LLVM  8.0.1
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/IntervalMap.h"
24 #include "llvm/ADT/None.h"
25 #include "llvm/ADT/Optional.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SetVector.h"
29 #include "llvm/ADT/SmallPtrSet.h"
30 #include "llvm/ADT/SmallSet.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/KnownBits.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <string>
74 #include <tuple>
75 #include <utility>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
88 
89 static cl::opt<bool>
90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
91  cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 
93 static cl::opt<bool>
94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
95  cl::desc("Enable DAG combiner's use of TBAA"));
96 
97 #ifndef NDEBUG
99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
100  cl::desc("Only use DAG-combiner alias analysis in this"
101  " function"));
102 #endif
103 
104 /// Hidden option to stress test load slicing, i.e., when this option
105 /// is enabled, load slicing bypasses most of its profitability guards.
106 static cl::opt<bool>
107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
108  cl::desc("Bypass the profitability model of load slicing"),
109  cl::init(false));
110 
111 static cl::opt<bool>
112  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
113  cl::desc("DAG combiner may split indexing from loads"));
114 
115 namespace {
116 
117  class DAGCombiner {
118  SelectionDAG &DAG;
119  const TargetLowering &TLI;
121  CodeGenOpt::Level OptLevel;
122  bool LegalOperations = false;
123  bool LegalTypes = false;
124  bool ForCodeSize;
125 
126  /// Worklist of all of the nodes that need to be simplified.
127  ///
128  /// This must behave as a stack -- new nodes to process are pushed onto the
129  /// back and when processing we pop off of the back.
130  ///
131  /// The worklist will not contain duplicates but may contain null entries
132  /// due to nodes being deleted from the underlying DAG.
133  SmallVector<SDNode *, 64> Worklist;
134 
135  /// Mapping from an SDNode to its position on the worklist.
136  ///
137  /// This is used to find and remove nodes from the worklist (by nulling
138  /// them) when they are deleted from the underlying DAG. It relies on
139  /// stable indices of nodes within the worklist.
140  DenseMap<SDNode *, unsigned> WorklistMap;
141 
142  /// Set of nodes which have been combined (at least once).
143  ///
144  /// This is used to allow us to reliably add any operands of a DAG node
145  /// which have not yet been combined to the worklist.
146  SmallPtrSet<SDNode *, 32> CombinedNodes;
147 
148  // AA - Used for DAG load/store alias analysis.
149  AliasAnalysis *AA;
150 
151  /// When an instruction is simplified, add all users of the instruction to
152  /// the work lists because they might get more simplified now.
153  void AddUsersToWorklist(SDNode *N) {
154  for (SDNode *Node : N->uses())
155  AddToWorklist(Node);
156  }
157 
158  /// Call the node-specific routine that folds each particular type of node.
159  SDValue visit(SDNode *N);
160 
161  public:
162  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
163  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
164  OptLevel(OL), AA(AA) {
165  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
166 
167  MaximumLegalStoreInBits = 0;
168  for (MVT VT : MVT::all_valuetypes())
169  if (EVT(VT).isSimple() && VT != MVT::Other &&
170  TLI.isTypeLegal(EVT(VT)) &&
171  VT.getSizeInBits() >= MaximumLegalStoreInBits)
172  MaximumLegalStoreInBits = VT.getSizeInBits();
173  }
174 
175  /// Add to the worklist making sure its instance is at the back (next to be
176  /// processed.)
177  void AddToWorklist(SDNode *N) {
179  "Deleted Node added to Worklist");
180 
181  // Skip handle nodes as they can't usefully be combined and confuse the
182  // zero-use deletion strategy.
183  if (N->getOpcode() == ISD::HANDLENODE)
184  return;
185 
186  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
187  Worklist.push_back(N);
188  }
189 
190  /// Remove all instances of N from the worklist.
191  void removeFromWorklist(SDNode *N) {
192  CombinedNodes.erase(N);
193 
194  auto It = WorklistMap.find(N);
195  if (It == WorklistMap.end())
196  return; // Not in the worklist.
197 
198  // Null out the entry rather than erasing it to avoid a linear operation.
199  Worklist[It->second] = nullptr;
200  WorklistMap.erase(It);
201  }
202 
203  void deleteAndRecombine(SDNode *N);
204  bool recursivelyDeleteUnusedNodes(SDNode *N);
205 
206  /// Replaces all uses of the results of one DAG node with new values.
207  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
208  bool AddTo = true);
209 
210  /// Replaces all uses of the results of one DAG node with new values.
211  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
212  return CombineTo(N, &Res, 1, AddTo);
213  }
214 
215  /// Replaces all uses of the results of one DAG node with new values.
216  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
217  bool AddTo = true) {
218  SDValue To[] = { Res0, Res1 };
219  return CombineTo(N, To, 2, AddTo);
220  }
221 
222  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
223 
224  private:
225  unsigned MaximumLegalStoreInBits;
226 
227  /// Check the specified integer node value to see if it can be simplified or
228  /// if things it uses can be simplified by bit propagation.
229  /// If so, return true.
230  bool SimplifyDemandedBits(SDValue Op) {
231  unsigned BitWidth = Op.getScalarValueSizeInBits();
232  APInt Demanded = APInt::getAllOnesValue(BitWidth);
233  return SimplifyDemandedBits(Op, Demanded);
234  }
235 
236  /// Check the specified vector node value to see if it can be simplified or
237  /// if things it uses can be simplified as it only uses some of the
238  /// elements. If so, return true.
239  bool SimplifyDemandedVectorElts(SDValue Op) {
240  unsigned NumElts = Op.getValueType().getVectorNumElements();
241  APInt Demanded = APInt::getAllOnesValue(NumElts);
242  return SimplifyDemandedVectorElts(Op, Demanded);
243  }
244 
245  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
246  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
247  bool AssumeSingleUse = false);
248 
249  bool CombineToPreIndexedLoadStore(SDNode *N);
250  bool CombineToPostIndexedLoadStore(SDNode *N);
251  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
252  bool SliceUpLoad(SDNode *N);
253 
254  // Scalars have size 0 to distinguish from singleton vectors.
255  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
256  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
257  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
258 
259  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
260  /// load.
261  ///
262  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
263  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
264  /// \param EltNo index of the vector element to load.
265  /// \param OriginalLoad load that EVE came from to be replaced.
266  /// \returns EVE on success SDValue() on failure.
267  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
268  SDValue EltNo,
269  LoadSDNode *OriginalLoad);
270  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
271  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
272  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
273  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
274  SDValue PromoteIntBinOp(SDValue Op);
275  SDValue PromoteIntShiftOp(SDValue Op);
276  SDValue PromoteExtend(SDValue Op);
277  bool PromoteLoad(SDValue Op);
278 
279  /// Call the node-specific routine that knows how to fold each
280  /// particular type of node. If that doesn't do anything, try the
281  /// target-specific DAG combines.
282  SDValue combine(SDNode *N);
283 
284  // Visitation implementation - Implement dag node combining for different
285  // node types. The semantics are as follows:
286  // Return Value:
287  // SDValue.getNode() == 0 - No change was made
288  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
289  // otherwise - N should be replaced by the returned Operand.
290  //
291  SDValue visitTokenFactor(SDNode *N);
292  SDValue visitMERGE_VALUES(SDNode *N);
293  SDValue visitADD(SDNode *N);
294  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
295  SDValue visitSUB(SDNode *N);
296  SDValue visitADDSAT(SDNode *N);
297  SDValue visitSUBSAT(SDNode *N);
298  SDValue visitADDC(SDNode *N);
299  SDValue visitUADDO(SDNode *N);
300  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
301  SDValue visitSUBC(SDNode *N);
302  SDValue visitUSUBO(SDNode *N);
303  SDValue visitADDE(SDNode *N);
304  SDValue visitADDCARRY(SDNode *N);
305  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
306  SDValue visitSUBE(SDNode *N);
307  SDValue visitSUBCARRY(SDNode *N);
308  SDValue visitMUL(SDNode *N);
309  SDValue useDivRem(SDNode *N);
310  SDValue visitSDIV(SDNode *N);
311  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
312  SDValue visitUDIV(SDNode *N);
313  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
314  SDValue visitREM(SDNode *N);
315  SDValue visitMULHU(SDNode *N);
316  SDValue visitMULHS(SDNode *N);
317  SDValue visitSMUL_LOHI(SDNode *N);
318  SDValue visitUMUL_LOHI(SDNode *N);
319  SDValue visitSMULO(SDNode *N);
320  SDValue visitUMULO(SDNode *N);
321  SDValue visitIMINMAX(SDNode *N);
322  SDValue visitAND(SDNode *N);
323  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
324  SDValue visitOR(SDNode *N);
325  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
326  SDValue visitXOR(SDNode *N);
327  SDValue SimplifyVBinOp(SDNode *N);
328  SDValue visitSHL(SDNode *N);
329  SDValue visitSRA(SDNode *N);
330  SDValue visitSRL(SDNode *N);
331  SDValue visitFunnelShift(SDNode *N);
332  SDValue visitRotate(SDNode *N);
333  SDValue visitABS(SDNode *N);
334  SDValue visitBSWAP(SDNode *N);
335  SDValue visitBITREVERSE(SDNode *N);
336  SDValue visitCTLZ(SDNode *N);
337  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
338  SDValue visitCTTZ(SDNode *N);
339  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
340  SDValue visitCTPOP(SDNode *N);
341  SDValue visitSELECT(SDNode *N);
342  SDValue visitVSELECT(SDNode *N);
343  SDValue visitSELECT_CC(SDNode *N);
344  SDValue visitSETCC(SDNode *N);
345  SDValue visitSETCCCARRY(SDNode *N);
346  SDValue visitSIGN_EXTEND(SDNode *N);
347  SDValue visitZERO_EXTEND(SDNode *N);
348  SDValue visitANY_EXTEND(SDNode *N);
349  SDValue visitAssertExt(SDNode *N);
350  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
351  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
352  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
353  SDValue visitTRUNCATE(SDNode *N);
354  SDValue visitBITCAST(SDNode *N);
355  SDValue visitBUILD_PAIR(SDNode *N);
356  SDValue visitFADD(SDNode *N);
357  SDValue visitFSUB(SDNode *N);
358  SDValue visitFMUL(SDNode *N);
359  SDValue visitFMA(SDNode *N);
360  SDValue visitFDIV(SDNode *N);
361  SDValue visitFREM(SDNode *N);
362  SDValue visitFSQRT(SDNode *N);
363  SDValue visitFCOPYSIGN(SDNode *N);
364  SDValue visitFPOW(SDNode *N);
365  SDValue visitSINT_TO_FP(SDNode *N);
366  SDValue visitUINT_TO_FP(SDNode *N);
367  SDValue visitFP_TO_SINT(SDNode *N);
368  SDValue visitFP_TO_UINT(SDNode *N);
369  SDValue visitFP_ROUND(SDNode *N);
370  SDValue visitFP_ROUND_INREG(SDNode *N);
371  SDValue visitFP_EXTEND(SDNode *N);
372  SDValue visitFNEG(SDNode *N);
373  SDValue visitFABS(SDNode *N);
374  SDValue visitFCEIL(SDNode *N);
375  SDValue visitFTRUNC(SDNode *N);
376  SDValue visitFFLOOR(SDNode *N);
377  SDValue visitFMINNUM(SDNode *N);
378  SDValue visitFMAXNUM(SDNode *N);
379  SDValue visitFMINIMUM(SDNode *N);
380  SDValue visitFMAXIMUM(SDNode *N);
381  SDValue visitBRCOND(SDNode *N);
382  SDValue visitBR_CC(SDNode *N);
383  SDValue visitLOAD(SDNode *N);
384 
385  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
386  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
387 
388  SDValue visitSTORE(SDNode *N);
389  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
390  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
391  SDValue visitBUILD_VECTOR(SDNode *N);
392  SDValue visitCONCAT_VECTORS(SDNode *N);
393  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
394  SDValue visitVECTOR_SHUFFLE(SDNode *N);
395  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
396  SDValue visitINSERT_SUBVECTOR(SDNode *N);
397  SDValue visitMLOAD(SDNode *N);
398  SDValue visitMSTORE(SDNode *N);
399  SDValue visitMGATHER(SDNode *N);
400  SDValue visitMSCATTER(SDNode *N);
401  SDValue visitFP_TO_FP16(SDNode *N);
402  SDValue visitFP16_TO_FP(SDNode *N);
403 
404  SDValue visitFADDForFMACombine(SDNode *N);
405  SDValue visitFSUBForFMACombine(SDNode *N);
406  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
407 
408  SDValue XformToShuffleWithZero(SDNode *N);
409  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
410  SDValue N1, SDNodeFlags Flags);
411 
412  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
413 
414  SDValue foldSelectOfConstants(SDNode *N);
415  SDValue foldVSelectOfConstants(SDNode *N);
416  SDValue foldBinOpIntoSelect(SDNode *BO);
417  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
418  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
419  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
420  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
421  SDValue N2, SDValue N3, ISD::CondCode CC,
422  bool NotExtCompare = false);
423  SDValue convertSelectOfFPConstantsToLoadOffset(
424  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
425  ISD::CondCode CC);
426  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
427  SDValue N2, SDValue N3, ISD::CondCode CC);
428  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
429  const SDLoc &DL);
430  SDValue unfoldMaskedMerge(SDNode *N);
431  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
432  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
433  const SDLoc &DL, bool foldBooleans);
434  SDValue rebuildSetCC(SDValue N);
435 
436  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
437  SDValue &CC) const;
438  bool isOneUseSetCC(SDValue N) const;
439 
440  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
441  unsigned HiOp);
442  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
443  SDValue CombineExtLoad(SDNode *N);
444  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
445  SDValue combineRepeatedFPDivisors(SDNode *N);
446  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
447  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
448  SDValue BuildSDIV(SDNode *N);
449  SDValue BuildSDIVPow2(SDNode *N);
450  SDValue BuildUDIV(SDNode *N);
451  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
452  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
453  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
454  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
455  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
456  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
457  SDNodeFlags Flags, bool Reciprocal);
458  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
459  SDNodeFlags Flags, bool Reciprocal);
460  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
461  bool DemandHighBits = true);
462  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
463  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
464  SDValue InnerPos, SDValue InnerNeg,
465  unsigned PosOpcode, unsigned NegOpcode,
466  const SDLoc &DL);
467  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
468  SDValue MatchLoadCombine(SDNode *N);
469  SDValue ReduceLoadWidth(SDNode *N);
470  SDValue ReduceLoadOpStoreWidth(SDNode *N);
472  SDValue TransformFPLoadStorePair(SDNode *N);
473  SDValue convertBuildVecZextToZext(SDNode *N);
474  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
475  SDValue reduceBuildVecToShuffle(SDNode *N);
476  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
477  ArrayRef<int> VectorMask, SDValue VecIn1,
478  SDValue VecIn2, unsigned LeftIdx);
479  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
480 
481  /// Walk up chain skipping non-aliasing memory nodes,
482  /// looking for aliasing nodes and adding them to the Aliases vector.
483  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
484  SmallVectorImpl<SDValue> &Aliases);
485 
486  /// Return true if there is any possibility that the two addresses overlap.
487  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
488 
489  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
490  /// chain (aliasing node.)
491  SDValue FindBetterChain(SDNode *N, SDValue Chain);
492 
493  /// Try to replace a store and any possibly adjacent stores on
494  /// consecutive chains with better chains. Return true only if St is
495  /// replaced.
496  ///
497  /// Notice that other chains may still be replaced even if the function
498  /// returns false.
499  bool findBetterNeighborChains(StoreSDNode *St);
500 
501  // Helper for findBetterNeighborChains. Walk up store chain add additional
502  // chained stores that do not overlap and can be parallelized.
503  bool parallelizeChainedStores(StoreSDNode *St);
504 
505  /// Holds a pointer to an LSBaseSDNode as well as information on where it
506  /// is located in a sequence of memory operations connected by a chain.
507  struct MemOpLink {
508  // Ptr to the mem node.
509  LSBaseSDNode *MemNode;
510 
511  // Offset from the base ptr.
512  int64_t OffsetFromBase;
513 
514  MemOpLink(LSBaseSDNode *N, int64_t Offset)
515  : MemNode(N), OffsetFromBase(Offset) {}
516  };
517 
518  /// This is a helper function for visitMUL to check the profitability
519  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
520  /// MulNode is the original multiply, AddNode is (add x, c1),
521  /// and ConstNode is c2.
522  bool isMulAddWithConstProfitable(SDNode *MulNode,
523  SDValue &AddNode,
524  SDValue &ConstNode);
525 
526  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
527  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
528  /// the type of the loaded value to be extended.
529  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
530  EVT LoadResultTy, EVT &ExtVT);
531 
532  /// Helper function to calculate whether the given Load/Store can have its
533  /// width reduced to ExtVT.
534  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
535  EVT &MemVT, unsigned ShAmt = 0);
536 
537  /// Used by BackwardsPropagateMask to find suitable loads.
538  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
539  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
540  ConstantSDNode *Mask, SDNode *&NodeToMask);
541  /// Attempt to propagate a given AND node back to load leaves so that they
542  /// can be combined into narrow loads.
543  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
544 
545  /// Helper function for MergeConsecutiveStores which merges the
546  /// component store chains.
547  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
548  unsigned NumStores);
549 
550  /// This is a helper function for MergeConsecutiveStores. When the
551  /// source elements of the consecutive stores are all constants or
552  /// all extracted vector elements, try to merge them into one
553  /// larger store introducing bitcasts if necessary. \return True
554  /// if a merged store was created.
555  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
556  EVT MemVT, unsigned NumStores,
557  bool IsConstantSrc, bool UseVector,
558  bool UseTrunc);
559 
560  /// This is a helper function for MergeConsecutiveStores. Stores
561  /// that potentially may be merged with St are placed in
562  /// StoreNodes. RootNode is a chain predecessor to all store
563  /// candidates.
564  void getStoreMergeCandidates(StoreSDNode *St,
565  SmallVectorImpl<MemOpLink> &StoreNodes,
566  SDNode *&Root);
567 
568  /// Helper function for MergeConsecutiveStores. Checks if
569  /// candidate stores have indirect dependency through their
570  /// operands. RootNode is the predecessor to all stores calculated
571  /// by getStoreMergeCandidates and is used to prune the dependency check.
572  /// \return True if safe to merge.
573  bool checkMergeStoreCandidatesForDependencies(
574  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
575  SDNode *RootNode);
576 
577  /// Merge consecutive store operations into a wide store.
578  /// This optimization uses wide integers or vectors when possible.
579  /// \return number of stores that were merged into a merged store (the
580  /// affected nodes are stored as a prefix in \p StoreNodes).
581  bool MergeConsecutiveStores(StoreSDNode *St);
582 
583  /// Try to transform a truncation where C is a constant:
584  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
585  ///
586  /// \p N needs to be a truncation and its first operand an AND. Other
587  /// requirements are checked by the function (e.g. that trunc is
588  /// single-use) and if missed an empty SDValue is returned.
589  SDValue distributeTruncateThroughAnd(SDNode *N);
590 
591  /// Helper function to determine whether the target supports operation
592  /// given by \p Opcode for type \p VT, that is, whether the operation
593  /// is legal or custom before legalizing operations, and whether is
594  /// legal (but not custom) after legalization.
595  bool hasOperation(unsigned Opcode, EVT VT) {
596  if (LegalOperations)
597  return TLI.isOperationLegal(Opcode, VT);
598  return TLI.isOperationLegalOrCustom(Opcode, VT);
599  }
600 
601  public:
602  /// Runs the dag combiner on all nodes in the work list
603  void Run(CombineLevel AtLevel);
604 
605  SelectionDAG &getDAG() const { return DAG; }
606 
607  /// Returns a type large enough to hold any valid shift amount - before type
608  /// legalization these can be huge.
609  EVT getShiftAmountTy(EVT LHSTy) {
610  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
611  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
612  }
613 
614  /// This method returns true if we are running before type legalization or
615  /// if the specified VT is legal.
616  bool isTypeLegal(const EVT &VT) {
617  if (!LegalTypes) return true;
618  return TLI.isTypeLegal(VT);
619  }
620 
621  /// Convenience wrapper around TargetLowering::getSetCCResultType
622  EVT getSetCCResultType(EVT VT) const {
623  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
624  }
625 
626  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
627  SDValue OrigLoad, SDValue ExtLoad,
629  };
630 
631 /// This class is a DAGUpdateListener that removes any deleted
632 /// nodes from the worklist.
633 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
634  DAGCombiner &DC;
635 
636 public:
637  explicit WorklistRemover(DAGCombiner &dc)
638  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
639 
640  void NodeDeleted(SDNode *N, SDNode *E) override {
641  DC.removeFromWorklist(N);
642  }
643 };
644 
645 } // end anonymous namespace
646 
647 //===----------------------------------------------------------------------===//
648 // TargetLowering::DAGCombinerInfo implementation
649 //===----------------------------------------------------------------------===//
650 
652  ((DAGCombiner*)DC)->AddToWorklist(N);
653 }
654 
656 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
657  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
658 }
659 
661 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
662  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
663 }
664 
666 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
667  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
668 }
669 
672  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
673 }
674 
675 //===----------------------------------------------------------------------===//
676 // Helper Functions
677 //===----------------------------------------------------------------------===//
678 
679 void DAGCombiner::deleteAndRecombine(SDNode *N) {
680  removeFromWorklist(N);
681 
682  // If the operands of this node are only used by the node, they will now be
683  // dead. Make sure to re-visit them and recursively delete dead nodes.
684  for (const SDValue &Op : N->ops())
685  // For an operand generating multiple values, one of the values may
686  // become dead allowing further simplification (e.g. split index
687  // arithmetic from an indexed load).
688  if (Op->hasOneUse() || Op->getNumValues() > 1)
689  AddToWorklist(Op.getNode());
690 
691  DAG.DeleteNode(N);
692 }
693 
694 /// Return 1 if we can compute the negated form of the specified expression for
695 /// the same cost as the expression itself, or 2 if we can compute the negated
696 /// form more cheaply than the expression itself.
697 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
698  const TargetLowering &TLI,
699  const TargetOptions *Options,
700  unsigned Depth = 0) {
701  // fneg is removable even if it has multiple uses.
702  if (Op.getOpcode() == ISD::FNEG) return 2;
703 
704  // Don't allow anything with multiple uses unless we know it is free.
705  EVT VT = Op.getValueType();
706  const SDNodeFlags Flags = Op->getFlags();
707  if (!Op.hasOneUse())
708  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
709  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
710  return 0;
711 
712  // Don't recurse exponentially.
713  if (Depth > 6) return 0;
714 
715  switch (Op.getOpcode()) {
716  default: return false;
717  case ISD::ConstantFP: {
718  if (!LegalOperations)
719  return 1;
720 
721  // Don't invert constant FP values after legalization unless the target says
722  // the negated constant is legal.
723  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
724  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
725  }
726  case ISD::FADD:
727  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
728  return 0;
729 
730  // After operation legalization, it might not be legal to create new FSUBs.
731  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
732  return 0;
733 
734  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
735  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
736  Options, Depth + 1))
737  return V;
738  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
739  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
740  Depth + 1);
741  case ISD::FSUB:
742  // We can't turn -(A-B) into B-A when we honor signed zeros.
743  if (!Options->NoSignedZerosFPMath &&
744  !Flags.hasNoSignedZeros())
745  return 0;
746 
747  // fold (fneg (fsub A, B)) -> (fsub B, A)
748  return 1;
749 
750  case ISD::FMUL:
751  case ISD::FDIV:
752  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
753  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
754  Options, Depth + 1))
755  return V;
756 
757  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
758  Depth + 1);
759 
760  case ISD::FP_EXTEND:
761  case ISD::FP_ROUND:
762  case ISD::FSIN:
763  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
764  Depth + 1);
765  }
766 }
767 
768 /// If isNegatibleForFree returns true, return the newly negated expression.
770  bool LegalOperations, unsigned Depth = 0) {
771  const TargetOptions &Options = DAG.getTarget().Options;
772  // fneg is removable even if it has multiple uses.
773  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
774 
775  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
776 
777  const SDNodeFlags Flags = Op.getNode()->getFlags();
778 
779  switch (Op.getOpcode()) {
780  default: llvm_unreachable("Unknown code");
781  case ISD::ConstantFP: {
782  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
783  V.changeSign();
784  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
785  }
786  case ISD::FADD:
787  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
788 
789  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
790  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
791  DAG.getTargetLoweringInfo(), &Options, Depth+1))
792  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
793  GetNegatedExpression(Op.getOperand(0), DAG,
794  LegalOperations, Depth+1),
795  Op.getOperand(1), Flags);
796  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
797  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
798  GetNegatedExpression(Op.getOperand(1), DAG,
799  LegalOperations, Depth+1),
800  Op.getOperand(0), Flags);
801  case ISD::FSUB:
802  // fold (fneg (fsub 0, B)) -> B
803  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
804  if (N0CFP->isZero())
805  return Op.getOperand(1);
806 
807  // fold (fneg (fsub A, B)) -> (fsub B, A)
808  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
809  Op.getOperand(1), Op.getOperand(0), Flags);
810 
811  case ISD::FMUL:
812  case ISD::FDIV:
813  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
814  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
815  DAG.getTargetLoweringInfo(), &Options, Depth+1))
816  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
817  GetNegatedExpression(Op.getOperand(0), DAG,
818  LegalOperations, Depth+1),
819  Op.getOperand(1), Flags);
820 
821  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
822  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
823  Op.getOperand(0),
824  GetNegatedExpression(Op.getOperand(1), DAG,
825  LegalOperations, Depth+1), Flags);
826 
827  case ISD::FP_EXTEND:
828  case ISD::FSIN:
829  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
830  GetNegatedExpression(Op.getOperand(0), DAG,
831  LegalOperations, Depth+1));
832  case ISD::FP_ROUND:
833  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
834  GetNegatedExpression(Op.getOperand(0), DAG,
835  LegalOperations, Depth+1),
836  Op.getOperand(1));
837  }
838 }
839 
840 // APInts must be the same size for most operations, this helper
841 // function zero extends the shorter of the pair so that they match.
842 // We provide an Offset so that we can create bitwidths that won't overflow.
843 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
844  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
845  LHS = LHS.zextOrSelf(Bits);
846  RHS = RHS.zextOrSelf(Bits);
847 }
848 
849 // Return true if this node is a setcc, or is a select_cc
850 // that selects between the target values used for true and false, making it
851 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
852 // the appropriate nodes based on the type of node we are checking. This
853 // simplifies life a bit for the callers.
854 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
855  SDValue &CC) const {
856  if (N.getOpcode() == ISD::SETCC) {
857  LHS = N.getOperand(0);
858  RHS = N.getOperand(1);
859  CC = N.getOperand(2);
860  return true;
861  }
862 
863  if (N.getOpcode() != ISD::SELECT_CC ||
864  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
865  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
866  return false;
867 
868  if (TLI.getBooleanContents(N.getValueType()) ==
870  return false;
871 
872  LHS = N.getOperand(0);
873  RHS = N.getOperand(1);
874  CC = N.getOperand(4);
875  return true;
876 }
877 
878 /// Return true if this is a SetCC-equivalent operation with only one use.
879 /// If this is true, it allows the users to invert the operation for free when
880 /// it is profitable to do so.
881 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
882  SDValue N0, N1, N2;
883  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
884  return true;
885  return false;
886 }
887 
888 // Returns the SDNode if it is a constant float BuildVector
889 // or constant float.
891  if (isa<ConstantFPSDNode>(N))
892  return N.getNode();
894  return N.getNode();
895  return nullptr;
896 }
897 
898 // Determines if it is a constant integer or a build vector of constant
899 // integers (and undefs).
900 // Do not permit build vector implicit truncation.
901 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
902  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
903  return !(Const->isOpaque() && NoOpaques);
904  if (N.getOpcode() != ISD::BUILD_VECTOR)
905  return false;
906  unsigned BitWidth = N.getScalarValueSizeInBits();
907  for (const SDValue &Op : N->op_values()) {
908  if (Op.isUndef())
909  continue;
911  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
912  (Const->isOpaque() && NoOpaques))
913  return false;
914  }
915  return true;
916 }
917 
918 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
919 // undef's.
920 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
921  if (V.getOpcode() != ISD::BUILD_VECTOR)
922  return false;
923  return isConstantOrConstantVector(V, NoOpaques) ||
925 }
926 
927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
928  SDValue N1, SDNodeFlags Flags) {
929  // Don't reassociate reductions.
930  if (Flags.hasVectorReduction())
931  return SDValue();
932 
933  EVT VT = N0.getValueType();
934  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
937  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
938  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
939  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
940  return SDValue();
941  }
942  if (N0.hasOneUse()) {
943  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
944  // use
945  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
946  if (!OpNode.getNode())
947  return SDValue();
948  AddToWorklist(OpNode.getNode());
949  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
950  }
951  }
952  }
953 
954  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
957  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
958  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
959  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
960  return SDValue();
961  }
962  if (N1.hasOneUse()) {
963  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
964  // use
965  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
966  if (!OpNode.getNode())
967  return SDValue();
968  AddToWorklist(OpNode.getNode());
969  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
970  }
971  }
972  }
973 
974  return SDValue();
975 }
976 
977 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
978  bool AddTo) {
979  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
980  ++NodesCombined;
981  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
982  To[0].getNode()->dump(&DAG);
983  dbgs() << " and " << NumTo - 1 << " other values\n");
984  for (unsigned i = 0, e = NumTo; i != e; ++i)
985  assert((!To[i].getNode() ||
986  N->getValueType(i) == To[i].getValueType()) &&
987  "Cannot combine value to value of different type!");
988 
989  WorklistRemover DeadNodes(*this);
990  DAG.ReplaceAllUsesWith(N, To);
991  if (AddTo) {
992  // Push the new nodes and any users onto the worklist
993  for (unsigned i = 0, e = NumTo; i != e; ++i) {
994  if (To[i].getNode()) {
995  AddToWorklist(To[i].getNode());
996  AddUsersToWorklist(To[i].getNode());
997  }
998  }
999  }
1000 
1001  // Finally, if the node is now dead, remove it from the graph. The node
1002  // may not be dead if the replacement process recursively simplified to
1003  // something else needing this node.
1004  if (N->use_empty())
1005  deleteAndRecombine(N);
1006  return SDValue(N, 0);
1007 }
1008 
1009 void DAGCombiner::
1010 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1011  // Replace all uses. If any nodes become isomorphic to other nodes and
1012  // are deleted, make sure to remove them from our worklist.
1013  WorklistRemover DeadNodes(*this);
1014  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1015 
1016  // Push the new node and any (possibly new) users onto the worklist.
1017  AddToWorklist(TLO.New.getNode());
1018  AddUsersToWorklist(TLO.New.getNode());
1019 
1020  // Finally, if the node is now dead, remove it from the graph. The node
1021  // may not be dead if the replacement process recursively simplified to
1022  // something else needing this node.
1023  if (TLO.Old.getNode()->use_empty())
1024  deleteAndRecombine(TLO.Old.getNode());
1025 }
1026 
1027 /// Check the specified integer node value to see if it can be simplified or if
1028 /// things it uses can be simplified by bit propagation. If so, return true.
1029 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1030  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1031  KnownBits Known;
1032  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1033  return false;
1034 
1035  // Revisit the node.
1036  AddToWorklist(Op.getNode());
1037 
1038  // Replace the old value with the new one.
1039  ++NodesCombined;
1040  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1041  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1042  dbgs() << '\n');
1043 
1044  CommitTargetLoweringOpt(TLO);
1045  return true;
1046 }
1047 
1048 /// Check the specified vector node value to see if it can be simplified or
1049 /// if things it uses can be simplified as it only uses some of the elements.
1050 /// If so, return true.
1051 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1052  bool AssumeSingleUse) {
1053  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1054  APInt KnownUndef, KnownZero;
1055  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1056  0, AssumeSingleUse))
1057  return false;
1058 
1059  // Revisit the node.
1060  AddToWorklist(Op.getNode());
1061 
1062  // Replace the old value with the new one.
1063  ++NodesCombined;
1064  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1065  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1066  dbgs() << '\n');
1067 
1068  CommitTargetLoweringOpt(TLO);
1069  return true;
1070 }
1071 
1072 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1073  SDLoc DL(Load);
1074  EVT VT = Load->getValueType(0);
1075  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1076 
1077  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1078  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1079  WorklistRemover DeadNodes(*this);
1080  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1081  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1082  deleteAndRecombine(Load);
1083  AddToWorklist(Trunc.getNode());
1084 }
1085 
1086 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1087  Replace = false;
1088  SDLoc DL(Op);
1089  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1090  LoadSDNode *LD = cast<LoadSDNode>(Op);
1091  EVT MemVT = LD->getMemoryVT();
1093  : LD->getExtensionType();
1094  Replace = true;
1095  return DAG.getExtLoad(ExtType, DL, PVT,
1096  LD->getChain(), LD->getBasePtr(),
1097  MemVT, LD->getMemOperand());
1098  }
1099 
1100  unsigned Opc = Op.getOpcode();
1101  switch (Opc) {
1102  default: break;
1103  case ISD::AssertSext:
1104  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106  break;
1107  case ISD::AssertZext:
1108  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110  break;
1111  case ISD::Constant: {
1112  unsigned ExtOpc =
1114  return DAG.getNode(ExtOpc, DL, PVT, Op);
1115  }
1116  }
1117 
1118  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119  return SDValue();
1120  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121 }
1122 
1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1125  return SDValue();
1126  EVT OldVT = Op.getValueType();
1127  SDLoc DL(Op);
1128  bool Replace = false;
1129  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130  if (!NewOp.getNode())
1131  return SDValue();
1132  AddToWorklist(NewOp.getNode());
1133 
1134  if (Replace)
1135  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137  DAG.getValueType(OldVT));
1138 }
1139 
1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141  EVT OldVT = Op.getValueType();
1142  SDLoc DL(Op);
1143  bool Replace = false;
1144  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145  if (!NewOp.getNode())
1146  return SDValue();
1147  AddToWorklist(NewOp.getNode());
1148 
1149  if (Replace)
1150  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152 }
1153 
1154 /// Promote the specified integer binary operation if the target indicates it is
1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156 /// i32 since i16 instructions are longer.
1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158  if (!LegalOperations)
1159  return SDValue();
1160 
1161  EVT VT = Op.getValueType();
1162  if (VT.isVector() || !VT.isInteger())
1163  return SDValue();
1164 
1165  // If operation type is 'undesirable', e.g. i16 on x86, consider
1166  // promoting it.
1167  unsigned Opc = Op.getOpcode();
1168  if (TLI.isTypeDesirableForOp(Opc, VT))
1169  return SDValue();
1170 
1171  EVT PVT = VT;
1172  // Consult target whether it is a good idea to promote this operation and
1173  // what's the right type to promote it to.
1174  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175  assert(PVT != VT && "Don't know what type to promote to!");
1176 
1177  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1178 
1179  bool Replace0 = false;
1180  SDValue N0 = Op.getOperand(0);
1181  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182 
1183  bool Replace1 = false;
1184  SDValue N1 = Op.getOperand(1);
1185  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186  SDLoc DL(Op);
1187 
1188  SDValue RV =
1189  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190 
1191  // We are always replacing N0/N1's use in N and only need
1192  // additional replacements if there are additional uses.
1193  Replace0 &= !N0->hasOneUse();
1194  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195 
1196  // Combine Op here so it is preserved past replacements.
1197  CombineTo(Op.getNode(), RV);
1198 
1199  // If operands have a use ordering, make sure we deal with
1200  // predecessor first.
1201  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202  std::swap(N0, N1);
1203  std::swap(NN0, NN1);
1204  }
1205 
1206  if (Replace0) {
1207  AddToWorklist(NN0.getNode());
1208  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209  }
1210  if (Replace1) {
1211  AddToWorklist(NN1.getNode());
1212  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213  }
1214  return Op;
1215  }
1216  return SDValue();
1217 }
1218 
1219 /// Promote the specified integer shift operation if the target indicates it is
1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221 /// i32 since i16 instructions are longer.
1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223  if (!LegalOperations)
1224  return SDValue();
1225 
1226  EVT VT = Op.getValueType();
1227  if (VT.isVector() || !VT.isInteger())
1228  return SDValue();
1229 
1230  // If operation type is 'undesirable', e.g. i16 on x86, consider
1231  // promoting it.
1232  unsigned Opc = Op.getOpcode();
1233  if (TLI.isTypeDesirableForOp(Opc, VT))
1234  return SDValue();
1235 
1236  EVT PVT = VT;
1237  // Consult target whether it is a good idea to promote this operation and
1238  // what's the right type to promote it to.
1239  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240  assert(PVT != VT && "Don't know what type to promote to!");
1241 
1242  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1243 
1244  bool Replace = false;
1245  SDValue N0 = Op.getOperand(0);
1246  SDValue N1 = Op.getOperand(1);
1247  if (Opc == ISD::SRA)
1248  N0 = SExtPromoteOperand(N0, PVT);
1249  else if (Opc == ISD::SRL)
1250  N0 = ZExtPromoteOperand(N0, PVT);
1251  else
1252  N0 = PromoteOperand(N0, PVT, Replace);
1253 
1254  if (!N0.getNode())
1255  return SDValue();
1256 
1257  SDLoc DL(Op);
1258  SDValue RV =
1259  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260 
1261  AddToWorklist(N0.getNode());
1262  if (Replace)
1263  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264 
1265  // Deal with Op being deleted.
1266  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267  return RV;
1268  }
1269  return SDValue();
1270 }
1271 
1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273  if (!LegalOperations)
1274  return SDValue();
1275 
1276  EVT VT = Op.getValueType();
1277  if (VT.isVector() || !VT.isInteger())
1278  return SDValue();
1279 
1280  // If operation type is 'undesirable', e.g. i16 on x86, consider
1281  // promoting it.
1282  unsigned Opc = Op.getOpcode();
1283  if (TLI.isTypeDesirableForOp(Opc, VT))
1284  return SDValue();
1285 
1286  EVT PVT = VT;
1287  // Consult target whether it is a good idea to promote this operation and
1288  // what's the right type to promote it to.
1289  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290  assert(PVT != VT && "Don't know what type to promote to!");
1291  // fold (aext (aext x)) -> (aext x)
1292  // fold (aext (zext x)) -> (zext x)
1293  // fold (aext (sext x)) -> (sext x)
1294  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1295  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1296  }
1297  return SDValue();
1298 }
1299 
1300 bool DAGCombiner::PromoteLoad(SDValue Op) {
1301  if (!LegalOperations)
1302  return false;
1303 
1304  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1305  return false;
1306 
1307  EVT VT = Op.getValueType();
1308  if (VT.isVector() || !VT.isInteger())
1309  return false;
1310 
1311  // If operation type is 'undesirable', e.g. i16 on x86, consider
1312  // promoting it.
1313  unsigned Opc = Op.getOpcode();
1314  if (TLI.isTypeDesirableForOp(Opc, VT))
1315  return false;
1316 
1317  EVT PVT = VT;
1318  // Consult target whether it is a good idea to promote this operation and
1319  // what's the right type to promote it to.
1320  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1321  assert(PVT != VT && "Don't know what type to promote to!");
1322 
1323  SDLoc DL(Op);
1324  SDNode *N = Op.getNode();
1325  LoadSDNode *LD = cast<LoadSDNode>(N);
1326  EVT MemVT = LD->getMemoryVT();
1328  : LD->getExtensionType();
1329  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1330  LD->getChain(), LD->getBasePtr(),
1331  MemVT, LD->getMemOperand());
1332  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1333 
1334  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1335  Result.getNode()->dump(&DAG); dbgs() << '\n');
1336  WorklistRemover DeadNodes(*this);
1337  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1338  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1339  deleteAndRecombine(N);
1340  AddToWorklist(Result.getNode());
1341  return true;
1342  }
1343  return false;
1344 }
1345 
1346 /// Recursively delete a node which has no uses and any operands for
1347 /// which it is the only use.
1348 ///
1349 /// Note that this both deletes the nodes and removes them from the worklist.
1350 /// It also adds any nodes who have had a user deleted to the worklist as they
1351 /// may now have only one use and subject to other combines.
1352 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1353  if (!N->use_empty())
1354  return false;
1355 
1357  Nodes.insert(N);
1358  do {
1359  N = Nodes.pop_back_val();
1360  if (!N)
1361  continue;
1362 
1363  if (N->use_empty()) {
1364  for (const SDValue &ChildN : N->op_values())
1365  Nodes.insert(ChildN.getNode());
1366 
1367  removeFromWorklist(N);
1368  DAG.DeleteNode(N);
1369  } else {
1370  AddToWorklist(N);
1371  }
1372  } while (!Nodes.empty());
1373  return true;
1374 }
1375 
1376 //===----------------------------------------------------------------------===//
1377 // Main DAG Combiner implementation
1378 //===----------------------------------------------------------------------===//
1379 
1380 void DAGCombiner::Run(CombineLevel AtLevel) {
1381  // set the instance variables, so that the various visit routines may use it.
1382  Level = AtLevel;
1383  LegalOperations = Level >= AfterLegalizeVectorOps;
1384  LegalTypes = Level >= AfterLegalizeTypes;
1385 
1386  // Add all the dag nodes to the worklist.
1387  for (SDNode &Node : DAG.allnodes())
1388  AddToWorklist(&Node);
1389 
1390  // Create a dummy node (which is not added to allnodes), that adds a reference
1391  // to the root node, preventing it from being deleted, and tracking any
1392  // changes of the root.
1393  HandleSDNode Dummy(DAG.getRoot());
1394 
1395  // While the worklist isn't empty, find a node and try to combine it.
1396  while (!WorklistMap.empty()) {
1397  SDNode *N;
1398  // The Worklist holds the SDNodes in order, but it may contain null entries.
1399  do {
1400  N = Worklist.pop_back_val();
1401  } while (!N);
1402 
1403  bool GoodWorklistEntry = WorklistMap.erase(N);
1404  (void)GoodWorklistEntry;
1405  assert(GoodWorklistEntry &&
1406  "Found a worklist entry without a corresponding map entry!");
1407 
1408  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1409  // N is deleted from the DAG, since they too may now be dead or may have a
1410  // reduced number of uses, allowing other xforms.
1411  if (recursivelyDeleteUnusedNodes(N))
1412  continue;
1413 
1414  WorklistRemover DeadNodes(*this);
1415 
1416  // If this combine is running after legalizing the DAG, re-legalize any
1417  // nodes pulled off the worklist.
1418  if (Level == AfterLegalizeDAG) {
1419  SmallSetVector<SDNode *, 16> UpdatedNodes;
1420  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1421 
1422  for (SDNode *LN : UpdatedNodes) {
1423  AddToWorklist(LN);
1424  AddUsersToWorklist(LN);
1425  }
1426  if (!NIsValid)
1427  continue;
1428  }
1429 
1430  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1431 
1432  // Add any operands of the new node which have not yet been combined to the
1433  // worklist as well. Because the worklist uniques things already, this
1434  // won't repeatedly process the same operand.
1435  CombinedNodes.insert(N);
1436  for (const SDValue &ChildN : N->op_values())
1437  if (!CombinedNodes.count(ChildN.getNode()))
1438  AddToWorklist(ChildN.getNode());
1439 
1440  SDValue RV = combine(N);
1441 
1442  if (!RV.getNode())
1443  continue;
1444 
1445  ++NodesCombined;
1446 
1447  // If we get back the same node we passed in, rather than a new node or
1448  // zero, we know that the node must have defined multiple values and
1449  // CombineTo was used. Since CombineTo takes care of the worklist
1450  // mechanics for us, we have no work to do in this case.
1451  if (RV.getNode() == N)
1452  continue;
1453 
1455  RV.getOpcode() != ISD::DELETED_NODE &&
1456  "Node was deleted but visit returned new node!");
1457 
1458  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1459 
1460  if (N->getNumValues() == RV.getNode()->getNumValues())
1461  DAG.ReplaceAllUsesWith(N, RV.getNode());
1462  else {
1463  assert(N->getValueType(0) == RV.getValueType() &&
1464  N->getNumValues() == 1 && "Type mismatch");
1465  DAG.ReplaceAllUsesWith(N, &RV);
1466  }
1467 
1468  // Push the new node and any users onto the worklist
1469  AddToWorklist(RV.getNode());
1470  AddUsersToWorklist(RV.getNode());
1471 
1472  // Finally, if the node is now dead, remove it from the graph. The node
1473  // may not be dead if the replacement process recursively simplified to
1474  // something else needing this node. This will also take care of adding any
1475  // operands which have lost a user to the worklist.
1476  recursivelyDeleteUnusedNodes(N);
1477  }
1478 
1479  // If the root changed (e.g. it was a dead load, update the root).
1480  DAG.setRoot(Dummy.getValue());
1481  DAG.RemoveDeadNodes();
1482 }
1483 
1484 SDValue DAGCombiner::visit(SDNode *N) {
1485  switch (N->getOpcode()) {
1486  default: break;
1487  case ISD::TokenFactor: return visitTokenFactor(N);
1488  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1489  case ISD::ADD: return visitADD(N);
1490  case ISD::SUB: return visitSUB(N);
1491  case ISD::SADDSAT:
1492  case ISD::UADDSAT: return visitADDSAT(N);
1493  case ISD::SSUBSAT:
1494  case ISD::USUBSAT: return visitSUBSAT(N);
1495  case ISD::ADDC: return visitADDC(N);
1496  case ISD::UADDO: return visitUADDO(N);
1497  case ISD::SUBC: return visitSUBC(N);
1498  case ISD::USUBO: return visitUSUBO(N);
1499  case ISD::ADDE: return visitADDE(N);
1500  case ISD::ADDCARRY: return visitADDCARRY(N);
1501  case ISD::SUBE: return visitSUBE(N);
1502  case ISD::SUBCARRY: return visitSUBCARRY(N);
1503  case ISD::MUL: return visitMUL(N);
1504  case ISD::SDIV: return visitSDIV(N);
1505  case ISD::UDIV: return visitUDIV(N);
1506  case ISD::SREM:
1507  case ISD::UREM: return visitREM(N);
1508  case ISD::MULHU: return visitMULHU(N);
1509  case ISD::MULHS: return visitMULHS(N);
1510  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1511  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1512  case ISD::SMULO: return visitSMULO(N);
1513  case ISD::UMULO: return visitUMULO(N);
1514  case ISD::SMIN:
1515  case ISD::SMAX:
1516  case ISD::UMIN:
1517  case ISD::UMAX: return visitIMINMAX(N);
1518  case ISD::AND: return visitAND(N);
1519  case ISD::OR: return visitOR(N);
1520  case ISD::XOR: return visitXOR(N);
1521  case ISD::SHL: return visitSHL(N);
1522  case ISD::SRA: return visitSRA(N);
1523  case ISD::SRL: return visitSRL(N);
1524  case ISD::ROTR:
1525  case ISD::ROTL: return visitRotate(N);
1526  case ISD::FSHL:
1527  case ISD::FSHR: return visitFunnelShift(N);
1528  case ISD::ABS: return visitABS(N);
1529  case ISD::BSWAP: return visitBSWAP(N);
1530  case ISD::BITREVERSE: return visitBITREVERSE(N);
1531  case ISD::CTLZ: return visitCTLZ(N);
1532  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1533  case ISD::CTTZ: return visitCTTZ(N);
1534  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1535  case ISD::CTPOP: return visitCTPOP(N);
1536  case ISD::SELECT: return visitSELECT(N);
1537  case ISD::VSELECT: return visitVSELECT(N);
1538  case ISD::SELECT_CC: return visitSELECT_CC(N);
1539  case ISD::SETCC: return visitSETCC(N);
1540  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1541  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1542  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1543  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1544  case ISD::AssertSext:
1545  case ISD::AssertZext: return visitAssertExt(N);
1546  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1547  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1548  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1549  case ISD::TRUNCATE: return visitTRUNCATE(N);
1550  case ISD::BITCAST: return visitBITCAST(N);
1551  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1552  case ISD::FADD: return visitFADD(N);
1553  case ISD::FSUB: return visitFSUB(N);
1554  case ISD::FMUL: return visitFMUL(N);
1555  case ISD::FMA: return visitFMA(N);
1556  case ISD::FDIV: return visitFDIV(N);
1557  case ISD::FREM: return visitFREM(N);
1558  case ISD::FSQRT: return visitFSQRT(N);
1559  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1560  case ISD::FPOW: return visitFPOW(N);
1561  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1562  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1563  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1564  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1565  case ISD::FP_ROUND: return visitFP_ROUND(N);
1566  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1567  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1568  case ISD::FNEG: return visitFNEG(N);
1569  case ISD::FABS: return visitFABS(N);
1570  case ISD::FFLOOR: return visitFFLOOR(N);
1571  case ISD::FMINNUM: return visitFMINNUM(N);
1572  case ISD::FMAXNUM: return visitFMAXNUM(N);
1573  case ISD::FMINIMUM: return visitFMINIMUM(N);
1574  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1575  case ISD::FCEIL: return visitFCEIL(N);
1576  case ISD::FTRUNC: return visitFTRUNC(N);
1577  case ISD::BRCOND: return visitBRCOND(N);
1578  case ISD::BR_CC: return visitBR_CC(N);
1579  case ISD::LOAD: return visitLOAD(N);
1580  case ISD::STORE: return visitSTORE(N);
1581  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1582  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1583  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1584  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1585  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1586  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1587  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1588  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1589  case ISD::MGATHER: return visitMGATHER(N);
1590  case ISD::MLOAD: return visitMLOAD(N);
1591  case ISD::MSCATTER: return visitMSCATTER(N);
1592  case ISD::MSTORE: return visitMSTORE(N);
1593  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1594  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1595  }
1596  return SDValue();
1597 }
1598 
1599 SDValue DAGCombiner::combine(SDNode *N) {
1600  SDValue RV = visit(N);
1601 
1602  // If nothing happened, try a target-specific DAG combine.
1603  if (!RV.getNode()) {
1605  "Node was deleted but visit returned NULL!");
1606 
1607  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1609 
1610  // Expose the DAG combiner to the target combiner impls.
1612  DagCombineInfo(DAG, Level, false, this);
1613 
1614  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1615  }
1616  }
1617 
1618  // If nothing happened still, try promoting the operation.
1619  if (!RV.getNode()) {
1620  switch (N->getOpcode()) {
1621  default: break;
1622  case ISD::ADD:
1623  case ISD::SUB:
1624  case ISD::MUL:
1625  case ISD::AND:
1626  case ISD::OR:
1627  case ISD::XOR:
1628  RV = PromoteIntBinOp(SDValue(N, 0));
1629  break;
1630  case ISD::SHL:
1631  case ISD::SRA:
1632  case ISD::SRL:
1633  RV = PromoteIntShiftOp(SDValue(N, 0));
1634  break;
1635  case ISD::SIGN_EXTEND:
1636  case ISD::ZERO_EXTEND:
1637  case ISD::ANY_EXTEND:
1638  RV = PromoteExtend(SDValue(N, 0));
1639  break;
1640  case ISD::LOAD:
1641  if (PromoteLoad(SDValue(N, 0)))
1642  RV = SDValue(N, 0);
1643  break;
1644  }
1645  }
1646 
1647  // If N is a commutative binary node, try eliminate it if the commuted
1648  // version is already present in the DAG.
1649  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1650  N->getNumValues() == 1) {
1651  SDValue N0 = N->getOperand(0);
1652  SDValue N1 = N->getOperand(1);
1653 
1654  // Constant operands are canonicalized to RHS.
1655  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1656  SDValue Ops[] = {N1, N0};
1657  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1658  N->getFlags());
1659  if (CSENode)
1660  return SDValue(CSENode, 0);
1661  }
1662  }
1663 
1664  return RV;
1665 }
1666 
1667 /// Given a node, return its input chain if it has one, otherwise return a null
1668 /// sd operand.
1670  if (unsigned NumOps = N->getNumOperands()) {
1671  if (N->getOperand(0).getValueType() == MVT::Other)
1672  return N->getOperand(0);
1673  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1674  return N->getOperand(NumOps-1);
1675  for (unsigned i = 1; i < NumOps-1; ++i)
1676  if (N->getOperand(i).getValueType() == MVT::Other)
1677  return N->getOperand(i);
1678  }
1679  return SDValue();
1680 }
1681 
1682 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1683  // If N has two operands, where one has an input chain equal to the other,
1684  // the 'other' chain is redundant.
1685  if (N->getNumOperands() == 2) {
1686  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1687  return N->getOperand(0);
1688  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1689  return N->getOperand(1);
1690  }
1691 
1692  // Don't simplify token factors if optnone.
1693  if (OptLevel == CodeGenOpt::None)
1694  return SDValue();
1695 
1696  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1697  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1698  SmallPtrSet<SDNode*, 16> SeenOps;
1699  bool Changed = false; // If we should replace this token factor.
1700 
1701  // Start out with this token factor.
1702  TFs.push_back(N);
1703 
1704  // Iterate through token factors. The TFs grows when new token factors are
1705  // encountered.
1706  for (unsigned i = 0; i < TFs.size(); ++i) {
1707  SDNode *TF = TFs[i];
1708 
1709  // Check each of the operands.
1710  for (const SDValue &Op : TF->op_values()) {
1711  switch (Op.getOpcode()) {
1712  case ISD::EntryToken:
1713  // Entry tokens don't need to be added to the list. They are
1714  // redundant.
1715  Changed = true;
1716  break;
1717 
1718  case ISD::TokenFactor:
1719  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1720  // Queue up for processing.
1721  TFs.push_back(Op.getNode());
1722  // Clean up in case the token factor is removed.
1723  AddToWorklist(Op.getNode());
1724  Changed = true;
1725  break;
1726  }
1728 
1729  default:
1730  // Only add if it isn't already in the list.
1731  if (SeenOps.insert(Op.getNode()).second)
1732  Ops.push_back(Op);
1733  else
1734  Changed = true;
1735  break;
1736  }
1737  }
1738  }
1739 
1740  // Remove Nodes that are chained to another node in the list. Do so
1741  // by walking up chains breath-first stopping when we've seen
1742  // another operand. In general we must climb to the EntryNode, but we can exit
1743  // early if we find all remaining work is associated with just one operand as
1744  // no further pruning is possible.
1745 
1746  // List of nodes to search through and original Ops from which they originate.
1748  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1749  SmallPtrSet<SDNode *, 16> SeenChains;
1750  bool DidPruneOps = false;
1751 
1752  unsigned NumLeftToConsider = 0;
1753  for (const SDValue &Op : Ops) {
1754  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1755  OpWorkCount.push_back(1);
1756  }
1757 
1758  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1759  // If this is an Op, we can remove the op from the list. Remark any
1760  // search associated with it as from the current OpNumber.
1761  if (SeenOps.count(Op) != 0) {
1762  Changed = true;
1763  DidPruneOps = true;
1764  unsigned OrigOpNumber = 0;
1765  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1766  OrigOpNumber++;
1767  assert((OrigOpNumber != Ops.size()) &&
1768  "expected to find TokenFactor Operand");
1769  // Re-mark worklist from OrigOpNumber to OpNumber
1770  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1771  if (Worklist[i].second == OrigOpNumber) {
1772  Worklist[i].second = OpNumber;
1773  }
1774  }
1775  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1776  OpWorkCount[OrigOpNumber] = 0;
1777  NumLeftToConsider--;
1778  }
1779  // Add if it's a new chain
1780  if (SeenChains.insert(Op).second) {
1781  OpWorkCount[OpNumber]++;
1782  Worklist.push_back(std::make_pair(Op, OpNumber));
1783  }
1784  };
1785 
1786  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1787  // We need at least be consider at least 2 Ops to prune.
1788  if (NumLeftToConsider <= 1)
1789  break;
1790  auto CurNode = Worklist[i].first;
1791  auto CurOpNumber = Worklist[i].second;
1792  assert((OpWorkCount[CurOpNumber] > 0) &&
1793  "Node should not appear in worklist");
1794  switch (CurNode->getOpcode()) {
1795  case ISD::EntryToken:
1796  // Hitting EntryToken is the only way for the search to terminate without
1797  // hitting
1798  // another operand's search. Prevent us from marking this operand
1799  // considered.
1800  NumLeftToConsider++;
1801  break;
1802  case ISD::TokenFactor:
1803  for (const SDValue &Op : CurNode->op_values())
1804  AddToWorklist(i, Op.getNode(), CurOpNumber);
1805  break;
1806  case ISD::CopyFromReg:
1807  case ISD::CopyToReg:
1808  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1809  break;
1810  default:
1811  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1812  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1813  break;
1814  }
1815  OpWorkCount[CurOpNumber]--;
1816  if (OpWorkCount[CurOpNumber] == 0)
1817  NumLeftToConsider--;
1818  }
1819 
1820  // If we've changed things around then replace token factor.
1821  if (Changed) {
1822  SDValue Result;
1823  if (Ops.empty()) {
1824  // The entry token is the only possible outcome.
1825  Result = DAG.getEntryNode();
1826  } else {
1827  if (DidPruneOps) {
1828  SmallVector<SDValue, 8> PrunedOps;
1829  //
1830  for (const SDValue &Op : Ops) {
1831  if (SeenChains.count(Op.getNode()) == 0)
1832  PrunedOps.push_back(Op);
1833  }
1834  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1835  } else {
1836  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1837  }
1838  }
1839  return Result;
1840  }
1841  return SDValue();
1842 }
1843 
1844 /// MERGE_VALUES can always be eliminated.
1845 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1846  WorklistRemover DeadNodes(*this);
1847  // Replacing results may cause a different MERGE_VALUES to suddenly
1848  // be CSE'd with N, and carry its uses with it. Iterate until no
1849  // uses remain, to ensure that the node can be safely deleted.
1850  // First add the users of this node to the work list so that they
1851  // can be tried again once they have new operands.
1852  AddUsersToWorklist(N);
1853  do {
1854  // Do as a single replacement to avoid rewalking use lists.
1856  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1857  Ops.push_back(N->getOperand(i));
1858  DAG.ReplaceAllUsesWith(N, Ops.data());
1859  } while (!N->use_empty());
1860  deleteAndRecombine(N);
1861  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1862 }
1863 
1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1865 /// ConstantSDNode pointer else nullptr.
1868  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1869 }
1870 
1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1872  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1873 
1874  // Don't do this unless the old select is going away. We want to eliminate the
1875  // binary operator, not replace a binop with a select.
1876  // TODO: Handle ISD::SELECT_CC.
1877  unsigned SelOpNo = 0;
1878  SDValue Sel = BO->getOperand(0);
1879  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1880  SelOpNo = 1;
1881  Sel = BO->getOperand(1);
1882  }
1883 
1884  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1885  return SDValue();
1886 
1887  SDValue CT = Sel.getOperand(1);
1888  if (!isConstantOrConstantVector(CT, true) &&
1890  return SDValue();
1891 
1892  SDValue CF = Sel.getOperand(2);
1893  if (!isConstantOrConstantVector(CF, true) &&
1895  return SDValue();
1896 
1897  // Bail out if any constants are opaque because we can't constant fold those.
1898  // The exception is "and" and "or" with either 0 or -1 in which case we can
1899  // propagate non constant operands into select. I.e.:
1900  // and (select Cond, 0, -1), X --> select Cond, 0, X
1901  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1902  auto BinOpcode = BO->getOpcode();
1903  bool CanFoldNonConst =
1904  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1907 
1908  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1909  if (!CanFoldNonConst &&
1910  !isConstantOrConstantVector(CBO, true) &&
1912  return SDValue();
1913 
1914  EVT VT = Sel.getValueType();
1915 
1916  // In case of shift value and shift amount may have different VT. For instance
1917  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1918  // swapped operands and value types do not match. NB: x86 is fine if operands
1919  // are not swapped with shift amount VT being not bigger than shifted value.
1920  // TODO: that is possible to check for a shift operation, correct VTs and
1921  // still perform optimization on x86 if needed.
1922  if (SelOpNo && VT != CBO.getValueType())
1923  return SDValue();
1924 
1925  // We have a select-of-constants followed by a binary operator with a
1926  // constant. Eliminate the binop by pulling the constant math into the select.
1927  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1928  SDLoc DL(Sel);
1929  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1930  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1931  if (!CanFoldNonConst && !NewCT.isUndef() &&
1932  !isConstantOrConstantVector(NewCT, true) &&
1934  return SDValue();
1935 
1936  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1937  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1938  if (!CanFoldNonConst && !NewCF.isUndef() &&
1939  !isConstantOrConstantVector(NewCF, true) &&
1941  return SDValue();
1942 
1943  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1944 }
1945 
1947  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1948  "Expecting add or sub");
1949 
1950  // Match a constant operand and a zext operand for the math instruction:
1951  // add Z, C
1952  // sub C, Z
1953  bool IsAdd = N->getOpcode() == ISD::ADD;
1954  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1955  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1956  auto *CN = dyn_cast<ConstantSDNode>(C);
1957  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1958  return SDValue();
1959 
1960  // Match the zext operand as a setcc of a boolean.
1961  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1962  Z.getOperand(0).getValueType() != MVT::i1)
1963  return SDValue();
1964 
1965  // Match the compare as: setcc (X & 1), 0, eq.
1966  SDValue SetCC = Z.getOperand(0);
1967  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1968  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1969  SetCC.getOperand(0).getOpcode() != ISD::AND ||
1970  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1971  return SDValue();
1972 
1973  // We are adding/subtracting a constant and an inverted low bit. Turn that
1974  // into a subtract/add of the low bit with incremented/decremented constant:
1975  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1976  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1977  EVT VT = C.getValueType();
1978  SDLoc DL(N);
1979  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1980  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1981  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1982  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1983 }
1984 
1985 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1986 /// a shift and add with a different constant.
1988  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1989  "Expecting add or sub");
1990 
1991  // We need a constant operand for the add/sub, and the other operand is a
1992  // logical shift right: add (srl), C or sub C, (srl).
1993  bool IsAdd = N->getOpcode() == ISD::ADD;
1994  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1995  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1996  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1997  if (!C || ShiftOp.getOpcode() != ISD::SRL)
1998  return SDValue();
1999 
2000  // The shift must be of a 'not' value.
2001  SDValue Not = ShiftOp.getOperand(0);
2002  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2003  return SDValue();
2004 
2005  // The shift must be moving the sign bit to the least-significant-bit.
2006  EVT VT = ShiftOp.getValueType();
2007  SDValue ShAmt = ShiftOp.getOperand(1);
2008  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2009  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2010  return SDValue();
2011 
2012  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2013  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2014  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2015  SDLoc DL(N);
2016  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2017  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2018  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2019  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2020 }
2021 
2022 SDValue DAGCombiner::visitADD(SDNode *N) {
2023  SDValue N0 = N->getOperand(0);
2024  SDValue N1 = N->getOperand(1);
2025  EVT VT = N0.getValueType();
2026  SDLoc DL(N);
2027 
2028  // fold vector ops
2029  if (VT.isVector()) {
2030  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2031  return FoldedVOp;
2032 
2033  // fold (add x, 0) -> x, vector edition
2035  return N0;
2037  return N1;
2038  }
2039 
2040  // fold (add x, undef) -> undef
2041  if (N0.isUndef())
2042  return N0;
2043 
2044  if (N1.isUndef())
2045  return N1;
2046 
2048  // canonicalize constant to RHS
2050  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2051  // fold (add c1, c2) -> c1+c2
2052  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2053  N1.getNode());
2054  }
2055 
2056  // fold (add x, 0) -> x
2057  if (isNullConstant(N1))
2058  return N0;
2059 
2060  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2061  // fold ((c1-A)+c2) -> (c1+c2)-A
2062  if (N0.getOpcode() == ISD::SUB &&
2063  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2064  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2065  return DAG.getNode(ISD::SUB, DL, VT,
2066  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2067  N0.getOperand(1));
2068  }
2069 
2070  // add (sext i1 X), 1 -> zext (not i1 X)
2071  // We don't transform this pattern:
2072  // add (zext i1 X), -1 -> sext (not i1 X)
2073  // because most (?) targets generate better code for the zext form.
2074  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2075  isOneOrOneSplat(N1)) {
2076  SDValue X = N0.getOperand(0);
2077  if ((!LegalOperations ||
2078  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2079  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2080  X.getScalarValueSizeInBits() == 1) {
2081  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2082  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2083  }
2084  }
2085 
2086  // Undo the add -> or combine to merge constant offsets from a frame index.
2087  if (N0.getOpcode() == ISD::OR &&
2088  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2089  isa<ConstantSDNode>(N0.getOperand(1)) &&
2090  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2091  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2092  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2093  }
2094  }
2095 
2096  if (SDValue NewSel = foldBinOpIntoSelect(N))
2097  return NewSel;
2098 
2099  // reassociate add
2100  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2101  return RADD;
2102 
2103  // fold ((0-A) + B) -> B-A
2104  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2105  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2106 
2107  // fold (A + (0-B)) -> A-B
2108  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2109  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2110 
2111  // fold (A+(B-A)) -> B
2112  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2113  return N1.getOperand(0);
2114 
2115  // fold ((B-A)+A) -> B
2116  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2117  return N0.getOperand(0);
2118 
2119  // fold (A+(B-(A+C))) to (B-C)
2120  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2121  N0 == N1.getOperand(1).getOperand(0))
2122  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2123  N1.getOperand(1).getOperand(1));
2124 
2125  // fold (A+(B-(C+A))) to (B-C)
2126  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2127  N0 == N1.getOperand(1).getOperand(1))
2128  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2129  N1.getOperand(1).getOperand(0));
2130 
2131  // fold (A+((B-A)+or-C)) to (B+or-C)
2132  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2133  N1.getOperand(0).getOpcode() == ISD::SUB &&
2134  N0 == N1.getOperand(0).getOperand(1))
2135  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2136  N1.getOperand(1));
2137 
2138  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2139  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2140  SDValue N00 = N0.getOperand(0);
2141  SDValue N01 = N0.getOperand(1);
2142  SDValue N10 = N1.getOperand(0);
2143  SDValue N11 = N1.getOperand(1);
2144 
2146  return DAG.getNode(ISD::SUB, DL, VT,
2147  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2148  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2149  }
2150 
2151  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2152  return V;
2153 
2154  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2155  return V;
2156 
2157  if (SimplifyDemandedBits(SDValue(N, 0)))
2158  return SDValue(N, 0);
2159 
2160  // fold (a+b) -> (a|b) iff a and b share no bits.
2161  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2162  DAG.haveNoCommonBitsSet(N0, N1))
2163  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2164 
2165  // fold (add (xor a, -1), 1) -> (sub 0, a)
2166  if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2167  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2168  N0.getOperand(0));
2169 
2170  if (SDValue Combined = visitADDLike(N0, N1, N))
2171  return Combined;
2172 
2173  if (SDValue Combined = visitADDLike(N1, N0, N))
2174  return Combined;
2175 
2176  return SDValue();
2177 }
2178 
2179 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2180  unsigned Opcode = N->getOpcode();
2181  SDValue N0 = N->getOperand(0);
2182  SDValue N1 = N->getOperand(1);
2183  EVT VT = N0.getValueType();
2184  SDLoc DL(N);
2185 
2186  // fold vector ops
2187  if (VT.isVector()) {
2188  // TODO SimplifyVBinOp
2189 
2190  // fold (add_sat x, 0) -> x, vector edition
2192  return N0;
2194  return N1;
2195  }
2196 
2197  // fold (add_sat x, undef) -> -1
2198  if (N0.isUndef() || N1.isUndef())
2199  return DAG.getAllOnesConstant(DL, VT);
2200 
2202  // canonicalize constant to RHS
2204  return DAG.getNode(Opcode, DL, VT, N1, N0);
2205  // fold (add_sat c1, c2) -> c3
2206  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2207  N1.getNode());
2208  }
2209 
2210  // fold (add_sat x, 0) -> x
2211  if (isNullConstant(N1))
2212  return N0;
2213 
2214  // If it cannot overflow, transform into an add.
2215  if (Opcode == ISD::UADDSAT)
2216  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2217  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2218 
2219  return SDValue();
2220 }
2221 
2222 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2223  bool Masked = false;
2224 
2225  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2226  while (true) {
2227  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2228  V = V.getOperand(0);
2229  continue;
2230  }
2231 
2232  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2233  Masked = true;
2234  V = V.getOperand(0);
2235  continue;
2236  }
2237 
2238  break;
2239  }
2240 
2241  // If this is not a carry, return.
2242  if (V.getResNo() != 1)
2243  return SDValue();
2244 
2245  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2246  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2247  return SDValue();
2248 
2249  // If the result is masked, then no matter what kind of bool it is we can
2250  // return. If it isn't, then we need to make sure the bool type is either 0 or
2251  // 1 and not other values.
2252  if (Masked ||
2253  TLI.getBooleanContents(V.getValueType()) ==
2255  return V;
2256 
2257  return SDValue();
2258 }
2259 
2260 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2261  EVT VT = N0.getValueType();
2262  SDLoc DL(LocReference);
2263 
2264  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2265  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2267  return DAG.getNode(ISD::SUB, DL, VT, N0,
2268  DAG.getNode(ISD::SHL, DL, VT,
2269  N1.getOperand(0).getOperand(1),
2270  N1.getOperand(1)));
2271 
2272  if (N1.getOpcode() == ISD::AND) {
2273  SDValue AndOp0 = N1.getOperand(0);
2274  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2275  unsigned DestBits = VT.getScalarSizeInBits();
2276 
2277  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2278  // and similar xforms where the inner op is either ~0 or 0.
2279  if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
2280  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2281  }
2282 
2283  // add (sext i1), X -> sub X, (zext i1)
2284  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2285  N0.getOperand(0).getValueType() == MVT::i1 &&
2287  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2288  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2289  }
2290 
2291  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2292  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2293  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2294  if (TN->getVT() == MVT::i1) {
2295  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2296  DAG.getConstant(1, DL, VT));
2297  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2298  }
2299  }
2300 
2301  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2302  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2303  N1.getResNo() == 0)
2304  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2305  N0, N1.getOperand(0), N1.getOperand(2));
2306 
2307  // (add X, Carry) -> (addcarry X, 0, Carry)
2309  if (SDValue Carry = getAsCarry(TLI, N1))
2310  return DAG.getNode(ISD::ADDCARRY, DL,
2311  DAG.getVTList(VT, Carry.getValueType()), N0,
2312  DAG.getConstant(0, DL, VT), Carry);
2313 
2314  return SDValue();
2315 }
2316 
2317 SDValue DAGCombiner::visitADDC(SDNode *N) {
2318  SDValue N0 = N->getOperand(0);
2319  SDValue N1 = N->getOperand(1);
2320  EVT VT = N0.getValueType();
2321  SDLoc DL(N);
2322 
2323  // If the flag result is dead, turn this into an ADD.
2324  if (!N->hasAnyUseOfValue(1))
2325  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2326  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2327 
2328  // canonicalize constant to RHS.
2331  if (N0C && !N1C)
2332  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2333 
2334  // fold (addc x, 0) -> x + no carry out
2335  if (isNullConstant(N1))
2336  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2337  DL, MVT::Glue));
2338 
2339  // If it cannot overflow, transform into an add.
2340  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2341  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2342  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2343 
2344  return SDValue();
2345 }
2346 
2347 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2348  SelectionDAG &DAG, const TargetLowering &TLI) {
2349  SDValue Cst;
2350  switch (TLI.getBooleanContents(VT)) {
2353  Cst = DAG.getConstant(1, DL, VT);
2354  break;
2356  Cst = DAG.getConstant(-1, DL, VT);
2357  break;
2358  }
2359 
2360  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2361 }
2362 
2363 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2364  if (V.getOpcode() != ISD::XOR) return false;
2366  if (!Const) return false;
2367 
2368  switch(TLI.getBooleanContents(VT)) {
2370  return Const->isOne();
2372  return Const->isAllOnesValue();
2374  return (Const->getAPIntValue() & 0x01) == 1;
2375  }
2376  llvm_unreachable("Unsupported boolean content");
2377 }
2378 
2379 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2380  SDValue N0 = N->getOperand(0);
2381  SDValue N1 = N->getOperand(1);
2382  EVT VT = N0.getValueType();
2383  if (VT.isVector())
2384  return SDValue();
2385 
2386  EVT CarryVT = N->getValueType(1);
2387  SDLoc DL(N);
2388 
2389  // If the flag result is dead, turn this into an ADD.
2390  if (!N->hasAnyUseOfValue(1))
2391  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2392  DAG.getUNDEF(CarryVT));
2393 
2394  // canonicalize constant to RHS.
2397  if (N0C && !N1C)
2398  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2399 
2400  // fold (uaddo x, 0) -> x + no carry out
2401  if (isNullConstant(N1))
2402  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2403 
2404  // If it cannot overflow, transform into an add.
2405  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2406  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2407  DAG.getConstant(0, DL, CarryVT));
2408 
2409  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2410  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2411  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2412  DAG.getConstant(0, DL, VT),
2413  N0.getOperand(0));
2414  return CombineTo(N, Sub,
2415  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2416  }
2417 
2418  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2419  return Combined;
2420 
2421  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2422  return Combined;
2423 
2424  return SDValue();
2425 }
2426 
2427 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2428  auto VT = N0.getValueType();
2429 
2430  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2431  // If Y + 1 cannot overflow.
2432  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2433  SDValue Y = N1.getOperand(0);
2434  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2435  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2436  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2437  N1.getOperand(2));
2438  }
2439 
2440  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2442  if (SDValue Carry = getAsCarry(TLI, N1))
2443  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2444  DAG.getConstant(0, SDLoc(N), VT), Carry);
2445 
2446  return SDValue();
2447 }
2448 
2449 SDValue DAGCombiner::visitADDE(SDNode *N) {
2450  SDValue N0 = N->getOperand(0);
2451  SDValue N1 = N->getOperand(1);
2452  SDValue CarryIn = N->getOperand(2);
2453 
2454  // canonicalize constant to RHS
2457  if (N0C && !N1C)
2458  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2459  N1, N0, CarryIn);
2460 
2461  // fold (adde x, y, false) -> (addc x, y)
2462  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2463  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2464 
2465  return SDValue();
2466 }
2467 
2468 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2469  SDValue N0 = N->getOperand(0);
2470  SDValue N1 = N->getOperand(1);
2471  SDValue CarryIn = N->getOperand(2);
2472  SDLoc DL(N);
2473 
2474  // canonicalize constant to RHS
2477  if (N0C && !N1C)
2478  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2479 
2480  // fold (addcarry x, y, false) -> (uaddo x, y)
2481  if (isNullConstant(CarryIn)) {
2482  if (!LegalOperations ||
2484  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2485  }
2486 
2487  EVT CarryVT = CarryIn.getValueType();
2488 
2489  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2490  if (isNullConstant(N0) && isNullConstant(N1)) {
2491  EVT VT = N0.getValueType();
2492  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2493  AddToWorklist(CarryExt.getNode());
2494  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2495  DAG.getConstant(1, DL, VT)),
2496  DAG.getConstant(0, DL, CarryVT));
2497  }
2498 
2499  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2500  if (isBitwiseNot(N0) && isNullConstant(N1) &&
2501  isBooleanFlip(CarryIn, CarryVT, TLI)) {
2502  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2503  DAG.getConstant(0, DL, N0.getValueType()),
2504  N0.getOperand(0), CarryIn.getOperand(0));
2505  return CombineTo(N, Sub,
2506  flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2507  }
2508 
2509  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2510  return Combined;
2511 
2512  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2513  return Combined;
2514 
2515  return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2519  SDNode *N) {
2520  // Iff the flag result is dead:
2521  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2522  if ((N0.getOpcode() == ISD::ADD ||
2523  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2524  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2525  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2526  N0.getOperand(0), N0.getOperand(1), CarryIn);
2527 
2528  /**
2529  * When one of the addcarry argument is itself a carry, we may be facing
2530  * a diamond carry propagation. In which case we try to transform the DAG
2531  * to ensure linear carry propagation if that is possible.
2532  *
2533  * We are trying to get:
2534  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2535  */
2536  if (auto Y = getAsCarry(TLI, N1)) {
2537  /**
2538  * (uaddo A, B)
2539  * / \
2540  * Carry Sum
2541  * | \
2542  * | (addcarry *, 0, Z)
2543  * | /
2544  * \ Carry
2545  * | /
2546  * (addcarry X, *, *)
2547  */
2548  if (Y.getOpcode() == ISD::UADDO &&
2549  CarryIn.getResNo() == 1 &&
2550  CarryIn.getOpcode() == ISD::ADDCARRY &&
2551  isNullConstant(CarryIn.getOperand(1)) &&
2552  CarryIn.getOperand(0) == Y.getValue(0)) {
2553  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2554  Y.getOperand(0), Y.getOperand(1),
2555  CarryIn.getOperand(2));
2556  AddToWorklist(NewY.getNode());
2557  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2558  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2559  NewY.getValue(1));
2560  }
2561  }
2562 
2563  return SDValue();
2564 }
2565 
2566 // Since it may not be valid to emit a fold to zero for vector initializers
2567 // check if we can before folding.
2568 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2569  SelectionDAG &DAG, bool LegalOperations) {
2570  if (!VT.isVector())
2571  return DAG.getConstant(0, DL, VT);
2572  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2573  return DAG.getConstant(0, DL, VT);
2574  return SDValue();
2575 }
2576 
2577 SDValue DAGCombiner::visitSUB(SDNode *N) {
2578  SDValue N0 = N->getOperand(0);
2579  SDValue N1 = N->getOperand(1);
2580  EVT VT = N0.getValueType();
2581  SDLoc DL(N);
2582 
2583  // fold vector ops
2584  if (VT.isVector()) {
2585  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2586  return FoldedVOp;
2587 
2588  // fold (sub x, 0) -> x, vector edition
2590  return N0;
2591  }
2592 
2593  // fold (sub x, x) -> 0
2594  // FIXME: Refactor this and xor and other similar operations together.
2595  if (N0 == N1)
2596  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2599  // fold (sub c1, c2) -> c1-c2
2600  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2601  N1.getNode());
2602  }
2603 
2604  if (SDValue NewSel = foldBinOpIntoSelect(N))
2605  return NewSel;
2606 
2608 
2609  // fold (sub x, c) -> (add x, -c)
2610  if (N1C) {
2611  return DAG.getNode(ISD::ADD, DL, VT, N0,
2612  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2613  }
2614 
2615  if (isNullOrNullSplat(N0)) {
2616  unsigned BitWidth = VT.getScalarSizeInBits();
2617  // Right-shifting everything out but the sign bit followed by negation is
2618  // the same as flipping arithmetic/logical shift type without the negation:
2619  // -(X >>u 31) -> (X >>s 31)
2620  // -(X >>s 31) -> (X >>u 31)
2621  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2622  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2623  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2624  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2625  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2626  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2627  }
2628  }
2629 
2630  // 0 - X --> 0 if the sub is NUW.
2631  if (N->getFlags().hasNoUnsignedWrap())
2632  return N0;
2633 
2634  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2635  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2636  // N1 must be 0 because negating the minimum signed value is undefined.
2637  if (N->getFlags().hasNoSignedWrap())
2638  return N0;
2639 
2640  // 0 - X --> X if X is 0 or the minimum signed value.
2641  return N1;
2642  }
2643  }
2644 
2645  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2646  if (isAllOnesOrAllOnesSplat(N0))
2647  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2648 
2649  // fold (A - (0-B)) -> A+B
2650  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2651  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2652 
2653  // fold A-(A-B) -> B
2654  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2655  return N1.getOperand(1);
2656 
2657  // fold (A+B)-A -> B
2658  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2659  return N0.getOperand(1);
2660 
2661  // fold (A+B)-B -> A
2662  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2663  return N0.getOperand(0);
2664 
2665  // fold C2-(A+C1) -> (C2-C1)-A
2666  if (N1.getOpcode() == ISD::ADD) {
2667  SDValue N11 = N1.getOperand(1);
2668  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2669  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2670  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2671  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2672  }
2673  }
2674 
2675  // fold ((A+(B+or-C))-B) -> A+or-C
2676  if (N0.getOpcode() == ISD::ADD &&
2677  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2678  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2679  N0.getOperand(1).getOperand(0) == N1)
2680  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2681  N0.getOperand(1).getOperand(1));
2682 
2683  // fold ((A+(C+B))-B) -> A+C
2684  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2685  N0.getOperand(1).getOperand(1) == N1)
2686  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2687  N0.getOperand(1).getOperand(0));
2688 
2689  // fold ((A-(B-C))-C) -> A-B
2690  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2691  N0.getOperand(1).getOperand(1) == N1)
2692  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2693  N0.getOperand(1).getOperand(0));
2694 
2695  // fold (A-(B-C)) -> A+(C-B)
2696  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2697  return DAG.getNode(ISD::ADD, DL, VT, N0,
2698  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2699  N1.getOperand(0)));
2700 
2701  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2702  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2703  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2705  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2706  N1.getOperand(0).getOperand(1),
2707  N1.getOperand(1));
2708  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2709  }
2710  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2712  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2713  N1.getOperand(0),
2714  N1.getOperand(1).getOperand(1));
2715  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2716  }
2717  }
2718 
2719  // If either operand of a sub is undef, the result is undef
2720  if (N0.isUndef())
2721  return N0;
2722  if (N1.isUndef())
2723  return N1;
2724 
2725  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2726  return V;
2727 
2728  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2729  return V;
2730 
2731  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2732  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2733  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2734  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2735  SDValue S0 = N1.getOperand(0);
2736  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2737  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2739  if (C->getAPIntValue() == (OpSizeInBits - 1))
2740  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2741  }
2742  }
2743  }
2744 
2745  // If the relocation model supports it, consider symbol offsets.
2746  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2747  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2748  // fold (sub Sym, c) -> Sym-c
2749  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2750  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2751  GA->getOffset() -
2752  (uint64_t)N1C->getSExtValue());
2753  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2754  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2755  if (GA->getGlobal() == GB->getGlobal())
2756  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2757  DL, VT);
2758  }
2759 
2760  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2761  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2762  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2763  if (TN->getVT() == MVT::i1) {
2764  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2765  DAG.getConstant(1, DL, VT));
2766  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2767  }
2768  }
2769 
2770  // Prefer an add for more folding potential and possibly better codegen:
2771  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2772  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2773  SDValue ShAmt = N1.getOperand(1);
2774  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2775  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2776  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2777  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2778  }
2779  }
2780 
2781  return SDValue();
2782 }
2783 
2784 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2785  SDValue N0 = N->getOperand(0);
2786  SDValue N1 = N->getOperand(1);
2787  EVT VT = N0.getValueType();
2788  SDLoc DL(N);
2789 
2790  // fold vector ops
2791  if (VT.isVector()) {
2792  // TODO SimplifyVBinOp
2793 
2794  // fold (sub_sat x, 0) -> x, vector edition
2796  return N0;
2797  }
2798 
2799  // fold (sub_sat x, undef) -> 0
2800  if (N0.isUndef() || N1.isUndef())
2801  return DAG.getConstant(0, DL, VT);
2802 
2803  // fold (sub_sat x, x) -> 0
2804  if (N0 == N1)
2805  return DAG.getConstant(0, DL, VT);
2806 
2809  // fold (sub_sat c1, c2) -> c3
2810  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2811  N1.getNode());
2812  }
2813 
2814  // fold (sub_sat x, 0) -> x
2815  if (isNullConstant(N1))
2816  return N0;
2817 
2818  return SDValue();
2819 }
2820 
2821 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2822  SDValue N0 = N->getOperand(0);
2823  SDValue N1 = N->getOperand(1);
2824  EVT VT = N0.getValueType();
2825  SDLoc DL(N);
2826 
2827  // If the flag result is dead, turn this into an SUB.
2828  if (!N->hasAnyUseOfValue(1))
2829  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2830  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2831 
2832  // fold (subc x, x) -> 0 + no borrow
2833  if (N0 == N1)
2834  return CombineTo(N, DAG.getConstant(0, DL, VT),
2835  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2836 
2837  // fold (subc x, 0) -> x + no borrow
2838  if (isNullConstant(N1))
2839  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2840 
2841  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2842  if (isAllOnesConstant(N0))
2843  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2844  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2845 
2846  return SDValue();
2847 }
2848 
2849 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2850  SDValue N0 = N->getOperand(0);
2851  SDValue N1 = N->getOperand(1);
2852  EVT VT = N0.getValueType();
2853  if (VT.isVector())
2854  return SDValue();
2855 
2856  EVT CarryVT = N->getValueType(1);
2857  SDLoc DL(N);
2858 
2859  // If the flag result is dead, turn this into an SUB.
2860  if (!N->hasAnyUseOfValue(1))
2861  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2862  DAG.getUNDEF(CarryVT));
2863 
2864  // fold (usubo x, x) -> 0 + no borrow
2865  if (N0 == N1)
2866  return CombineTo(N, DAG.getConstant(0, DL, VT),
2867  DAG.getConstant(0, DL, CarryVT));
2868 
2869  // fold (usubo x, 0) -> x + no borrow
2870  if (isNullConstant(N1))
2871  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2872 
2873  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2874  if (isAllOnesConstant(N0))
2875  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2876  DAG.getConstant(0, DL, CarryVT));
2877 
2878  return SDValue();
2879 }
2880 
2881 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2882  SDValue N0 = N->getOperand(0);
2883  SDValue N1 = N->getOperand(1);
2884  SDValue CarryIn = N->getOperand(2);
2885 
2886  // fold (sube x, y, false) -> (subc x, y)
2887  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2888  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2889 
2890  return SDValue();
2891 }
2892 
2893 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2894  SDValue N0 = N->getOperand(0);
2895  SDValue N1 = N->getOperand(1);
2896  SDValue CarryIn = N->getOperand(2);
2897 
2898  // fold (subcarry x, y, false) -> (usubo x, y)
2899  if (isNullConstant(CarryIn)) {
2900  if (!LegalOperations ||
2902  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2903  }
2904 
2905  return SDValue();
2906 }
2907 
2908 SDValue DAGCombiner::visitMUL(SDNode *N) {
2909  SDValue N0 = N->getOperand(0);
2910  SDValue N1 = N->getOperand(1);
2911  EVT VT = N0.getValueType();
2912 
2913  // fold (mul x, undef) -> 0
2914  if (N0.isUndef() || N1.isUndef())
2915  return DAG.getConstant(0, SDLoc(N), VT);
2916 
2917  bool N0IsConst = false;
2918  bool N1IsConst = false;
2919  bool N1IsOpaqueConst = false;
2920  bool N0IsOpaqueConst = false;
2921  APInt ConstValue0, ConstValue1;
2922  // fold vector ops
2923  if (VT.isVector()) {
2924  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2925  return FoldedVOp;
2926 
2927  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2928  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2929  assert((!N0IsConst ||
2930  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2931  "Splat APInt should be element width");
2932  assert((!N1IsConst ||
2933  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2934  "Splat APInt should be element width");
2935  } else {
2936  N0IsConst = isa<ConstantSDNode>(N0);
2937  if (N0IsConst) {
2938  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2939  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2940  }
2941  N1IsConst = isa<ConstantSDNode>(N1);
2942  if (N1IsConst) {
2943  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2944  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2945  }
2946  }
2947 
2948  // fold (mul c1, c2) -> c1*c2
2949  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2950  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2951  N0.getNode(), N1.getNode());
2952 
2953  // canonicalize constant to RHS (vector doesn't have to splat)
2956  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2957  // fold (mul x, 0) -> 0
2958  if (N1IsConst && ConstValue1.isNullValue())
2959  return N1;
2960  // fold (mul x, 1) -> x
2961  if (N1IsConst && ConstValue1.isOneValue())
2962  return N0;
2963 
2964  if (SDValue NewSel = foldBinOpIntoSelect(N))
2965  return NewSel;
2966 
2967  // fold (mul x, -1) -> 0-x
2968  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2969  SDLoc DL(N);
2970  return DAG.getNode(ISD::SUB, DL, VT,
2971  DAG.getConstant(0, DL, VT), N0);
2972  }
2973  // fold (mul x, (1 << c)) -> x << c
2974  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2975  DAG.isKnownToBeAPowerOfTwo(N1) &&
2976  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2977  SDLoc DL(N);
2978  SDValue LogBase2 = BuildLogBase2(N1, DL);
2979  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2980  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2981  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2982  }
2983  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2984  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2985  unsigned Log2Val = (-ConstValue1).logBase2();
2986  SDLoc DL(N);
2987  // FIXME: If the input is something that is easily negated (e.g. a
2988  // single-use add), we should put the negate there.
2989  return DAG.getNode(ISD::SUB, DL, VT,
2990  DAG.getConstant(0, DL, VT),
2991  DAG.getNode(ISD::SHL, DL, VT, N0,
2992  DAG.getConstant(Log2Val, DL,
2993  getShiftAmountTy(N0.getValueType()))));
2994  }
2995 
2996  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
2997  // mul x, (2^N + 1) --> add (shl x, N), x
2998  // mul x, (2^N - 1) --> sub (shl x, N), x
2999  // Examples: x * 33 --> (x << 5) + x
3000  // x * 15 --> (x << 4) - x
3001  // x * -33 --> -((x << 5) + x)
3002  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3003  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3004  // TODO: We could handle more general decomposition of any constant by
3005  // having the target set a limit on number of ops and making a
3006  // callback to determine that sequence (similar to sqrt expansion).
3007  unsigned MathOp = ISD::DELETED_NODE;
3008  APInt MulC = ConstValue1.abs();
3009  if ((MulC - 1).isPowerOf2())
3010  MathOp = ISD::ADD;
3011  else if ((MulC + 1).isPowerOf2())
3012  MathOp = ISD::SUB;
3013 
3014  if (MathOp != ISD::DELETED_NODE) {
3015  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3016  : (MulC + 1).logBase2();
3017  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3018  "Not expecting multiply-by-constant that could have simplified");
3019  SDLoc DL(N);
3020  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3021  DAG.getConstant(ShAmt, DL, VT));
3022  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3023  if (ConstValue1.isNegative())
3024  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3025  return R;
3026  }
3027  }
3028 
3029  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3030  if (N0.getOpcode() == ISD::SHL &&
3031  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3032  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3033  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3035  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3036  }
3037 
3038  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3039  // use.
3040  {
3041  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3042 
3043  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3044  if (N0.getOpcode() == ISD::SHL &&
3046  N0.getNode()->hasOneUse()) {
3047  Sh = N0; Y = N1;
3048  } else if (N1.getOpcode() == ISD::SHL &&
3050  N1.getNode()->hasOneUse()) {
3051  Sh = N1; Y = N0;
3052  }
3053 
3054  if (Sh.getNode()) {
3055  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3056  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3057  }
3058  }
3059 
3060  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3062  N0.getOpcode() == ISD::ADD &&
3064  isMulAddWithConstProfitable(N, N0, N1))
3065  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3066  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3067  N0.getOperand(0), N1),
3068  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3069  N0.getOperand(1), N1));
3070 
3071  // reassociate mul
3072  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3073  return RMUL;
3074 
3075  return SDValue();
3076 }
3077 
3078 /// Return true if divmod libcall is available.
3079 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3080  const TargetLowering &TLI) {
3081  RTLIB::Libcall LC;
3082  EVT NodeType = Node->getValueType(0);
3083  if (!NodeType.isSimple())
3084  return false;
3085  switch (NodeType.getSimpleVT().SimpleTy) {
3086  default: return false; // No libcall for vector types.
3087  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3088  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3089  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3090  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3091  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3092  }
3093 
3094  return TLI.getLibcallName(LC) != nullptr;
3095 }
3096 
3097 /// Issue divrem if both quotient and remainder are needed.
3098 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3099  if (Node->use_empty())
3100  return SDValue(); // This is a dead node, leave it alone.
3101 
3102  unsigned Opcode = Node->getOpcode();
3103  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3104  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3105 
3106  // DivMod lib calls can still work on non-legal types if using lib-calls.
3107  EVT VT = Node->getValueType(0);
3108  if (VT.isVector() || !VT.isInteger())
3109  return SDValue();
3110 
3111  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3112  return SDValue();
3113 
3114  // If DIVREM is going to get expanded into a libcall,
3115  // but there is no libcall available, then don't combine.
3116  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3117  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3118  return SDValue();
3119 
3120  // If div is legal, it's better to do the normal expansion
3121  unsigned OtherOpcode = 0;
3122  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3123  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3124  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3125  return SDValue();
3126  } else {
3127  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3128  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3129  return SDValue();
3130  }
3131 
3132  SDValue Op0 = Node->getOperand(0);
3133  SDValue Op1 = Node->getOperand(1);
3134  SDValue combined;
3135  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3136  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3137  SDNode *User = *UI;
3138  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3139  User->use_empty())
3140  continue;
3141  // Convert the other matching node(s), too;
3142  // otherwise, the DIVREM may get target-legalized into something
3143  // target-specific that we won't be able to recognize.
3144  unsigned UserOpc = User->getOpcode();
3145  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3146  User->getOperand(0) == Op0 &&
3147  User->getOperand(1) == Op1) {
3148  if (!combined) {
3149  if (UserOpc == OtherOpcode) {
3150  SDVTList VTs = DAG.getVTList(VT, VT);
3151  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3152  } else if (UserOpc == DivRemOpc) {
3153  combined = SDValue(User, 0);
3154  } else {
3155  assert(UserOpc == Opcode);
3156  continue;
3157  }
3158  }
3159  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3160  CombineTo(User, combined);
3161  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3162  CombineTo(User, combined.getValue(1));
3163  }
3164  }
3165  return combined;
3166 }
3167 
3169  SDValue N0 = N->getOperand(0);
3170  SDValue N1 = N->getOperand(1);
3171  EVT VT = N->getValueType(0);
3172  SDLoc DL(N);
3173 
3174  unsigned Opc = N->getOpcode();
3175  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3177 
3178  // X / undef -> undef
3179  // X % undef -> undef
3180  // X / 0 -> undef
3181  // X % 0 -> undef
3182  // NOTE: This includes vectors where any divisor element is zero/undef.
3183  if (DAG.isUndef(Opc, {N0, N1}))
3184  return DAG.getUNDEF(VT);
3185 
3186  // undef / X -> 0
3187  // undef % X -> 0
3188  if (N0.isUndef())
3189  return DAG.getConstant(0, DL, VT);
3190 
3191  // 0 / X -> 0
3192  // 0 % X -> 0
3194  if (N0C && N0C->isNullValue())
3195  return N0;
3196 
3197  // X / X -> 1
3198  // X % X -> 0
3199  if (N0 == N1)
3200  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3201 
3202  // X / 1 -> X
3203  // X % 1 -> 0
3204  // If this is a boolean op (single-bit element type), we can't have
3205  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3206  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3207  // it's a 1.
3208  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3209  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3210 
3211  return SDValue();
3212 }
3213 
3214 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3215  SDValue N0 = N->getOperand(0);
3216  SDValue N1 = N->getOperand(1);
3217  EVT VT = N->getValueType(0);
3218  EVT CCVT = getSetCCResultType(VT);
3219 
3220  // fold vector ops
3221  if (VT.isVector())
3222  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3223  return FoldedVOp;
3224 
3225  SDLoc DL(N);
3226 
3227  // fold (sdiv c1, c2) -> c1/c2
3230  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3231  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3232  // fold (sdiv X, -1) -> 0-X
3233  if (N1C && N1C->isAllOnesValue())
3234  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3235  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3236  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3237  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3238  DAG.getConstant(1, DL, VT),
3239  DAG.getConstant(0, DL, VT));
3240 
3241  if (SDValue V = simplifyDivRem(N, DAG))
3242  return V;
3243 
3244  if (SDValue NewSel = foldBinOpIntoSelect(N))
3245  return NewSel;
3246 
3247  // If we know the sign bits of both operands are zero, strength reduce to a
3248  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3249  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3250  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3251 
3252  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3253  // If the corresponding remainder node exists, update its users with
3254  // (Dividend - (Quotient * Divisor).
3255  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3256  { N0, N1 })) {
3257  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3258  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3259  AddToWorklist(Mul.getNode());
3260  AddToWorklist(Sub.getNode());
3261  CombineTo(RemNode, Sub);
3262  }
3263  return V;
3264  }
3265 
3266  // sdiv, srem -> sdivrem
3267  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3268  // true. Otherwise, we break the simplification logic in visitREM().
3270  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3271  if (SDValue DivRem = useDivRem(N))
3272  return DivRem;
3273 
3274  return SDValue();
3275 }
3276 
3277 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3278  SDLoc DL(N);
3279  EVT VT = N->getValueType(0);
3280  EVT CCVT = getSetCCResultType(VT);
3281  unsigned BitWidth = VT.getScalarSizeInBits();
3282 
3283  // Helper for determining whether a value is a power-2 constant scalar or a
3284  // vector of such elements.
3285  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3286  if (C->isNullValue() || C->isOpaque())
3287  return false;
3288  if (C->getAPIntValue().isPowerOf2())
3289  return true;
3290  if ((-C->getAPIntValue()).isPowerOf2())
3291  return true;
3292  return false;
3293  };
3294 
3295  // fold (sdiv X, pow2) -> simple ops after legalize
3296  // FIXME: We check for the exact bit here because the generic lowering gives
3297  // better results in that case. The target-specific lowering should learn how
3298  // to handle exact sdivs efficiently.
3299  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3300  // Target-specific implementation of sdiv x, pow2.
3301  if (SDValue Res = BuildSDIVPow2(N))
3302  return Res;
3303 
3304  // Create constants that are functions of the shift amount value.
3305  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3306  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3307  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3308  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3309  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3310  if (!isConstantOrConstantVector(Inexact))
3311  return SDValue();
3312 
3313  // Splat the sign bit into the register
3314  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3315  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3316  AddToWorklist(Sign.getNode());
3317 
3318  // Add (N0 < 0) ? abs2 - 1 : 0;
3319  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3320  AddToWorklist(Srl.getNode());
3321  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3322  AddToWorklist(Add.getNode());
3323  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3324  AddToWorklist(Sra.getNode());
3325 
3326  // Special case: (sdiv X, 1) -> X
3327  // Special Case: (sdiv X, -1) -> 0-X
3328  SDValue One = DAG.getConstant(1, DL, VT);
3329  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3330  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3331  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3332  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3333  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3334 
3335  // If dividing by a positive value, we're done. Otherwise, the result must
3336  // be negated.
3337  SDValue Zero = DAG.getConstant(0, DL, VT);
3338  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3339 
3340  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3341  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3342  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3343  return Res;
3344  }
3345 
3346  // If integer divide is expensive and we satisfy the requirements, emit an
3347  // alternate sequence. Targets may check function attributes for size/speed
3348  // trade-offs.
3350  if (isConstantOrConstantVector(N1) &&
3351  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3352  if (SDValue Op = BuildSDIV(N))
3353  return Op;
3354 
3355  return SDValue();
3356 }
3357 
3358 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3359  SDValue N0 = N->getOperand(0);
3360  SDValue N1 = N->getOperand(1);
3361  EVT VT = N->getValueType(0);
3362  EVT CCVT = getSetCCResultType(VT);
3363 
3364  // fold vector ops
3365  if (VT.isVector())
3366  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3367  return FoldedVOp;
3368 
3369  SDLoc DL(N);
3370 
3371  // fold (udiv c1, c2) -> c1/c2
3374  if (N0C && N1C)
3375  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3376  N0C, N1C))
3377  return Folded;
3378  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3379  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3380  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3381  DAG.getConstant(1, DL, VT),
3382  DAG.getConstant(0, DL, VT));
3383 
3384  if (SDValue V = simplifyDivRem(N, DAG))
3385  return V;
3386 
3387  if (SDValue NewSel = foldBinOpIntoSelect(N))
3388  return NewSel;
3389 
3390  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3391  // If the corresponding remainder node exists, update its users with
3392  // (Dividend - (Quotient * Divisor).
3393  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3394  { N0, N1 })) {
3395  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3396  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3397  AddToWorklist(Mul.getNode());
3398  AddToWorklist(Sub.getNode());
3399  CombineTo(RemNode, Sub);
3400  }
3401  return V;
3402  }
3403 
3404  // sdiv, srem -> sdivrem
3405  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3406  // true. Otherwise, we break the simplification logic in visitREM().
3408  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3409  if (SDValue DivRem = useDivRem(N))
3410  return DivRem;
3411 
3412  return SDValue();
3413 }
3414 
3415 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3416  SDLoc DL(N);
3417  EVT VT = N->getValueType(0);
3418 
3419  // fold (udiv x, (1 << c)) -> x >>u c
3420  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3421  DAG.isKnownToBeAPowerOfTwo(N1)) {
3422  SDValue LogBase2 = BuildLogBase2(N1, DL);
3423  AddToWorklist(LogBase2.getNode());
3424 
3425  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3426  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3427  AddToWorklist(Trunc.getNode());
3428  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3429  }
3430 
3431  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3432  if (N1.getOpcode() == ISD::SHL) {
3433  SDValue N10 = N1.getOperand(0);
3434  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3435  DAG.isKnownToBeAPowerOfTwo(N10)) {
3436  SDValue LogBase2 = BuildLogBase2(N10, DL);
3437  AddToWorklist(LogBase2.getNode());
3438 
3439  EVT ADDVT = N1.getOperand(1).getValueType();
3440  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3441  AddToWorklist(Trunc.getNode());
3442  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3443  AddToWorklist(Add.getNode());
3444  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3445  }
3446  }
3447 
3448  // fold (udiv x, c) -> alternate
3450  if (isConstantOrConstantVector(N1) &&
3451  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3452  if (SDValue Op = BuildUDIV(N))
3453  return Op;
3454 
3455  return SDValue();
3456 }
3457 
3458 // handles ISD::SREM and ISD::UREM
3459 SDValue DAGCombiner::visitREM(SDNode *N) {
3460  unsigned Opcode = N->getOpcode();
3461  SDValue N0 = N->getOperand(0);
3462  SDValue N1 = N->getOperand(1);
3463  EVT VT = N->getValueType(0);
3464  EVT CCVT = getSetCCResultType(VT);
3465 
3466  bool isSigned = (Opcode == ISD::SREM);
3467  SDLoc DL(N);
3468 
3469  // fold (rem c1, c2) -> c1%c2
3472  if (N0C && N1C)
3473  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3474  return Folded;
3475  // fold (urem X, -1) -> select(X == -1, 0, x)
3476  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3477  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3478  DAG.getConstant(0, DL, VT), N0);
3479 
3480  if (SDValue V = simplifyDivRem(N, DAG))
3481  return V;
3482 
3483  if (SDValue NewSel = foldBinOpIntoSelect(N))
3484  return NewSel;
3485 
3486  if (isSigned) {
3487  // If we know the sign bits of both operands are zero, strength reduce to a
3488  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3489  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3490  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3491  } else {
3492  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3493  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3494  // fold (urem x, pow2) -> (and x, pow2-1)
3495  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3496  AddToWorklist(Add.getNode());
3497  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3498  }
3499  if (N1.getOpcode() == ISD::SHL &&
3500  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3501  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3502  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3503  AddToWorklist(Add.getNode());
3504  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3505  }
3506  }
3507 
3509 
3510  // If X/C can be simplified by the division-by-constant logic, lower
3511  // X%C to the equivalent of X-X/C*C.
3512  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3513  // speculative DIV must not cause a DIVREM conversion. We guard against this
3514  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3515  // combine will not return a DIVREM. Regardless, checking cheapness here
3516  // makes sense since the simplification results in fatter code.
3517  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3518  SDValue OptimizedDiv =
3519  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3520  if (OptimizedDiv.getNode()) {
3521  // If the equivalent Div node also exists, update its users.
3522  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3523  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3524  { N0, N1 }))
3525  CombineTo(DivNode, OptimizedDiv);
3526  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3527  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3528  AddToWorklist(OptimizedDiv.getNode());
3529  AddToWorklist(Mul.getNode());
3530  return Sub;
3531  }
3532  }
3533 
3534  // sdiv, srem -> sdivrem
3535  if (SDValue DivRem = useDivRem(N))
3536  return DivRem.getValue(1);
3537 
3538  return SDValue();
3539 }
3540 
3541 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3542  SDValue N0 = N->getOperand(0);
3543  SDValue N1 = N->getOperand(1);
3544  EVT VT = N->getValueType(0);
3545  SDLoc DL(N);
3546 
3547  if (VT.isVector()) {
3548  // fold (mulhs x, 0) -> 0
3550  return N1;
3552  return N0;
3553  }
3554 
3555  // fold (mulhs x, 0) -> 0
3556  if (isNullConstant(N1))
3557  return N1;
3558  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3559  if (isOneConstant(N1))
3560  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3561  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3562  getShiftAmountTy(N0.getValueType())));
3563 
3564  // fold (mulhs x, undef) -> 0
3565  if (N0.isUndef() || N1.isUndef())
3566  return DAG.getConstant(0, DL, VT);
3567 
3568  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3569  // plus a shift.
3570  if (VT.isSimple() && !VT.isVector()) {
3571  MVT Simple = VT.getSimpleVT();
3572  unsigned SimpleSize = Simple.getSizeInBits();
3573  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3576  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3577  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3578  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3579  DAG.getConstant(SimpleSize, DL,
3580  getShiftAmountTy(N1.getValueType())));
3581  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3582  }
3583  }
3584 
3585  return SDValue();
3586 }
3587 
3588 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3589  SDValue N0 = N->getOperand(0);
3590  SDValue N1 = N->getOperand(1);
3591  EVT VT = N->getValueType(0);
3592  SDLoc DL(N);
3593 
3594  if (VT.isVector()) {
3595  // fold (mulhu x, 0) -> 0
3597  return N1;
3599  return N0;
3600  }
3601 
3602  // fold (mulhu x, 0) -> 0
3603  if (isNullConstant(N1))
3604  return N1;
3605  // fold (mulhu x, 1) -> 0
3606  if (isOneConstant(N1))
3607  return DAG.getConstant(0, DL, N0.getValueType());
3608  // fold (mulhu x, undef) -> 0
3609  if (N0.isUndef() || N1.isUndef())
3610  return DAG.getConstant(0, DL, VT);
3611 
3612  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3613  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3614  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3615  SDLoc DL(N);
3616  unsigned NumEltBits = VT.getScalarSizeInBits();
3617  SDValue LogBase2 = BuildLogBase2(N1, DL);
3618  SDValue SRLAmt = DAG.getNode(
3619  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3620  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3621  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3622  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3623  }
3624 
3625  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3626  // plus a shift.
3627  if (VT.isSimple() && !VT.isVector()) {
3628  MVT Simple = VT.getSimpleVT();
3629  unsigned SimpleSize = Simple.getSizeInBits();
3630  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3631  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3632  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3633  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3634  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3635  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3636  DAG.getConstant(SimpleSize, DL,
3637  getShiftAmountTy(N1.getValueType())));
3638  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3639  }
3640  }
3641 
3642  return SDValue();
3643 }
3644 
3645 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3646 /// give the opcodes for the two computations that are being performed. Return
3647 /// true if a simplification was made.
3648 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3649  unsigned HiOp) {
3650  // If the high half is not needed, just compute the low half.
3651  bool HiExists = N->hasAnyUseOfValue(1);
3652  if (!HiExists && (!LegalOperations ||
3653  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3654  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3655  return CombineTo(N, Res, Res);
3656  }
3657 
3658  // If the low half is not needed, just compute the high half.
3659  bool LoExists = N->hasAnyUseOfValue(0);
3660  if (!LoExists && (!LegalOperations ||
3661  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3662  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3663  return CombineTo(N, Res, Res);
3664  }
3665 
3666  // If both halves are used, return as it is.
3667  if (LoExists && HiExists)
3668  return SDValue();
3669 
3670  // If the two computed results can be simplified separately, separate them.
3671  if (LoExists) {
3672  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3673  AddToWorklist(Lo.getNode());
3674  SDValue LoOpt = combine(Lo.getNode());
3675  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3676  (!LegalOperations ||
3677  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3678  return CombineTo(N, LoOpt, LoOpt);
3679  }
3680 
3681  if (HiExists) {
3682  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3683  AddToWorklist(Hi.getNode());
3684  SDValue HiOpt = combine(Hi.getNode());
3685  if (HiOpt.getNode() && HiOpt != Hi &&
3686  (!LegalOperations ||
3687  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3688  return CombineTo(N, HiOpt, HiOpt);
3689  }
3690 
3691  return SDValue();
3692 }
3693 
3694 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3695  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3696  return Res;
3697 
3698  EVT VT = N->getValueType(0);
3699  SDLoc DL(N);
3700 
3701  // If the type is twice as wide is legal, transform the mulhu to a wider
3702  // multiply plus a shift.
3703  if (VT.isSimple() && !VT.isVector()) {
3704  MVT Simple = VT.getSimpleVT();
3705  unsigned SimpleSize = Simple.getSizeInBits();
3706  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3707  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3708  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3709  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3710  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3711  // Compute the high part as N1.
3712  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3713  DAG.getConstant(SimpleSize, DL,
3714  getShiftAmountTy(Lo.getValueType())));
3715  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3716  // Compute the low part as N0.
3717  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3718  return CombineTo(N, Lo, Hi);
3719  }
3720  }
3721 
3722  return SDValue();
3723 }
3724 
3725 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3726  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3727  return Res;
3728 
3729  EVT VT = N->getValueType(0);
3730  SDLoc DL(N);
3731 
3732  // If the type is twice as wide is legal, transform the mulhu to a wider
3733  // multiply plus a shift.
3734  if (VT.isSimple() && !VT.isVector()) {
3735  MVT Simple = VT.getSimpleVT();
3736  unsigned SimpleSize = Simple.getSizeInBits();
3737  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3738  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3739  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3740  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3741  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3742  // Compute the high part as N1.
3743  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3744  DAG.getConstant(SimpleSize, DL,
3745  getShiftAmountTy(Lo.getValueType())));
3746  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3747  // Compute the low part as N0.
3748  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3749  return CombineTo(N, Lo, Hi);
3750  }
3751  }
3752 
3753  return SDValue();
3754 }
3755 
3756 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3757  // (smulo x, 2) -> (saddo x, x)
3758  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3759  if (C2->getAPIntValue() == 2)
3760  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3761  N->getOperand(0), N->getOperand(0));
3762 
3763  return SDValue();
3764 }
3765 
3766 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3767  // (umulo x, 2) -> (uaddo x, x)
3768  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3769  if (C2->getAPIntValue() == 2)
3770  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3771  N->getOperand(0), N->getOperand(0));
3772 
3773  return SDValue();
3774 }
3775 
3776 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3777  SDValue N0 = N->getOperand(0);
3778  SDValue N1 = N->getOperand(1);
3779  EVT VT = N0.getValueType();
3780 
3781  // fold vector ops
3782  if (VT.isVector())
3783  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3784  return FoldedVOp;
3785 
3786  // fold operation with constant operands.
3789  if (N0C && N1C)
3790  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3791 
3792  // canonicalize constant to RHS
3795  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3796 
3797  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3798  // Only do this if the current op isn't legal and the flipped is.
3799  unsigned Opcode = N->getOpcode();
3800  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3801  if (!TLI.isOperationLegal(Opcode, VT) &&
3802  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3803  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3804  unsigned AltOpcode;
3805  switch (Opcode) {
3806  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3807  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3808  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3809  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3810  default: llvm_unreachable("Unknown MINMAX opcode");
3811  }
3812  if (TLI.isOperationLegal(AltOpcode, VT))
3813  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3814  }
3815 
3816  return SDValue();
3817 }
3818 
3819 /// If this is a bitwise logic instruction and both operands have the same
3820 /// opcode, try to sink the other opcode after the logic instruction.
3821 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3822  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3823  EVT VT = N0.getValueType();
3824  unsigned LogicOpcode = N->getOpcode();
3825  unsigned HandOpcode = N0.getOpcode();
3826  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3827  LogicOpcode == ISD::XOR) && "Expected logic opcode");
3828  assert(HandOpcode == N1.getOpcode() && "Bad input!");
3829 
3830  // Bail early if none of these transforms apply.
3831  if (N0.getNumOperands() == 0)
3832  return SDValue();
3833 
3834  // FIXME: We should check number of uses of the operands to not increase
3835  // the instruction count for all transforms.
3836 
3837  // Handle size-changing casts.
3838  SDValue X = N0.getOperand(0);
3839  SDValue Y = N1.getOperand(0);
3840  EVT XVT = X.getValueType();
3841  SDLoc DL(N);
3842  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3843  HandOpcode == ISD::SIGN_EXTEND) {
3844  // If both operands have other uses, this transform would create extra
3845  // instructions without eliminating anything.
3846  if (!N0.hasOneUse() && !N1.hasOneUse())
3847  return SDValue();
3848  // We need matching integer source types.
3849  if (XVT != Y.getValueType())
3850  return SDValue();
3851  // Don't create an illegal op during or after legalization. Don't ever
3852  // create an unsupported vector op.
3853  if ((VT.isVector() || LegalOperations) &&
3854  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3855  return SDValue();
3856  // Avoid infinite looping with PromoteIntBinOp.
3857  // TODO: Should we apply desirable/legal constraints to all opcodes?
3858  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3859  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3860  return SDValue();
3861  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3862  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3863  return DAG.getNode(HandOpcode, DL, VT, Logic);
3864  }
3865 
3866  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3867  if (HandOpcode == ISD::TRUNCATE) {
3868  // If both operands have other uses, this transform would create extra
3869  // instructions without eliminating anything.
3870  if (!N0.hasOneUse() && !N1.hasOneUse())
3871  return SDValue();
3872  // We need matching source types.
3873  if (XVT != Y.getValueType())
3874  return SDValue();
3875  // Don't create an illegal op during or after legalization.
3876  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3877  return SDValue();
3878  // Be extra careful sinking truncate. If it's free, there's no benefit in
3879  // widening a binop. Also, don't create a logic op on an illegal type.
3880  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3881  return SDValue();
3882  if (!TLI.isTypeLegal(XVT))
3883  return SDValue();
3884  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3885  return DAG.getNode(HandOpcode, DL, VT, Logic);
3886  }
3887 
3888  // For binops SHL/SRL/SRA/AND:
3889  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3890  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3891  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3892  N0.getOperand(1) == N1.getOperand(1)) {
3893  // If either operand has other uses, this transform is not an improvement.
3894  if (!N0.hasOneUse() || !N1.hasOneUse())
3895  return SDValue();
3896  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3897  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3898  }
3899 
3900  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3901  if (HandOpcode == ISD::BSWAP) {
3902  // If either operand has other uses, this transform is not an improvement.
3903  if (!N0.hasOneUse() || !N1.hasOneUse())
3904  return SDValue();
3905  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3906  return DAG.getNode(HandOpcode, DL, VT, Logic);
3907  }
3908 
3909  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3910  // Only perform this optimization up until type legalization, before
3911  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3912  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3913  // we don't want to undo this promotion.
3914  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3915  // on scalars.
3916  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3917  Level <= AfterLegalizeTypes) {
3918  // Input types must be integer and the same.
3919  if (XVT.isInteger() && XVT == Y.getValueType()) {
3920  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3921  return DAG.getNode(HandOpcode, DL, VT, Logic);
3922  }
3923  }
3924 
3925  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3926  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3927  // If both shuffles use the same mask, and both shuffle within a single
3928  // vector, then it is worthwhile to move the swizzle after the operation.
3929  // The type-legalizer generates this pattern when loading illegal
3930  // vector types from memory. In many cases this allows additional shuffle
3931  // optimizations.
3932  // There are other cases where moving the shuffle after the xor/and/or
3933  // is profitable even if shuffles don't perform a swizzle.
3934  // If both shuffles use the same mask, and both shuffles have the same first
3935  // or second operand, then it might still be profitable to move the shuffle
3936  // after the xor/and/or operation.
3937  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3938  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3939  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3940  assert(X.getValueType() == Y.getValueType() &&
3941  "Inputs to shuffles are not the same type");
3942 
3943  // Check that both shuffles use the same mask. The masks are known to be of
3944  // the same length because the result vector type is the same.
3945  // Check also that shuffles have only one use to avoid introducing extra
3946  // instructions.
3947  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3948  !SVN0->getMask().equals(SVN1->getMask()))
3949  return SDValue();
3950 
3951  // Don't try to fold this node if it requires introducing a
3952  // build vector of all zeros that might be illegal at this stage.
3953  SDValue ShOp = N0.getOperand(1);
3954  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3955  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3956 
3957  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
3958  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3959  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
3960  N0.getOperand(0), N1.getOperand(0));
3961  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
3962  }
3963 
3964  // Don't try to fold this node if it requires introducing a
3965  // build vector of all zeros that might be illegal at this stage.
3966  ShOp = N0.getOperand(0);
3967  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
3968  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3969 
3970  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
3971  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
3972  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
3973  N1.getOperand(1));
3974  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
3975  }
3976  }
3977 
3978  return SDValue();
3979 }
3980 
3981 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3982 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3983  const SDLoc &DL) {
3984  SDValue LL, LR, RL, RR, N0CC, N1CC;
3985  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3986  !isSetCCEquivalent(N1, RL, RR, N1CC))
3987  return SDValue();
3988 
3989  assert(N0.getValueType() == N1.getValueType() &&
3990  "Unexpected operand types for bitwise logic op");
3991  assert(LL.getValueType() == LR.getValueType() &&
3992  RL.getValueType() == RR.getValueType() &&
3993  "Unexpected operand types for setcc");
3994 
3995  // If we're here post-legalization or the logic op type is not i1, the logic
3996  // op type must match a setcc result type. Also, all folds require new
3997  // operations on the left and right operands, so those types must match.
3998  EVT VT = N0.getValueType();
3999  EVT OpVT = LL.getValueType();
4000  if (LegalOperations || VT.getScalarType() != MVT::i1)
4001  if (VT != getSetCCResultType(OpVT))
4002  return SDValue();
4003  if (OpVT != RL.getValueType())
4004  return SDValue();
4005 
4006  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4007  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4008  bool IsInteger = OpVT.isInteger();
4009  if (LR == RR && CC0 == CC1 && IsInteger) {
4010  bool IsZero = isNullOrNullSplat(LR);
4011  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4012 
4013  // All bits clear?
4014  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4015  // All sign bits clear?
4016  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4017  // Any bits set?
4018  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4019  // Any sign bits set?
4020  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4021 
4022  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4023  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4024  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4025  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4026  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4027  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4028  AddToWorklist(Or.getNode());
4029  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4030  }
4031 
4032  // All bits set?
4033  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4034  // All sign bits set?
4035  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4036  // Any bits clear?
4037  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4038  // Any sign bits clear?
4039  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4040 
4041  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4042  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4043  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4044  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4045  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4046  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4047  AddToWorklist(And.getNode());
4048  return DAG.getSetCC(DL, VT, And, LR, CC1);
4049  }
4050  }
4051 
4052  // TODO: What is the 'or' equivalent of this fold?
4053  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4054  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4055  IsInteger && CC0 == ISD::SETNE &&
4056  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4057  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4058  SDValue One = DAG.getConstant(1, DL, OpVT);
4059  SDValue Two = DAG.getConstant(2, DL, OpVT);
4060  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4061  AddToWorklist(Add.getNode());
4062  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4063  }
4064 
4065  // Try more general transforms if the predicates match and the only user of
4066  // the compares is the 'and' or 'or'.
4067  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4068  N0.hasOneUse() && N1.hasOneUse()) {
4069  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4070  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4071  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4072  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4073  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4074  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4075  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4076  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4077  }
4078  }
4079 
4080  // Canonicalize equivalent operands to LL == RL.
4081  if (LL == RR && LR == RL) {
4082  CC1 = ISD::getSetCCSwappedOperands(CC1);
4083  std::swap(RL, RR);
4084  }
4085 
4086  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4087  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4088  if (LL == RL && LR == RR) {
4089  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4090  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4091  if (NewCC != ISD::SETCC_INVALID &&
4092  (!LegalOperations ||
4093  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4094  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4095  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4096  }
4097 
4098  return SDValue();
4099 }
4100 
4101 /// This contains all DAGCombine rules which reduce two values combined by
4102 /// an And operation to a single value. This makes them reusable in the context
4103 /// of visitSELECT(). Rules involving constants are not included as
4104 /// visitSELECT() already handles those cases.
4105 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4106  EVT VT = N1.getValueType();
4107  SDLoc DL(N);
4108 
4109  // fold (and x, undef) -> 0
4110  if (N0.isUndef() || N1.isUndef())
4111  return DAG.getConstant(0, DL, VT);
4112 
4113  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4114  return V;
4115 
4116  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4117  VT.getSizeInBits() <= 64) {
4118  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4119  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4120  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4121  // immediate for an add, but it is legal if its top c2 bits are set,
4122  // transform the ADD so the immediate doesn't need to be materialized
4123  // in a register.
4124  APInt ADDC = ADDI->getAPIntValue();
4125  APInt SRLC = SRLI->getAPIntValue();
4126  if (ADDC.getMinSignedBits() <= 64 &&
4127  SRLC.ult(VT.getSizeInBits()) &&
4128  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4130  SRLC.getZExtValue());
4131  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4132  ADDC |= Mask;
4133  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4134  SDLoc DL0(N0);
4135  SDValue NewAdd =
4136  DAG.getNode(ISD::ADD, DL0, VT,
4137  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4138  CombineTo(N0.getNode(), NewAdd);
4139  // Return N so it doesn't get rechecked!
4140  return SDValue(N, 0);
4141  }
4142  }
4143  }
4144  }
4145  }
4146  }
4147 
4148  // Reduce bit extract of low half of an integer to the narrower type.
4149  // (and (srl i64:x, K), KMask) ->
4150  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4151  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4152  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4153  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4154  unsigned Size = VT.getSizeInBits();
4155  const APInt &AndMask = CAnd->getAPIntValue();
4156  unsigned ShiftBits = CShift->getZExtValue();
4157 
4158  // Bail out, this node will probably disappear anyway.
4159  if (ShiftBits == 0)
4160  return SDValue();
4161 
4162  unsigned MaskBits = AndMask.countTrailingOnes();
4163  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4164 
4165  if (AndMask.isMask() &&
4166  // Required bits must not span the two halves of the integer and
4167  // must fit in the half size type.
4168  (ShiftBits + MaskBits <= Size / 2) &&
4169  TLI.isNarrowingProfitable(VT, HalfVT) &&
4170  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4171  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4172  TLI.isTruncateFree(VT, HalfVT) &&
4173  TLI.isZExtFree(HalfVT, VT)) {
4174  // The isNarrowingProfitable is to avoid regressions on PPC and
4175  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4176  // on downstream users of this. Those patterns could probably be
4177  // extended to handle extensions mixed in.
4178 
4179  SDValue SL(N0);
4180  assert(MaskBits <= Size);
4181 
4182  // Extracting the highest bit of the low half.
4183  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4184  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4185  N0.getOperand(0));
4186 
4187  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4188  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4189  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4190  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4191  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4192  }
4193  }
4194  }
4195  }
4196 
4197  return SDValue();
4198 }
4199 
4200 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4201  EVT LoadResultTy, EVT &ExtVT) {
4202  if (!AndC->getAPIntValue().isMask())
4203  return false;
4204 
4205  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4206 
4207  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4208  EVT LoadedVT = LoadN->getMemoryVT();
4209 
4210  if (ExtVT == LoadedVT &&
4211  (!LegalOperations ||
4212  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4213  // ZEXTLOAD will match without needing to change the size of the value being
4214  // loaded.
4215  return true;
4216  }
4217 
4218  // Do not change the width of a volatile load.
4219  if (LoadN->isVolatile())
4220  return false;
4221 
4222  // Do not generate loads of non-round integer types since these can
4223  // be expensive (and would be wrong if the type is not byte sized).
4224  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4225  return false;
4226 
4227  if (LegalOperations &&
4228  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4229  return false;
4230 
4231  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4232  return false;
4233 
4234  return true;
4235 }
4236 
4237 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4238  ISD::LoadExtType ExtType, EVT &MemVT,
4239  unsigned ShAmt) {
4240  if (!LDST)
4241  return false;
4242  // Only allow byte offsets.
4243  if (ShAmt % 8)
4244  return false;
4245 
4246  // Do not generate loads of non-round integer types since these can
4247  // be expensive (and would be wrong if the type is not byte sized).
4248  if (!MemVT.isRound())
4249  return false;
4250 
4251  // Don't change the width of a volatile load.
4252  if (LDST->isVolatile())
4253  return false;
4254 
4255  // Verify that we are actually reducing a load width here.
4256  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4257  return false;
4258 
4259  // Ensure that this isn't going to produce an unsupported unaligned access.
4260  if (ShAmt &&
4261  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4262  LDST->getAddressSpace(), ShAmt / 8))
4263  return false;
4264 
4265  // It's not possible to generate a constant of extended or untyped type.
4266  EVT PtrType = LDST->getBasePtr().getValueType();
4267  if (PtrType == MVT::Untyped || PtrType.isExtended())
4268  return false;
4269 
4270  if (isa<LoadSDNode>(LDST)) {
4271  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4272  // Don't transform one with multiple uses, this would require adding a new
4273  // load.
4274  if (!SDValue(Load, 0).hasOneUse())
4275  return false;
4276 
4277  if (LegalOperations &&
4278  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4279  return false;
4280 
4281  // For the transform to be legal, the load must produce only two values
4282  // (the value loaded and the chain). Don't transform a pre-increment
4283  // load, for example, which produces an extra value. Otherwise the
4284  // transformation is not equivalent, and the downstream logic to replace
4285  // uses gets things wrong.
4286  if (Load->getNumValues() > 2)
4287  return false;
4288 
4289  // If the load that we're shrinking is an extload and we're not just
4290  // discarding the extension we can't simply shrink the load. Bail.
4291  // TODO: It would be possible to merge the extensions in some cases.
4292  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4293  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4294  return false;
4295 
4296  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4297  return false;
4298  } else {
4299  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4300  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4301  // Can't write outside the original store
4302  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4303  return false;
4304 
4305  if (LegalOperations &&
4306  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4307  return false;
4308  }
4309  return true;
4310 }
4311 
4312 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4314  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4316  SDNode *&NodeToMask) {
4317  // Recursively search for the operands, looking for loads which can be
4318  // narrowed.
4319  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4320  SDValue Op = N->getOperand(i);
4321 
4322  if (Op.getValueType().isVector())
4323  return false;
4324 
4325  // Some constants may need fixing up later if they are too large.
4326  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4327  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4328  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4329  NodesWithConsts.insert(N);
4330  continue;
4331  }
4332 
4333  if (!Op.hasOneUse())
4334  return false;
4335 
4336  switch(Op.getOpcode()) {
4337  case ISD::LOAD: {
4338  auto *Load = cast<LoadSDNode>(Op);
4339  EVT ExtVT;
4340  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4341  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4342 
4343  // ZEXTLOAD is already small enough.
4344  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4345  ExtVT.bitsGE(Load->getMemoryVT()))
4346  continue;
4347 
4348  // Use LE to convert equal sized loads to zext.
4349  if (ExtVT.bitsLE(Load->getMemoryVT()))
4350  Loads.push_back(Load);
4351 
4352  continue;
4353  }
4354  return false;
4355  }
4356  case ISD::ZERO_EXTEND:
4357  case ISD::AssertZext: {
4358  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4359  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4360  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4361  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4362  Op.getOperand(0).getValueType();
4363 
4364  // We can accept extending nodes if the mask is wider or an equal
4365  // width to the original type.
4366  if (ExtVT.bitsGE(VT))
4367  continue;
4368  break;
4369  }
4370  case ISD::OR:
4371  case ISD::XOR:
4372  case ISD::AND:
4373  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4374  NodeToMask))
4375  return false;
4376  continue;
4377  }
4378 
4379  // Allow one node which will masked along with any loads found.
4380  if (NodeToMask)
4381  return false;
4382 
4383  // Also ensure that the node to be masked only produces one data result.
4384  NodeToMask = Op.getNode();
4385  if (NodeToMask->getNumValues() > 1) {
4386  bool HasValue = false;
4387  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4388  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4389  if (VT != MVT::Glue && VT != MVT::Other) {
4390  if (HasValue) {
4391  NodeToMask = nullptr;
4392  return false;
4393  }
4394  HasValue = true;
4395  }
4396  }
4397  assert(HasValue && "Node to be masked has no data result?");
4398  }
4399  }
4400  return true;
4401 }
4402 
4403 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4404  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4405  if (!Mask)
4406  return false;
4407 
4408  if (!Mask->getAPIntValue().isMask())
4409  return false;
4410 
4411  // No need to do anything if the and directly uses a load.
4412  if (isa<LoadSDNode>(N->getOperand(0)))
4413  return false;
4414 
4416  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4417  SDNode *FixupNode = nullptr;
4418  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4419  if (Loads.size() == 0)
4420  return false;
4421 
4422  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4423  SDValue MaskOp = N->getOperand(1);
4424 
4425  // If it exists, fixup the single node we allow in the tree that needs
4426  // masking.
4427  if (FixupNode) {
4428  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4429  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4430  FixupNode->getValueType(0),
4431  SDValue(FixupNode, 0), MaskOp);
4432  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4433  if (And.getOpcode() == ISD ::AND)
4434  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4435  }
4436 
4437  // Narrow any constants that need it.
4438  for (auto *LogicN : NodesWithConsts) {
4439  SDValue Op0 = LogicN->getOperand(0);
4440  SDValue Op1 = LogicN->getOperand(1);
4441 
4442  if (isa<ConstantSDNode>(Op0))
4443  std::swap(Op0, Op1);
4444 
4445  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4446  Op1, MaskOp);
4447 
4448  DAG.UpdateNodeOperands(LogicN, Op0, And);
4449  }
4450 
4451  // Create narrow loads.
4452  for (auto *Load : Loads) {
4453  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4454  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4455  SDValue(Load, 0), MaskOp);
4456  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4457  if (And.getOpcode() == ISD ::AND)
4458  And = SDValue(
4459  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4460  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4461  assert(NewLoad &&
4462  "Shouldn't be masking the load if it can't be narrowed");
4463  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4464  }
4465  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4466  return true;
4467  }
4468  return false;
4469 }
4470 
4471 // Unfold
4472 // x & (-1 'logical shift' y)
4473 // To
4474 // (x 'opposite logical shift' y) 'logical shift' y
4475 // if it is better for performance.
4476 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4477  assert(N->getOpcode() == ISD::AND);
4478 
4479  SDValue N0 = N->getOperand(0);
4480  SDValue N1 = N->getOperand(1);
4481 
4482  // Do we actually prefer shifts over mask?
4483  if (!TLI.preferShiftsToClearExtremeBits(N0))
4484  return SDValue();
4485 
4486  // Try to match (-1 '[outer] logical shift' y)
4487  unsigned OuterShift;
4488  unsigned InnerShift; // The opposite direction to the OuterShift.
4489  SDValue Y; // Shift amount.
4490  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4491  if (!M.hasOneUse())
4492  return false;
4493  OuterShift = M->getOpcode();
4494  if (OuterShift == ISD::SHL)
4495  InnerShift = ISD::SRL;
4496  else if (OuterShift == ISD::SRL)
4497  InnerShift = ISD::SHL;
4498  else
4499  return false;
4500  if (!isAllOnesConstant(M->getOperand(0)))
4501  return false;
4502  Y = M->getOperand(1);
4503  return true;
4504  };
4505 
4506  SDValue X;
4507  if (matchMask(N1))
4508  X = N0;
4509  else if (matchMask(N0))
4510  X = N1;
4511  else
4512  return SDValue();
4513 
4514  SDLoc DL(N);
4515  EVT VT = N->getValueType(0);
4516 
4517  // tmp = x 'opposite logical shift' y
4518  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4519  // ret = tmp 'logical shift' y
4520  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4521 
4522  return T1;
4523 }
4524 
4525 SDValue DAGCombiner::visitAND(SDNode *N) {
4526  SDValue N0 = N->getOperand(0);
4527  SDValue N1 = N->getOperand(1);
4528  EVT VT = N1.getValueType();
4529 
4530  // x & x --> x
4531  if (N0 == N1)
4532  return N0;
4533 
4534  // fold vector ops
4535  if (VT.isVector()) {
4536  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4537  return FoldedVOp;
4538 
4539  // fold (and x, 0) -> 0, vector edition
4541  // do not return N0, because undef node may exist in N0
4543  SDLoc(N), N0.getValueType());
4545  // do not return N1, because undef node may exist in N1
4547  SDLoc(N), N1.getValueType());
4548 
4549  // fold (and x, -1) -> x, vector edition
4551  return N1;
4553  return N0;
4554  }
4555 
4556  // fold (and c1, c2) -> c1&c2
4559  if (N0C && N1C && !N1C->isOpaque())
4560  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4561  // canonicalize constant to RHS
4564  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4565  // fold (and x, -1) -> x
4566  if (isAllOnesConstant(N1))
4567  return N0;
4568  // if (and x, c) is known to be zero, return 0
4569  unsigned BitWidth = VT.getScalarSizeInBits();
4570  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4571  APInt::getAllOnesValue(BitWidth)))
4572  return DAG.getConstant(0, SDLoc(N), VT);
4573 
4574  if (SDValue NewSel = foldBinOpIntoSelect(N))
4575  return NewSel;
4576 
4577  // reassociate and
4578  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4579  return RAND;
4580 
4581  // Try to convert a constant mask AND into a shuffle clear mask.
4582  if (VT.isVector())
4583  if (SDValue Shuffle = XformToShuffleWithZero(N))
4584  return Shuffle;
4585 
4586  // fold (and (or x, C), D) -> D if (C & D) == D
4587  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4588  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4589  };
4590  if (N0.getOpcode() == ISD::OR &&
4591  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4592  return N1;
4593  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4594  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4595  SDValue N0Op0 = N0.getOperand(0);
4596  APInt Mask = ~N1C->getAPIntValue();
4597  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4598  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4599  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4600  N0.getValueType(), N0Op0);
4601 
4602  // Replace uses of the AND with uses of the Zero extend node.
4603  CombineTo(N, Zext);
4604 
4605  // We actually want to replace all uses of the any_extend with the
4606  // zero_extend, to avoid duplicating things. This will later cause this
4607  // AND to be folded.
4608  CombineTo(N0.getNode(), Zext);
4609  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4610  }
4611  }
4612  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4613  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4614  // already be zero by virtue of the width of the base type of the load.
4615  //
4616  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4617  // more cases.
4618  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4620  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4621  N0.getOperand(0).getResNo() == 0) ||
4622  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4623  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4624  N0 : N0.getOperand(0) );
4625 
4626  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4627  // This can be a pure constant or a vector splat, in which case we treat the
4628  // vector as a scalar and use the splat value.
4630  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4631  Constant = C->getAPIntValue();
4632  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4633  APInt SplatValue, SplatUndef;
4634  unsigned SplatBitSize;
4635  bool HasAnyUndefs;
4636  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4637  SplatBitSize, HasAnyUndefs);
4638  if (IsSplat) {
4639  // Undef bits can contribute to a possible optimisation if set, so
4640  // set them.
4641  SplatValue |= SplatUndef;
4642 
4643  // The splat value may be something like "0x00FFFFFF", which means 0 for
4644  // the first vector value and FF for the rest, repeating. We need a mask
4645  // that will apply equally to all members of the vector, so AND all the
4646  // lanes of the constant together.
4647  EVT VT = Vector->getValueType(0);
4648  unsigned BitWidth = VT.getScalarSizeInBits();
4649 
4650  // If the splat value has been compressed to a bitlength lower
4651  // than the size of the vector lane, we need to re-expand it to
4652  // the lane size.
4653  if (BitWidth > SplatBitSize)
4654  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4655  SplatBitSize < BitWidth;
4656  SplatBitSize = SplatBitSize * 2)
4657  SplatValue |= SplatValue.shl(SplatBitSize);
4658 
4659  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4660  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4661  if (SplatBitSize % BitWidth == 0) {
4662  Constant = APInt::getAllOnesValue(BitWidth);
4663  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4664  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4665  }
4666  }
4667  }
4668 
4669  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4670  // actually legal and isn't going to get expanded, else this is a false
4671  // optimisation.
4672  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4673  Load->getValueType(0),
4674  Load->getMemoryVT());
4675 
4676  // Resize the constant to the same size as the original memory access before
4677  // extension. If it is still the AllOnesValue then this AND is completely
4678  // unneeded.
4679  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4680 
4681  bool B;
4682  switch (Load->getExtensionType()) {
4683  default: B = false; break;
4684  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4685  case ISD::ZEXTLOAD:
4686  case ISD::NON_EXTLOAD: B = true; break;
4687  }
4688 
4689  if (B && Constant.isAllOnesValue()) {
4690  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4691  // preserve semantics once we get rid of the AND.
4692  SDValue NewLoad(Load, 0);
4693 
4694  // Fold the AND away. NewLoad may get replaced immediately.
4695  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4696 
4697  if (Load->getExtensionType() == ISD::EXTLOAD) {
4698  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4699  Load->getValueType(0), SDLoc(Load),
4700  Load->getChain(), Load->getBasePtr(),
4701  Load->getOffset(), Load->getMemoryVT(),
4702  Load->getMemOperand());
4703  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4704  if (Load->getNumValues() == 3) {
4705  // PRE/POST_INC loads have 3 values.
4706  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4707  NewLoad.getValue(2) };
4708  CombineTo(Load, To, 3, true);
4709  } else {
4710  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4711  }
4712  }
4713 
4714  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4715  }
4716  }
4717 
4718  // fold (and (load x), 255) -> (zextload x, i8)
4719  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4720  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4721  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4722  (N0.getOpcode() == ISD::ANY_EXTEND &&
4723  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4724  if (SDValue Res = ReduceLoadWidth(N)) {
4725  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4726  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4727  AddToWorklist(N);
4728  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4729  return SDValue(N, 0);
4730  }
4731  }
4732 
4733  if (Level >= AfterLegalizeTypes) {
4734  // Attempt to propagate the AND back up to the leaves which, if they're
4735  // loads, can be combined to narrow loads and the AND node can be removed.
4736  // Perform after legalization so that extend nodes will already be
4737  // combined into the loads.
4738  if (BackwardsPropagateMask(N, DAG)) {
4739  return SDValue(N, 0);
4740  }
4741  }
4742 
4743  if (SDValue Combined = visitANDLike(N0, N1, N))
4744  return Combined;
4745 
4746  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4747  if (N0.getOpcode() == N1.getOpcode())
4748  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4749  return V;
4750 
4751  // Masking the negated extension of a boolean is just the zero-extended
4752  // boolean:
4753  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4754  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4755  //
4756  // Note: the SimplifyDemandedBits fold below can make an information-losing
4757  // transform, and then we have no way to find this better fold.
4758  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4759  if (isNullOrNullSplat(N0.getOperand(0))) {
4760  SDValue SubRHS = N0.getOperand(1);
4761  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4762  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4763  return SubRHS;
4764  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4765  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4766  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4767  }
4768  }
4769 
4770  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4771  // fold (and (sra)) -> (and (srl)) when possible.
4772  if (SimplifyDemandedBits(SDValue(N, 0)))
4773  return SDValue(N, 0);
4774 
4775  // fold (zext_inreg (extload x)) -> (zextload x)
4776  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4777  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4778  EVT MemVT = LN0->getMemoryVT();
4779  // If we zero all the possible extended bits, then we can turn this into
4780  // a zextload if we are running before legalize or the operation is legal.
4781  unsigned BitWidth = N1.getScalarValueSizeInBits();
4782  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4783  BitWidth - MemVT.getScalarSizeInBits())) &&
4784  ((!LegalOperations && !LN0->isVolatile()) ||
4785  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4786  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4787  LN0->getChain(), LN0->getBasePtr(),
4788  MemVT, LN0->getMemOperand());
4789  AddToWorklist(N);
4790  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4791  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4792  }
4793  }
4794  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4795  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4796  N0.hasOneUse()) {
4797  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4798  EVT MemVT = LN0->getMemoryVT();
4799  // If we zero all the possible extended bits, then we can turn this into
4800  // a zextload if we are running before legalize or the operation is legal.
4801  unsigned BitWidth = N1.getScalarValueSizeInBits();
4802  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4803  BitWidth - MemVT.getScalarSizeInBits())) &&
4804  ((!LegalOperations && !LN0->isVolatile()) ||
4805  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4806  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4807  LN0->getChain(), LN0->getBasePtr(),
4808  MemVT, LN0->getMemOperand());
4809  AddToWorklist(N);
4810  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4811  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4812  }
4813  }
4814  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4815  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4816  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4817  N0.getOperand(1), false))
4818  return BSwap;
4819  }
4820 
4821  if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4822  return Shifts;
4823 
4824  return SDValue();
4825 }
4826 
4827 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4828 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4829  bool DemandHighBits) {
4830  if (!LegalOperations)
4831  return SDValue();
4832 
4833  EVT VT = N->getValueType(0);
4834  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4835  return SDValue();
4836  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4837  return SDValue();
4838 
4839  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4840  bool LookPassAnd0 = false;
4841  bool LookPassAnd1 = false;
4842  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4843  std::swap(N0, N1);
4844  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4845  std::swap(N0, N1);
4846  if (N0.getOpcode() == ISD::AND) {
4847  if (!N0.getNode()->hasOneUse())
4848  return SDValue();
4850  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4851  // This is needed for X86.
4852  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4853  N01C->getZExtValue() != 0xFFFF))
4854  return SDValue();
4855  N0 = N0.getOperand(0);
4856  LookPassAnd0 = true;
4857  }
4858 
4859  if (N1.getOpcode() == ISD::AND) {
4860  if (!N1.getNode()->hasOneUse())
4861  return SDValue();
4863  if (!N11C || N11C->getZExtValue() != 0xFF)
4864  return SDValue();
4865  N1 = N1.getOperand(0);
4866  LookPassAnd1 = true;
4867  }
4868 
4869  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4870  std::swap(N0, N1);
4871  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4872  return SDValue();
4873  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4874  return SDValue();
4875 
4878  if (!N01C || !N11C)
4879  return SDValue();
4880  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4881  return SDValue();
4882 
4883  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4884  SDValue N00 = N0->getOperand(0);
4885  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4886  if (!N00.getNode()->hasOneUse())
4887  return SDValue();
4889  if (!N001C || N001C->getZExtValue() != 0xFF)
4890  return SDValue();
4891  N00 = N00.getOperand(0);
4892  LookPassAnd0 = true;
4893  }
4894 
4895  SDValue N10 = N1->getOperand(0);
4896  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4897  if (!N10.getNode()->hasOneUse())
4898  return SDValue();
4900  // Also allow 0xFFFF since the bits will be shifted out. This is needed
4901  // for X86.
4902  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4903  N101C->getZExtValue() != 0xFFFF))
4904  return SDValue();
4905  N10 = N10.getOperand(0);
4906  LookPassAnd1 = true;
4907  }
4908 
4909  if (N00 != N10)
4910  return SDValue();
4911 
4912  // Make sure everything beyond the low halfword gets set to zero since the SRL
4913  // 16 will clear the top bits.
4914  unsigned OpSizeInBits = VT.getSizeInBits();
4915  if (DemandHighBits && OpSizeInBits > 16) {
4916  // If the left-shift isn't masked out then the only way this is a bswap is
4917  // if all bits beyond the low 8 are 0. In that case the entire pattern
4918  // reduces to a left shift anyway: leave it for other parts of the combiner.
4919  if (!LookPassAnd0)
4920  return SDValue();
4921 
4922  // However, if the right shift isn't masked out then it might be because
4923  // it's not needed. See if we can spot that too.
4924  if (!LookPassAnd1 &&
4925  !DAG.MaskedValueIsZero(
4926  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4927  return SDValue();
4928  }
4929 
4930  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4931  if (OpSizeInBits > 16) {
4932  SDLoc DL(N);
4933  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4934  DAG.getConstant(OpSizeInBits - 16, DL,
4935  getShiftAmountTy(VT)));
4936  }
4937  return Res;
4938 }
4939 
4940 /// Return true if the specified node is an element that makes up a 32-bit
4941 /// packed halfword byteswap.
4942 /// ((x & 0x000000ff) << 8) |
4943 /// ((x & 0x0000ff00) >> 8) |
4944 /// ((x & 0x00ff0000) << 8) |
4945 /// ((x & 0xff000000) >> 8)
4947  if (!N.getNode()->hasOneUse())
4948  return false;
4949 
4950  unsigned Opc = N.getOpcode();
4951  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4952  return false;
4953 
4954  SDValue N0 = N.getOperand(0);
4955  unsigned Opc0 = N0.getOpcode();
4956  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4957  return false;
4958 
4959  ConstantSDNode *N1C = nullptr;
4960  // SHL or SRL: look upstream for AND mask operand
4961  if (Opc == ISD::AND)
4962  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4963  else if (Opc0 == ISD::AND)
4964  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4965  if (!N1C)
4966  return false;
4967 
4968  unsigned MaskByteOffset;
4969  switch (N1C->getZExtValue()) {
4970  default:
4971  return false;
4972  case 0xFF: MaskByteOffset = 0; break;
4973  case 0xFF00: MaskByteOffset = 1; break;
4974  case 0xFFFF:
4975  // In case demanded bits didn't clear the bits that will be shifted out.
4976  // This is needed for X86.
4977  if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4978  MaskByteOffset = 1;
4979  break;
4980  }
4981  return false;
4982  case 0xFF0000: MaskByteOffset = 2; break;
4983  case 0xFF000000: MaskByteOffset = 3; break;
4984  }
4985 
4986  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4987  if (Opc == ISD::AND) {
4988  if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4989  // (x >> 8) & 0xff
4990  // (x >> 8) & 0xff0000
4991  if (Opc0 != ISD::SRL)
4992  return false;
4994  if (!C || C->getZExtValue() != 8)
4995  return false;
4996  } else {
4997  // (x << 8) & 0xff00
4998  // (x << 8) & 0xff000000
4999  if (Opc0 != ISD::SHL)
5000  return false;
5002  if (!C || C->getZExtValue() != 8)
5003  return false;
5004  }
5005  } else if (Opc == ISD::SHL) {
5006  // (x & 0xff) << 8
5007  // (x & 0xff0000) << 8
5008  if (MaskByteOffset != 0 && MaskByteOffset != 2)
5009  return false;
5011  if (!C || C->getZExtValue() != 8)
5012  return false;
5013  } else { // Opc == ISD::SRL
5014  // (x & 0xff00) >> 8
5015  // (x & 0xff000000) >> 8
5016  if (MaskByteOffset != 1 && MaskByteOffset != 3)
5017  return false;
5019  if (!C || C->getZExtValue() != 8)
5020  return false;
5021  }
5022 
5023  if (Parts[MaskByteOffset])
5024  return false;
5025 
5026  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5027  return true;
5028 }
5029 
5030 /// Match a 32-bit packed halfword bswap. That is
5031 /// ((x & 0x000000ff) << 8) |
5032 /// ((x & 0x0000ff00) >> 8) |
5033 /// ((x & 0x00ff0000) << 8) |
5034 /// ((x & 0xff000000) >> 8)
5035 /// => (rotl (bswap x), 16)
5036 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5037  if (!LegalOperations)
5038  return SDValue();
5039 
5040  EVT VT = N->getValueType(0);
5041  if (VT != MVT::i32)
5042  return SDValue();
5043  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5044  return SDValue();
5045 
5046  // Look for either
5047  // (or (or (and), (and)), (or (and), (and)))
5048  // (or (or (or (and), (and)), (and)), (and))
5049  if (N0.getOpcode() != ISD::OR)
5050  return SDValue();
5051  SDValue N00 = N0.getOperand(0);
5052  SDValue N01 = N0.getOperand(1);
5053  SDNode *Parts[4] = {};
5054 
5055  if (N1.getOpcode() == ISD::OR &&
5056  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5057  // (or (or (and), (and)), (or (and), (and)))
5058  if (!isBSwapHWordElement(N00, Parts))
5059  return SDValue();
5060 
5061  if (!isBSwapHWordElement(N01, Parts))
5062  return SDValue();
5063  SDValue N10 = N1.getOperand(0);
5064  if (!isBSwapHWordElement(N10, Parts))
5065  return SDValue();
5066  SDValue N11 = N1.getOperand(1);
5067  if (!isBSwapHWordElement(N11, Parts))
5068  return SDValue();
5069  } else {
5070  // (or (or (or (and), (and)), (and)), (and))
5071  if (!isBSwapHWordElement(N1, Parts))
5072  return SDValue();
5073  if (!isBSwapHWordElement(N01, Parts))
5074  return SDValue();
5075  if (N00.getOpcode() != ISD::OR)
5076  return SDValue();
5077  SDValue N000 = N00.getOperand(0);
5078  if (!isBSwapHWordElement(N000, Parts))
5079  return SDValue();
5080  SDValue N001 = N00.getOperand(1);
5081  if (!isBSwapHWordElement(N001, Parts))
5082  return SDValue();
5083  }
5084 
5085  // Make sure the parts are all coming from the same node.
5086  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5087  return SDValue();
5088 
5089  SDLoc DL(N);
5090  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5091  SDValue(Parts[0], 0));
5092 
5093  // Result of the bswap should be rotated by 16. If it's not legal, then
5094  // do (x << 16) | (x >> 16).
5095  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5096  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5097  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5098  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5099  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5100  return DAG.getNode(ISD::OR, DL, VT,
5101  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5102  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5103 }
5104 
5105 /// This contains all DAGCombine rules which reduce two values combined by
5106 /// an Or operation to a single value \see visitANDLike().
5107 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5108  EVT VT = N1.getValueType();
5109  SDLoc DL(N);
5110 
5111  // fold (or x, undef) -> -1
5112  if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5113  return DAG.getAllOnesConstant(DL, VT);
5114 
5115  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5116  return V;
5117 
5118  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5119  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5120  // Don't increase # computations.
5121  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5122  // We can only do this xform if we know that bits from X that are set in C2
5123  // but not in C1 are already zero. Likewise for Y.
5124  if (const ConstantSDNode *N0O1C =
5126  if (const ConstantSDNode *N1O1C =
5128  // We can only do this xform if we know that bits from X that are set in
5129  // C2 but not in C1 are already zero. Likewise for Y.
5130  const APInt &LHSMask = N0O1C->getAPIntValue();
5131  const APInt &RHSMask = N1O1C->getAPIntValue();
5132 
5133  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5134  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5135  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5136  N0.getOperand(0), N1.getOperand(0));
5137  return DAG.getNode(ISD::AND, DL, VT, X,
5138  DAG.getConstant(LHSMask | RHSMask, DL, VT));
5139  }
5140  }
5141  }
5142  }
5143 
5144  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5145  if (N0.getOpcode() == ISD::AND &&
5146  N1.getOpcode() == ISD::AND &&
5147  N0.getOperand(0) == N1.getOperand(0) &&
5148  // Don't increase # computations.
5149  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5150  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5151  N0.getOperand(1), N1.getOperand(1));
5152  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5153  }
5154 
5155  return SDValue();
5156 }
5157 
5158 SDValue DAGCombiner::visitOR(SDNode *N) {
5159  SDValue N0 = N->getOperand(0);
5160  SDValue N1 = N->getOperand(1);
5161  EVT VT = N1.getValueType();
5162 
5163  // x | x --> x
5164  if (N0 == N1)
5165  return N0;
5166 
5167  // fold vector ops
5168  if (VT.isVector()) {
5169  if (SDValue FoldedVOp = SimplifyVBinOp(N))
5170  return FoldedVOp;
5171 
5172  // fold (or x, 0) -> x, vector edition
5174  return N1;
5176  return N0;
5177 
5178  // fold (or x, -1) -> -1, vector edition
5180  // do not return N0, because undef node may exist in N0
5181  return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5183  // do not return N1, because undef node may exist in N1
5184  return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5185 
5186  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5187  // Do this only if the resulting shuffle is legal.
5188  if (isa<ShuffleVectorSDNode>(N0) &&
5189  isa<ShuffleVectorSDNode>(N1) &&
5190  // Avoid folding a node with illegal type.
5191  TLI.isTypeLegal(VT)) {
5192  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5193  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5194  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5195  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5196  // Ensure both shuffles have a zero input.
5197  if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5198  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5199  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5200  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5201  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5202  bool CanFold = true;
5203  int NumElts = VT.getVectorNumElements();
5204  SmallVector<int, 4> Mask(NumElts);
5205 
5206  for (int i = 0; i != NumElts; ++i) {
5207  int M0 = SV0->getMaskElt(i);
5208  int M1 = SV1->getMaskElt(i);
5209 
5210  // Determine if either index is pointing to a zero vector.
5211  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5212  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5213 
5214  // If one element is zero and the otherside is undef, keep undef.
5215  // This also handles the case that both are undef.
5216  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5217  Mask[i] = -1;
5218  continue;
5219  }
5220 
5221  // Make sure only one of the elements is zero.
5222  if (M0Zero == M1Zero) {
5223  CanFold = false;
5224  break;
5225  }
5226 
5227  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5228 
5229  // We have a zero and non-zero element. If the non-zero came from
5230  // SV0 make the index a LHS index. If it came from SV1, make it
5231  // a RHS index. We need to mod by NumElts because we don't care
5232  // which operand it came from in the original shuffles.
5233  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5234  }
5235 
5236  if (CanFold) {
5237  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5238  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5239 
5240  bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5241  if (!LegalMask) {
5242  std::swap(NewLHS, NewRHS);
5244  LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5245  }
5246 
5247  if (LegalMask)
5248  return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5249  }
5250  }
5251  }
5252  }
5253 
5254  // fold (or c1, c2) -> c1|c2
5257  if (N0C && N1C && !N1C->isOpaque())
5258  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5259  // canonicalize constant to RHS
5262  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5263  // fold (or x, 0) -> x
5264  if (isNullConstant(N1))
5265  return N0;
5266  // fold (or x, -1) -> -1
5267  if (isAllOnesConstant(N1))
5268  return N1;
5269 
5270  if (SDValue NewSel = foldBinOpIntoSelect(N))
5271  return NewSel;
5272 
5273  // fold (or x, c) -> c iff (x & ~c) == 0
5274  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5275  return N1;
5276 
5277  if (SDValue Combined = visitORLike(N0, N1, N))
5278  return Combined;
5279 
5280  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5281  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5282  return BSwap;
5283  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5284  return BSwap;
5285 
5286  // reassociate or
5287  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5288  return ROR;
5289 
5290  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5291  // iff (c1 & c2) != 0 or c1/c2 are undef.
5292  auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5293  return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5294  };
5295  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5296  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5297  if (SDValue COR = DAG.FoldConstantArithmetic(
5298  ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5299  SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5300  AddToWorklist(IOR.getNode());
5301  return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5302  }
5303  }
5304 
5305  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5306  if (N0.getOpcode() == N1.getOpcode())
5307  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5308  return V;
5309 
5310  // See if this is some rotate idiom.
5311  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5312  return SDValue(Rot, 0);
5313 
5314  if (SDValue Load = MatchLoadCombine(N))
5315  return Load;
5316 
5317  // Simplify the operands using demanded-bits information.
5318  if (SimplifyDemandedBits(SDValue(N, 0)))
5319  return SDValue(N, 0);
5320 
5321  return SDValue();
5322 }
5323 
5325  if (Op.getOpcode() == ISD::AND &&
5327  Mask = Op.getOperand(1);
5328  return Op.getOperand(0);
5329  }
5330  return Op;
5331 }
5332 
5333 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5334 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5335  SDValue &Mask) {
5336  Op = stripConstantMask(DAG, Op, Mask);
5337  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5338  Shift = Op;
5339  return true;
5340  }
5341  return false;
5342 }
5343 
5344 /// Helper function for visitOR to extract the needed side of a rotate idiom
5345 /// from a shl/srl/mul/udiv. This is meant to handle cases where
5346 /// InstCombine merged some outside op with one of the shifts from
5347 /// the rotate pattern.
5348 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5349 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5350 /// patterns:
5351 ///
5352 /// (or (mul v c0) (shrl (mul v c1) c2)):
5353 /// expands (mul v c0) -> (shl (mul v c1) c3)
5354 ///
5355 /// (or (udiv v c0) (shl (udiv v c1) c2)):
5356 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5357 ///
5358 /// (or (shl v c0) (shrl (shl v c1) c2)):
5359 /// expands (shl v c0) -> (shl (shl v c1) c3)
5360 ///
5361 /// (or (shrl v c0) (shl (shrl v c1) c2)):
5362 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5363 ///
5364 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5366  SDValue ExtractFrom, SDValue &Mask,
5367  const SDLoc &DL) {
5368  assert(OppShift && ExtractFrom && "Empty SDValue");
5369  assert(
5370  (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5371  "Existing shift must be valid as a rotate half");
5372 
5373  ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5374  // Preconditions:
5375  // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5376  //
5377  // Find opcode of the needed shift to be extracted from (op0 v c0).
5378  unsigned Opcode = ISD::DELETED_NODE;
5379  bool IsMulOrDiv = false;
5380  // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5381  // opcode or its arithmetic (mul or udiv) variant.
5382  auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5383  IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5384  if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5385  return false;
5386  Opcode = NeededShift;
5387  return true;
5388  };
5389  // op0 must be either the needed shift opcode or the mul/udiv equivalent
5390  // that the needed shift can be extracted from.
5391  if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5392  (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5393  return SDValue();
5394 
5395  // op0 must be the same opcode on both sides, have the same LHS argument,
5396  // and produce the same value type.
5397  SDValue OppShiftLHS = OppShift.getOperand(0);
5398  EVT ShiftedVT = OppShiftLHS.getValueType();
5399  if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5400  OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5401  ShiftedVT != ExtractFrom.getValueType())
5402  return SDValue();
5403 
5404  // Amount of the existing shift.
5405  ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5406  // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5407  ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5408  // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5409  ConstantSDNode *ExtractFromCst =
5410  isConstOrConstSplat(ExtractFrom.getOperand(1));
5411  // TODO: We should be able to handle non-uniform constant vectors for these values
5412  // Check that we have constant values.
5413  if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5414  !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5415  !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5416  return SDValue();
5417 
5418  // Compute the shift amount we need to extract to complete the rotate.
5419  const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5420  if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5421  return SDValue();
5422  APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5423  // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5424  APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5425  APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5426  zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5427 
5428  // Now try extract the needed shift from the ExtractFrom op and see if the
5429  // result matches up with the existing shift's LHS op.
5430  if (IsMulOrDiv) {
5431  // Op to extract from is a mul or udiv by a constant.
5432  // Check:
5433  // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5434  // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5435  const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5436  NeededShiftAmt.getZExtValue());
5437  APInt ResultAmt;
5438  APInt Rem;
5439  APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5440  if (Rem != 0 || ResultAmt != OppLHSAmt)
5441  return SDValue();
5442  } else {
5443  // Op to extract from is a shift by a constant.
5444  // Check:
5445  // c2 - (bitwidth(op0 v c0) - c1) == c0
5446  if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5447  ExtractFromAmt.getBitWidth()))
5448  return SDValue();
5449  }
5450 
5451  // Return the expanded shift op that should allow a rotate to be formed.
5452  EVT ShiftVT = OppShift.getOperand(1).getValueType();
5453  EVT ResVT = ExtractFrom.getValueType();
5454  SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5455  return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5456 }
5457 
5458 // Return true if we can prove that, whenever Neg and Pos are both in the
5459 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
5460 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5461 //
5462 // (or (shift1 X, Neg), (shift2 X, Pos))
5463 //
5464 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5465 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
5466 // to consider shift amounts with defined behavior.
5467 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5468  SelectionDAG &DAG) {
5469  // If EltSize is a power of 2 then:
5470  //
5471  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5472  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5473  //
5474  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5475  // for the stronger condition:
5476  //
5477  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
5478  //
5479  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5480  // we can just replace Neg with Neg' for the rest of the function.
5481  //
5482  // In other cases we check for the even stronger condition:
5483  //
5484  // Neg == EltSize - Pos [B]
5485  //
5486  // for all Neg and Pos. Note that the (or ...) then invokes undefined
5487  // behavior if Pos == 0 (and consequently Neg == EltSize).
5488  //
5489  // We could actually use [A] whenever EltSize is a power of 2, but the
5490  // only extra cases that it would match are those uninteresting ones
5491  // where Neg and Pos are never in range at the same time. E.g. for
5492  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5493  // as well as (sub 32, Pos), but:
5494  //
5495  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5496  //
5497  // always invokes undefined behavior for 32-bit X.
5498  //
5499  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5500  unsigned MaskLoBits = 0;
5501  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5502  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5503  KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5504  unsigned Bits = Log2_64(EltSize);
5505  if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5506  ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5507  Neg = Neg.getOperand(0);
5508  MaskLoBits = Bits;
5509  }
5510  }
5511  }
5512 
5513  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5514  if (Neg.getOpcode() != ISD::SUB)
5515  return false;
5517  if (!NegC)
5518  return false;
5519  SDValue NegOp1 = Neg.getOperand(1);
5520 
5521  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5522  // Pos'. The truncation is redundant for the purpose of the equality.
5523  if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5524  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5525  KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5526  if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5527  ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5528  MaskLoBits))
5529  Pos = Pos.getOperand(0);
5530  }
5531  }
5532 
5533  // The condition we need is now:
5534  //
5535  // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5536  //
5537  // If NegOp1 == Pos then we need:
5538  //
5539  // EltSize & Mask == NegC & Mask
5540  //
5541  // (because "x & Mask" is a truncation and distributes through subtraction).
5542  APInt Width;
5543  if (Pos == NegOp1)
5544  Width = NegC->getAPIntValue();
5545 
5546  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5547  // Then the condition we want to prove becomes:
5548  //
5549  // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5550  //
5551  // which, again because "x & Mask" is a truncation, becomes:
5552  //
5553  // NegC & Mask == (EltSize - PosC) & Mask
5554  // EltSize & Mask == (NegC + PosC) & Mask
5555  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5556  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5557  Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5558  else
5559  return false;
5560  } else
5561  return false;
5562 
5563  // Now we just need to check that EltSize & Mask == Width & Mask.
5564  if (MaskLoBits)
5565  // EltSize & Mask is 0 since Mask is EltSize - 1.
5566  return Width.getLoBits(MaskLoBits) == 0;
5567  return Width == EltSize;
5568 }
5569 
5570 // A subroutine of MatchRotate used once we have found an OR of two opposite
5571 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
5572 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5573 // former being preferred if supported. InnerPos and InnerNeg are Pos and
5574 // Neg with outer conversions stripped away.
5575 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5576  SDValue Neg, SDValue InnerPos,
5577  SDValue InnerNeg, unsigned PosOpcode,
5578  unsigned NegOpcode, const SDLoc &DL) {
5579  // fold (or (shl x, (*ext y)),
5580  // (srl x, (*ext (sub 32, y)))) ->
5581  // (rotl x, y) or (rotr x, (sub 32, y))
5582  //
5583  // fold (or (shl x, (*ext (sub 32, y))),
5584  // (srl x, (*ext y))) ->
5585  // (rotr x, y) or (rotl x, (sub 32, y))
5586  EVT VT = Shifted.getValueType();
5587  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5588  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5589  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5590  HasPos ? Pos : Neg).getNode();
5591  }
5592 
5593  return nullptr;
5594 }
5595 
5596 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
5597 // idioms for rotate, and if the target supports rotation instructions, generate
5598 // a rot[lr].
5599 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5600  // Must be a legal type. Expanded 'n promoted things won't work with rotates.
5601  EVT VT = LHS.getValueType();
5602  if (!TLI.isTypeLegal(VT)) return nullptr;
5603 
5604  // The target must have at least one rotate flavor.
5605  bool HasROTL = hasOperation(ISD::ROTL, VT);
5606  bool HasROTR = hasOperation(ISD::ROTR, VT);
5607  if (!HasROTL && !HasROTR) return nullptr;
5608 
5609  // Check for truncated rotate.
5610  if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5611  LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5612  assert(LHS.getValueType() == RHS.getValueType());
5613  if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5614  return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5615  SDValue(Rot, 0)).getNode();
5616  }
5617  }
5618 
5619  // Match "(X shl/srl V1) & V2" where V2 may not be present.
5620  SDValue LHSShift; // The shift.
5621  SDValue LHSMask; // AND value if any.
5622  matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5623 
5624  SDValue RHSShift; // The shift.
5625  SDValue RHSMask; // AND value if any.
5626  matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5627 
5628  // If neither side matched a rotate half, bail
5629  if (!LHSShift && !RHSShift)
5630  return nullptr;
5631 
5632  // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5633  // side of the rotate, so try to handle that here. In all cases we need to
5634  // pass the matched shift from the opposite side to compute the opcode and
5635  // needed shift amount to extract. We still want to do this if both sides
5636  // matched a rotate half because one half may be a potential overshift that
5637  // can be broken down (ie if InstCombine merged two shl or srl ops into a
5638  // single one).
5639 
5640  // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5641  if (LHSShift)
5642  if (SDValue NewRHSShift =
5643  extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5644  RHSShift = NewRHSShift;
5645  // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5646  if (RHSShift)
5647  if (SDValue NewLHSShift =
5648  extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5649  LHSShift = NewLHSShift;
5650 
5651  // If a side is still missing, nothing else we can do.
5652  if (!RHSShift || !LHSShift)
5653  return nullptr;
5654 
5655  // At this point we've matched or extracted a shift op on each side.
5656 
5657  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5658  return nullptr; // Not shifting the same value.
5659 
5660  if (LHSShift.getOpcode() == RHSShift.getOpcode())
5661  return nullptr; // Shifts must disagree.
5662 
5663  // Canonicalize shl to left side in a shl/srl pair.
5664  if (RHSShift.getOpcode() == ISD::SHL) {
5665  std::swap(LHS, RHS);
5666  std::swap(LHSShift, RHSShift);
5667  std::swap(LHSMask, RHSMask);
5668  }
5669 
5670  unsigned EltSizeInBits = VT.getScalarSizeInBits();
5671  SDValue LHSShiftArg = LHSShift.getOperand(0);
5672  SDValue LHSShiftAmt = LHSShift.getOperand(1);
5673  SDValue RHSShiftArg = RHSShift.getOperand(0);
5674  SDValue RHSShiftAmt = RHSShift.getOperand(1);
5675 
5676  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5677  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5678  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5679  ConstantSDNode *RHS) {
5680  return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5681  };
5682  if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5683  SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5684  LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5685 
5686  // If there is an AND of either shifted operand, apply it to the result.
5687  if (LHSMask.getNode() || RHSMask.getNode()) {
5688  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5689  SDValue Mask = AllOnes;
5690 
5691  if (LHSMask.getNode()) {
5692  SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5693  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5694  DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5695  }
5696  if (RHSMask.getNode()) {
5697  SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5698  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5699  DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5700  }
5701 
5702  Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5703  }
5704 
5705  return Rot.getNode();
5706  }
5707 
5708  // If there is a mask here, and we have a variable shift, we can't be sure
5709  // that we're masking out the right stuff.
5710  if (LHSMask.getNode() || RHSMask.getNode())
5711  return nullptr;
5712 
5713  // If the shift amount is sign/zext/any-extended just peel it off.
5714  SDValue LExtOp0 = LHSShiftAmt;
5715  SDValue RExtOp0 = RHSShiftAmt;
5716  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5717  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5718  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5719  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5720  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5721  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5722  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5723  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5724  LExtOp0 = LHSShiftAmt.getOperand(0);
5725  RExtOp0 = RHSShiftAmt.getOperand(0);
5726  }
5727 
5728  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5729  LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5730  if (TryL)
5731  return TryL;
5732 
5733  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5734  RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5735  if (TryR)
5736  return TryR;
5737 
5738  return nullptr;
5739 }
5740 
5741 namespace {
5742 
5743 /// Represents known origin of an individual byte in load combine pattern. The
5744 /// value of the byte is either constant zero or comes from memory.
5745 struct ByteProvider {
5746  // For constant zero providers Load is set to nullptr. For memory providers
5747  // Load represents the node which loads the byte from memory.
5748  // ByteOffset is the offset of the byte in the value produced by the load.
5749  LoadSDNode *Load = nullptr;
5750  unsigned ByteOffset = 0;
5751 
5752  ByteProvider() = default;
5753 
5754  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5755  return ByteProvider(Load, ByteOffset);
5756  }
5757 
5758  static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5759 
5760  bool isConstantZero() const { return !Load; }
5761  bool isMemory() const { return Load; }
5762 
5763  bool operator==(const ByteProvider &Other) const {
5764  return Other.Load == Load && Other.ByteOffset == ByteOffset;
5765  }
5766 
5767 private:
5768  ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5769  : Load(Load), ByteOffset(ByteOffset) {}
5770 };
5771 
5772 } // end anonymous namespace
5773 
5774 /// Recursively traverses the expression calculating the origin of the requested
5775 /// byte of the given value. Returns None if the provider can't be calculated.
5776 ///
5777 /// For all the values except the root of the expression verifies that the value
5778 /// has exactly one use and if it's not true return None. This way if the origin
5779 /// of the byte is returned it's guaranteed that the values which contribute to
5780 /// the byte are not used outside of this expression.
5781 ///
5782 /// Because the parts of the expression are not allowed to have more than one
5783 /// use this function iterates over trees, not DAGs. So it never visits the same
5784 /// node more than once.
5785 static const Optional<ByteProvider>
5786 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5787  bool Root = false) {
5788  // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5789  if (Depth == 10)
5790  return None;
5791 
5792  if (!Root && !Op.hasOneUse())
5793  return None;
5794 
5795  assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5796  unsigned BitWidth = Op.getValueSizeInBits();
5797  if (BitWidth % 8 != 0)
5798  return None;
5799  unsigned ByteWidth = BitWidth / 8;
5800  assert(Index < ByteWidth && "invalid index requested");
5801  (void) ByteWidth;
5802 
5803  switch (Op.getOpcode()) {
5804  case ISD::OR: {
5805  auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5806  if (!LHS)
5807  return None;
5808  auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5809  if (!RHS)
5810  return None;
5811 
5812  if (LHS->isConstantZero())
5813  return RHS;
5814  if (RHS->isConstantZero())
5815  return LHS;
5816  return None;
5817  }
5818  case ISD::SHL: {
5819  auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5820  if (!ShiftOp)
5821  return None;
5822 
5823  uint64_t BitShift = ShiftOp->getZExtValue();
5824  if (BitShift % 8 != 0)
5825  return None;
5826  uint64_t ByteShift = BitShift / 8;
5827 
5828  return Index < ByteShift
5829  ? ByteProvider::getConstantZero()
5830  : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5831  Depth + 1);
5832  }
5833  case ISD::ANY_EXTEND:
5834  case ISD::SIGN_EXTEND:
5835  case ISD::ZERO_EXTEND: {
5836  SDValue NarrowOp = Op->getOperand(0);
5837  unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5838  if (NarrowBitWidth % 8 != 0)
5839  return None;
5840  uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5841 
5842  if (Index >= NarrowByteWidth)
5843  return Op.getOpcode() == ISD::ZERO_EXTEND
5844  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5845  : None;
5846  return calculateByteProvider(NarrowOp, Index, Depth + 1);
5847  }
5848  case ISD::BSWAP:
5849  return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5850  Depth + 1);
5851  case ISD::LOAD: {
5852  auto L = cast<LoadSDNode>(Op.getNode());
5853  if (L->isVolatile() || L->isIndexed())
5854  return None;
5855 
5856  unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5857  if (NarrowBitWidth % 8 != 0)
5858  return None;
5859  uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5860 
5861  if (Index >= NarrowByteWidth)
5862  return L->getExtensionType() == ISD::ZEXTLOAD
5863  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5864  : None;
5865  return ByteProvider::getMemory(L, Index);
5866  }
5867  }
5868 
5869  return None;
5870 }
5871 
5872 /// Match a pattern where a wide type scalar value is loaded by several narrow
5873 /// loads and combined by shifts and ors. Fold it into a single load or a load
5874 /// and a BSWAP if the targets supports it.
5875 ///
5876 /// Assuming little endian target:
5877 /// i8 *a = ...
5878 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5879 /// =>
5880 /// i32 val = *((i32)a)
5881 ///
5882 /// i8 *a = ...
5883 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5884 /// =>
5885 /// i32 val = BSWAP(*((i32)a))
5886 ///
5887 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5888 /// interact well with the worklist mechanism. When a part of the pattern is
5889 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5890 /// but the root node of the pattern which triggers the load combine is not
5891 /// necessarily a direct user of the changed node. For example, once the address
5892 /// of t28 load is reassociated load combine won't be triggered:
5893 /// t25: i32 = add t4, Constant:i32<2>
5894 /// t26: i64 = sign_extend t25
5895 /// t27: i64 = add t2, t26
5896 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5897 /// t29: i32 = zero_extend t28
5898 /// t32: i32 = shl t29, Constant:i8<8>
5899 /// t33: i32 = or t23, t32
5900 /// As a possible fix visitLoad can check if the load can be a part of a load
5901 /// combine pattern and add corresponding OR roots to the worklist.
5902 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5903  assert(N->getOpcode() == ISD::OR &&
5904  "Can only match load combining against OR nodes");
5905 
5906  // Handles simple types only
5907  EVT VT = N->getValueType(0);
5908  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5909  return SDValue();
5910  unsigned ByteWidth = VT.getSizeInBits() / 8;
5911 
5912  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5913  // Before legalize we can introduce too wide illegal loads which will be later
5914  // split into legal sized loads. This enables us to combine i64 load by i8
5915  // patterns to a couple of i32 loads on 32 bit targets.
5916  if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5917  return SDValue();
5918 
5919  std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5920  unsigned BW, unsigned i) { return i; };
5921  std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5922  unsigned BW, unsigned i) { return BW - i - 1; };
5923 
5924  bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5925  auto MemoryByteOffset = [&] (ByteProvider P) {
5926  assert(P.isMemory() && "Must be a memory byte provider");
5927  unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5928  assert(LoadBitWidth % 8 == 0 &&
5929  "can only analyze providers for individual bytes not bit");
5930  unsigned LoadByteWidth = LoadBitWidth / 8;
5931  return IsBigEndianTarget
5932  ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5933  : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5934  };
5935 
5937  SDValue Chain;
5938 
5940  Optional<ByteProvider> FirstByteProvider;
5941  int64_t FirstOffset = INT64_MAX;
5942 
5943  // Check if all the bytes of the OR we are looking at are loaded from the same
5944  // base address. Collect bytes offsets from Base address in ByteOffsets.
5945  SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5946  for (unsigned i = 0; i < ByteWidth; i++) {
5947  auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5948  if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5949  return SDValue();
5950 
5951  LoadSDNode *L = P->Load;
5952  assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5953  "Must be enforced by calculateByteProvider");
5954  assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5955 
5956  // All loads must share the same chain
5957  SDValue LChain = L->getChain();
5958  if (!Chain)
5959  Chain = LChain;
5960  else if (Chain != LChain)
5961  return SDValue();
5962 
5963  // Loads must share the same base address
5965  int64_t ByteOffsetFromBase = 0;
5966  if (!Base)
5967  Base = Ptr;
5968  else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5969  return SDValue();
5970 
5971  // Calculate the offset of the current byte from the base address
5972  ByteOffsetFromBase += MemoryByteOffset(*P);
5973  ByteOffsets[i] = ByteOffsetFromBase;
5974 
5975  // Remember the first byte load
5976  if (ByteOffsetFromBase < FirstOffset) {
5977  FirstByteProvider = P;
5978  FirstOffset = ByteOffsetFromBase;
5979  }
5980 
5981  Loads.insert(L);
5982  }
5983  assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5984  "memory, so there must be at least one load which produces the value");
5985  assert(Base && "Base address of the accessed memory location must be set");
5986  assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5987 
5988  // Check if the bytes of the OR we are looking at match with either big or
5989  // little endian value load
5990  bool BigEndian = true, LittleEndian = true;
5991  for (unsigned i = 0; i < ByteWidth; i++) {
5992  int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5993  LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5994  BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5995  if (!BigEndian && !LittleEndian)
5996  return SDValue();
5997  }
5998  assert((BigEndian != LittleEndian) && "should be either or");
5999  assert(FirstByteProvider && "must be set");
6000 
6001  // Ensure that the first byte is loaded from zero offset of the first load.
6002  // So the combined value can be loaded from the first load address.
6003  if (MemoryByteOffset(*FirstByteProvider) != 0)
6004  return SDValue();
6005  LoadSDNode *FirstLoad = FirstByteProvider->Load;
6006 
6007  // The node we are looking at matches with the pattern, check if we can
6008  // replace it with a single load and bswap if needed.
6009 
6010  // If the load needs byte swap check if the target supports it
6011  bool NeedsBswap = IsBigEndianTarget != BigEndian;
6012 
6013  // Before legalize we can introduce illegal bswaps which will be later
6014  // converted to an explicit bswap sequence. This way we end up with a single
6015  // load and byte shuffling instead of several loads and byte shuffling.
6016  if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6017  return SDValue();
6018 
6019  // Check that a load of the wide type is both allowed and fast on the target
6020  bool Fast = false;
6021  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6022  VT, FirstLoad->getAddressSpace(),
6023  FirstLoad->getAlignment(), &Fast);
6024  if (!Allowed || !Fast)
6025  return SDValue();
6026 
6027  SDValue NewLoad =
6028  DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6029  FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6030 
6031  // Transfer chain users from old loads to the new load.
6032  for (LoadSDNode *L : Loads)
6033  DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6034 
6035  return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6036 }
6037 
6038 // If the target has andn, bsl, or a similar bit-select instruction,
6039 // we want to unfold masked merge, with canonical pattern of:
6040 // | A | |B|
6041 // ((x ^ y) & m) ^ y
6042 // | D |
6043 // Into:
6044 // (x & m) | (y & ~m)
6045 // If y is a constant, and the 'andn' does not work with immediates,
6046 // we unfold into a different pattern:
6047 // ~(~x & m) & (m | y)
6048 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6049 // the very least that breaks andnpd / andnps patterns, and because those
6050 // patterns are simplified in IR and shouldn't be created in the DAG
6051 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6052  assert(N->getOpcode() == ISD::XOR);
6053 
6054  // Don't touch 'not' (i.e. where y = -1).
6056  return SDValue();
6057 
6058  EVT VT = N->getValueType(0);
6059 
6060  // There are 3 commutable operators in the pattern,
6061  // so we have to deal with 8 possible variants of the basic pattern.
6062  SDValue X, Y, M;
6063  auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6064  if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6065  return false;
6066  SDValue Xor = And.getOperand(XorIdx);
6067  if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6068  return false;
6069  SDValue Xor0 = Xor.getOperand(0);
6070  SDValue Xor1 = Xor.getOperand(1);
6071  // Don't touch 'not' (i.e. where y = -1).
6072  if (isAllOnesOrAllOnesSplat(Xor1))
6073  return false;
6074  if (Other == Xor0)
6075  std::swap(Xor0, Xor1);
6076  if (Other != Xor1)
6077  return false;
6078  X = Xor0;
6079  Y = Xor1;
6080  M = And.getOperand(XorIdx ? 0 : 1);
6081  return true;
6082  };
6083 
6084  SDValue N0 = N->getOperand(0);
6085  SDValue N1 = N->getOperand(1);
6086  if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6087  !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6088  return SDValue();
6089 
6090  // Don't do anything if the mask is constant. This should not be reachable.
6091  // InstCombine should have already unfolded this pattern, and DAGCombiner
6092  // probably shouldn't produce it, too.
6093  if (isa<ConstantSDNode>(M.getNode()))
6094  return SDValue();
6095 
6096  // We can transform if the target has AndNot
6097  if (!TLI.hasAndNot(M))
6098  return SDValue();
6099 
6100  SDLoc DL(N);
6101 
6102  // If Y is a constant, check that 'andn' works with immediates.
6103  if (!TLI.hasAndNot(Y)) {
6104  assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6105  // If not, we need to do a bit more work to make sure andn is still used.
6106  SDValue NotX = DAG.getNOT(DL, X, VT);
6107  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6108  SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6109  SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6110  return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6111  }
6112 
6113  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6114  SDValue NotM = DAG.getNOT(DL, M, VT);
6115  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6116 
6117  return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6118 }
6119 
6120 SDValue DAGCombiner::visitXOR(SDNode *N) {
6121  SDValue N0 = N->getOperand(0);
6122  SDValue N1 = N->getOperand(1);
6123  EVT VT = N0.getValueType();
6124 
6125  // fold vector ops
6126  if (VT.isVector()) {
6127  if (SDValue FoldedVOp = SimplifyVBinOp(N))
6128  return FoldedVOp;
6129 
6130  // fold (xor x, 0) -> x, vector edition
6132  return N1;
6134  return N0;
6135  }
6136 
6137  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6138  SDLoc DL(N);
6139  if (N0.isUndef() && N1.isUndef())
6140  return DAG.getConstant(0, DL, VT);
6141  // fold (xor x, undef) -> undef
6142  if (N0.isUndef())
6143  return N0;
6144  if (N1.isUndef())
6145  return N1;
6146  // fold (xor c1, c2) -> c1^c2
6149  if (N0C && N1C)
6150  return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6151  // canonicalize constant to RHS
6154  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6155  // fold (xor x, 0) -> x
6156  if (isNullConstant(N1))
6157  return N0;
6158 
6159  if (SDValue NewSel = foldBinOpIntoSelect(N))
6160  return NewSel;
6161 
6162  // reassociate xor
6163  if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6164  return RXOR;
6165 
6166  // fold !(x cc y) -> (x !cc y)
6167  unsigned N0Opcode = N0.getOpcode();
6168  SDValue LHS, RHS, CC;
6169  if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6170  ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6171  LHS.getValueType().isInteger());
6172  if (!LegalOperations ||
6173  TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6174  switch (N0Opcode) {
6175  default:
6176  llvm_unreachable("Unhandled SetCC Equivalent!");
6177  case ISD::SETCC:
6178  return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6179  case ISD::SELECT_CC:
6180  return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6181  N0.getOperand(3), NotCC);
6182  }
6183  }
6184  }
6185 
6186  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6187  if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6188  isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6189  SDValue V = N0.getOperand(0);
6190  SDLoc DL0(N0);
6191  V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6192  DAG.getConstant(1, DL0, V.getValueType()));
6193  AddToWorklist(V.getNode());
6194  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6195  }
6196 
6197  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6198  if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6199  (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6200  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6201  if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6202  unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6203  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6204  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6205  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6206  return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6207  }
6208  }
6209  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6210  if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6211  (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6212  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6213  if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6214  unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6215  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6216  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6217  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6218  return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6219  }
6220  }
6221  // fold (xor (and x, y), y) -> (and (not x), y)
6222  if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6223  SDValue X = N0.getOperand(0);
6224  SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6225  AddToWorklist(NotX.getNode());
6226  return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6227  }
6228 
6229  if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6230  ConstantSDNode *XorC = isConstOrConstSplat(N1);
6232  unsigned BitWidth = VT.getScalarSizeInBits();
6233  if (XorC && ShiftC) {
6234  // Don't crash on an oversized shift. We can not guarantee that a bogus
6235  // shift has been simplified to undef.
6236  uint64_t ShiftAmt = ShiftC->getLimitedValue();
6237  if (ShiftAmt < BitWidth) {
6238  APInt Ones = APInt::getAllOnesValue(BitWidth);
6239  Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6240  if (XorC->getAPIntValue() == Ones) {
6241  // If the xor constant is a shifted -1, do a 'not' before the shift:
6242  // xor (X << ShiftC), XorC --> (not X) << ShiftC
6243  // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6244  SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6245  return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6246  }
6247  }
6248  }
6249  }
6250 
6251  // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6252  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6253  SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6254  SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6255  if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6256  SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6257  SDValue S0 = S.getOperand(0);
6258  if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6259  unsigned OpSizeInBits = VT.getScalarSizeInBits();
6261  if (C->getAPIntValue() == (OpSizeInBits - 1))
6262  return DAG.getNode(ISD::ABS, DL, VT, S0);
6263  }
6264  }
6265  }
6266 
6267  // fold (xor x, x) -> 0
6268  if (N0 == N1)
6269  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6270 
6271  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6272  // Here is a concrete example of this equivalence:
6273  // i16 x == 14
6274  // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
6275  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6276  //
6277  // =>
6278  //
6279  // i16 ~1 == 0b1111111111111110
6280  // i16 rol(~1, 14) == 0b1011111111111111
6281  //
6282  // Some additional tips to help conceptualize this transform:
6283  // - Try to see the operation as placing a single zero in a value of all ones.
6284  // - There exists no value for x which would allow the result to contain zero.
6285  // - Values of x larger than the bitwidth are undefined and do not require a
6286  // consistent result.
6287  // - Pushing the zero left requires shifting one bits in from the right.
6288  // A rotate left of ~1 is a nice way of achieving the desired result.
6289  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6290  isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6291  return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6292  N0.getOperand(1));
6293  }
6294 
6295  // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
6296  if (N0Opcode == N1.getOpcode())
6297  if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6298  return V;
6299 
6300  // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
6301  if (SDValue MM = unfoldMaskedMerge(N))
6302  return MM;
6303 
6304  // Simplify the expression using non-local knowledge.
6305  if (SimplifyDemandedBits(SDValue(N, 0)))
6306  return SDValue(N, 0);
6307 
6308  return SDValue();
6309 }
6310 
6311 /// Handle transforms common to the three shifts, when the shift amount is a
6312 /// constant.
6313 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6314  // Do not turn a 'not' into a regular xor.
6315  if (isBitwiseNot(N->getOperand(0)))
6316  return SDValue();
6317 
6318  SDNode *LHS = N->getOperand(0).getNode();
6319  if (!LHS->hasOneUse()) return SDValue();
6320 
6321  // We want to pull some binops through shifts, so that we have (and (shift))
6322  // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
6323  // thing happens with address calculations, so it's important to canonicalize
6324  // it.
6325  bool HighBitSet = false; // Can we transform this if the high bit is set?
6326 
6327  switch (LHS->getOpcode()) {
6328  default: return SDValue();
6329  case ISD::OR:
6330  case ISD::XOR:
6331  HighBitSet = false; // We can only transform sra if the high bit is clear.
6332  break;
6333  case ISD::AND:
6334  HighBitSet = true; // We can only transform sra if the high bit is set.
6335  break;
6336  case ISD::ADD:
6337  if (N->getOpcode() != ISD::SHL)
6338  return SDValue(); // only shl(add) not sr[al](add).
6339  HighBitSet = false; // We can only transform sra if the high bit is clear.
6340  break;
6341  }
6342 
6343  // We require the RHS of the binop to be a constant and not opaque as well.
6344  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6345  if (!BinOpCst) return SDValue();
6346 
6347  // FIXME: disable this unless the input to the binop is a shift by a constant
6348  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6349  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6350  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6351  BinOpLHSVal->getOpcode() == ISD::SRA ||
6352  BinOpLHSVal->getOpcode() == ISD::SRL;
6353  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6354  BinOpLHSVal->getOpcode() == ISD::SELECT;
6355 
6356  if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6357  !isCopyOrSelect)
6358  return SDValue();
6359 
6360  if (isCopyOrSelect && N->hasOneUse())
6361  return SDValue();
6362 
6363  EVT VT = N->getValueType(0);
6364 
6365  // If this is a signed shift right, and the high bit is modified by the
6366  // logical operation, do not perform the transformation. The highBitSet
6367  // boolean indicates the value of the high bit of the constant which would
6368  // cause it to be modified for this operation.
6369  if (N->getOpcode() == ISD::SRA) {
6370  bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6371  if (BinOpRHSSignSet != HighBitSet)
6372  return SDValue();
6373  }
6374 
6375  if (!TLI.isDesirableToCommuteWithShift(N, Level))
6376  return SDValue();
6377 
6378  // Fold the constants, shifting the binop RHS by the shift amount.
6379  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6380  N->getValueType(0),
6381  LHS->getOperand(1), N->getOperand(1));
6382  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6383 
6384  // Create the new shift.
6385  SDValue NewShift = DAG.getNode(N->getOpcode(),
6386  SDLoc(LHS->getOperand(0)),
6387  VT, LHS->getOperand(0), N->getOperand(1));
6388 
6389  // Create the new binop.
6390  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6391 }
6392 
6393 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6394  assert(N->getOpcode() == ISD::TRUNCATE);
6395  assert(N->getOperand(0).getOpcode() == ISD::AND);
6396 
6397  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6398  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6399  SDValue N01 = N->getOperand(0).getOperand(1);
6400  if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6401  SDLoc DL(N);
6402  EVT TruncVT = N->getValueType(0);
6403  SDValue N00 = N->getOperand(0).getOperand(0);
6404  SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6405  SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6406  AddToWorklist(Trunc00.getNode());
6407  AddToWorklist(Trunc01.getNode());
6408  return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6409  }
6410  }
6411 
6412  return SDValue();
6413 }
6414 
6415 SDValue DAGCombiner::visitRotate(SDNode *N) {
6416  SDLoc dl(N);
6417  SDValue N0 = N->getOperand(0);
6418  SDValue N1 = N->getOperand(1);
6419  EVT VT = N->getValueType(0);
6420  unsigned Bitsize = VT.getScalarSizeInBits();
6421 
6422  // fold (rot x, 0) -> x
6423  if (isNullOrNullSplat(N1))
6424  return N0;
6425 
6426  // fold (rot x, c) -> x iff (c % BitSize) == 0
6427  if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6428  APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6429  if (DAG.MaskedValueIsZero(N1, ModuloMask))
6430  return N0;
6431  }
6432 
6433  // fold (rot x, c) -> (rot x, c % BitSize)
6434  if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6435  if (Cst->getAPIntValue().uge(Bitsize)) {
6436  uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6437  return DAG.getNode(N->getOpcode(), dl, VT, N0,
6438  DAG.getConstant(RotAmt, dl, N1.getValueType()));
6439  }
6440  }
6441 
6442  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6443  if (N1.getOpcode() == ISD::TRUNCATE &&
6444  N1.getOperand(0).getOpcode() == ISD::AND) {
6445  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6446  return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6447  }
6448 
6449  unsigned NextOp = N0.getOpcode();
6450  // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6451  if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6454  if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6455  EVT ShiftVT = C1->getValueType(0);
6456  bool SameSide = (N->getOpcode() == NextOp);
6457  unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6458  if (SDValue CombinedShift =
6459  DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6460  SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6461  SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6462  ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6463  BitsizeC.getNode());
6464  return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6465  CombinedShiftNorm);
6466  }
6467  }
6468  }
6469  return SDValue();
6470 }
6471 
6472 SDValue DAGCombiner::visitSHL(SDNode *N) {
6473  SDValue N0 = N->getOperand(0);
6474  SDValue N1 = N->getOperand(1);
6475  if (SDValue V = DAG.simplifyShift(N0, N1))
6476  return V;
6477 
6478  EVT VT = N0.getValueType();
6479  unsigned OpSizeInBits = VT.getScalarSizeInBits();
6480 
6481  // fold vector ops
6482  if (VT.isVector()) {
6483  if (SDValue FoldedVOp = SimplifyVBinOp(N))
6484  return FoldedVOp;
6485 
6487  // If setcc produces all-one true value then:
6488  // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6489  if (N1CV && N1CV->isConstant()) {
6490  if (N0.getOpcode() == ISD::AND) {
6491  SDValue N00 = N0->getOperand(0);
6492  SDValue N01 = N0->getOperand(1);
6494 
6495  if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6496  TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6498  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6499  N01CV, N1CV))
6500  return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6501  }
6502  }
6503  }
6504  }
6505 
6507 
6508  // fold (shl c1, c2) -> c1<<c2
6510  if (N0C && N1C && !N1C->isOpaque())
6511  return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6512 
6513  if (SDValue NewSel = foldBinOpIntoSelect(N))
6514  return NewSel;
6515 
6516  // if (shl x, c) is known to be zero, return 0
6517  if (DAG.MaskedValueIsZero(SDValue(N, 0),
6518  APInt::getAllOnesValue(OpSizeInBits)))
6519  return DAG.getConstant(0, SDLoc(N), VT);
6520  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6521  if (N1.getOpcode() == ISD::TRUNCATE &&
6522  N1.getOperand(0).getOpcode() == ISD::AND) {
6523  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6524  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6525  }
6526 
6527  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6528  return SDValue(N, 0);
6529 
6530  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6531  if (N0.getOpcode() == ISD::SHL) {
6532  auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6533  ConstantSDNode *RHS) {
6534  APInt c1 = LHS->getAPIntValue();
6535  APInt c2 = RHS->getAPIntValue();
6536  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6537  return (c1 + c2).uge(OpSizeInBits);
6538  };
6539  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6540  return DAG.getConstant(0, SDLoc(N), VT);
6541 
6542  auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6543  ConstantSDNode *RHS) {
6544  APInt c1 = LHS->getAPIntValue();
6545  APInt c2 = RHS->getAPIntValue();
6546  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6547  return (c1 + c2).ult(OpSizeInBits);
6548  };
6549  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6550  SDLoc DL(N);
6551  EVT ShiftVT = N1.getValueType();
6552  SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6553  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6554  }
6555  }
6556 
6557  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6558  // For this to be valid, the second form must not preserve any of the bits
6559  // that are shifted out by the inner shift in the first form. This means
6560  // the outer shift size must be >= the number of bits added by the ext.
6561  // As a corollary, we don't care what kind of ext it is.
6562  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6563  N0.getOpcode() == ISD::ANY_EXTEND ||
6564  N0.getOpcode() == ISD::SIGN_EXTEND) &&
6565  N0.getOperand(0).getOpcode() == ISD::SHL) {
6566  SDValue N0Op0 = N0.getOperand(0);
6567  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6568  APInt c1 = N0Op0C1->getAPIntValue();
6569  APInt c2 = N1C->getAPIntValue();
6570  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6571 
6572  EVT InnerShiftVT = N0Op0.getValueType();
6573  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6574  if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6575  SDLoc DL(N0);
6576  APInt Sum = c1 + c2;
6577  if (Sum.uge(OpSizeInBits))
6578  return DAG.getConstant(0, DL, VT);
6579 
6580  return DAG.getNode(
6581  ISD::SHL, DL, VT,
6582  DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6583  DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6584  }
6585  }
6586  }
6587 
6588  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6589  // Only fold this if the inner zext has no other uses to avoid increasing
6590  // the total number of instructions.
6591  if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6592  N0.getOperand(0).getOpcode() == ISD::SRL) {
6593  SDValue N0Op0 = N0.getOperand(0);
6594  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6595  if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6596  uint64_t c1 = N0Op0C1->getZExtValue();
6597  uint64_t c2 = N1C->getZExtValue();
6598  if (c1 == c2) {
6599  SDValue NewOp0 = N0.getOperand(0);
6600  EVT CountVT = NewOp0.getOperand(1).getValueType();
6601  SDLoc DL(N);
6602  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6603  NewOp0,
6604  DAG.getConstant(c2, DL, CountVT));
6605  AddToWorklist(NewSHL.getNode());
6606  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6607  }
6608  }
6609  }
6610  }
6611 
6612  // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
6613  // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
6614  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6615  N0->getFlags().hasExact()) {
6616  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6617  uint64_t C1 = N0C1->getZExtValue();
6618  uint64_t C2 = N1C->getZExtValue();
6619  SDLoc DL(N);
6620  if (C1 <= C2)
6621  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6622  DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6623  return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6624  DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6625  }
6626  }
6627 
6628  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6629  // (and (srl x, (sub c1, c2), MASK)
6630  // Only fold this if the inner shift has no other uses -- if it does, folding
6631  // this will increase the total number of instructions.
6632  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6633  TLI.shouldFoldShiftPairToMask(N, Level)) {
6634  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6635  uint64_t c1 = N0C1->getZExtValue();
6636  if (c1 < OpSizeInBits) {
6637  uint64_t c2 = N1C->getZExtValue();
6638  APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6639  SDValue Shift;
6640  if (c2 > c1) {
6641  Mask <<= c2 - c1;
6642  SDLoc DL(N);
6643  Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6644  DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6645  } else {
6646  Mask.lshrInPlace(c1 - c2);
6647  SDLoc DL(N);
6648  Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6649  DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6650  }
6651  SDLoc DL(N0);
6652  return DAG.getNode(ISD::AND, DL, VT, Shift,
6653  DAG.getConstant(Mask, DL, VT));
6654  }
6655  }
6656  }
6657 
6658  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6659  if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6660  isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6661  SDLoc DL(N);
6662  SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6663  SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6664  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6665  }
6666 
6667  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6668  // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6669  // Variant of version done on multiply, except mul by a power of 2 is turned
6670  // into a shift.
6671  if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6672  N0.getNode()->hasOneUse() &&
6673  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6674  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6675  TLI.isDesirableToCommuteWithShift(N, Level)) {
6676  SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6677  SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6678  AddToWorklist(Shl0.getNode());
6679  AddToWorklist(Shl1.getNode());
6680  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6681  }
6682 
6683  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6684  if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6685  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6686  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6687  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6688  if (isConstantOrConstantVector(Shl))
6689  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6690  }
6691 
6692  if (N1C && !N1C->isOpaque())
6693  if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6694  return NewSHL;
6695 
6696  return SDValue();
6697 }
6698 
6699 SDValue DAGCombiner::visitSRA(SDNode *N) {
6700  SDValue N0 = N->getOperand(0);
6701  SDValue N1 = N->getOperand(1);
6702  if (SDValue V = DAG.simplifyShift(N0, N1))
6703  return V;
6704 
6705  EVT VT = N0.getValueType();
6706  unsigned OpSizeInBits = VT.getScalarSizeInBits();
6707 
6708  // Arithmetic shifting an all-sign-bit value is a no-op.
6709  // fold (sra 0, x) -> 0
6710  // fold (sra -1, x) -> -1
6711  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6712  return N0;
6713 
6714  // fold vector ops
6715  if (VT.isVector())
6716  if (SDValue FoldedVOp = SimplifyVBinOp(N))
6717  return FoldedVOp;
6718 
6720 
6721  // fold (sra c1, c2) -> (sra c1, c2)
6723  if (N0C && N1C && !N1C->isOpaque())
6724  return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6725 
6726  if (SDValue NewSel = foldBinOpIntoSelect(N))
6727  return NewSel;
6728 
6729  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6730  // sext_inreg.
6731  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6732  unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6733  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6734  if (VT.isVector())
6735  ExtVT = EVT::getVectorVT(*DAG.getContext(),
6736  ExtVT, VT.getVectorNumElements());
6737  if ((!LegalOperations ||
6739  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6740  N0.getOperand(0), DAG.getValueType(ExtVT));
6741  }
6742 
6743  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6744  // clamp (add c1, c2) to max shift.
6745  if (N0.getOpcode() == ISD::SRA) {
6746  SDLoc DL(N);
6747  EVT ShiftVT = N1.getValueType();
6748  EVT ShiftSVT = ShiftVT.getScalarType();
6749  SmallVector<SDValue, 16> ShiftValues;
6750 
6751  auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6752  APInt c1 = LHS->getAPIntValue();
6753  APInt c2 = RHS->getAPIntValue();
6754  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6755  APInt Sum = c1 + c2;
6756  unsigned ShiftSum =
6757  Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6758  ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6759  return true;
6760  };
6761  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6762  SDValue ShiftValue;
6763  if (VT.isVector())
6764  ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6765  else
6766  ShiftValue = ShiftValues[0];
6767  return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6768  }
6769  }
6770 
6771  // fold (sra (shl X, m), (sub result_size, n))
6772  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6773  // result_size - n != m.
6774  // If truncate is free for the target sext(shl) is likely to result in better
6775  // code.
6776  if (N0.getOpcode() == ISD::SHL && N1C) {
6777  // Get the two constanst of the shifts, CN0 = m, CN = n.
6778  const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6779  if (N01C) {
6780  LLVMContext &Ctx = *DAG.getContext();
6781  // Determine what the truncate's result bitsize and type would be.
6782  EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6783 
6784  if (VT.isVector())
6785  TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6786 
6787  // Determine the residual right-shift amount.
6788  int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6789 
6790  // If the shift is not a no-op (in which case this should be just a sign
6791  // extend already), the truncated to type is legal, sign_extend is legal
6792  // on that type, and the truncate to that type is both legal and free,
6793  // perform the transform.
6794  if ((ShiftAmt > 0) &&
6797  TLI.isTruncateFree(VT, TruncVT)) {
6798  SDLoc DL(N);
6799  SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6800  getShiftAmountTy(N0.getOperand(0).getValueType()));
6801  SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6802  N0.getOperand(0), Amt);
6803  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6804  Shift);
6805  return DAG.getNode(ISD::SIGN_EXTEND, DL,
6806  N->getValueType(0), Trunc);
6807  }
6808  }
6809  }
6810 
6811  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6812  if (N1.getOpcode() == ISD::TRUNCATE &&
6813  N1.getOperand(0).getOpcode() == ISD::AND) {
6814  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6815  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6816  }
6817 
6818  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6819  // if c1 is equal to the number of bits the trunc removes
6820  if (N0.getOpcode() == ISD::TRUNCATE &&
6821  (N0.getOperand(0).getOpcode() == ISD::SRL ||
6822  N0.getOperand(0).getOpcode() == ISD::SRA) &&
6823  N0.getOperand(0).hasOneUse() &&
6824  N0.getOperand(0).getOperand(1).hasOneUse() &&
6825  N1C) {
6826  SDValue N0Op0 = N0.getOperand(0);
6827  if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6828  unsigned LargeShiftVal = LargeShift->getZExtValue();
6829  EVT LargeVT = N0Op0.getValueType();
6830 
6831  if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6832  SDLoc DL(N);
6833  SDValue Amt =
6834  DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6835  getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6836  SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6837  N0Op0.getOperand(0), Amt);
6838  return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6839  }
6840  }
6841  }
6842 
6843  // Simplify, based on bits shifted out of the LHS.
6844  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6845  return SDValue(N, 0);
6846 
6847  // If the sign bit is known to be zero, switch this to a SRL.
6848  if (DAG.SignBitIsZero(N0))
6849  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6850 
6851  if (N1C && !N1C->isOpaque())
6852  if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6853  return NewSRA;
6854 
6855  return SDValue();
6856 }
6857 
6858 SDValue DAGCombiner::visitSRL(SDNode *N) {
6859  SDValue N0 = N->getOperand(0);
6860  SDValue N1 = N->getOperand(1);
6861  if (SDValue V = DAG.simplifyShift(N0, N1))
6862  return V;
6863 
6864  EVT VT = N0.getValueType();
6865  unsigned OpSizeInBits = VT.getScalarSizeInBits();
6866 
6867  // fold vector ops
6868  if (VT.isVector())
6869  if (SDValue FoldedVOp = SimplifyVBinOp(N))
6870  return FoldedVOp;
6871 
6873 
6874  // fold (srl c1, c2) -> c1 >>u c2
6876  if (N0C && N1C && !N1C->isOpaque())
6877  return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6878 
6879  if (SDValue NewSel = foldBinOpIntoSelect(N))
6880  return NewSel;
6881 
6882  // if (srl x, c) is known to be zero, return 0
6883  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6884  APInt::getAllOnesValue(OpSizeInBits)))
6885  return DAG.getConstant(0, SDLoc(N), VT);
6886 
6887  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6888  if (N0.getOpcode() == ISD::SRL) {
6889  auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6890  ConstantSDNode *RHS) {
6891  APInt c1 = LHS->getAPIntValue();
6892  APInt c2 = RHS->getAPIntValue();
6893  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6894  return (c1 + c2).uge(OpSizeInBits);
6895  };
6896  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6897  return DAG.getConstant(0, SDLoc(N), VT);
6898 
6899  auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6900  ConstantSDNode *RHS) {
6901  APInt c1 = LHS->getAPIntValue();
6902  APInt c2 = RHS->getAPIntValue();
6903  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6904  return (c1 + c2).ult(OpSizeInBits);
6905  };
6906  if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6907  SDLoc DL(N);
6908  EVT ShiftVT = N1.getValueType();
6909  SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6910  return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6911  }
6912  }
6913 
6914  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6915  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6916  N0.getOperand(0).getOpcode() == ISD::SRL) {
6917  if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6918  uint64_t c1 = N001C->getZExtValue();
6919  uint64_t c2 = N1C->getZExtValue();
6920  EVT InnerShiftVT = N0.getOperand(0).getValueType();
6921  EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6922  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6923  // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6924  if (c1 + OpSizeInBits == InnerShiftSize) {
6925  SDLoc DL(N0);
6926  if (c1 + c2 >= InnerShiftSize)
6927  return DAG.getConstant(0, DL, VT);
6928  return DAG.getNode(ISD::TRUNCATE, DL, VT,
6929  DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6930  N0.getOperand(0).getOperand(0),
6931  DAG.getConstant(c1 + c2, DL,
6932  ShiftCountVT)));
6933  }
6934  }
6935  }
6936 
6937  // fold (srl (shl x, c), c) -> (and x, cst2)
6938  if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6939  isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6940  SDLoc DL(N);
6941  SDValue Mask =
6942  DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6943  AddToWorklist(Mask.getNode());
6944  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6945  }
6946 
6947  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6948  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6949  // Shifting in all undef bits?
6950  EVT SmallVT = N0.getOperand(0).getValueType();
6951  unsigned BitSize = SmallVT.getScalarSizeInBits();
6952  if (N1C->getZExtValue() >= BitSize)
6953  return DAG.getUNDEF(VT);
6954 
6955  if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6956  uint64_t ShiftAmt = N1C->getZExtValue();
6957  SDLoc DL0(N0);
6958  SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6959  N0.getOperand(0),
6960  DAG.getConstant(ShiftAmt, DL0,
6961  getShiftAmountTy(SmallVT)));
6962  AddToWorklist(SmallShift.getNode());
6963  APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6964  SDLoc DL(N);
6965  return DAG.getNode(ISD::AND, DL, VT,
6966  DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6967  DAG.getConstant(Mask, DL, VT));
6968  }
6969  }
6970 
6971  // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
6972  // bit, which is unmodified by sra.
6973  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6974  if (N0.getOpcode() == ISD::SRA)
6975  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6976  }
6977 
6978  // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
6979  if (N1C && N0.getOpcode() == ISD::CTLZ &&
6980  N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6981  KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
6982 
6983  // If any of the input bits are KnownOne, then the input couldn't be all
6984  // zeros, thus the result of the srl will always be zero.
6985  if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6986 
6987  // If all of the bits input the to ctlz node are known to be zero, then
6988  // the result of the ctlz is "32" and the result of the shift is one.
6989  APInt UnknownBits = ~Known.Zero;
6990  if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6991 
6992  // Otherwise, check to see if there is exactly one bit input to the ctlz.
6993  if (UnknownBits.isPowerOf2()) {
6994  // Okay, we know that only that the single bit specified by UnknownBits
6995  // could be set on input to the CTLZ node. If this bit is set, the SRL
6996  // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6997  // to an SRL/XOR pair, which is likely to simplify more.
6998  unsigned ShAmt = UnknownBits.countTrailingZeros();
6999  SDValue Op = N0.getOperand(0);
7000 
7001  if (ShAmt) {
7002  SDLoc DL(N0);
7003  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7004  DAG.getConstant(ShAmt, DL,
7005  getShiftAmountTy(Op.getValueType())));
7006  AddToWorklist(Op.getNode());
7007  }
7008 
7009  SDLoc DL(N);
7010  return DAG.getNode(ISD::XOR, DL, VT,
7011  Op, DAG.getConstant(1, DL, VT));
7012  }
7013  }
7014 
7015  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7016  if (N1.getOpcode() == ISD::TRUNCATE &&
7017  N1.getOperand(0).getOpcode() == ISD::AND) {
7018  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7019  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7020  }
7021 
7022  // fold operands of srl based on knowledge that the low bits are not
7023  // demanded.
7024  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7025  return SDValue(N, 0);
7026 
7027  if (N1C && !N1C->isOpaque())
7028  if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7029  return NewSRL;
7030 
7031  // Attempt to convert a srl of a load into a narrower zero-extending load.
7032  if (SDValue NarrowLoad = ReduceLoadWidth(N))
7033  return NarrowLoad;
7034 
7035  // Here is a common situation. We want to optimize:
7036  //
7037  // %a = ...
7038  // %b = and i32 %a, 2
7039  // %c = srl i32 %b, 1
7040  // brcond i32 %c ...
7041  //
7042  // into
7043  //
7044  // %a = ...
7045  // %b = and %a, 2
7046  // %c = setcc eq %b, 0
7047  // brcond %c ...
7048  //
7049  // However when after the source operand of SRL is optimized into AND, the SRL
7050  // itself may not be optimized further. Look for it and add the BRCOND into
7051  // the worklist.
7052  if (N->hasOneUse()) {
7053  SDNode *Use = *N->use_begin();
7054  if (Use->getOpcode() == ISD::BRCOND)
7055  AddToWorklist(Use);
7056  else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7057  // Also look pass the truncate.
7058  Use = *Use->use_begin();
7059  if (Use->getOpcode() == ISD::BRCOND)
7060  AddToWorklist(Use);
7061  }
7062  }
7063 
7064  return SDValue();
7065 }
7066 
7067 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7068  EVT VT = N->getValueType(0);
7069  SDValue N0 = N->getOperand(0);
7070  SDValue N1 = N->getOperand(1);
7071  SDValue N2 = N->getOperand(2);
7072  bool IsFSHL = N->getOpcode() == ISD::FSHL;
7073  unsigned BitWidth = VT.getScalarSizeInBits();
7074 
7075  // fold (fshl N0, N1, 0) -> N0
7076  // fold (fshr N0, N1, 0) -> N1
7077  if (isPowerOf2_32(BitWidth))
7078  if (DAG.MaskedValueIsZero(
7079  N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7080  return IsFSHL ? N0 : N1;
7081 
7082  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7083  if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7084  if (Cst->getAPIntValue().uge(BitWidth)) {
7085  uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7086  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7087  DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
7088  }
7089  }
7090 
7091  // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7092  // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7093  // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7094  // is legal as well we might be better off avoiding non-constant (BW - N2).
7095  unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7096  if (N0 == N1 && hasOperation(RotOpc, VT))
7097  return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7098 
7099  return SDValue();
7100 }
7101 
7102 SDValue DAGCombiner::visitABS(SDNode *N) {
7103  SDValue N0 = N->getOperand(0);
7104  EVT VT = N->getValueType(0);
7105 
7106  // fold (abs c1) -> c2
7108  return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7109  // fold (abs (abs x)) -> (abs x)
7110  if (N0.getOpcode() == ISD::ABS)
7111  return N0;
7112  // fold (abs x) -> x iff not-negative
7113  if (DAG.SignBitIsZero(N0))
7114  return N0;
7115  return SDValue();
7116 }
7117 
7118 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7119  SDValue N0 = N->getOperand(0);
7120  EVT VT = N->getValueType(0);
7121 
7122  // fold (bswap c1) -> c2
7124  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7125  // fold (bswap (bswap x)) -> x
7126  if (N0.getOpcode() == ISD::BSWAP)
7127  return N0->getOperand(0);
7128  return SDValue();
7129 }
7130 
7131 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7132  SDValue N0 = N->getOperand(0);
7133  EVT VT = N->getValueType(0);
7134 
7135  // fold (bitreverse c1) -> c2
7137  return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7138  // fold (bitreverse (bitreverse x)) -> x
7139  if (N0.getOpcode() == ISD::BITREVERSE)
7140  return N0.getOperand(0);
7141  return SDValue();
7142 }
7143 
7144 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7145  SDValue N0 = N->getOperand(0);
7146  EVT VT = N->getValueType(0);
7147 
7148  // fold (ctlz c1) -> c2
7150  return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7151 
7152  // If the value is known never to be zero, switch to the undef version.
7153  if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7154  if (DAG.isKnownNeverZero(N0))
7155  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7156  }
7157 
7158  return SDValue();
7159 }
7160 
7161 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7162  SDValue N0 = N->getOperand(0);
7163  EVT VT = N->getValueType(0);
7164 
7165  // fold (ctlz_zero_undef c1) -> c2
7167  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7168  return SDValue();
7169 }
7170 
7171 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7172  SDValue N0 = N->getOperand(0);
7173  EVT VT = N->getValueType(0);
7174 
7175  // fold (cttz c1) -> c2
7177  return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7178 
7179  // If the value is known never to be zero, switch to the undef version.
7180  if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7181  if (DAG.isKnownNeverZero(N0))
7182  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7183  }
7184 
7185  return SDValue();
7186 }
7187 
7188 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7189  SDValue N0 = N->getOperand(0);
7190  EVT VT = N->getValueType(0);
7191 
7192  // fold (cttz_zero_undef c1) -> c2
7194  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7195  return SDValue();
7196 }
7197 
7198 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7199  SDValue N0 = N->getOperand(0);
7200  EVT VT = N->getValueType(0);
7201 
7202  // fold (ctpop c1) -> c2
7204  return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7205  return SDValue();
7206 }
7207 
7208 // FIXME: This should be checking for no signed zeros on individual operands, as
7209 // well as no nans.
7211  const TargetOptions &Options = DAG.getTarget().Options;
7212  EVT VT = LHS.getValueType();
7213 
7214  return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7215  DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7216 }
7217 
7218 /// Generate Min/Max node
7219 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7220  SDValue RHS, SDValue True, SDValue False,
7221  ISD::CondCode CC, const TargetLowering &TLI,
7222  SelectionDAG &DAG) {
7223  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7224  return SDValue();
7225 
7226  EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7227  switch (CC) {
7228  case ISD::SETOLT:
7229  case ISD::SETOLE:
7230  case ISD::SETLT:
7231  case ISD::SETLE:
7232  case ISD::SETULT:
7233  case ISD::SETULE: {
7234  // Since it's known never nan to get here already, either fminnum or
7235  // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7236  // expanded in terms of it.
7237  unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7238  if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7239  return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7240 
7241  unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7242  if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7243  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7244  return SDValue();
7245  }
7246  case ISD::SETOGT:
7247  case ISD::SETOGE:
7248  case ISD::SETGT:
7249  case ISD::SETGE:
7250  case ISD::SETUGT:
7251  case ISD::SETUGE: {
7252  unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7253  if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7254  return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7255 
7256  unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7257  if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7258  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7259  return SDValue();
7260  }
7261  default:
7262  return SDValue();
7263  }
7264 }
7265 
7266 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7267  SDValue Cond = N->getOperand(0);
7268  SDValue N1 = N->getOperand(1);
7269  SDValue N2 = N->getOperand(2);
7270  EVT VT = N->getValueType(0);
7271  EVT CondVT = Cond.getValueType();
7272  SDLoc DL(N);
7273 
7274  if (!VT.isInteger())
7275  return SDValue();
7276 
7277  auto *C1 = dyn_cast<ConstantSDNode>(N1);
7278  auto *C2 = dyn_cast<ConstantSDNode>(N2);
7279  if (!C1 || !C2)
7280  return SDValue();
7281 
7282  // Only do this before legalization to avoid conflicting with target-specific
7283  // transforms in the other direction (create a select from a zext/sext). There
7284  // is also a target-independent combine here in DAGCombiner in the other
7285  // direction for (select Cond, -1, 0) when the condition is not i1.
7286  if (CondVT == MVT::i1 && !LegalOperations) {
7287  if (C1->isNullValue() && C2->isOne()) {
7288  // select Cond, 0, 1 --> zext (!Cond)
7289  SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7290  if (VT != MVT::i1)
7291  NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7292  return NotCond;
7293  }
7294  if (C1->isNullValue() && C2->isAllOnesValue()) {
7295  // select Cond, 0, -1 --> sext (!Cond)
7296  SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7297  if (VT != MVT::i1)
7298  NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7299  return NotCond;
7300  }
7301  if (C1->isOne() && C2->isNullValue()) {
7302  // select Cond, 1, 0 --> zext (Cond)
7303  if (VT != MVT::i1)
7304  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7305  return Cond;
7306  }
7307  if (C1->isAllOnesValue() && C2->isNullValue()) {
7308  // select Cond, -1, 0 --> sext (Cond)
7309  if (VT != MVT::i1)
7310  Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7311  return Cond;
7312  }
7313 
7314  // For any constants that differ by 1, we can transform the select into an
7315  // extend and add. Use a target hook because some targets may prefer to
7316  // transform in the other direction.
7317  if (TLI.convertSelectOfConstantsToMath(VT)) {
7318  if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7319  // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7320  if (VT != MVT::i1)
7321  Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7322  return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7323  }
7324  if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7325  // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7326  if (VT != MVT::i1)
7327  Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7328  return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7329  }
7330  }
7331 
7332  return SDValue();
7333  }
7334 
7335  // fold (select Cond, 0, 1) -> (xor Cond, 1)
7336  // We can't do this reliably if integer based booleans have different contents
7337  // to floating point based booleans. This is because we can't tell whether we
7338  // have an integer-based boolean or a floating-point-based boolean unless we
7339  // can find the SETCC that produced it and inspect its operands. This is
7340  // fairly easy if C is the SETCC node, but it can potentially be
7341  // undiscoverable (or not reasonably discoverable). For example, it could be
7342  // in another basic block or it could require searching a complicated
7343  // expression.
7344  if (CondVT.isInteger() &&
7345  TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7347  TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7349  C1->isNullValue() && C2->isOne()) {
7350  SDValue NotCond =
7351  DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7352  if (VT.bitsEq(CondVT))
7353  return NotCond;
7354  return DAG.getZExtOrTrunc(NotCond, DL, VT);
7355  }
7356 
7357  return SDValue();
7358 }
7359 
7360 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7361  SDValue N0 = N->getOperand(0);
7362  SDValue N1 = N->getOperand(1);
7363  SDValue N2 = N->getOperand(2);
7364  EVT VT = N->getValueType(0);
7365  EVT VT0 = N0.getValueType();
7366  SDLoc DL(N);
7367 
7368  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7369  return V;
7370 
7371  // fold (select X, X, Y) -> (or X, Y)
7372  // fold (select X, 1, Y) -> (or C, Y)
7373  if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7374  return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7375 
7376  if (SDValue V = foldSelectOfConstants(N))
7377  return V;
7378 
7379  // fold (select C, 0, X) -> (and (not C), X)
7380  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7381  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7382  AddToWorklist(NOTNode.getNode());
7383  return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7384  }
7385  // fold (select C, X, 1) -> (or (not C), X)
7386  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7387  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7388  AddToWorklist(NOTNode.getNode());
7389  return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7390  }
7391  // fold (select X, Y, X) -> (and X, Y)
7392  // fold (select X, Y, 0) -> (and X, Y)
7393  if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7394  return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7395 
7396  // If we can fold this based on the true/false value, do so.
7397  if (SimplifySelectOps(N, N1, N2))
7398  return SDValue(N, 0); // Don't revisit N.
7399 
7400  if (VT0 == MVT::i1) {
7401  // The code in this block deals with the following 2 equivalences:
7402  // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7403  // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7404  // The target can specify its preferred form with the
7405  // shouldNormalizeToSelectSequence() callback. However we always transform
7406  // to the right anyway if we find the inner select exists in the DAG anyway
7407  // and we always transform to the left side if we know that we can further
7408  // optimize the combination of the conditions.
7409  bool normalizeToSequence =
7411  // select (and Cond0, Cond1), X, Y
7412  // -> select Cond0, (select Cond1, X, Y), Y
7413  if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7414  SDValue Cond0 = N0->getOperand(0);
7415  SDValue Cond1 = N0->getOperand(1);
7416  SDValue InnerSelect =
7417  DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7418  if (normalizeToSequence || !InnerSelect.use_empty())
7419  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7420  InnerSelect, N2);
7421  }
7422  // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7423  if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7424  SDValue Cond0 = N0->getOperand(0);
7425  SDValue Cond1 = N0->getOperand(1);
7426  SDValue InnerSelect =
7427  DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7428  if (normalizeToSequence || !InnerSelect.use_empty())
7429  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7430  InnerSelect);
7431  }
7432 
7433  // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7434  if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7435  SDValue N1_0 = N1->getOperand(0);
7436  SDValue N1_1 = N1->getOperand(1);
7437  SDValue N1_2 = N1->getOperand(2);
7438  if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7439  // Create the actual and node if we can generate good code for it.
7440  if (!normalizeToSequence) {
7441  SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7442  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7443  }
7444  // Otherwise see if we can optimize the "and" to a better pattern.
7445  if (SDValue Combined = visitANDLike(N0, N1_0, N))
7446  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7447  N2);
7448  }
7449  }
7450  // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7451  if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7452  SDValue N2_0 = N2->getOperand(0);
7453  SDValue N2_1 = N2->getOperand(1);
7454  SDValue N2_2 = N2->getOperand(2);
7455  if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7456  // Create the actual or node if we can generate good code for it.
7457  if (!normalizeToSequence) {
7458  SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7459  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7460  }
7461  // Otherwise see if we can optimize to a better pattern.
7462  if (SDValue Combined = visitORLike(N0, N2_0, N))
7463  return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7464  N2_2);
7465  }
7466  }
7467  }
7468 
7469  if (VT0 == MVT::i1) {
7470  // select (not Cond), N1, N2 -> select Cond, N2, N1
7471  if (isBitwiseNot(N0))
7472  return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7473  }
7474 
7475  // Fold selects based on a setcc into other things, such as min/max/abs.
7476  if (N0.getOpcode() == ISD::SETCC) {
7477  SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7478  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7479 
7480  // select (fcmp lt x, y), x, y -> fminnum x, y
7481  // select (fcmp gt x, y), x, y -> fmaxnum x, y
7482  //
7483  // This is OK if we don't care what happens if either operand is a NaN.
7484  if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
7485  if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7486  CC, TLI, DAG))
7487  return FMinMax;
7488 
7489  // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7490  // This is conservatively limited to pre-legal-operations to give targets
7491  // a chance to reverse the transform if they want to do that. Also, it is
7492  // unlikely that the pattern would be formed late, so it's probably not
7493  // worth going through the other checks.
7494  if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7495  CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7496  N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7497  auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7498  auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7499  if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7500  // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7501  // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7502  //
7503  // The IR equivalent of this transform would have this form:
7504  // %a = add %x, C
7505  // %c = icmp ugt %x, ~C
7506  // %r = select %c, -1, %a
7507  // =>
7508  // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7509  // %u0 = extractvalue %u, 0
7510  // %u1 = extractvalue %u, 1
7511  // %r = select %u1, -1, %u0
7512  SDVTList VTs = DAG.getVTList(VT, VT0);
7513  SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7514  return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7515  }
7516  }
7517 
7518  if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7519  (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7520  return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7521  N0.getOperand(2));
7522 
7523  return SimplifySelect(DL, N0, N1, N2);
7524  }
7525 
7526  return SDValue();
7527 }
7528 
7529 static
7530 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7531  SDLoc DL(N);
7532  EVT LoVT, HiVT;
7533  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7534 
7535  // Split the inputs.
7536  SDValue Lo, Hi, LL, LH, RL, RH;
7537  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7538  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7539 
7540  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7541  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7542 
7543  return std::make_pair(Lo, Hi);
7544 }
7545 
7546 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7547 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7549  SDLoc DL(N);
7550  SDValue Cond = N->getOperand(0);
7551  SDValue LHS = N->getOperand(1);
7552  SDValue RHS = N->getOperand(2);
7553  EVT VT = N->getValueType(0);
7554  int NumElems = VT.getVectorNumElements();
7556  RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7557  Cond.getOpcode() == ISD::BUILD_VECTOR);
7558 
7559  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7560  // binary ones here.
7561  if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7562  return SDValue();
7563 
7564  // We're sure we have an even number of elements due to the
7565  // concat_vectors we have as arguments to vselect.
7566  // Skip BV elements until we find one that's not an UNDEF
7567  // After we find an UNDEF element, keep looping until we get to half the
7568  // length of the BV and see if all the non-undef nodes are the same.
7569  ConstantSDNode *BottomHalf = nullptr;
7570  for (int i = 0; i < NumElems / 2; ++i) {
7571  if (Cond->getOperand(i)->isUndef())
7572  continue;
7573 
7574  if (BottomHalf == nullptr)
7575  BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7576  else if (Cond->getOperand(i).getNode() != BottomHalf)
7577  return SDValue();
7578  }
7579 
7580  // Do the same for the second half of the BuildVector
7581  ConstantSDNode *TopHalf = nullptr;
7582  for (int i = NumElems / 2; i < NumElems; ++i) {
7583  if (Cond->getOperand(i)->isUndef())
7584  continue;
7585 
7586  if (TopHalf == nullptr)
7587  TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7588  else if (Cond->getOperand(i).getNode() != TopHalf)
7589  return SDValue();
7590  }
7591 
7592  assert(TopHalf && BottomHalf &&
7593  "One half of the selector was all UNDEFs and the other was all the "
7594  "same value. This should have been addressed before this function.");
7595  return DAG.getNode(
7596  ISD::CONCAT_VECTORS, DL, VT,
7597  BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7598  TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7599 }
7600 
7601 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7602  if (Level >= AfterLegalizeTypes)
7603  return SDValue();
7604 
7605  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7606  SDValue Mask = MSC->getMask();
7607  SDValue Data = MSC->getValue();
7608  SDLoc DL(N);
7609 
7610  // If the MSCATTER data type requires splitting and the mask is provided by a
7611  // SETCC, then split both nodes and its operands before legalization. This
7612  // prevents the type legalizer from unrolling SETCC into scalar comparisons
7613  // and enables future optimizations (e.g. min/max pattern matching on X86).
7614  if (Mask.getOpcode() != ISD::SETCC)
7615  return SDValue();
7616 
7617  // Check if any splitting is required.
7618  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7620  return SDValue();
7621  SDValue MaskLo, MaskHi;
7622  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7623 
7624  EVT LoVT, HiVT;
7625  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7626 
7627  SDValue Chain = MSC->getChain();
7628 
7629  EVT MemoryVT = MSC->getMemoryVT();
7630  unsigned Alignment = MSC->getOriginalAlignment();
7631 
7632  EVT LoMemVT, HiMemVT;
7633  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7634 
7635  SDValue DataLo, DataHi;
7636  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7637 
7638  SDValue Scale = MSC->getScale();
7639  SDValue BasePtr = MSC->getBasePtr();
7640  SDValue IndexLo, IndexHi;
7641  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7642 
7644  getMachineMemOperand(MSC->getPointerInfo(),
7646  Alignment, MSC->getAAInfo(), MSC->getRanges());
7647 
7648  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7650  DataLo.getValueType(), DL, OpsLo, MMO);
7651 
7652  // The order of the Scatter operation after split is well defined. The "Hi"
7653  // part comes after the "Lo". So these two operations should be chained one
7654  // after another.
7655  SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7656  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7657  DL, OpsHi, MMO);
7658 }
7659 
7660 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7661  if (Level >= AfterLegalizeTypes)
7662  return SDValue();
7663 
7665  SDValue Mask = MST->getMask();
7666  SDValue Data = MST->getValue();
7667  EVT VT = Data.getValueType();
7668  SDLoc DL(N);
7669 
7670  // If the MSTORE data type requires splitting and the mask is provided by a
7671  // SETCC, then split both nodes and its operands before legalization. This
7672  // prevents the type legalizer from unrolling SETCC into scalar comparisons
7673  // and enables future optimizations (e.g. min/max pattern matching on X86).
7674  if (Mask.getOpcode() == ISD::SETCC) {
7675  // Check if any splitting is required.
7676  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7678  return SDValue();
7679 
7680  SDValue MaskLo, MaskHi, Lo, Hi;
7681  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7682 
7683  SDValue Chain = MST->getChain();
7684  SDValue Ptr = MST->getBasePtr();
7685 
7686  EVT MemoryVT = MST->getMemoryVT();
7687  unsigned Alignment = MST->getOriginalAlignment();
7688 
7689  // if Alignment is equal to the vector size,
7690  // take the half of it for the second part
7691  unsigned SecondHalfAlignment =
7692  (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7693 
7694  EVT LoMemVT, HiMemVT;
7695  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7696 
7697  SDValue DataLo, DataHi;
7698  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7699 
7701  getMachineMemOperand(MST->getPointerInfo(),
7703  Alignment, MST->getAAInfo(), MST->getRanges());
7704 
7705  Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7706  MST->isTruncatingStore(),
7707  MST->isCompressingStore());
7708 
7709  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7710  MST->isCompressingStore());
7711  unsigned HiOffset = LoMemVT.getStoreSize();
7712 
7714  MST->getPointerInfo().getWithOffset(HiOffset),
7715  MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7716  MST->getAAInfo(), MST->getRanges());
7717 
7718  Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7719  MST->isTruncatingStore(),
7720  MST->isCompressingStore());
7721 
7722  AddToWorklist(Lo.getNode());
7723  AddToWorklist(Hi.getNode());
7724 
7725  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7726  }
7727  return SDValue();
7728 }
7729 
7730 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7731  if (Level >= AfterLegalizeTypes)
7732  return SDValue();
7733 
7734  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7735  SDValue Mask = MGT->getMask();
7736  SDLoc DL(N);
7737 
7738  // If the MGATHER result requires splitting and the mask is provided by a
7739  // SETCC, then split both nodes and its operands before legalization. This
7740  // prevents the type legalizer from unrolling SETCC into scalar comparisons
7741  // and enables future optimizations (e.g. min/max pattern matching on X86).
7742 
7743  if (Mask.getOpcode() != ISD::SETCC)
7744  return SDValue();
7745 
7746  EVT VT = N->getValueType(0);
7747 
7748  // Check if any splitting is required.
7749  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7751  return SDValue();
7752 
7753  SDValue MaskLo, MaskHi, Lo, Hi;
7754  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7755 
7756  SDValue PassThru = MGT->getPassThru();
7757  SDValue PassThruLo, PassThruHi;
7758  std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7759 
7760  EVT LoVT, HiVT;
7761  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7762 
7763  SDValue Chain = MGT->getChain();
7764  EVT MemoryVT = MGT->getMemoryVT();
7765  unsigned Alignment = MGT->getOriginalAlignment();
7766 
7767  EVT LoMemVT, HiMemVT;
7768  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7769 
7770  SDValue Scale = MGT->getScale();
7771  SDValue BasePtr = MGT->getBasePtr();
7772  SDValue Index = MGT->getIndex();
7773  SDValue IndexLo, IndexHi;
7774  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7775 
7777  getMachineMemOperand(MGT->getPointerInfo(),
7779  Alignment, MGT->getAAInfo(), MGT->getRanges());
7780 
7781  SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7782  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7783  MMO);
7784 
7785  SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7786  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7787  MMO);
7788 
7789  AddToWorklist(Lo.getNode());
7790  AddToWorklist(Hi.getNode());
7791 
7792  // Build a factor node to remember that this load is independent of the
7793  // other one.
7794  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7795  Hi.getValue(1));
7796 
7797  // Legalized the chain result - switch anything that used the old chain to
7798  // use the new one.
7799  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7800 
7801  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7802 
7803  SDValue RetOps[] = { GatherRes, Chain };
7804  return DAG.getMergeValues(RetOps, DL);
7805 }
7806 
7807 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7808  if (Level >= AfterLegalizeTypes)
7809  return SDValue();
7810 
7812  SDValue Mask = MLD->getMask();
7813  SDLoc DL(N);
7814 
7815  // If the MLOAD result requires splitting and the mask is provided by a
7816  // SETCC, then split both nodes and its operands before legalization. This
7817  // prevents the type legalizer from unrolling SETCC into scalar comparisons
7818  // and enables future optimizations (e.g. min/max pattern matching on X86).
7819  if (Mask.getOpcode() == ISD::SETCC) {
7820  EVT VT = N->getValueType(0);
7821 
7822  // Check if any splitting is required.
7823  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7825  return SDValue();
7826 
7827  SDValue MaskLo, MaskHi, Lo, Hi;
7828  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7829 
7830  SDValue PassThru = MLD->getPassThru();
7831  SDValue PassThruLo, PassThruHi;
7832  std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7833 
7834  EVT LoVT, HiVT;
7835  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7836 
7837  SDValue Chain = MLD->getChain();
7838  SDValue Ptr = MLD->getBasePtr();
7839  EVT MemoryVT = MLD->getMemoryVT();
7840  unsigned Alignment = MLD->getOriginalAlignment();
7841 
7842  // if Alignment is equal to the vector size,
7843  // take the half of it for the second part
7844  unsigned SecondHalfAlignment =
7845  (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7846  Alignment/2 : Alignment;
7847 
7848  EVT LoMemVT, HiMemVT;
7849  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7850 
7852  getMachineMemOperand(MLD->getPointerInfo(),
7854  Alignment, MLD->getAAInfo(), MLD->getRanges());
7855 
7856  Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7857  MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7858 
7859  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7860  MLD->isExpandingLoad());
7861  unsigned HiOffset = LoMemVT.getStoreSize();
7862 
7864  MLD->getPointerInfo().getWithOffset(HiOffset),
7865  MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7866  MLD->getAAInfo(), MLD->getRanges());
7867 
7868  Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7869  MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7870 
7871  AddToWorklist(Lo.getNode());
7872  AddToWorklist(Hi.getNode());
7873 
7874  // Build a factor node to remember that this load is independent of the
7875  // other one.
7876  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7877  Hi.getValue(1));
7878 
7879  // Legalized the chain result - switch anything that used the old chain to
7880  // use the new one.
7881  DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7882 
7883  SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7884 
7885  SDValue RetOps[] = { LoadRes, Chain };
7886  return DAG.getMergeValues(RetOps, DL);
7887  }
7888  return SDValue();
7889 }
7890 
7891 /// A vector select of 2 constant vectors can be simplified to math/logic to
7892 /// avoid a variable select instruction and possibly avoid constant loads.
7893 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7894  SDValue Cond = N->getOperand(0);
7895  SDValue N1 = N->getOperand(1);
7896  SDValue N2 = N->getOperand(2);
7897  EVT VT = N->getValueType(0);
7898  if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7899  !TLI.convertSelectOfConstantsToMath(VT) ||
7902  return SDValue();
7903 
7904  // Check if we can use the condition value to increment/decrement a single
7905  // constant value. This simplifies a select to an add and removes a constant
7906  // load/materialization from the general case.
7907  bool AllAddOne = true;
7908  bool AllSubOne = true;
7909  unsigned Elts = VT.getVectorNumElements();
7910  for (unsigned i = 0; i != Elts; ++i) {
7911  SDValue N1Elt = N1.getOperand(i);
7912  SDValue N2Elt = N2.getOperand(i);
7913  if (N1Elt.isUndef() || N2Elt.isUndef())
7914  continue;
7915 
7916  const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7917  const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7918  if (C1 != C2 + 1)
7919  AllAddOne = false;
7920  if (C1 != C2 - 1)
7921  AllSubOne = false;
7922  }
7923 
7924  // Further simplifications for the extra-special cases where the constants are
7925  // all 0 or all -1 should be implemented as folds of these patterns.
7926  SDLoc DL(N);
7927  if (AllAddOne || AllSubOne) {
7928  // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7929  // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7930  auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7931  SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7932  return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7933  }
7934 
7935  // The general case for select-of-constants:
7936  // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7937  // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7938  // leave that to a machine-specific pass.
7939  return SDValue();
7940 }
7941 
7942 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7943  SDValue N0 = N->getOperand(0);
7944  SDValue N1 = N->getOperand(1);
7945  SDValue N2 = N->getOperand(2);
7946  SDLoc DL(N);
7947 
7948  if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7949  return V;
7950 
7951  // Canonicalize integer abs.
7952  // vselect (setg[te] X, 0), X, -X ->
7953  // vselect (setgt X, -1), X, -X ->
7954  // vselect (setl[te] X, 0), -X, X ->
7955  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7956  if (N0.getOpcode() == ISD::SETCC) {
7957  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7958  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7959  bool isAbs = false;
7960  bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7961 
7962  if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7963  (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7964  N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7966  else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7967  N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7968  isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7969 
7970  if (isAbs) {
7971  EVT VT = LHS.getValueType();
7972  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7973  return DAG.getNode(ISD::ABS, DL, VT, LHS);
7974 
7975  SDValue Shift = DAG.getNode(
7976  ISD::SRA, DL, VT, LHS,
7977  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7978  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7979  AddToWorklist(Shift.getNode());
7980  AddToWorklist(Add.getNode());
7981  return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7982  }
7983 
7984  // vselect x, y (fcmp lt x, y) -> fminnum x, y
7985  // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
7986  //
7987  // This is OK if we don't care about what happens if either operand is a
7988  // NaN.
7989  //
7990  EVT VT = N->getValueType(0);
7991  if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
7992  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7993  if (SDValue FMinMax = combineMinNumMaxNum(
7994  DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7995  return FMinMax;
7996  }
7997 
7998  // If this select has a condition (setcc) with narrower operands than the
7999  // select, try to widen the compare to match the select width.
8000  // TODO: This should be extended to handle any constant.
8001  // TODO: This could be extended to handle non-loading patterns, but that
8002  // requires thorough testing to avoid regressions.
8003  if (isNullOrNullSplat(RHS)) {
8004  EVT NarrowVT = LHS.getValueType();
8005  EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8006  EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8007  unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8008  unsigned WideWidth = WideVT.getScalarSizeInBits();
8009  bool IsSigned = isSignedIntSetCC(CC);
8010  auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8011  if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8012  SetCCWidth != 1 && SetCCWidth < WideWidth &&
8013  TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8014  TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8015  // Both compare operands can be widened for free. The LHS can use an
8016  // extended load, and the RHS is a constant:
8017  // vselect (ext (setcc load(X), C)), N1, N2 -->
8018  // vselect (setcc extload(X), C'), N1, N2
8019  auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8020  SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8021  SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8022  EVT WideSetCCVT = getSetCCResultType(WideVT);
8023  SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8024  return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8025  }
8026  }
8027  }
8028 
8029  if (SimplifySelectOps(N, N1, N2))
8030  return SDValue(N, 0); // Don't revisit N.
8031 
8032  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8034  return N1;
8035  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8037  return N2;
8038 
8039  // The ConvertSelectToConcatVector function is assuming both the above
8040  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8041  // and addressed.
8042  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8043  N2.getOpcode() == ISD::CONCAT_VECTORS &&
8045  if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8046  return CV;
8047  }
8048 
8049  if (SDValue V = foldVSelectOfConstants(N))
8050  return V;
8051 
8052  return SDValue();
8053 }
8054 
8055 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8056  SDValue N0 = N->getOperand(0);
8057  SDValue N1 = N->getOperand(1);
8058  SDValue N2 = N->getOperand(2);
8059  SDValue N3 = N->getOperand(3);
8060  SDValue N4 = N->getOperand(4);
8061  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8062 
8063  // fold select_cc lhs, rhs, x, x, cc -> x
8064  if (N2 == N3)
8065  return N2;
8066 
8067  // Determine if the condition we're dealing with is constant
8068  if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8069  CC, SDLoc(N), false)) {
8070  AddToWorklist(SCC.getNode());
8071 
8072  if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8073  if (!SCCC->isNullValue())
8074  return N2; // cond always true -> true val
8075  else
8076  return N3; // cond always false -> false val
8077  } else if (SCC->isUndef()) {
8078  // When the condition is UNDEF, just return the first operand. This is
8079  // coherent the DAG creation, no setcc node is created in this case
8080  return N2;
8081  } else if (SCC.getOpcode() == ISD::SETCC) {
8082  // Fold to a simpler select_cc
8083  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8084  SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8085  SCC.getOperand(2));
8086  }
8087  }
8088 
8089  // If we can fold this based on the true/false value, do so.
8090  if (SimplifySelectOps(N, N2, N3))
8091  return SDValue(N, 0); // Don't revisit N.
8092 
8093  // fold select_cc into other things, such as min/max/abs
8094  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8095 }
8096 
8097 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8098  // setcc is very commonly used as an argument to brcond. This pattern
8099  // also lend itself to numerous combines and, as a result, it is desired
8100  // we keep the argument to a brcond as a setcc as much as possible.
8101  bool PreferSetCC =
8102  N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8103 
8104  SDValue Combined = SimplifySetCC(
8105  N->getValueType(0), N->getOperand(0), N->getOperand(1),
8106  cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8107 
8108  if (!Combined)
8109  return SDValue();
8110 
8111  // If we prefer to have a setcc, and we don't, we'll try our best to
8112  // recreate one using rebuildSetCC.
8113  if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8114  SDValue NewSetCC = rebuildSetCC(Combined);
8115 
8116  // We don't have anything interesting to combine to.
8117  if (NewSetCC.getNode() == N)
8118  return SDValue();
8119 
8120  if (NewSetCC)
8121  return NewSetCC;
8122  }
8123 
8124  return Combined;
8125 }
8126 
8127 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8128  SDValue LHS = N->getOperand(0);
8129  SDValue RHS = N->getOperand(1);
8130  SDValue Carry = N->getOperand(2);
8131  SDValue Cond = N->getOperand(3);
8132 
8133  // If Carry is false, fold to a regular SETCC.
8134  if (isNullConstant(Carry))
8135  return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8136 
8137  return SDValue();
8138 }
8139 
8140 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8141 /// a build_vector of constants.
8142 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8143 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8144 /// Vector extends are not folded if operations are legal; this is to
8145 /// avoid introducing illegal build_vector dag nodes.
8147  SelectionDAG &DAG, bool LegalTypes) {
8148  unsigned Opcode = N->getOpcode();
8149  SDValue N0 = N->getOperand(0);
8150  EVT VT = N->getValueType(0);
8151 
8152  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8153  Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8155  && "Expected EXTEND dag node in input!");
8156 
8157  // fold (sext c1) -> c1
8158  // fold (zext c1) -> c1
8159  // fold (aext c1) -> c1
8160  if (isa<ConstantSDNode>(N0))
8161  return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8162 
8163  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8164  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8165  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8166  EVT SVT = VT.getScalarType();
8167  if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8169  return SDValue();
8170 
8171  // We can fold this node into a build_vector.
8172  unsigned VTBits = SVT.getSizeInBits();
8173  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8175  unsigned NumElts = VT.getVectorNumElements();
8176  SDLoc DL(N);
8177 
8178  // For zero-extensions, UNDEF elements still guarantee to have the upper
8179  // bits set to zero.
8180  bool IsZext =
8181  Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8182 
8183  for (unsigned i = 0; i != NumElts; ++i) {
8184  SDValue Op = N0.getOperand(i);
8185  if (Op.isUndef()) {
8186  Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8187  continue;
8188  }
8189 
8190  SDLoc DL(Op);
8191  // Get the constant value and if needed trunc it to the size of the type.
8192  // Nodes like build_vector might have constants wider than the scalar type.
8193  APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8194  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8195  Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8196  else
8197  Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8198  }
8199 
8200  return DAG.getBuildVector(VT, DL, Elts);
8201 }
8202 
8203 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8204 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8205 // transformation. Returns true if extension are possible and the above
8206 // mentioned transformation is profitable.
8208  unsigned ExtOpc,
8209  SmallVectorImpl<SDNode *> &ExtendNodes,
8210  const TargetLowering &TLI) {
8211  bool HasCopyToRegUses = false;
8212  bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8213  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8214  UE = N0.getNode()->use_end();
8215  UI != UE; ++UI) {
8216  SDNode *User = *UI;
8217  if (User == N)
8218  continue;
8219  if (UI.getUse().getResNo() != N0.getResNo())
8220  continue;
8221  // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8222  if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8223  ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8224  if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8225  // Sign bits will be lost after a zext.
8226  return false;
8227  bool Add = false;
8228  for (unsigned i = 0; i != 2; ++i) {
8229  SDValue UseOp = User->getOperand(i);
8230  if (UseOp == N0)
8231  continue;
8232  if (!isa<ConstantSDNode>(UseOp))
8233  return false;
8234  Add = true;
8235  }
8236  if (Add)
8237  ExtendNodes.push_back(User);
8238  continue;
8239  }
8240  // If truncates aren't free and there are users we can't
8241  // extend, it isn't worthwhile.
8242  if (!isTruncFree)
8243  return false;
8244  // Remember if this value is live-out.
8245  if (User->getOpcode() == ISD::CopyToReg)
8246  HasCopyToRegUses = true;
8247  }
8248 
8249  if (HasCopyToRegUses) {
8250  bool BothLiveOut = false;
8251  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8252  UI != UE; ++UI) {
8253  SDUse &Use = UI.getUse();
8254  if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8255  BothLiveOut = true;
8256  break;
8257  }
8258  }
8259  if (BothLiveOut)
8260  // Both unextended and extended values are live out. There had better be
8261  // a good reason for the transformation.
8262  return ExtendNodes.size();
8263  }
8264  return true;
8265 }
8266 
8267 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8268  SDValue OrigLoad, SDValue ExtLoad,
8269  ISD::NodeType ExtType) {
8270  // Extend SetCC uses if necessary.
8271  SDLoc DL(ExtLoad);
8272  for (SDNode *SetCC : SetCCs) {
8274 
8275  for (unsigned j = 0; j != 2; ++j) {
8276  SDValue SOp = SetCC->getOperand(j);
8277  if (SOp == OrigLoad)
8278  Ops.push_back(ExtLoad);
8279  else
8280  Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8281  }
8282 
8283  Ops.push_back(SetCC->getOperand(2));
8284  CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8285  }
8286 }
8287 
8288 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8289 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8290  SDValue N0 = N->getOperand(0);
8291  EVT DstVT = N->getValueType(0);
8292  EVT SrcVT = N0.getValueType();
8293 
8294  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8295  N->getOpcode() == ISD::ZERO_EXTEND) &&
8296  "Unexpected node type (not an extend)!");
8297 
8298  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8299  // For example, on a target with legal v4i32, but illegal v8i32, turn:
8300  // (v8i32 (sext (v8i16 (load x))))
8301  // into:
8302  // (v8i32 (concat_vectors (v4i32 (sextload x)),
8303  // (v4i32 (sextload (x + 16)))))
8304  // Where uses of the original load, i.e.:
8305  // (v8i16 (load x))
8306  // are replaced with:
8307  // (v8i16 (truncate
8308  // (v8i32 (concat_vectors (v4i32 (sextload x)),
8309  // (v4i32 (sextload (x + 16)))))))
8310  //
8311  // This combine is only applicable to illegal, but splittable, vectors.
8312  // All legal types, and illegal non-vector types, are handled elsewhere.
8313  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8314  //
8315  if (N0->getOpcode() != ISD::LOAD)
8316  return SDValue();
8317 
8318  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8319 
8320  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8321  !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8322  !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8323  return SDValue();
8324 
8325  SmallVector<SDNode *, 4> SetCCs;
8326  if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8327  return SDValue();
8328 
8329  ISD::LoadExtType ExtType =
8331 
8332  // Try to split the vector types to get down to legal types.
8333  EVT SplitSrcVT = SrcVT;
8334  EVT SplitDstVT = DstVT;
8335  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8336  SplitSrcVT.getVectorNumElements() > 1) {
8337  SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8338  SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8339  }
8340 
8341  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8342  return SDValue();
8343 
8344  SDLoc DL(N);
8345  const unsigned NumSplits =
8346  DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8347  const unsigned Stride = SplitSrcVT.getStoreSize();
8349  SmallVector<SDValue, 4> Chains;
8350 
8351  SDValue BasePtr = LN0->getBasePtr();
8352  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8353  const unsigned Offset = Idx * Stride;
8354  const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8355 
8356  SDValue SplitLoad = DAG.getExtLoad(
8357  ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8358  LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8359  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8360 
8361  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8362  DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8363 
8364  Loads.push_back(SplitLoad.getValue(0));
8365  Chains.push_back(SplitLoad.getValue(1));
8366  }
8367 
8368  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8369  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8370 
8371  // Simplify TF.
8372  AddToWorklist(NewChain.getNode());
8373 
8374  CombineTo(N, NewValue);
8375 
8376  // Replace uses of the original load (before extension)
8377  // with a truncate of the concatenated sextloaded vectors.
8378  SDValue Trunc =
8379  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8380  ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8381  CombineTo(N0.getNode(), Trunc, NewChain);
8382  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8383 }
8384 
8385 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8386 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8387 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8389  EVT VT = N->getValueType(0);
8390 
8391  // and/or/xor
8392  SDValue N0 = N->getOperand(0);
8393  if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8394  N0.getOpcode() == ISD::XOR) ||
8395  N0.getOperand(1).getOpcode() != ISD::Constant ||
8396  (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8397  return SDValue();
8398 
8399  // shl/shr
8400  SDValue N1 = N0->getOperand(0);
8401  if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8402  N1.getOperand(1).getOpcode() != ISD::Constant ||
8403  (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8404  return SDValue();
8405 
8406  // load
8407  if (!isa<LoadSDNode>(N1.getOperand(0)))
8408  return SDValue();
8409  LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8410  EVT MemVT = Load->getMemoryVT();
8411  if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8412  Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8413  return SDValue();
8414 
8415 
8416  // If the shift op is SHL, the logic op must be AND, otherwise the result
8417  // will be wrong.
8418  if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8419  return SDValue();
8420 
8421  if (!N0.hasOneUse() || !N1.hasOneUse())
8422  return SDValue();
8423 
8424  SmallVector<SDNode*, 4> SetCCs;
8425  if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8426  ISD::ZERO_EXTEND, SetCCs, TLI))
8427  return SDValue();
8428 
8429  // Actually do the transformation.
8430  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8431  Load->getChain(), Load->getBasePtr(),
8432  Load->getMemoryVT(), Load->getMemOperand());
8433 
8434  SDLoc DL1(N1);
8435  SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8436  N1.getOperand(1));
8437 
8438  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8439  Mask = Mask.zext(VT.getSizeInBits());
8440  SDLoc DL0(N0);
8441  SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8442  DAG.getConstant(Mask, DL0, VT));
8443 
8444  ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8445  CombineTo(N, And);
8446  if (SDValue(Load, 0).hasOneUse()) {
8447  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8448  } else {
8449  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8450  Load->getValueType(0), ExtLoad);
8451  CombineTo(Load, Trunc, ExtLoad.getValue(1));
8452  }
8453  return SDValue(N,0); // Return N so it doesn't get rechecked!
8454 }
8455 
8456 /// If we're narrowing or widening the result of a vector select and the final
8457 /// size is the same size as a setcc (compare) feeding the select, then try to
8458 /// apply the cast operation to the select's operands because matching vector
8459 /// sizes for a select condition and other operands should be more efficient.
8460 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8461  unsigned CastOpcode = Cast->getOpcode();
8462  assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8463  CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8464  CastOpcode == ISD::FP_ROUND) &&
8465  "Unexpected opcode for vector select narrowing/widening");
8466 
8467  // We only do this transform before legal ops because the pattern may be
8468  // obfuscated by target-specific operations after legalization. Do not create
8469  // an illegal select op, however, because that may be difficult to lower.
8470  EVT VT = Cast->getValueType(0);
8471  if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8472  return SDValue();
8473 
8474  SDValue VSel = Cast->getOperand(0);
8475  if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8476  VSel.getOperand(0).getOpcode() != ISD::SETCC)
8477  return SDValue();
8478 
8479  // Does the setcc have the same vector size as the casted select?
8480  SDValue SetCC = VSel.getOperand(0);
8481  EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8482  if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8483  return SDValue();
8484 
8485  // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8486  SDValue A = VSel.getOperand(1);
8487  SDValue B = VSel.getOperand(2);
8488  SDValue CastA, CastB;
8489  SDLoc DL(Cast);
8490  if (CastOpcode == ISD::FP_ROUND) {
8491  // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8492  CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8493  CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8494  } else {
8495  CastA = DAG.getNode(CastOpcode, DL, VT, A);
8496  CastB = DAG.getNode(CastOpcode, DL, VT, B);
8497  }
8498  return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8499 }
8500 
8501 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8502 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8504  const TargetLowering &TLI, EVT VT,
8505  bool LegalOperations, SDNode *N,
8506  SDValue N0, ISD::LoadExtType ExtLoadType) {
8507  SDNode *N0Node = N0.getNode();
8508  bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8509  : ISD::isZEXTLoad(N0Node);
8510  if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8511  !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8512  return {};
8513 
8514  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8515  EVT MemVT = LN0->getMemoryVT();
8516  if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8517  !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8518  return {};
8519 
8520  SDValue ExtLoad =
8521  DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8522  LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8523  Combiner.CombineTo(N, ExtLoad);
8524  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8525  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8526 }
8527 
8528 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8529 // Only generate vector extloads when 1) they're legal, and 2) they are
8530 // deemed desirable by the target.
8532  const TargetLowering &TLI, EVT VT,
8533  bool LegalOperations, SDNode *N, SDValue N0,
8534  ISD::LoadExtType ExtLoadType,
8535  ISD::NodeType ExtOpc) {
8536  if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8537  !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8538  ((LegalOperations || VT.isVector() ||
8539  cast<LoadSDNode>(N0)->isVolatile()) &&
8540  !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8541  return {};
8542 
8543  bool DoXform = true;
8544  SmallVector<SDNode *, 4> SetCCs;
8545  if (!N0.hasOneUse())
8546  DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8547  if (VT.isVector())
8548  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8549  if (!DoXform)
8550  return {};
8551 
8552  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8553  SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8554  LN0->getBasePtr(), N0.getValueType(),
8555  LN0->getMemOperand());
8556  Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8557  // If the load value is used only by N, replace it via CombineTo N.
8558  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8559  Combiner.CombineTo(N, ExtLoad);
8560  if (NoReplaceTrunc) {
8561  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8562  } else {
8563  SDValue Trunc =
8564  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8565  Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8566  }
8567  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8568 }
8569 
8571  bool LegalOperations) {
8572  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8573  N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8574 
8575  SDValue SetCC = N->getOperand(0);
8576  if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8577  !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8578  return SDValue();
8579 
8580  SDValue X = SetCC.getOperand(0);
8581  SDValue Ones = SetCC.getOperand(1);
8582  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8583  EVT VT = N->getValueType(0);
8584  EVT XVT = X.getValueType();
8585  // setge X, C is canonicalized to setgt, so we do not need to match that
8586  // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8587  // not require the 'not' op.
8588  if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8589  // Invert and smear/shift the sign bit:
8590  // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8591  // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8592  SDLoc DL(N);
8593  SDValue NotX = DAG.getNOT(DL, X, VT);
8594  SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8595  auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8596  return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8597  }
8598  return SDValue();
8599 }
8600 
8601 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8602  SDValue N0 = N->getOperand(0);
8603  EVT VT = N->getValueType(0);
8604  SDLoc DL(N);
8605 
8606  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8607  return Res;
8608 
8609  // fold (sext (sext x)) -> (sext x)
8610  // fold (sext (aext x)) -> (sext x)
8611  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8612  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8613 
8614  if (N0.getOpcode() == ISD::TRUNCATE) {
8615  // fold (sext (truncate (load x))) -> (sext (smaller load x))
8616  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8617  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8618  SDNode *oye = N0.getOperand(0).getNode();
8619  if (NarrowLoad.getNode() != N0.getNode()) {
8620  CombineTo(N0.getNode(), NarrowLoad);
8621  // CombineTo deleted the truncate, if needed, but not what's under it.
8622  AddToWorklist(oye);
8623  }
8624  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8625  }
8626 
8627  // See if the value being truncated is already sign extended. If so, just
8628  // eliminate the trunc/sext pair.
8629  SDValue Op = N0.getOperand(0);
8630  unsigned OpBits = Op.getScalarValueSizeInBits();
8631  unsigned MidBits = N0.getScalarValueSizeInBits();
8632  unsigned DestBits = VT.getScalarSizeInBits();
8633  unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8634 
8635  if (OpBits == DestBits) {
8636  // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
8637  // bits, it is already ready.
8638  if (NumSignBits > DestBits-MidBits)
8639  return Op;
8640  } else if (OpBits < DestBits) {
8641  // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
8642  // bits, just sext from i32.
8643  if (NumSignBits > OpBits-MidBits)
8644  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8645  } else {
8646  // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
8647  // bits, just truncate to i32.
8648  if (NumSignBits > OpBits-MidBits)
8649  return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8650  }
8651 
8652  // fold (sext (truncate x)) -> (sextinreg x).
8653  if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8654  N0.getValueType())) {
8655  if (OpBits < DestBits)
8656  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8657  else if (OpBits > DestBits)
8658  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8659  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8660  DAG.getValueType(N0.getValueType()));
8661  }
8662  }
8663 
8664  // Try to simplify (sext (load x)).
8665  if (SDValue foldedExt =
8666  tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8668  return foldedExt;
8669 
8670  // fold (sext (load x)) to multiple smaller sextloads.
8671  // Only on illegal but splittable vectors.
8672  if (SDValue ExtLoad = CombineExtLoad(N))
8673  return ExtLoad;
8674 
8675  // Try to simplify (sext (sextload x)).
8676  if (SDValue foldedExt = tryToFoldExtOfExtload(
8677  DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8678  return foldedExt;
8679 
8680  // fold (sext (and/or/xor (load x), cst)) ->
8681  // (and/or/xor (sextload x), (sext cst))
8682  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8683  N0.getOpcode() == ISD::XOR) &&
8684  isa<LoadSDNode>(N0.getOperand(0)) &&
8685  N0.getOperand(1).getOpcode() == ISD::Constant &&
8686  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8687  LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8688  EVT MemVT = LN00->getMemoryVT();
8689  if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8690  LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8691  SmallVector<SDNode*, 4> SetCCs;
8692  bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8693  ISD::SIGN_EXTEND, SetCCs, TLI);
8694  if (DoXform) {
8695  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8696  LN00->getChain(), LN00->getBasePtr(),
8697  LN00->getMemoryVT(),
8698  LN00->getMemOperand());
8699  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8700  Mask = Mask.sext(VT.getSizeInBits());
8701  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8702  ExtLoad, DAG.getConstant(Mask, DL, VT));
8703  ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8704  bool NoReplaceTruncAnd = !N0.hasOneUse();
8705  bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8706  CombineTo(N, And);
8707  // If N0 has multiple uses, change other uses as well.
8708  if (NoReplaceTruncAnd) {
8709  SDValue TruncAnd =
8710  DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8711  CombineTo(N0.getNode(), TruncAnd);
8712  }
8713  if (NoReplaceTrunc) {
8714  DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8715  } else {
8716  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8717  LN00->getValueType(0), ExtLoad);
8718  CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8719  }
8720  return SDValue(N,0); // Return N so it doesn't get rechecked!
8721  }
8722  }
8723  }
8724 
8725  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8726  return V;
8727 
8728  if (N0.getOpcode() == ISD::SETCC) {
8729  SDValue N00 = N0.getOperand(0);
8730  SDValue N01 = N0.getOperand(1);
8731  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8732  EVT N00VT = N0.getOperand(0).getValueType();
8733 
8734  // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8735  // Only do this before legalize for now.
8736  if (VT.isVector() && !LegalOperations &&
8737  TLI.getBooleanContents(N00VT) ==
8739  // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8740  // of the same size as the compared operands. Only optimize sext(setcc())
8741  // if this is the case.
8742  EVT SVT = getSetCCResultType(N00VT);
8743 
8744  // If we already have the desired type, don't change it.
8745  if (SVT != N0.getValueType()) {
8746  // We know that the # elements of the results is the same as the
8747  // # elements of the compare (and the # elements of the compare result
8748  // for that matter). Check to see that they are the same size. If so,
8749  // we know that the element size of the sext'd result matches the
8750  // element size of the compare operands.
8751  if (VT.getSizeInBits() == SVT.getSizeInBits())
8752  return DAG.getSetCC(DL, VT, N00, N01, CC);
8753 
8754  // If the desired elements are smaller or larger than the source
8755  // elements, we can use a matching integer vector type and then
8756  // truncate/sign extend.
8757  EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8758  if (SVT == MatchingVecType) {
8759  SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8760  return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8761  }
8762  }
8763  }
8764 
8765  // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8766  // Here, T can be 1 or -1, depending on the type of the setcc and
8767  // getBooleanContents().
8768  unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8769 
8770  // To determine the "true" side of the select, we need to know the high bit
8771  // of the value returned by the setcc if it evaluates to true.
8772  // If the type of the setcc is i1, then the true case of the select is just
8773  // sext(i1 1), that is, -1.
8774  // If the type of the setcc is larger (say, i8) then the value of the high
8775  // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8776  // of the appropriate width.
8777  SDValue ExtTrueVal = (SetCCWidth == 1)
8778  ? DAG.getAllOnesConstant(DL, VT)
8779  : DAG.getBoolConstant(true, DL, VT, N00VT);
8780  SDValue Zero = DAG.getConstant(0, DL, VT);
8781  if (SDValue SCC =
8782  SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8783  return SCC;
8784 
8785  if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8786  EVT SetCCVT = getSetCCResultType(N00VT);
8787  // Don't do this transform for i1 because there's a select transform
8788  // that would reverse it.
8789  // TODO: We should not do this transform at all without a target hook
8790  // because a sext is likely cheaper than a select?
8791  if (SetCCVT.getScalarSizeInBits() != 1 &&
8792  (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8793  SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8794  return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8795  }
8796  }
8797  }
8798 
8799  // fold (sext x) -> (zext x) if the sign bit is known zero.
8800  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8801  DAG.SignBitIsZero(N0))
8802  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8803 
8804  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8805  return NewVSel;
8806 
8807  return SDValue();
8808 }
8809 
8810 // isTruncateOf - If N is a truncate of some other value, return true, record
8811 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8812 // This function computes KnownBits to avoid a duplicated call to
8813 // computeKnownBits in the caller.
8814 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8815  KnownBits &Known) {
8816  if (N->getOpcode() == ISD::TRUNCATE) {
8817  Op = N->getOperand(0);
8818  Known = DAG.computeKnownBits(Op);
8819  return true;
8820  }
8821 
8822  if (N.getOpcode() != ISD::SETCC ||
8823  N.getValueType().getScalarType() != MVT::i1 ||
8824  cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
8825  return false;
8826 
8827  SDValue Op0 = N->getOperand(0);
8828  SDValue Op1 = N->getOperand(1);
8829  assert(Op0.getValueType() == Op1.getValueType());
8830 
8831  if (isNullOrNullSplat(Op0))
8832  Op = Op1;
8833  else if (isNullOrNullSplat(Op1))
8834  Op = Op0;
8835  else
8836  return false;
8837 
8838  Known = DAG.computeKnownBits(Op);
8839 
8840  return (Known.Zero | 1).isAllOnesValue();
8841 }
8842 
8843 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8844  SDValue N0 = N->getOperand(0);
8845  EVT VT = N->getValueType(0);
8846 
8847  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8848  return Res;
8849 
8850  // fold (zext (zext x)) -> (zext x)
8851  // fold (zext (aext x)) -> (zext x)
8852  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8853  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8854  N0.getOperand(0));
8855 
8856  // fold (zext (truncate x)) -> (zext x) or
8857  // (zext (truncate x)) -> (truncate x)
8858  // This is valid when the truncated bits of x are already zero.
8859  SDValue Op;
8860  KnownBits Known;
8861  if (isTruncateOf(DAG, N0, Op, Known)) {
8862  APInt TruncatedBits =
8864  APInt(Op.getScalarValueSizeInBits(), 0) :
8867  std::min(Op.getScalarValueSizeInBits(),
8868  VT.getScalarSizeInBits()));
8869  if (TruncatedBits.isSubsetOf(Known.Zero))
8870  return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8871  }
8872 
8873  // fold (zext (truncate x)) -> (and x, mask)
8874  if (N0.getOpcode() == ISD::TRUNCATE) {
8875  // fold (zext (truncate (load x))) -> (zext (smaller load x))
8876  // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8877  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8878  SDNode *oye = N0.getOperand(0).getNode();
8879  if (NarrowLoad.getNode() != N0.getNode()) {
8880  CombineTo(N0.getNode(), NarrowLoad);
8881  // CombineTo deleted the truncate, if needed, but not what's under it.
8882  AddToWorklist(oye);
8883  }
8884  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8885  }
8886 
8887  EVT SrcVT = N0.getOperand(0).getValueType();
8888  EVT MinVT = N0.getValueType();
8889 
8890  // Try to mask before the extension to avoid having to generate a larger mask,
8891  // possibly over several sub-vectors.
8892  if (SrcVT.bitsLT(VT) && VT.isVector()) {
8893  if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8894  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8895  SDValue Op = N0.getOperand(0);
8896  Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8897  AddToWorklist(Op.getNode());
8898  SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8899  // Transfer the debug info; the new node is equivalent to N0.
8900  DAG.transferDbgValues(N0, ZExtOrTrunc);
8901  return ZExtOrTrunc;
8902  }
8903  }
8904 
8905  if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8906  SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8907  AddToWorklist(Op.getNode());
8908  SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8909  // We may safely transfer the debug info describing the truncate node over
8910  // to the equivalent and operation.
8911  DAG.transferDbgValues(N0, And);
8912  return And;
8913  }
8914  }
8915 
8916  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8917  // if either of the casts is not free.
8918  if (N0.getOpcode() == ISD::AND &&
8919  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8920  N0.getOperand(1).getOpcode() == ISD::Constant &&
8922  N0.getValueType()) ||
8923  !TLI.isZExtFree(N0.getValueType(), VT))) {
8924  SDValue X = N0.getOperand(0).getOperand(0);
8925  X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8926  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8927  Mask = Mask.zext(VT.getSizeInBits());
8928  SDLoc DL(N);
8929  return DAG.getNode(ISD::AND, DL, VT,
8930  X, DAG.getConstant(Mask, DL, VT));
8931  }
8932 
8933  // Try to simplify (zext (load x)).
8934  if (SDValue foldedExt =
8935  tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8937  return foldedExt;
8938 
8939  // fold (zext (load x)) to multiple smaller zextloads.
8940  // Only on illegal but splittable vectors.
8941  if (SDValue ExtLoad = CombineExtLoad(N))
8942  return ExtLoad;
8943 
8944  // fold (zext (and/or/xor (load x), cst)) ->
8945  // (and/or/xor (zextload x), (zext cst))
8946  // Unless (and (load x) cst) will match as a zextload already and has
8947  // additional users.
8948  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8949  N0.getOpcode() == ISD::XOR) &&
8950  isa<LoadSDNode>(N0.getOperand(0)) &&
8951  N0.getOperand(1).getOpcode() == ISD::Constant &&
8952  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8953  LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8954  EVT MemVT = LN00->getMemoryVT();
8955  if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8956  LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8957  bool DoXform = true;
8958  SmallVector<SDNode*, 4> SetCCs;
8959  if (!N0.hasOneUse()) {
8960  if (N0.getOpcode() == ISD::AND) {
8961  auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8962  EVT LoadResultTy = AndC->getValueType(0);
8963  EVT ExtVT;
8964  if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8965  DoXform = false;
8966  }
8967  }
8968  if (DoXform)
8969  DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8970  ISD::ZERO_EXTEND, SetCCs, TLI);
8971  if (DoXform) {
8972  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8973  LN00->getChain(), LN00->getBasePtr(),
8974  LN00->getMemoryVT(),
8975  LN00->getMemOperand());
8976  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8977  Mask = Mask.zext(VT.getSizeInBits());
8978  SDLoc DL(N);
8979  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8980  ExtLoad, DAG.getConstant(Mask, DL, VT));
8981  ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8982  bool NoReplaceTruncAnd = !N0.hasOneUse();
8983  bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8984  CombineTo(N, And);
8985  // If N0 has multiple uses, change other uses as well.
8986  if (NoReplaceTruncAnd) {
8987  SDValue TruncAnd =
8988  DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8989  CombineTo(N0.getNode(), TruncAnd);
8990  }
8991  if (NoReplaceTrunc) {
8992  DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8993  } else {
8994  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8995  LN00->getValueType(0), ExtLoad);
8996  CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8997  }
8998  return SDValue(N,0); // Return N so it doesn't get rechecked!
8999  }
9000  }
9001  }
9002 
9003  // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9004  // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9005  if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9006  return ZExtLoad;
9007 
9008  // Try to simplify (zext (zextload x)).
9009  if (SDValue foldedExt = tryToFoldExtOfExtload(
9010  DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9011  return foldedExt;
9012 
9013  if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9014  return V;
9015 
9016  if (N0.getOpcode() == ISD::SETCC) {
9017  // Only do this before legalize for now.
9018  if (!LegalOperations && VT.isVector() &&
9020  EVT N00VT = N0.getOperand(0).getValueType();
9021  if (getSetCCResultType(N00VT) == N0.getValueType())
9022  return SDValue();
9023 
9024  // We know that the # elements of the results is the same as the #
9025  // elements of the compare (and the # elements of the compare result for
9026  // that matter). Check to see that they are the same size. If so, we know
9027  // that the element size of the sext'd result matches the element size of
9028  // the compare operands.
9029  SDLoc DL(N);
9030  SDValue VecOnes = DAG.getConstant(1, DL, VT);
9031  if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9032  // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9033  SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9034  N0.getOperand(1), N0.getOperand(2));
9035  return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9036  }
9037 
9038  // If the desired elements are smaller or larger than the source
9039  // elements we can use a matching integer vector type and then
9040  // truncate/sign extend.
9041  EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9042  SDValue VsetCC =
9043  DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9044  N0.getOperand(1), N0.getOperand(2));
9045  return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9046  VecOnes);
9047  }
9048 
9049  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9050  SDLoc DL(N);
9051  if (SDValue SCC = SimplifySelectCC(
9052  DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9053  DAG.getConstant(0, DL, VT),
9054  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9055  return SCC;
9056  }
9057 
9058  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9059  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9060  isa<ConstantSDNode>(N0.getOperand(1)) &&
9061  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9062  N0.hasOneUse()) {
9063  SDValue ShAmt = N0.getOperand(1);
9064  unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9065  if (N0.getOpcode() == ISD::SHL) {
9066  SDValue InnerZExt = N0.getOperand(0);
9067  // If the original shl may be shifting out bits, do not perform this
9068  // transformation.
9069  unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9070  InnerZExt.getOperand(0).getValueSizeInBits();
9071  if (ShAmtVal > KnownZeroBits)
9072  return SDValue();
9073  }
9074 
9075  SDLoc DL(N);
9076 
9077  // Ensure that the shift amount is wide enough for the shifted value.
9078  if (VT.getSizeInBits() >= 256)
9079  ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9080 
9081  return DAG.getNode(N0.getOpcode(), DL, VT,
9082  DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9083  ShAmt);
9084  }
9085 
9086  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9087  return NewVSel;
9088 
9089  return SDValue();
9090 }
9091 
9092 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9093  SDValue N0 = N->getOperand(0);
9094  EVT VT = N->getValueType(0);
9095 
9096  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9097  return Res;
9098 
9099  // fold (aext (aext x)) -> (aext x)
9100  // fold (aext (zext x)) -> (zext x)
9101  // fold (aext (sext x)) -> (sext x)
9102  if (N0.getOpcode() == ISD::ANY_EXTEND ||
9103  N0.getOpcode() == ISD::ZERO_EXTEND ||
9104  N0.getOpcode() == ISD::SIGN_EXTEND)
9105  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9106 
9107  // fold (aext (truncate (load x))) -> (aext (smaller load x))
9108  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9109  if (N0.getOpcode() == ISD::TRUNCATE) {
9110  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9111  SDNode *oye = N0.getOperand(0).getNode();
9112  if (NarrowLoad.getNode() != N0.getNode()) {
9113  CombineTo(N0.getNode(), NarrowLoad);
9114  // CombineTo deleted the truncate, if needed, but not what's under it.
9115  AddToWorklist(oye);
9116  }
9117  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9118  }
9119  }
9120 
9121  // fold (aext (truncate x))
9122  if (N0.getOpcode() == ISD::TRUNCATE)
9123  return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9124 
9125  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9126  // if the trunc is not free.
9127  if (N0.getOpcode() == ISD::AND &&
9128  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9129  N0.getOperand(1).getOpcode() == ISD::Constant &&
9131  N0.getValueType())) {
9132  SDLoc DL(N);
9133  SDValue X = N0.getOperand(0).getOperand(0);
9134  X = DAG.getAnyExtOrTrunc(X, DL, VT);
9135  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9136  Mask = Mask.zext(VT.getSizeInBits());
9137  return DAG.getNode(ISD::AND, DL, VT,
9138  X, DAG.getConstant(Mask, DL, VT));
9139  }
9140 
9141  // fold (aext (load x)) -> (aext (truncate (extload x)))
9142  // None of the supported targets knows how to perform load and any_ext
9143  // on vectors in one instruction. We only perform this transformation on
9144  // scalars.
9145  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9146  ISD::isUNINDEXEDLoad(N0.getNode()) &&
9147  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9148  bool DoXform = true;
9149  SmallVector<SDNode*, 4> SetCCs;
9150  if (!N0.hasOneUse())
9151  DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9152  TLI);
9153  if (DoXform) {
9154  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9155  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9156  LN0->getChain(),
9157  LN0->getBasePtr(), N0.getValueType(),
9158  LN0->getMemOperand());
9159  ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9160  // If the load value is used only by N, replace it via CombineTo N.
9161  bool NoReplaceTrunc = N0.hasOneUse();
9162  CombineTo(N, ExtLoad);
9163  if (NoReplaceTrunc) {
9164  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9165  } else {
9166  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9167  N0.getValueType(), ExtLoad);
9168  CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9169  }
9170  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9171  }
9172  }
9173 
9174  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9175  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9176  // fold (aext ( extload x)) -> (aext (truncate (extload x)))
9177  if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9178  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9179  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9180  ISD::LoadExtType ExtType = LN0->getExtensionType();
9181  EVT MemVT = LN0->getMemoryVT();
9182  if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9183  SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9184  VT, LN0->getChain(), LN0->getBasePtr(),
9185  MemVT, LN0->getMemOperand());
9186  CombineTo(N, ExtLoad);
9187  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9188  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9189  }
9190  }
9191 
9192  if (N0.getOpcode() == ISD::SETCC) {
9193  // For vectors:
9194  // aext(setcc) -> vsetcc
9195  // aext(setcc) -> truncate(vsetcc)
9196  // aext(setcc) -> aext(vsetcc)
9197  // Only do this before legalize for now.
9198  if (VT.isVector() && !LegalOperations) {
9199  EVT N00VT = N0.getOperand(0).getValueType();
9200  if (getSetCCResultType(N00VT) == N0.getValueType())
9201  return SDValue();
9202 
9203  // We know that the # elements of the results is the same as the
9204  // # elements of the compare (and the # elements of the compare result
9205  // for that matter). Check to see that they are the same size. If so,
9206  // we know that the element size of the sext'd result matches the
9207  // element size of the compare operands.
9208  if (VT.getSizeInBits() == N00VT.getSizeInBits())
9209  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9210  N0.getOperand(1),
9211  cast<CondCodeSDNode>(N0.getOperand(2))->get());
9212 
9213  // If the desired elements are smaller or larger than the source
9214  // elements we can use a matching integer vector type and then
9215  // truncate/any extend
9216  EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9217  SDValue VsetCC =
9218  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9219  N0.getOperand(1),
9220  cast<CondCodeSDNode>(N0.getOperand(2))->get());
9221  return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9222  }
9223 
9224  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9225  SDLoc DL(N);
9226  if (SDValue SCC = SimplifySelectCC(
9227  DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9228  DAG.getConstant(0, DL, VT),
9229  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9230  return SCC;
9231  }
9232 
9233  return SDValue();
9234 }
9235 
9236 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9237  unsigned Opcode = N->getOpcode();
9238  SDValue N0 = N->getOperand(0);
9239  SDValue N1 = N->getOperand(1);
9240  EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9241 
9242  // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9243  if (N0.getOpcode() == Opcode &&
9244  AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9245  return N0;
9246 
9247  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9248  N0.getOperand(0).getOpcode() == Opcode) {
9249  // We have an assert, truncate, assert sandwich. Make one stronger assert
9250  // by asserting on the smallest asserted type to the larger source type.
9251  // This eliminates the later assert:
9252  // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9253  // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9254  SDValue BigA = N0.getOperand(0);
9255  EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9256  assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9257  "Asserting zero/sign-extended bits to a type larger than the "
9258  "truncated destination does not provide information");
9259 
9260  SDLoc DL(N);
9261  EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9262  SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9263  SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9264  BigA.getOperand(0), MinAssertVTVal);
9265  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9266  }
9267 
9268  // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9269  // than X. Just move the AssertZext in front of the truncate and drop the
9270  // AssertSExt.
9271  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9272  N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9273  Opcode == ISD::AssertZext) {
9274  SDValue BigA = N0.getOperand(0);
9275  EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9276  assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9277  "Asserting zero/sign-extended bits to a type larger than the "
9278  "truncated destination does not provide information");
9279 
9280  if (AssertVT.bitsLT(BigA_AssertVT)) {
9281  SDLoc DL(N);
9282  SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9283  BigA.getOperand(0), N1);
9284  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9285  }
9286  }
9287 
9288  return SDValue();
9289 }
9290 
9291 /// If the result of a wider load is shifted to right of N bits and then
9292 /// truncated to a narrower type and where N is a multiple of number of bits of
9293 /// the narrower type, transform it to a narrower load from address + N / num of
9294 /// bits of new type. Also narrow the load if the result is masked with an AND
9295 /// to effectively produce a smaller type. If the result is to be extended, also
9296 /// fold the extension to form a extending load.
9297 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9298  unsigned Opc = N->getOpcode();
9299 
9301  SDValue N0 = N->getOperand(0);
9302  EVT VT = N->getValueType(0);
9303  EVT ExtVT = VT;
9304 
9305  // This transformation isn't valid for vector loads.
9306  if (VT.isVector())
9307  return SDValue();
9308 
9309  unsigned ShAmt = 0;
9310  bool HasShiftedOffset = false;
9311  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9312  // extended to VT.
9313  if (Opc == ISD::SIGN_EXTEND_INREG) {
9314  ExtType = ISD::SEXTLOAD;
9315  ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9316  } else if (Opc == ISD::SRL) {
9317  // Another special-case: SRL is basically zero-extending a narrower value,
9318  // or it maybe shifting a higher subword, half or byte into the lowest
9319  // bits.
9320  ExtType = ISD::ZEXTLOAD;
9321  N0 = SDValue(N, 0);
9322 
9323  auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9324  auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9325  if (!N01 || !LN0)
9326  return SDValue();
9327 
9328  uint64_t ShiftAmt = N01->getZExtValue();
9329  uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9330  if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9331  ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9332  else
9333  ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9334  VT.getSizeInBits() - ShiftAmt);
9335  } else if (Opc == ISD::AND) {
9336  // An AND with a constant mask is the same as a truncate + zero-extend.
9337  auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9338  if (!AndC)
9339  return SDValue();
9340 
9341  const APInt &Mask = AndC->getAPIntValue();
9342  unsigned ActiveBits = 0;
9343  if (Mask.isMask()) {
9344  ActiveBits = Mask.countTrailingOnes();
9345  } else if (Mask.isShiftedMask()) {
9346  ShAmt = Mask.countTrailingZeros();
9347  APInt ShiftedMask = Mask.lshr(ShAmt);
9348  ActiveBits = ShiftedMask.countTrailingOnes();
9349  HasShiftedOffset = true;
9350  } else
9351  return SDValue();
9352 
9353  ExtType = ISD::ZEXTLOAD;
9354  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9355  }
9356 
9357  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9358  SDValue SRL = N0;
9359  if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9360  ShAmt = ConstShift->getZExtValue();
9361  unsigned EVTBits = ExtVT.getSizeInBits();
9362  // Is the shift amount a multiple of size of VT?
9363  if ((ShAmt & (EVTBits-1)) == 0) {
9364  N0 = N0.getOperand(0);
9365  // Is the load width a multiple of size of VT?
9366  if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9367  return SDValue();
9368  }
9369 
9370  // At this point, we must have a load or else we can't do the transform.
9371  if (!isa<LoadSDNode>(N0)) return SDValue();
9372 
9373  auto *LN0 = cast<LoadSDNode>(N0);
9374 
9375  // Because a SRL must be assumed to *need* to zero-extend the high bits
9376  // (as opposed to anyext the high bits), we can't combine the zextload
9377  // lowering of SRL and an sextload.
9378  if (LN0->getExtensionType() == ISD::SEXTLOAD)
9379  return SDValue();
9380 
9381  // If the shift amount is larger than the input type then we're not
9382  // accessing any of the loaded bytes. If the load was a zextload/extload
9383  // then the result of the shift+trunc is zero/undef (handled elsewhere).
9384  if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9385  return SDValue();
9386 
9387  // If the SRL is only used by a masking AND, we may be able to adjust
9388  // the ExtVT to make the AND redundant.
9389  SDNode *Mask = *(SRL->use_begin());
9390  if (Mask->getOpcode() == ISD::AND &&
9391  isa<ConstantSDNode>(Mask->getOperand(1))) {
9392  const APInt &ShiftMask =
9393  cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9394  if (ShiftMask.isMask()) {
9395  EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9396  ShiftMask.countTrailingOnes());
9397  // If the mask is smaller, recompute the type.
9398  if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9399  TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9400  ExtVT = MaskedVT;
9401  }
9402  }
9403  }
9404  }
9405 
9406  // If the load is shifted left (and the result isn't shifted back right),
9407  // we can fold the truncate through the shift.
9408  unsigned ShLeftAmt = 0;
9409  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9410  ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9411  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9412  ShLeftAmt = N01->getZExtValue();
9413  N0 = N0.getOperand(0);
9414  }
9415  }
9416 
9417  // If we haven't found a load, we can't narrow it.
9418  if (!isa<LoadSDNode>(N0))
9419  return SDValue();
9420 
9421  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9422  if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9423  return SDValue();
9424 
9425  auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9426  unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9427  unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9428  return LVTStoreBits - EVTStoreBits - ShAmt;
9429  };
9430 
9431  // For big endian targets, we need to adjust the offset to the pointer to
9432  // load the correct bytes.
9433  if (DAG.getDataLayout().isBigEndian())
9434  ShAmt = AdjustBigEndianShift(ShAmt);
9435 
9436  EVT PtrType = N0.getOperand(1).getValueType();
9437  uint64_t PtrOff = ShAmt / 8;
9438  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9439  SDLoc DL(LN0);
9440  // The original load itself didn't wrap, so an offset within it doesn't.
9441  SDNodeFlags Flags;
9442  Flags.setNoUnsignedWrap(true);
9443  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9444  PtrType, LN0->getBasePtr(),
9445  DAG.getConstant(PtrOff, DL, PtrType),
9446  Flags);
9447  AddToWorklist(NewPtr.getNode());
9448 
9449  SDValue Load;
9450  if (ExtType == ISD::NON_EXTLOAD)
9451  Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9452  LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9453  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9454  else
9455  Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9456  LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9457  NewAlign, LN0->getMemOperand()->getFlags(),
9458  LN0->getAAInfo());
9459 
9460  // Replace the old load's chain with the new load's chain.
9461  WorklistRemover DeadNodes(*this);
9462  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9463 
9464  // Shift the result left, if we've swallowed a left shift.
9465  SDValue Result = Load;
9466  if (ShLeftAmt != 0) {
9467  EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9468  if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9469  ShImmTy = VT;
9470  // If the shift amount is as large as the result size (but, presumably,
9471  // no larger than the source) then the useful bits of the result are
9472  // zero; we can't simply return the shortened shift, because the result
9473  // of that operation is undefined.
9474  SDLoc DL(N0);
9475  if (ShLeftAmt >= VT.getSizeInBits())
9476  Result = DAG.getConstant(0, DL, VT);
9477  else
9478  Result = DAG.getNode(ISD::SHL, DL, VT,
9479  Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9480  }
9481 
9482  if (HasShiftedOffset) {
9483  // Recalculate the shift amount after it has been altered to calculate
9484  // the offset.
9485  if (DAG.getDataLayout().isBigEndian())
9486  ShAmt = AdjustBigEndianShift(ShAmt);
9487 
9488  // We're using a shifted mask, so the load now has an offset. This means
9489  // that data has been loaded into the lower bytes than it would have been
9490  // before, so we need to shl the loaded data into the correct position in the
9491  // register.
9492  SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9493  Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9494  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9495  }
9496 
9497  // Return the new loaded value.
9498  return Result;
9499 }
9500 
9501 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9502  SDValue N0 = N->getOperand(0);
9503  SDValue N1 = N->getOperand(1);
9504  EVT VT = N->getValueType(0);
9505  EVT EVT = cast<VTSDNode>(N1)->getVT();
9506  unsigned VTBits = VT.getScalarSizeInBits();
9507  unsigned EVTBits = EVT.getScalarSizeInBits();
9508 
9509  if (N0.isUndef())
9510  return DAG.getUNDEF(VT);
9511 
9512  // fold (sext_in_reg c1) -> c1
9514  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9515 
9516  // If the input is already sign extended, just drop the extension.
9517  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9518  return N0;
9519 
9520  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9521  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9522  EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9523  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9524  N0.getOperand(0), N1);
9525 
9526  // fold (sext_in_reg (sext x)) -> (sext x)
9527  // fold (sext_in_reg (aext x)) -> (sext x)
9528  // if x is small enough or if we know that x has more than 1 sign bit and the
9529  // sign_extend_inreg is extending from one of them.
9530  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9531  SDValue N00 = N0.getOperand(0);
9532  unsigned N00Bits = N00.getScalarValueSizeInBits();
9533  if ((N00Bits <= EVTBits ||
9534  (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9535  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9536  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9537  }
9538 
9539  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9540  if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9543  N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9544  if (!LegalOperations ||
9546  return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9547  N0.getOperand(0));
9548  }
9549 
9550  // fold (sext_in_reg (zext x)) -> (sext x)
9551  // iff we are extending the source sign bit.
9552  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9553  SDValue N00 = N0.getOperand(0);
9554  if (N00.getScalarValueSizeInBits() == EVTBits &&
9555  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9556  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9557  }
9558 
9559  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9560  if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9561  return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9562 
9563  // fold operands of sext_in_reg based on knowledge that the top bits are not
9564  // demanded.
9565  if (SimplifyDemandedBits(SDValue(N, 0)))
9566  return SDValue(N, 0);
9567 
9568  // fold (sext_in_reg (load x)) -> (smaller sextload x)
9569  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9570  if (SDValue NarrowLoad = ReduceLoadWidth(N))
9571  return NarrowLoad;
9572 
9573  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9574  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9575  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9576  if (N0.getOpcode() == ISD::SRL) {
9577  if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9578  if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9579  // We can turn this into an SRA iff the input to the SRL is already sign
9580  // extended enough.
9581  unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9582  if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9583  return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9584  N0.getOperand(0), N0.getOperand(1));
9585  }
9586  }
9587 
9588  // fold (sext_inreg (extload x)) -> (sextload x)
9589  // If sextload is not supported by target, we can only do the combine when
9590  // load has one use. Doing otherwise can block folding the extload with other
9591  // extends that the target does support.
9592  if (ISD::isEXTLoad(N0.getNode()) &&
9593  ISD::isUNINDEXEDLoad(N0.getNode()) &&
9594  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9595  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9596  N0.hasOneUse()) ||
9597  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9598  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9599  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9600  LN0->getChain(),
9601  LN0->getBasePtr(), EVT,
9602  LN0->getMemOperand());
9603  CombineTo(N, ExtLoad);
9604  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9605  AddToWorklist(ExtLoad.getNode());
9606  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9607  }
9608  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9609  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9610  N0.hasOneUse() &&
9611  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9612  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9613  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9614  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9615  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9616  LN0->getChain(),
9617  LN0->getBasePtr(), EVT,
9618  LN0->getMemOperand());
9619  CombineTo(N, ExtLoad);
9620  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9621  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9622  }
9623 
9624  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9625  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9626  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9627  N0.getOperand(1), false))
9628  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9629  BSwap, N1);
9630  }
9631 
9632  return SDValue();
9633 }
9634 
9635 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9636  SDValue N0 = N->getOperand(0);
9637  EVT VT = N->getValueType(0);
9638 
9639  if (N0.isUndef())
9640  return DAG.getUNDEF(VT);
9641 
9642  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9643  return Res;
9644 
9645  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9646  return SDValue(N, 0);
9647 
9648  return SDValue();
9649 }
9650 
9651 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9652  SDValue N0 = N->getOperand(0);
9653  EVT VT = N->getValueType(0);
9654 
9655  if (N0.isUndef())
9656  return DAG.getUNDEF(VT);
9657 
9658  if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9659  return Res;
9660 
9661  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9662  return SDValue(N, 0);
9663 
9664  return SDValue();
9665 }
9666 
9667 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9668  SDValue N0 = N->getOperand(0);
9669  EVT VT = N->getValueType(0);
9670  bool isLE = DAG.getDataLayout().isLittleEndian();
9671 
9672  // noop truncate
9673  if (N0.getValueType() == N->getValueType(0))
9674  return N0;
9675 
9676  // fold (truncate (truncate x)) -> (truncate x)
9677  if (N0.getOpcode() == ISD::TRUNCATE)
9678  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9679 
9680  // fold (truncate c1) -> c1
9682  SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9683  if (C.getNode() != N)
9684  return C;
9685  }
9686 
9687  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9688  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9689  N0.getOpcode() == ISD::SIGN_EXTEND ||
9690  N0.getOpcode() == ISD::ANY_EXTEND) {
9691  // if the source is smaller than the dest, we still need an extend.
9692  if (N0.getOperand(0).getValueType().bitsLT(VT))
9693  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9694  // if the source is larger than the dest, than we just need the truncate.
9695  if (N0.getOperand(0).getValueType().bitsGT(VT))
9696  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9697  // if the source and dest are the same type, we can drop both the extend
9698  // and the truncate.
9699  return N0.getOperand(0);
9700  }
9701 
9702  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9703  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9704  return SDValue();
9705 
9706  // Fold extract-and-trunc into a narrow extract. For example:
9707  // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9708  // i32 y = TRUNCATE(i64 x)
9709  // -- becomes --
9710  // v16i8 b = BITCAST (v2i64 val)
9711  // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9712  //
9713  // Note: We only run this optimization after type legalization (which often
9714  // creates this pattern) and before operation legalization after which
9715  // we need to be more careful about the vector instructions that we generate.
9716  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9717  LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9718  EVT VecTy = N0.getOperand(0).getValueType();
9719  EVT ExTy = N0.getValueType();
9720  EVT TrTy = N->getValueType(0);
9721 
9722  unsigned NumElem = VecTy.getVectorNumElements();
9723  unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9724 
9725  EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9726  assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9727 
9728  SDValue EltNo = N0->getOperand(1);
9729  if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9730  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9731  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9732  int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9733 
9734  SDLoc DL(N);
9735  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9736  DAG.getBitcast(NVT, N0.getOperand(0)),
9737  DAG.getConstant(Index, DL, IndexTy));
9738  }
9739  }
9740 
9741  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9742  if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9743  EVT SrcVT = N0.getValueType();
9744  if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9745  TLI.isTruncateFree(SrcVT, VT)) {
9746  SDLoc SL(N0);
9747  SDValue Cond = N0.getOperand(0);
9748  SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9749  SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9750  return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9751  }
9752  }
9753 
9754  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9755  if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9756  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9757  TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9758  SDValue Amt = N0.getOperand(1);
9759  KnownBits Known = DAG.computeKnownBits(Amt);
9760  unsigned Size = VT.getScalarSizeInBits();
9761  if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9762  SDLoc SL(N);
9763  EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9764 
9765  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9766  if (AmtVT != Amt.getValueType()) {
9767  Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9768  AddToWorklist(Amt.getNode());
9769  }
9770  return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9771  }
9772  }
9773 
9774  // Fold a series of buildvector, bitcast, and truncate if possible.
9775  // For example fold
9776  // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9777  // (2xi32 (buildvector x, y)).
9778  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9779  N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9780  N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9781  N0.getOperand(0).hasOneUse()) {
9782  SDValue BuildVect = N0.getOperand(0);
9783  EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9784  EVT TruncVecEltTy = VT.getVectorElementType();
9785 
9786  // Check that the element types match.
9787  if (BuildVectEltTy == TruncVecEltTy) {
9788  // Now we only need to compute the offset of the truncated elements.
9789  unsigned BuildVecNumElts = BuildVect.getNumOperands();
9790  unsigned TruncVecNumElts = VT.getVectorNumElements();
9791  unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9792 
9793  assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9794  "Invalid number of elements");
9795 
9797  for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9798  Opnds.push_back(BuildVect.getOperand(i));
9799 
9800  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9801  }
9802  }
9803 
9804  // See if we can simplify the input to this truncate through knowledge that
9805  // only the low bits are being used.
9806  // For example "trunc (or (shl x, 8), y)" // -> trunc y
9807  // Currently we only perform this optimization on scalars because vectors
9808  // may have different active low bits.
9809  if (!VT.isVector()) {
9810  APInt Mask =
9812  if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9813  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9814  }
9815 
9816  // fold (truncate (load x)) -> (smaller load x)
9817  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9818  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9819  if (SDValue Reduced = ReduceLoadWidth(N))
9820  return Reduced;
9821 
9822  // Handle the case where the load remains an extending load even
9823  // after truncation.
9824  if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9825  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9826  if (!LN0->isVolatile() &&
9827  LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9828  SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9829  VT, LN0->getChain(), LN0->getBasePtr(),
9830  LN0->getMemoryVT(),
9831  LN0->getMemOperand());
9832  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9833  return NewLoad;
9834  }
9835  }
9836  }
9837 
9838  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9839  // where ... are all 'undef'.
9840  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9841  SmallVector<EVT, 8> VTs;
9842  SDValue V;
9843  unsigned Idx = 0;
9844  unsigned NumDefs = 0;
9845 
9846  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9847  SDValue X = N0.getOperand(i);
9848  if (!X.isUndef()) {
9849  V = X;
9850  Idx = i;
9851  NumDefs++;
9852  }
9853  // Stop if more than one members are non-undef.
9854  if (NumDefs > 1)
9855  break;
9857  VT.getVectorElementType(),
9859  }
9860 
9861  if (NumDefs == 0)
9862  return DAG.getUNDEF(VT);
9863 
9864  if (NumDefs == 1) {
9865  assert(V.getNode() && "The single defined operand is empty!");
9867  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9868  if (i != Idx) {
9869  Opnds.push_back(DAG.getUNDEF(VTs[i]));
9870  continue;
9871  }
9872  SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9873  AddToWorklist(NV.getNode());
9874  Opnds.push_back(NV);
9875  }
9876  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9877  }
9878  }
9879 
9880  // Fold truncate of a bitcast of a vector to an extract of the low vector
9881  // element.
9882  //
9883  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9884  if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9885  SDValue VecSrc = N0.getOperand(0);
9886  EVT SrcVT = VecSrc.getValueType();
9887  if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9888  (!LegalOperations ||
9890  SDLoc SL(N);
9891 
9892  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9893  unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9894  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9895  VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9896  }
9897  }
9898 
9899  // Simplify the operands using demanded-bits information.
9900  if (!VT.isVector() &&
9901  SimplifyDemandedBits(SDValue(N, 0)))
9902  return SDValue(N, 0);
9903 
9904  // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9905  // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9906  // When the adde's carry is not used.
9907  if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9908  N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9909  (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9910  SDLoc SL(N);
9911  auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9912  auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9913  auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9914  return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9915  }
9916 
9917  // fold (truncate (extract_subvector(ext x))) ->
9918  // (extract_subvector x)
9919  // TODO: This can be generalized to cover cases where the truncate and extract
9920  // do not fully cancel each other out.
9921  if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9922  SDValue N00 = N0.getOperand(0);
9923  if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9924  N00.getOpcode() == ISD::ZERO_EXTEND ||
9925  N00.getOpcode() == ISD::ANY_EXTEND) {
9926  if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9927  VT.getVectorElementType())
9928  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9929  N00.getOperand(0), N0.getOperand(1));
9930  }
9931  }
9932 
9933  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9934  return NewVSel;
9935 
9936  // Narrow a suitable binary operation with a non-opaque constant operand by
9937  // moving it ahead of the truncate. This is limited to pre-legalization
9938  // because targets may prefer a wider type during later combines and invert
9939  // this transform.
9940  switch (N0.getOpcode()) {
9941  case ISD::ADD:
9942  case ISD::SUB:
9943  case ISD::MUL:
9944  case ISD::AND:
9945  case ISD::OR:
9946  case ISD::XOR:
9947  if (!LegalOperations && N0.hasOneUse() &&
9948  (isConstantOrConstantVector(N0.getOperand(0), true) ||
9949  isConstantOrConstantVector(N0.getOperand(1), true))) {
9950  // TODO: We already restricted this to pre-legalization, but for vectors
9951  // we are extra cautious to not create an unsupported operation.
9952  // Target-specific changes are likely needed to avoid regressions here.
9953  if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
9954  SDLoc DL(N);
9955  SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
9956  SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
9957  return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
9958  }
9959  }
9960  }
9961 
9962  return SDValue();
9963 }
9964 
9965 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9966  SDValue Elt = N->getOperand(i);
9967  if (Elt.getOpcode() != ISD::MERGE_VALUES)
9968  return Elt.getNode();
9969  return Elt.getOperand(Elt.getResNo()).getNode();
9970 }
9971 
9972 /// build_pair (load, load) -> load
9973 /// if load locations are consecutive.
9974 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9975  assert(N->getOpcode() == ISD::BUILD_PAIR);
9976 
9979 
9980  // A BUILD_PAIR is always having the least significant part in elt 0 and the
9981  // most significant part in elt 1. So when combining into one large load, we
9982  // need to consider the endianness.
9983  if (DAG.getDataLayout().isBigEndian())
9984  std::swap(LD1, LD2);
9985 
9986  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9987  LD1->getAddressSpace() != LD2->getAddressSpace())
9988  return SDValue();
9989  EVT LD1VT = LD1->getValueType(0);
9990  unsigned LD1Bytes = LD1VT.getStoreSize();
9991  if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9992  DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9993  unsigned Align = LD1->getAlignment();
9994  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9995  VT.getTypeForEVT(*DAG.getContext()));
9996 
9997  if (NewAlign <= Align &&
9998  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9999  return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10000  LD1->getPointerInfo(), Align);
10001  }
10002 
10003  return SDValue();
10004 }
10005 
10006 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10007  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10008  // and Lo parts; on big-endian machines it doesn't.
10009  return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10010 }
10011 
10013  const TargetLowering &TLI) {
10014  // If this is not a bitcast to an FP type or if the target doesn't have
10015  // IEEE754-compliant FP logic, we're done.
10016  EVT VT = N->getValueType(0);
10017  if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10018  return SDValue();
10019 
10020  // TODO: Handle cases where the integer constant is a different scalar
10021  // bitwidth to the FP.
10022  SDValue N0 = N->getOperand(0);
10023  EVT SourceVT = N0.getValueType();
10024  if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10025  return SDValue();
10026 
10027  unsigned FPOpcode;
10028  APInt SignMask;
10029  switch (N0.getOpcode()) {
10030  case ISD::AND:
10031  FPOpcode = ISD::FABS;
10032  SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10033  break;
10034  case ISD::XOR:
10035  FPOpcode = ISD::FNEG;
10036  SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10037  break;
10038  case ISD::OR:
10039  FPOpcode = ISD::FABS;
10040  SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10041  break;
10042  default:
10043  return SDValue();
10044  }
10045 
10046  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10047  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10048  // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10049  // fneg (fabs X)
10050  SDValue LogicOp0 = N0.getOperand(0);
10051  ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10052  if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10053  LogicOp0.getOpcode() == ISD::BITCAST &&
10054  LogicOp0.getOperand(0).getValueType() == VT) {
10055  SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10056  NumFPLogicOpsConv++;
10057  if (N0.getOpcode() == ISD::OR)
10058  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10059  return FPOp;
10060  }
10061 
10062  return SDValue();
10063 }
10064 
10065 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10066  SDValue N0 = N->getOperand(0);
10067  EVT VT = N->getValueType(0);
10068 
10069  if (N0.isUndef())
10070  return DAG.getUNDEF(VT);
10071 
10072  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10073  // Only do this before legalize types, since we might create an illegal
10074  // scalar type. Even if we knew we wouldn't create an illegal scalar type
10075  // we can only do this before legalize ops, since the target maybe
10076  // depending on the bitcast.
10077  // First check to see if this is all constant.
10078  if (!LegalTypes &&
10079  N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10080  VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
10081  return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10082  VT.getVectorElementType());
10083 
10084  // If the input is a constant, let getNode fold it.
10085  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10086  // If we can't allow illegal operations, we need to check that this is just
10087  // a fp -> int or int -> conversion and that the resulting operation will
10088  // be legal.
10089  if (!LegalOperations ||
10090  (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10091  TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10092  (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10093  TLI.isOperationLegal(ISD::Constant, VT))) {
10094  SDValue C = DAG.getBitcast(VT, N0);
10095  if (C.getNode() != N)
10096  return C;
10097  }
10098  }
10099 
10100  // (conv (conv x, t1), t2) -> (conv x, t2)
10101  if (N0.getOpcode() == ISD::BITCAST)
10102  return DAG.getBitcast(VT, N0.getOperand(0));
10103 
10104  // fold (conv (load x)) -> (load (conv*)x)
10105  // If the resultant load doesn't need a higher alignment than the original!
10106  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10107  // Do not remove the cast if the types differ in endian layout.
10109  TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10110  // If the load is volatile, we only want to change the load type if the
10111  // resulting load is legal. Otherwise we might increase the number of
10112  // memory accesses. We don't care if the original type was legal or not
10113  // as we assume software couldn't rely on the number of accesses of an
10114  // illegal type.
10115  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10116  TLI.isOperationLegal(ISD::LOAD, VT)) &&
10117  TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10118  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10119  unsigned OrigAlign = LN0->getAlignment();
10120 
10121  bool Fast = false;
10122  if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10123  LN0->getAddressSpace(), OrigAlign, &Fast) &&
10124  Fast) {
10125  SDValue Load =
10126  DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10127  LN0->getPointerInfo(), OrigAlign,
10128  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10129  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10130  return Load;
10131  }
10132  }
10133 
10134  if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10135  return V;
10136 
10137  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10138  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10139  //
10140  // For ppc_fp128:
10141  // fold (bitcast (fneg x)) ->
10142  // flipbit = signbit
10143  // (xor (bitcast x) (build_pair flipbit, flipbit))
10144  //
10145  // fold (bitcast (fabs x)) ->
10146  // flipbit = (and (extract_element (bitcast x), 0), signbit)
10147  // (xor (bitcast x) (build_pair flipbit, flipbit))
10148  // This often reduces constant pool loads.
10149  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10150  (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10151  N0.getNode()->hasOneUse() && VT.isInteger() &&
10152  !VT.isVector() && !N0.getValueType().isVector()) {
10153  SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10154  AddToWorklist(NewConv.getNode());
10155 
10156  SDLoc DL(N);
10157  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10158  assert(VT.getSizeInBits() == 128);
10159  SDValue SignBit = DAG.getConstant(
10161  SDValue FlipBit;
10162  if (N0.getOpcode() == ISD::FNEG) {
10163  FlipBit = SignBit;
10164  AddToWorklist(FlipBit.getNode());
10165  } else {
10166  assert(N0.getOpcode() == ISD::FABS);
10167  SDValue Hi =
10168  DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10170  SDLoc(NewConv)));
10171  AddToWorklist(Hi.getNode());
10172  FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10173  AddToWorklist(FlipBit.getNode());
10174  }
10175  SDValue FlipBits =
10176  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10177  AddToWorklist(FlipBits.getNode());
10178  return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10179  }
10180  APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10181  if (N0.getOpcode() == ISD::FNEG)
10182  return DAG.getNode(ISD::XOR, DL, VT,
10183  NewConv, DAG.getConstant(SignBit, DL, VT));
10184  assert(N0.getOpcode() == ISD::FABS);
10185  return DAG.getNode(ISD::AND, DL, VT,
10186  NewConv, DAG.getConstant(~SignBit, DL, VT));
10187  }
10188 
10189  // fold (bitconvert (fcopysign cst, x)) ->
10190  // (or (and (bitconvert x), sign), (and cst, (not sign)))
10191  // Note that we don't handle (copysign x, cst) because this can always be
10192  // folded to an fneg or fabs.
10193  //
10194  // For ppc_fp128:
10195  // fold (bitcast (fcopysign cst, x)) ->
10196  // flipbit = (and (extract_element
10197  // (xor (bitcast cst), (bitcast x)), 0),
10198  // signbit)
10199  // (xor (bitcast cst) (build_pair flipbit, flipbit))
10200  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10201  isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10202  VT.isInteger() && !VT.isVector()) {
10203  unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10204  EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10205  if (isTypeLegal(IntXVT)) {
10206  SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10207  AddToWorklist(X.getNode());
10208 
10209  // If X has a different width than the result/lhs, sext it or truncate it.
10210  unsigned VTWidth = VT.getSizeInBits();
10211  if (OrigXWidth < VTWidth) {
10212  X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10213  AddToWorklist(X.getNode());
10214  } else if (OrigXWidth > VTWidth) {
10215  // To get the sign bit in the right place, we have to shift it right
10216  // before truncating.
10217  SDLoc DL(X);
10218  X = DAG.getNode(ISD::SRL, DL,
10219  X.getValueType(), X,
10220  DAG.getConstant(OrigXWidth-VTWidth, DL,
10221  X.getValueType()));
10222  AddToWorklist(X.getNode());
10223  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10224  AddToWorklist(X.getNode());
10225  }
10226 
10227  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10228  APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10229  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10230  AddToWorklist(Cst.getNode());
10231  SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10232  AddToWorklist(X.getNode());
10233  SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10234  AddToWorklist(XorResult.getNode());
10235  SDValue XorResult64 = DAG.getNode(
10236  ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10238  SDLoc(XorResult)));
10239  AddToWorklist(XorResult64.getNode());
10240  SDValue FlipBit =
10241  DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10242  DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10243  AddToWorklist(FlipBit.getNode());
10244  SDValue FlipBits =
10245  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10246  AddToWorklist(FlipBits.getNode());
10247  return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10248  }
10249  APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10250  X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10251  X, DAG.getConstant(SignBit, SDLoc(X), VT));
10252  AddToWorklist(X.getNode());
10253 
10254  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10255  Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10256  Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10257  AddToWorklist(Cst.getNode());
10258 
10259  return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10260  }
10261  }
10262 
10263  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10264  if (N0.getOpcode() == ISD::BUILD_PAIR)
10265  if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10266  return CombineLD;
10267 
10268  // Remove double bitcasts from shuffles - this is often a legacy of
10269  // XformToShuffleWithZero being used to combine bitmaskings (of
10270  // float vectors bitcast to integer vectors) into shuffles.
10271  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10272  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10273  N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10276  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10277 
10278  // If operands are a bitcast, peek through if it casts the original VT.
10279  // If operands are a constant, just bitcast back to original VT.
10280  auto PeekThroughBitcast = [&](SDValue Op) {
10281  if (Op.getOpcode() == ISD::BITCAST &&
10282  Op.getOperand(0).getValueType() == VT)
10283  return SDValue(Op.getOperand(0));
10286  return DAG.getBitcast(VT, Op);
10287  return SDValue();
10288  };
10289 
10290  // FIXME: If either input vector is bitcast, try to convert the shuffle to
10291  // the result type of this bitcast. This would eliminate at least one
10292  // bitcast. See the transform in InstCombine.
10293  SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10294  SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10295  if (!(SV0 && SV1))
10296  return SDValue();
10297 
10298  int MaskScale =
10300  SmallVector<int, 8> NewMask;
10301  for (int M : SVN->getMask())
10302  for (int i = 0; i != MaskScale; ++i)
10303  NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10304 
10305  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10306  if (!LegalMask) {
10307  std::swap(SV0, SV1);
10309  LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10310  }
10311 
10312  if (LegalMask)
10313  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10314  }
10315 
10316  return SDValue();
10317 }
10318 
10319 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10320  EVT VT = N->getValueType(0);
10321  return CombineConsecutiveLoads(N, VT);
10322 }
10323 
10324 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10325 /// operands. DstEltVT indicates the destination element value type.
10326 SDValue DAGCombiner::
10327 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10328  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10329 
10330  // If this is already the right type, we're done.
10331  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10332 
10333  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10334  unsigned DstBitSize = DstEltVT.getSizeInBits();
10335 
10336  // If this is a conversion of N elements of one type to N elements of another
10337  // type, convert each element. This handles FP<->INT cases.
10338  if (SrcBitSize == DstBitSize) {
10340  for (SDValue Op : BV->op_values()) {
10341  // If the vector element type is not legal, the BUILD_VECTOR operands
10342  // are promoted and implicitly truncated. Make that explicit here.
10343  if (Op.getValueType() != SrcEltVT)
10344  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10345  Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10346  AddToWorklist(Ops.back().getNode());
10347  }
10348  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10350  return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10351  }
10352 
10353  // Otherwise, we're growing or shrinking the elements. To avoid having to
10354  // handle annoying details of growing/shrinking FP values, we convert them to
10355  // int first.
10356  if (SrcEltVT.isFloatingPoint()) {
10357  // Convert the input float vector to a int vector where the elements are the
10358  // same sizes.
10359  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10360  BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10361  SrcEltVT = IntVT;
10362  }
10363 
10364  // Now we know the input is an integer vector. If the output is a FP type,
10365  // convert to integer first, then to FP of the right size.
10366  if (DstEltVT.isFloatingPoint()) {
10367  EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10368  SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10369 
10370  // Next, convert to FP elements of the same size.
10371  return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10372  }
10373 
10374  SDLoc DL(BV);
10375 
10376  // Okay, we know the src/dst types are both integers of differing types.
10377  // Handling growing first.
10378  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10379  if (SrcBitSize < DstBitSize) {
10380  unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10381 
10383  for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10384  i += NumInputsPerOutput) {
10385  bool isLE = DAG.getDataLayout().isLittleEndian();
10386  APInt NewBits = APInt(DstBitSize, 0);
10387  bool EltIsUndef = true;
10388  for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10389  // Shift the previously computed bits over.
10390  NewBits <<= SrcBitSize;
10391  SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10392  if (Op.isUndef()) continue;
10393  EltIsUndef = false;
10394 
10395  NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10396  zextOrTrunc(SrcBitSize).zext(DstBitSize);
10397  }
10398 
10399  if (EltIsUndef)
10400  Ops.push_back(DAG.getUNDEF(DstEltVT));
10401  else
10402  Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10403  }
10404 
10405  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10406  return DAG.getBuildVector(VT, DL, Ops);
10407  }
10408 
10409  // Finally, this must be the case where we are shrinking elements: each input
10410  // turns into multiple outputs.
10411  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10412  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10413  NumOutputsPerInput*BV->getNumOperands());
10415 
10416  for (const SDValue &Op : BV->op_values()) {
10417  if (Op.isUndef()) {
10418  Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10419  continue;
10420  }
10421 
10422  APInt OpVal = cast<ConstantSDNode>(Op)->
10423  getAPIntValue().zextOrTrunc(SrcBitSize);
10424 
10425  for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10426  APInt ThisVal = OpVal.trunc(DstBitSize);
10427  Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10428  OpVal.lshrInPlace(DstBitSize);
10429  }
10430 
10431  // For big endian targets, swap the order of the pieces of each element.
10432  if (DAG.getDataLayout().isBigEndian())
10433  std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10434  }
10435 
10436  return DAG.getBuildVector(VT, DL, Ops);
10437 }
10438 
10439 static bool isContractable(SDNode *N) {
10440  SDNodeFlags F = N->getFlags();
10441  return F.hasAllowContract() || F.hasAllowReassociation();
10442 }
10443 
10444 /// Try to perform FMA combining on a given FADD node.
10445 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10446  SDValue N0 = N->getOperand(0);
10447  SDValue N1 = N->getOperand(1);
10448  EVT VT = N->getValueType(0);
10449  SDLoc SL(N);
10450 
10451  const TargetOptions &Options = DAG.getTarget().Options;
10452 
10453  // Floating-point multiply-add with intermediate rounding.
10454  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10455 
10456  // Floating-point multiply-add without intermediate rounding.
10457  bool HasFMA =
10458  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10459  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10460 
10461  // No valid opcode, do not combine.
10462  if (!HasFMAD && !HasFMA)
10463  return SDValue();
10464 
10465  SDNodeFlags Flags = N->getFlags();
10466  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10467  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10468  CanFuse || HasFMAD);
10469  // If the addition is not contractable, do not combine.
10470  if (!AllowFusionGlobally && !isContractable(N))
10471  return SDValue();
10472 
10474  if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10475  return SDValue();
10476 
10477  // Always prefer FMAD to FMA for precision.
10478  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10479  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10480 
10481  // Is the node an FMUL and contractable either due to global flags or
10482  // SDNodeFlags.
10483  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10484  if (N.getOpcode() != ISD::FMUL)
10485  return false;
10486  return AllowFusionGlobally || isContractable(N.getNode());
10487  };
10488  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10489  // prefer to fold the multiply with fewer uses.
10490  if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10491  if (N0.getNode()->use_size() > N1.getNode()->use_size())
10492  std::swap(N0, N1);
10493  }
10494 
10495  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10496  if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10497  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10498  N0.getOperand(0), N0.getOperand(1), N1, Flags);
10499  }
10500 
10501  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10502  // Note: Commutes FADD operands.
10503  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10504  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10505  N1.getOperand(0), N1.getOperand(1), N0, Flags);
10506  }
10507 
10508  // Look through FP_EXTEND nodes to do more combining.
10509 
10510  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10511  if (N0.getOpcode() == ISD::FP_EXTEND) {
10512  SDValue N00 = N0.getOperand(0);
10513  if (isContractableFMUL(N00) &&
10514  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10515  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10516  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10517  N00.getOperand(0)),
10518  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10519  N00.getOperand(1)), N1, Flags);
10520  }
10521  }
10522 
10523  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10524  // Note: Commutes FADD operands.
10525  if (N1.getOpcode() == ISD::FP_EXTEND) {
10526  SDValue N10 = N1.getOperand(0);
10527  if (isContractableFMUL(N10) &&
10528  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10529  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10530  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10531  N10.getOperand(0)),
10532  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10533  N10.getOperand(1)), N0, Flags);
10534  }
10535  }
10536 
10537  // More folding opportunities when target permits.
10538  if (Aggressive) {
10539  // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10540  if (CanFuse &&
10541  N0.getOpcode() == PreferredFusedOpcode &&
10542  N0.getOperand(2).getOpcode() == ISD::FMUL &&
10543  N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10544  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10545  N0.getOperand(0), N0.getOperand(1),
10546  DAG.getNode(PreferredFusedOpcode, SL, VT,
10547  N0.getOperand(2).getOperand(0),
10548  N0.getOperand(2).getOperand(1),
10549  N1, Flags), Flags);
10550  }
10551 
10552  // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10553  if (CanFuse &&
10554  N1->getOpcode() == PreferredFusedOpcode &&
10555  N1.getOperand(2).getOpcode() == ISD::FMUL &&
10556  N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10557  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10558  N1.getOperand(0), N1.getOperand(1),
10559  DAG.getNode(PreferredFusedOpcode, SL, VT,
10560  N1.getOperand(2).getOperand(0),
10561  N1.getOperand(2).getOperand(1),
10562  N0, Flags), Flags);
10563  }
10564 
10565 
10566  // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10567  // -> (fma x, y, (fma (fpext u), (fpext v), z))
10568  auto FoldFAddFMAFPExtFMul = [&] (
10570  SDNodeFlags Flags) {
10571  return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10572  DAG.getNode(PreferredFusedOpcode, SL, VT,
10573  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10574  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10575  Z, Flags), Flags);
10576  };
10577  if (N0.getOpcode() == PreferredFusedOpcode) {
10578  SDValue N02 = N0.getOperand(2);
10579  if (N02.getOpcode() == ISD::FP_EXTEND) {
10580  SDValue N020 = N02.getOperand(0);
10581  if (isContractableFMUL(N020) &&
10582  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10583  return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10584  N020.getOperand(0), N020.getOperand(1),
10585  N1, Flags);
10586  }
10587  }
10588  }
10589 
10590  // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10591  // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10592  // FIXME: This turns two single-precision and one double-precision
10593  // operation into two double-precision operations, which might not be
10594  // interesting for all targets, especially GPUs.
10595  auto FoldFAddFPExtFMAFMul = [&] (
10597  SDNodeFlags Flags) {
10598  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10599  DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10600  DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10601  DAG.getNode(PreferredFusedOpcode, SL, VT,
10602  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10603  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10604  Z, Flags), Flags);
10605  };
10606  if (N0.getOpcode() == ISD::FP_EXTEND) {
10607  SDValue N00 = N0.getOperand(0);
10608  if (N00.getOpcode() == PreferredFusedOpcode) {
10609  SDValue N002 = N00.getOperand(2);
10610  if (isContractableFMUL(N002) &&
10611  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10612  return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10613  N002.getOperand(0), N002.getOperand(1),
10614  N1, Flags);
10615  }
10616  }
10617  }
10618 
10619  // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10620  // -> (fma y, z, (fma (fpext u), (fpext v), x))
10621  if (N1.getOpcode() == PreferredFusedOpcode) {
10622  SDValue N12 = N1.getOperand(2);
10623  if (N12.getOpcode() == ISD::FP_EXTEND) {
10624  SDValue N120 = N12.getOperand(0);
10625  if (isContractableFMUL(N120) &&
10626  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10627  return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10628  N120.getOperand(0), N120.getOperand(1),
10629  N0, Flags);
10630  }
10631  }
10632  }
10633 
10634  // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10635  // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10636  // FIXME: This turns two single-precision and one double-precision
10637  // operation into two double-precision operations, which might not be
10638  // interesting for all targets, especially GPUs.
10639  if (N1.getOpcode() == ISD::FP_EXTEND) {
10640  SDValue N10 = N1.getOperand(0);
10641  if (N10.getOpcode() == PreferredFusedOpcode) {
10642  SDValue N102 = N10.getOperand(2);
10643  if (isContractableFMUL(N102) &&
10644  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10645  return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10646  N102.getOperand(0), N102.getOperand(1),
10647  N0, Flags);
10648  }
10649  }
10650  }
10651  }
10652 
10653  return SDValue();
10654 }
10655 
10656 /// Try to perform FMA combining on a given FSUB node.
10657 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10658  SDValue N0 = N->getOperand(0);
10659  SDValue N1 = N->getOperand(1);
10660  EVT VT = N->getValueType(0);
10661  SDLoc SL(N);
10662 
10663  const TargetOptions &Options = DAG.getTarget().Options;
10664  // Floating-point multiply-add with intermediate rounding.
10665  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10666 
10667  // Floating-point multiply-add without intermediate rounding.
10668  bool HasFMA =
10669  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10670  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10671 
10672  // No valid opcode, do not combine.
10673  if (!HasFMAD && !HasFMA)
10674  return SDValue();
10675 
10676  const SDNodeFlags Flags = N->getFlags();
10677  bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10678  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10679  CanFuse || HasFMAD);
10680 
10681  // If the subtraction is not contractable, do not combine.
10682  if (!AllowFusionGlobally && !isContractable(N))
10683  return SDValue();
10684 
10686  if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10687  return SDValue();
10688 
10689  // Always prefer FMAD to FMA for precision.
10690  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10691  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10692 
10693  // Is the node an FMUL and contractable either due to global flags or
10694  // SDNodeFlags.
10695  auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10696  if (N.getOpcode() != ISD::FMUL)
10697  return false;
10698  return AllowFusionGlobally || isContractable(N.getNode());
10699  };
10700 
10701  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10702  if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10703  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10704  N0.getOperand(0), N0.getOperand(1),
10705  DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10706  }
10707 
10708  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10709  // Note: Commutes FSUB operands.
10710  if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10711  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10712  DAG.getNode(ISD::FNEG, SL, VT,
10713  N1.getOperand(0)),
10714  N1.getOperand(1), N0, Flags);
10715  }
10716 
10717  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10718  if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10719  (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10720  SDValue N00 = N0.getOperand(0).getOperand(0);
10721  SDValue N01 = N0.getOperand(0).getOperand(1);
10722  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10723  DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10724  DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10725  }
10726 
10727  // Look through FP_EXTEND nodes to do more combining.
10728 
10729  // fold (fsub (fpext (fmul x, y)), z)
10730  // -> (fma (fpext x), (fpext y), (fneg z))
10731  if (N0.getOpcode() == ISD::FP_EXTEND) {
10732  SDValue N00 = N0.getOperand(0);
10733  if (isContractableFMUL(N00) &&
10734  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10735  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10736  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10737  N00.getOperand(0)),
10738  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10739  N00.getOperand(1)),
10740  DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10741  }
10742  }
10743 
10744  // fold (fsub x, (fpext (fmul y, z)))
10745  // -> (fma (fneg (fpext y)), (fpext z), x)
10746  // Note: Commutes FSUB operands.
10747  if (N1.getOpcode() == ISD::FP_EXTEND) {
10748  SDValue N10 = N1.getOperand(0);
10749  if (isContractableFMUL(N10) &&
10750  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10751  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10752  DAG.getNode(ISD::FNEG, SL, VT,
10753  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10754  N10.getOperand(0))),
10755  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10756  N10.getOperand(1)),
10757  N0, Flags);
10758  }
10759  }
10760 
10761  // fold (fsub (fpext (fneg (fmul, x, y))), z)
10762  // -> (fneg (fma (fpext x), (fpext y), z))
10763  // Note: This could be removed with appropriate canonicalization of the
10764  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10765  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10766  // from implementing the canonicalization in visitFSUB.
10767  if (N0.getOpcode() == ISD::FP_EXTEND) {
10768  SDValue N00 = N0.getOperand(0);
10769  if (N00.getOpcode() == ISD::FNEG) {
10770  SDValue N000 = N00.getOperand(0);
10771  if (isContractableFMUL(N000) &&
10772  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10773  return DAG.getNode(ISD::FNEG, SL, VT,
10774  DAG.getNode(PreferredFusedOpcode, SL, VT,
10775  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10776  N000.getOperand(0)),
10777  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10778  N000.getOperand(1)),
10779  N1, Flags));
10780  }
10781  }
10782  }
10783 
10784  // fold (fsub (fneg (fpext (fmul, x, y))), z)
10785  // -> (fneg (fma (fpext x)), (fpext y), z)
10786  // Note: This could be removed with appropriate canonicalization of the
10787  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10788  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10789  // from implementing the canonicalization in visitFSUB.
10790  if (N0.getOpcode() == ISD::FNEG) {
10791  SDValue N00 = N0.getOperand(0);
10792  if (N00.getOpcode() == ISD::FP_EXTEND) {
10793  SDValue N000 = N00.getOperand(0);
10794  if (isContractableFMUL(N000) &&
10795  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10796  return DAG.getNode(ISD::FNEG, SL, VT,
10797  DAG.getNode(PreferredFusedOpcode, SL, VT,
10798  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10799  N000.getOperand(0)),
10800  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10801  N000.getOperand(1)),
10802  N1, Flags));
10803  }
10804  }
10805  }
10806 
10807  // More folding opportunities when target permits.
10808  if (Aggressive) {
10809  // fold (fsub (fma x, y, (fmul u, v)), z)
10810  // -> (fma x, y (fma u, v, (fneg z)))
10811  if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10812  isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10813  N0.getOperand(2)->hasOneUse()) {
10814  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10815  N0.getOperand(0), N0.getOperand(1),
10816  DAG.getNode(PreferredFusedOpcode, SL, VT,
10817  N0.getOperand(2).getOperand(0),
10818  N0.getOperand(2).getOperand(1),
10819  DAG.getNode(ISD::FNEG, SL, VT,
10820  N1), Flags), Flags);
10821  }
10822 
10823  // fold (fsub x, (fma y, z, (fmul u, v)))
10824  // -> (fma (fneg y), z, (fma (fneg u), v, x))
10825  if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10826  isContractableFMUL(N1.getOperand(2))) {
10827  SDValue N20 = N1.getOperand(2).getOperand(0);
10828  SDValue N21 = N1.getOperand(2).getOperand(1);
10829  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10830  DAG.getNode(ISD::FNEG, SL, VT,
10831  N1.getOperand(0)),
10832  N1.getOperand(1),
10833  DAG.getNode(PreferredFusedOpcode, SL, VT,
10834  DAG.getNode(ISD::FNEG, SL, VT, N20),
10835  N21, N0, Flags), Flags);
10836  }
10837 
10838 
10839  // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10840  // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10841  if (N0.getOpcode() == PreferredFusedOpcode) {
10842  SDValue N02 = N0.getOperand(2);
10843  if (N02.getOpcode() == ISD::FP_EXTEND) {
10844  SDValue N020 = N02.getOperand(0);
10845  if (isContractableFMUL(N020) &&
10846  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10847  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10848  N0.getOperand(0), N0.getOperand(1),
10849  DAG.getNode(PreferredFusedOpcode, SL, VT,
10850  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10851  N020.getOperand(0)),
10852  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10853  N020.getOperand(1)),
10854  DAG.getNode(ISD::FNEG, SL, VT,
10855  N1), Flags), Flags);
10856  }
10857  }
10858  }
10859 
10860  // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10861  // -> (fma (fpext x), (fpext y),
10862  // (fma (fpext u), (fpext v), (fneg z)))
10863  // FIXME: This turns two single-precision and one double-precision
10864  // operation into two double-precision operations, which might not be
10865  // interesting for all targets, especially GPUs.
10866  if (N0.getOpcode() == ISD::FP_EXTEND) {
10867  SDValue N00 = N0.getOperand(0);
10868  if (N00.getOpcode() == PreferredFusedOpcode) {
10869  SDValue N002 = N00.getOperand(2);
10870  if (isContractableFMUL(N002) &&
10871  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10872  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10873  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10874  N00.getOperand(0)),
10875  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10876  N00.getOperand(1)),
10877  DAG.getNode(PreferredFusedOpcode, SL, VT,
10878  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10879  N002.getOperand(0)),
10880  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10881  N002.getOperand(1)),
10882  DAG.getNode(ISD::FNEG, SL, VT,
10883  N1), Flags), Flags);
10884  }
10885  }
10886  }
10887 
10888  // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10889  // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10890  if (N1.getOpcode() == PreferredFusedOpcode &&
10891  N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10892  SDValue N120 = N1.getOperand(2).getOperand(0);
10893  if (isContractableFMUL(N120) &&
10894  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10895  SDValue N1200 = N120.getOperand(0);
10896  SDValue N1201 = N120.getOperand(1);
10897  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10898  DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10899  N1.getOperand(1),
10900  DAG.getNode(PreferredFusedOpcode, SL, VT,
10901  DAG.getNode(ISD::FNEG, SL, VT,
10902  DAG.getNode(ISD::FP_EXTEND, SL,
10903  VT, N1200)),
10904  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10905  N1201),
10906  N0, Flags), Flags);
10907  }
10908  }
10909 
10910  // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10911  // -> (fma (fneg (fpext y)), (fpext z),
10912  // (fma (fneg (fpext u)), (fpext v), x))
10913  // FIXME: This turns two single-precision and one double-precision
10914  // operation into two double-precision operations, which might not be
10915  // interesting for all targets, especially GPUs.
10916  if (N1.getOpcode() == ISD::FP_EXTEND &&
10917  N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10918  SDValue CvtSrc = N1.getOperand(0);
10919  SDValue N100 = CvtSrc.getOperand(0);
10920  SDValue N101 = CvtSrc.getOperand(1);
10921  SDValue N102 = CvtSrc.getOperand(2);
10922  if (isContractableFMUL(N102) &&
10923  TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10924  SDValue N1020 = N102.getOperand(0);
10925  SDValue N1021 = N102.getOperand(1);
10926  return DAG.getNode(PreferredFusedOpcode, SL, VT,
10927  DAG.getNode(ISD::FNEG, SL, VT,
10928  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10929  N100)),
10930  DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10931  DAG.getNode(PreferredFusedOpcode, SL, VT,
10932  DAG.getNode(ISD::FNEG, SL, VT,
10933  DAG.getNode(ISD::FP_EXTEND, SL,
10934  VT, N1020)),
10935  DAG.getNode(ISD::FP_EXTEND, SL, VT,
10936  N1021),
10937  N0, Flags), Flags);
10938  }
10939  }
10940  }
10941 
10942  return SDValue();
10943 }
10944 
10945 /// Try to perform FMA combining on a given FMUL node based on the distributive
10946 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10947 /// subtraction instead of addition).
10948 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10949  SDValue N0 = N->getOperand(0);
10950  SDValue N1 = N->getOperand(1);
10951  EVT VT = N->getValueType(0);
10952  SDLoc SL(N);
10953  const SDNodeFlags Flags = N->getFlags();
10954 
10955  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10956 
10957  const TargetOptions &Options = DAG.getTarget().Options;
10958 
10959  // The transforms below are incorrect when x == 0 and y == inf, because the
10960  // intermediate multiplication produces a nan.
10961  if (!Options.NoInfsFPMath)
10962  return SDValue();
10963 
10964  // Floating-point multiply-add without intermediate rounding.
10965  bool HasFMA =
10966  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10967  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10968  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10969 
10970  // Floating-point multiply-add with intermediate rounding. This can result
10971  // in a less precise result due to the changed rounding order.
10972  bool HasFMAD = Options.UnsafeFPMath &&
10973  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10974 
10975  // No valid opcode, do not combine.
10976  if (!HasFMAD && !HasFMA)
10977  return SDValue();
10978 
10979  // Always prefer FMAD to FMA for precision.
10980  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10981  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10982 
10983  // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
10984  // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
10985  auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10986  if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10987  if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
10988  if (C->isExactlyValue(+1.0))
10989  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10990  Y, Flags);
10991  if (C->isExactlyValue(-1.0))
10992  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10993  DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10994  }
10995  }
10996  return SDValue();
10997  };
10998 
10999  if (SDValue FMA = FuseFADD(N0, N1, Flags))
11000  return FMA;
11001  if (SDValue FMA = FuseFADD(N1, N0, Flags))
11002  return FMA;
11003 
11004  // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11005  // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11006  // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11007  // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11008  auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11009  if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11010  if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11011  if (C0->isExactlyValue(+1.0))
11012  return DAG.getNode(PreferredFusedOpcode, SL, VT,
11013  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11014  Y, Flags);
11015  if (C0->isExactlyValue(-1.0))
11016  return DAG.getNode(PreferredFusedOpcode, SL, VT,
11017  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11018  DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11019  }
11020  if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11021  if (C1->isExactlyValue(+1.0))
11022  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11023  DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11024  if (C1->isExactlyValue(-1.0))
11025  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11026  Y, Flags);
11027  }
11028  }
11029  return SDValue();
11030  };
11031 
11032  if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11033  return FMA;
11034  if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11035  return FMA;
11036 
11037  return SDValue();
11038 }
11039 
11040 SDValue DAGCombiner::visitFADD(SDNode *N) {
11041  SDValue N0 = N->getOperand(0);
11042  SDValue N1 = N->getOperand(1);
11043  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11044  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11045  EVT VT = N->getValueType(0);
11046  SDLoc DL(N);
11047  const TargetOptions &Options = DAG.getTarget().Options;
11048  const SDNodeFlags Flags = N->getFlags();
11049 
11050  // fold vector ops
11051  if (VT.isVector())
11052  if (SDValue FoldedVOp = SimplifyVBinOp(N))
11053  return FoldedVOp;
11054 
11055  // fold (fadd c1, c2) -> c1 + c2
11056  if (N0CFP && N1CFP)
11057  return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11058 
11059  // canonicalize constant to RHS
11060  if (N0CFP && !N1CFP)
11061  return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11062 
11063  // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11064  ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11065  if (N1C && N1C->isZero())
11066  if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11067  return N0;
11068 
11069  if (SDValue NewSel = foldBinOpIntoSelect(N))
11070  return NewSel;
11071 
11072  // fold (fadd A, (fneg B)) -> (fsub A, B)
11073  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11074  isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
11075  return DAG.getNode(ISD::FSUB, DL, VT, N0,
11076  GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11077 
11078  // fold (fadd (fneg A), B) -> (fsub B, A)
11079  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11080  isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
11081  return DAG.getNode(ISD::FSUB, DL, VT, N1,
11082  GetNegatedExpression(N0, DAG, LegalOperations), Flags);
11083 
11084  auto isFMulNegTwo = [](SDValue FMul) {
11085  if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11086  return false;
11087  auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11088  return C && C->isExactlyValue(-2.0);
11089  };
11090 
11091  // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11092  if (isFMulNegTwo(N0)) {
11093  SDValue B = N0.getOperand(0);
11094  SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11095  return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11096  }
11097  // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11098  if (isFMulNegTwo(N1)) {
11099  SDValue B = N1.getOperand(0);
11100  SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11101  return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11102  }
11103 
11104  // No FP constant should be created after legalization as Instruction
11105  // Selection pass has a hard time dealing with FP constants.
11106  bool AllowNewConst = (Level < AfterLegalizeDAG);
11107 
11108  // If 'unsafe math' or nnan is enabled, fold lots of things.
11109  if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11110  // If allowed, fold (fadd (fneg x), x) -> 0.0
11111  if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11112  return DAG.getConstantFP(0.0, DL, VT);
11113 
11114  // If allowed, fold (fadd x, (fneg x)) -> 0.0
11115  if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11116  return DAG.getConstantFP(0.0, DL, VT);
11117  }
11118 
11119  // If 'unsafe math' or reassoc and nsz, fold lots of things.
11120  // TODO: break out portions of the transformations below for which Unsafe is
11121  // considered and which do not require both nsz and reassoc
11122  if ((Options.UnsafeFPMath ||
11123  (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11124  AllowNewConst) {
11125  // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11126  if (N1CFP && N0.getOpcode() == ISD::FADD &&
11128  SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11129  return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11130  }
11131 
11132  // We can fold chains of FADD's of the same value into multiplications.
11133  // This transform is not safe in general because we are reducing the number
11134  // of rounding steps.
11135  if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11136  if (N0.getOpcode() == ISD::FMUL) {
11137  bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11138  bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11139 
11140  // (fadd (fmul x, c), x) -> (fmul x, c+1)
11141  if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11142  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11143  DAG.getConstantFP(1.0, DL, VT), Flags);
11144  return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11145  }
11146 
11147  // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11148  if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11149  N1.getOperand(0) == N1.getOperand(1) &&
11150  N0.getOperand(0) == N1.getOperand(0)) {
11151  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11152  DAG.getConstantFP(2.0, DL, VT), Flags);
11153  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11154  }
11155  }
11156 
11157  if (N1.getOpcode() == ISD::FMUL) {
11158  bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11159  bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11160 
11161  // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11162  if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11163  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11164  DAG.getConstantFP(1.0, DL, VT), Flags);
11165  return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11166  }
11167 
11168  // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11169  if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11170  N0.getOperand(0) == N0.getOperand(1) &&
11171  N1.getOperand(0) == N0.getOperand(0)) {
11172  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11173  DAG.getConstantFP(2.0, DL, VT), Flags);
11174  return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11175  }
11176  }
11177 
11178  if (N0.getOpcode() == ISD::FADD) {
11179  bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11180  // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11181  if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11182  (N0.getOperand(0) == N1)) {
11183  return DAG.getNode(ISD::FMUL, DL, VT,
11184  N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11185  }
11186  }
11187 
11188  if (N1.getOpcode() == ISD::FADD) {
11189  bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11190  // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11191  if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11192  N1.getOperand(0) == N0) {
11193  return DAG.getNode(ISD::FMUL, DL, VT,
11194  N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11195  }
11196  }
11197 
11198  // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11199  if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11200  N0.getOperand(0) == N0.getOperand(1) &&
11201  N1.getOperand(0) == N1.getOperand(1) &&
11202  N0.getOperand(0) == N1.getOperand(0)) {
11203  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11204  DAG.getConstantFP(4.0, DL, VT), Flags);
11205  }
11206  }
11207  } // enable-unsafe-fp-math
11208 
11209  // FADD -> FMA combines:
11210  if (SDValue Fused = visitFADDForFMACombine(N)) {
11211  AddToWorklist(Fused.getNode());
11212  return Fused;
11213  }
11214  return SDValue();
11215 }
11216 
11217 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11218  SDValue N0 = N->getOperand(0);
11219  SDValue N1 = N->getOperand(1);
11220  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11221  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11222  EVT VT = N->getValueType(0);
11223  SDLoc DL(N);
11224  const TargetOptions &Options = DAG.getTarget().Options;
11225  const SDNodeFlags Flags = N->getFlags();
11226 
11227  // fold vector ops
11228  if (VT.isVector())
11229  if (SDValue FoldedVOp = SimplifyVBinOp(N))
11230  return FoldedVOp;
11231 
11232  // fold (fsub c1, c2) -> c1-c2
11233  if (N0CFP && N1CFP)
11234  return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11235 
11236  if (SDValue NewSel = foldBinOpIntoSelect(N))
11237  return NewSel;
11238 
11239  // (fsub A, 0) -> A
11240  if (N1CFP && N1CFP->isZero()) {
11241  if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11242  Flags.hasNoSignedZeros()) {
11243  return N0;
11244  }
11245  }
11246 
11247  if (N0 == N1) {
11248  // (fsub x, x) -> 0.0
11249  if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11250  return DAG.getConstantFP(0.0f, DL, VT);
11251  }
11252 
11253  // (fsub -0.0, N1) -> -N1
11254  if (N0CFP && N0CFP->isZero()) {
11255  if (N0CFP->isNegative() ||
11256  (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11257  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11258  return GetNegatedExpression(N1, DAG, LegalOperations);
11259  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11260  return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11261  }
11262  }
11263 
11264  if ((Options.UnsafeFPMath ||
11265  (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11266  && N1.getOpcode() == ISD::FADD) {
11267  // X - (X + Y) -> -Y
11268  if (N0 == N1->getOperand(0))
11269  return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11270  // X - (Y + X) -> -Y
11271  if (N0 == N1->getOperand(1))
11272  return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11273  }
11274 
11275  // fold (fsub A, (fneg B)) -> (fadd A, B)
11276  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11277  return DAG.getNode(ISD::FADD, DL, VT, N0,
11278  GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11279 
11280  // FSUB -> FMA combines:
11281  if (SDValue Fused = visitFSUBForFMACombine(N)) {
11282  AddToWorklist(Fused.getNode());
11283  return Fused;
11284  }
11285 
11286  return SDValue();
11287 }
11288 
11289 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11290  SDValue N0 = N->getOperand(0);
11291  SDValue N1 = N->getOperand(1);
11292  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11293  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11294  EVT VT = N->getValueType(0);
11295  SDLoc DL(N);
11296  const TargetOptions &Options = DAG.getTarget().Options;
11297  const SDNodeFlags Flags = N->getFlags();
11298 
11299  // fold vector ops
11300  if (VT.isVector()) {
11301  // This just handles C1 * C2 for vectors. Other vector folds are below.
11302  if (SDValue FoldedVOp = SimplifyVBinOp(N))
11303  return FoldedVOp;
11304  }
11305 
11306  // fold (fmul c1, c2) -> c1*c2
11307  if (N0CFP && N1CFP)
11308  return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11309 
11310  // canonicalize constant to RHS
11313  return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11314 
11315  // fold (fmul A, 1.0) -> A
11316  if (N1CFP && N1CFP->isExactlyValue(1.0))
11317  return N0;
11318 
11319  if (SDValue NewSel = foldBinOpIntoSelect(N))
11320  return NewSel;
11321 
11322  if (Options.UnsafeFPMath ||
11323  (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11324  // fold (fmul A, 0) -> 0
11325  if (N1CFP && N1CFP->isZero())
11326  return N1;
11327  }
11328 
11329  if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11330  // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11332  N0.getOpcode() == ISD::FMUL) {
11333  SDValue N00 = N0.getOperand(0);
11334  SDValue N01 = N0.getOperand(1);
11335  // Avoid an infinite loop by making sure that N00 is not a constant
11336  // (the inner multiply has not been constant folded yet).
11339  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11340  return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11341  }
11342  }
11343 
11344  // Match a special-case: we convert X * 2.0 into fadd.
11345  // fmul (fadd X, X), C -> fmul X, 2.0 * C
11346  if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11347  N0.getOperand(0) == N0.getOperand(1)) {
11348  const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11349  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11350  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11351  }
11352  }
11353 
11354  // fold (fmul X, 2.0) -> (fadd X, X)
11355  if (N1CFP && N1CFP->isExactlyValue(+2.0))
11356  return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11357 
11358  // fold (fmul X, -1.0) -> (fneg X)
11359  if (N1CFP && N1CFP->isExactlyValue(-1.0))
11360  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11361  return DAG.getNode(ISD::FNEG, DL, VT, N0);
11362 
11363  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11364  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11365  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11366  // Both can be negated for free, check to see if at least one is cheaper
11367  // negated.
11368  if (LHSNeg == 2 || RHSNeg == 2)
11369  return DAG.getNode(ISD::FMUL, DL, VT,
11370  GetNegatedExpression(N0, DAG, LegalOperations),
11371  GetNegatedExpression(N1, DAG, LegalOperations),
11372  Flags);
11373  }
11374  }
11375 
11376  // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11377  // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11378  if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11379  (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11380  TLI.isOperationLegal(ISD::FABS, VT)) {
11381  SDValue Select = N0, X = N1;
11382  if (Select.getOpcode() != ISD::SELECT)
11383  std::swap(Select, X);
11384 
11385  SDValue Cond = Select.getOperand(0);
11386  auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11387  auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11388 
11389  if (TrueOpnd && FalseOpnd &&
11390  Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11391  isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11392  cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11393  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11394  switch (CC) {
11395  default: break;
11396  case ISD::SETOLT:
11397  case ISD::SETULT:
11398  case ISD::SETOLE:
11399  case ISD::SETULE:
11400  case ISD::SETLT:
11401  case ISD::SETLE:
11402  std::swap(TrueOpnd, FalseOpnd);
11404  case ISD::SETOGT:
11405  case ISD::SETUGT:
11406  case ISD::SETOGE:
11407  case ISD::SETUGE:
11408  case ISD::SETGT:
11409  case ISD::SETGE:
11410  if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11411  TLI.isOperationLegal(ISD::FNEG, VT))
11412  return DAG.getNode(ISD::FNEG, DL, VT,
11413  DAG.getNode(ISD::FABS, DL, VT, X));
11414  if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11415  return DAG.getNode(ISD::FABS, DL, VT, X);
11416 
11417  break;
11418  }
11419  }
11420  }
11421 
11422  // FMUL -> FMA combines:
11423  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11424  AddToWorklist(Fused.getNode());
11425  return Fused;
11426  }
11427 
11428  return SDValue();
11429 }
11430 
11431 SDValue DAGCombiner::visitFMA(SDNode *N) {
11432  SDValue N0 = N->getOperand(0);
11433  SDValue N1 = N->getOperand(1);
11434  SDValue N2 = N->getOperand(2);
11437  EVT VT = N->getValueType(0);
11438  SDLoc DL(N);
11439  const TargetOptions &Options = DAG.getTarget().Options;
11440 
11441  // FMA nodes have flags that propagate to the created nodes.
11442  const SDNodeFlags Flags = N->getFlags();
11443  bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11444 
11445  // Constant fold FMA.
11446  if (isa<ConstantFPSDNode>(N0) &&
11447  isa<ConstantFPSDNode>(N1) &&
11448  isa<ConstantFPSDNode>(N2)) {
11449  return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11450  }
11451 
11452  if (UnsafeFPMath) {
11453  if (N0CFP && N0CFP->isZero())
11454  return N2;
11455  if (N1CFP && N1CFP->isZero())
11456  return N2;
11457  }
11458  // TODO: The FMA node should have flags that propagate to these nodes.
11459  if (N0CFP && N0CFP->isExactlyValue(1.0))
11460  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11461  if (N1CFP && N1CFP->isExactlyValue(1.0))
11462  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11463 
11464  // Canonicalize (fma c, x, y) -> (fma x, c, y)
11467  return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11468 
11469  if (UnsafeFPMath) {
11470  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11471  if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11474  return DAG.getNode(ISD::FMUL, DL, VT, N0,
11475  DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11476  Flags), Flags);
11477  }
11478 
11479  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11480  if (N0.getOpcode() == ISD::FMUL &&
11482  isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11483  return DAG.getNode(ISD::FMA, DL, VT,
11484  N0.getOperand(0),
11485  DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11486  Flags),
11487  N2);
11488  }
11489  }
11490 
11491  // (fma x, 1, y) -> (fadd x, y)
11492  // (fma x, -1, y) -> (fadd (fneg x), y)
11493  if (N1CFP) {
11494  if (N1CFP->isExactlyValue(1.0))
11495  // TODO: The FMA node should have flags that propagate to this node.
11496  return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11497 
11498  if (N1CFP->isExactlyValue(-1.0) &&
11499  (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11500  SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11501  AddToWorklist(RHSNeg.getNode());
11502  // TODO: The FMA node should have flags that propagate to this node.
11503  return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11504  }
11505 
11506  // fma (fneg x), K, y -> fma x -K, y
11507  if (N0.getOpcode() == ISD::FNEG &&
11508  (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11509  (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11510  return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11511  DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11512  }
11513  }
11514 
11515  if (UnsafeFPMath) {
11516  // (fma x, c, x) -> (fmul x, (c+1))
11517  if (N1CFP && N0 == N2) {
11518  return DAG.getNode(ISD::FMUL, DL, VT, N0,
11519  DAG.getNode(ISD::FADD, DL, VT, N1,
11520  DAG.getConstantFP(1.0, DL, VT), Flags),
11521  Flags);
11522  }
11523 
11524  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11525  if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11526  return DAG.getNode(ISD::FMUL, DL, VT, N0,
11527  DAG.getNode(ISD::FADD, DL, VT, N1,
11528  DAG.getConstantFP(-1.0, DL, VT), Flags),
11529  Flags);
11530  }
11531  }
11532 
11533  return SDValue();
11534 }
11535 
11536 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11537 // reciprocal.
11538 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11539 // Notice that this is not always beneficial. One reason is different targets
11540 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11541 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11542 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11543 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11544  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11545  const SDNodeFlags Flags = N->getFlags();
11546  if (!UnsafeMath && !Flags.hasAllowReciprocal())
11547  return SDValue();
11548 
11549  // Skip if current node is a reciprocal.
11550  SDValue N0 = N->getOperand(0);
11552  if (N0CFP && N0CFP->isExactlyValue(1.0))
11553  return SDValue();
11554 
11555  // Exit early if the target does not want this transform or if there can't
11556  // possibly be enough uses of the divisor to make the transform worthwhile.
11557  SDValue N1 = N->getOperand(1);
11558  unsigned MinUses = TLI.combineRepeatedFPDivisors();
11559  if (!MinUses || N1->use_size() < MinUses)
11560  return SDValue();
11561 
11562  // Find all FDIV users of the same divisor.
11563  // Use a set because duplicates may be present in the user list.
11565  for (auto *U : N1->uses()) {
11566  if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11567  // This division is eligible for optimization only if global unsafe math
11568  // is enabled or if this division allows reciprocal formation.
11569  if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11570  Users.insert(U);
11571  }
11572  }
11573 
11574  // Now that we have the actual number of divisor uses, make sure it meets
11575  // the minimum threshold specified by the target.
11576  if (Users.size() < MinUses)
11577  return SDValue();
11578 
11579  EVT VT = N->getValueType(0);
11580  SDLoc DL(N);
11581  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11582  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11583 
11584  // Dividend / Divisor -> Dividend * Reciprocal
11585  for (auto *U : Users) {
11586  SDValue Dividend = U->getOperand(0);
11587  if (Dividend != FPOne) {
11588  SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11589  Reciprocal, Flags);
11590  CombineTo(U, NewNode);
11591  } else if (U != Reciprocal.getNode()) {
11592  // In the absence of fast-math-flags, this user node is always the
11593  // same node as Reciprocal, but with FMF they may be different nodes.
11594  CombineTo(U, Reciprocal);
11595  }
11596  }
11597  return SDValue(N, 0); // N was replaced.
11598 }
11599 
11600 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11601  SDValue N0 = N->getOperand(0);
11602  SDValue N1 = N->getOperand(1);
11605  EVT VT = N->getValueType(0);
11606  SDLoc DL(N);
11607  const TargetOptions &Options = DAG.getTarget().Options;
11608  SDNodeFlags Flags = N->getFlags();
11609 
11610  // fold vector ops
11611  if (VT.isVector())
11612  if (SDValue FoldedVOp = SimplifyVBinOp(N))
11613  return FoldedVOp;
11614 
11615  // fold (fdiv c1, c2) -> c1/c2
11616  if (N0CFP && N1CFP)
11617  return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11618 
11619  if (SDValue NewSel = foldBinOpIntoSelect(N))
11620  return NewSel;
11621 
11622  if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11623  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11624  if (N1CFP) {
11625  // Compute the reciprocal 1.0 / c2.
11626  const APFloat &N1APF = N1CFP->getValueAPF();
11627  APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11628  APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11629  // Only do the transform if the reciprocal is a legal fp immediate that
11630  // isn't too nasty (eg NaN, denormal, ...).
11631  if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11632  (!LegalOperations ||
11633  // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11634  // backend)... we should handle this gracefully after Legalize.
11635  // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11636  TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11637  TLI.isFPImmLegal(Recip, VT)))
11638  return DAG.getNode(ISD::FMUL, DL, VT, N0,
11639  DAG.getConstantFP(Recip, DL, VT), Flags);
11640  }
11641 
11642  // If this FDIV is part of a reciprocal square root, it may be folded
11643  // into a target-specific square root estimate instruction.
11644  if (N1.getOpcode() == ISD::FSQRT) {
11645  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11646  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11647  }
11648  } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11649  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11650  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11651  Flags)) {
11652  RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11653  AddToWorklist(RV.getNode());
11654  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11655  }
11656  } else if (N1.getOpcode() == ISD::FP_ROUND &&
11657  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11658  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11659  Flags)) {
11660  RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11661  AddToWorklist(RV.getNode());
11662  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11663  }
11664  } else if (N1.getOpcode() == ISD::FMUL) {
11665  // Look through an FMUL. Even though this won't remove the FDIV directly,
11666  // it's still worthwhile to get rid of the FSQRT if possible.
11667  SDValue SqrtOp;
11668  SDValue OtherOp;
11669  if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11670  SqrtOp = N1.getOperand(0);
11671  OtherOp = N1.getOperand(1);
11672  } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11673  SqrtOp = N1.getOperand(1);
11674  OtherOp = N1.getOperand(0);
11675  }
11676  if (SqrtOp.getNode()) {
11677  // We found a FSQRT, so try to make this fold:
11678  // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11679  if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11680  RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11681  AddToWorklist(RV.getNode());
11682  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11683  }
11684  }
11685  }
11686 
11687  // Fold into a reciprocal estimate and multiply instead of a real divide.
11688  if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11689  AddToWorklist(RV.getNode());
11690  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11691  }
11692  }
11693 
11694  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11695  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11696  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11697  // Both can be negated for free, check to see if at least one is cheaper
11698  // negated.
11699  if (LHSNeg == 2 || RHSNeg == 2)
11700  return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11701  GetNegatedExpression(N0, DAG, LegalOperations),
11702  GetNegatedExpression(N1, DAG, LegalOperations),
11703  Flags);
11704  }
11705  }
11706 
11707  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11708  return CombineRepeatedDivisors;
11709 
11710  return SDValue();
11711 }
11712 
11713 SDValue DAGCombiner::visitFREM(SDNode *N) {
11714  SDValue N0 = N->getOperand(0);
11715  SDValue N1 = N->getOperand(1);
11718  EVT VT = N->getValueType(0);
11719 
11720  // fold (frem c1, c2) -> fmod(c1,c2)
11721  if (N0CFP && N1CFP)
11722  return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11723 
11724  if (SDValue NewSel = foldBinOpIntoSelect(N))
11725  return NewSel;
11726 
11727  return SDValue();
11728 }
11729 
11730 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11731  SDNodeFlags Flags = N->getFlags();
11732  if (!DAG.getTarget().Options.UnsafeFPMath &&
11733  !Flags.hasApproximateFuncs())
11734  return SDValue();
11735 
11736  SDValue N0 = N->getOperand(0);
11737  if (TLI.isFsqrtCheap(N0, DAG))
11738  return SDValue();
11739 
11740  // FSQRT nodes have flags that propagate to the created nodes.
11741  return buildSqrtEstimate(N0, Flags);
11742 }
11743 
11744 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11745 /// copysign(x, fp_round(y)) -> copysign(x, y)
11747  SDValue N1 = N->getOperand(1);
11748  if ((N1.getOpcode() == ISD::FP_EXTEND ||
11749  N1.getOpcode() == ISD::FP_ROUND)) {
11750  // Do not optimize out type conversion of f128 type yet.
11751  // For some targets like x86_64, configuration is changed to keep one f128
11752  // value in one SSE register, but instruction selection cannot handle
11753  // FCOPYSIGN on SSE registers yet.
11754  EVT N1VT = N1->getValueType(0);
11755  EVT N1Op0VT = N1->getOperand(0).getValueType();
11756  return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11757  }
11758  return false;
11759 }
11760 
11761 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11762  SDValue N0 = N->getOperand(0);
11763  SDValue N1 = N->getOperand(1);
11764  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11765  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11766  EVT VT = N->getValueType(0);
11767 
11768  if (N0CFP && N1CFP) // Constant fold
11769  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11770 
11772  const APFloat &V = N1C->getValueAPF();
11773  // copysign(x, c1) -> fabs(x) iff ispos(c1)
11774  // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11775  if (!V.isNegative()) {
11776  if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11777  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11778  } else {
11779  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11780  return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11781  DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11782  }
11783  }
11784 
11785  // copysign(fabs(x), y) -> copysign(x, y)
11786  // copysign(fneg(x), y) -> copysign(x, y)
11787  // copysign(copysign(x,z), y) -> copysign(x, y)
11788  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11789  N0.getOpcode() == ISD::FCOPYSIGN)
11790  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11791 
11792  // copysign(x, abs(y)) -> abs(x)
11793  if (N1.getOpcode() == ISD::FABS)
11794  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11795 
11796  // copysign(x, copysign(y,z)) -> copysign(x, z)
11797  if (N1.getOpcode() == ISD::FCOPYSIGN)
11798  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11799 
11800  // copysign(x, fp_extend(y)) -> copysign(x, y)
11801  // copysign(x, fp_round(y)) -> copysign(x, y)
11803  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11804 
11805  return SDValue();
11806 }
11807 
11808 SDValue DAGCombiner::visitFPOW(SDNode *N) {
11810  if (!ExponentC)
11811  return SDValue();
11812 
11813  // Try to convert x ** (1/3) into cube root.
11814  // TODO: Handle the various flavors of long double.
11815  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11816  // Some range near 1/3 should be fine.
11817  EVT VT = N->getValueType(0);
11818  if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11819  (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11820  // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11821  // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11822  // pow(-val, 1/3) = nan; cbrt(-val) = -num.
11823  // For regular numbers, rounding may cause the results to differ.
11824  // Therefore, we require { nsz ninf nnan afn } for this transform.
11825  // TODO: We could select out the special cases if we don't have nsz/ninf.
11826  SDNodeFlags Flags = N->getFlags();
11827  if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11828  !Flags.hasApproximateFuncs())
11829  return SDValue();
11830 
11831  // Do not create a cbrt() libcall if the target does not have it, and do not
11832  // turn a pow that has lowering support into a cbrt() libcall.
11833  if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
11836  return SDValue();
11837 
11838  return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
11839  }
11840 
11841  // Try to convert x ** (1/4) into square roots.
11842  // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
11843  // TODO: This could be extended (using a target hook) to handle smaller
11844  // power-of-2 fractional exponents.
11845  if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
11846  // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
11847  // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
11848  // For regular numbers, rounding may cause the results to differ.
11849  // Therefore, we require { nsz ninf afn } for this transform.
11850  // TODO: We could select out the special cases if we don't have nsz/ninf.
11851  SDNodeFlags Flags = N->getFlags();
11852  if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
11853  !Flags.hasApproximateFuncs())
11854  return SDValue();
11855 
11856  // Don't double the number of libcalls. We are trying to inline fast code.
11858  return SDValue();
11859 
11860  // Assume that libcalls are the smallest code.
11861  // TODO: This restriction should probably be lifted for vectors.
11863  return SDValue();
11864 
11865  // pow(X, 0.25) --> sqrt(sqrt(X))
11866  SDLoc DL(N);
11867  SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
11868  return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
11869  }
11870 
11871  return SDValue();
11872 }
11873 
11875  const TargetLowering &TLI) {
11876  // This optimization is guarded by a function attribute because it may produce
11877  // unexpected results. Ie, programs may be relying on the platform-specific
11878  // undefined behavior when the float-to-int conversion overflows.
11879  const Function &F = DAG.getMachineFunction().getFunction();
11880  Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11881  if (StrictOverflow.getValueAsString().equals("false"))
11882  return SDValue();
11883 
11884  // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11885  // replacing casts with a libcall. We also must be allowed to ignore -0.0
11886  // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11887  // conversions would return +0.0.
11888  // FIXME: We should be able to use node-level FMF here.
11889  // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11890  EVT VT = N->getValueType(0);
11891  if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11893  return SDValue();
11894 
11895  // fptosi/fptoui round towards zero, so converting from FP to integer and
11896  // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11897  SDValue N0 = N->getOperand(0);
11898  if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11899  N0.getOperand(0).getValueType() == VT)
11900  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11901 
11902  if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11903  N0.getOperand(0).getValueType() == VT)
11904  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11905 
11906  return SDValue();
11907 }
11908 
11909 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11910  SDValue N0 = N->getOperand(0);
11911  EVT VT = N->getValueType(0);
11912  EVT OpVT = N0.getValueType();
11913 
11914  // fold (sint_to_fp c1) -> c1fp
11916  // ...but only if the target supports immediate floating-point values
11917  (!LegalOperations ||
11919  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11920 
11921  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11922  // but UINT_TO_FP is legal on this target, try to convert.
11923  if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
11924  hasOperation(ISD::UINT_TO_FP, OpVT)) {
11925  // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11926  if (DAG.SignBitIsZero(N0))
11927  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11928  }
11929 
11930  // The next optimizations are desirable only if SELECT_CC can be lowered.
11931  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11932  // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11933  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11934  !VT.isVector() &&
11935  (!LegalOperations ||
11937  SDLoc DL(N);
11938  SDValue Ops[] =
11939  { N0.getOperand(0), N0.getOperand(1),
11940  DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11941  N0.getOperand(2) };
11942  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11943  }
11944 
11945  // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11946  // (select_cc x, y, 1.0, 0.0,, cc)
11947  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11948  N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11949  (!LegalOperations ||
11951  SDLoc DL(N);
11952  SDValue Ops[] =
11953  { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11954  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11955  N0.getOperand(0).getOperand(2) };
11956  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11957  }
11958  }
11959 
11960  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11961  return FTrunc;
11962 
11963  return SDValue();
11964 }
11965 
11966 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11967  SDValue N0 = N->getOperand(0);
11968  EVT VT = N->getValueType(0);
11969  EVT OpVT = N0.getValueType();
11970 
11971  // fold (uint_to_fp c1) -> c1fp
11973  // ...but only if the target supports immediate floating-point values
11974  (!LegalOperations ||
11976  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11977 
11978  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11979  // but SINT_TO_FP is legal on this target, try to convert.
11980  if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
11981  hasOperation(ISD::SINT_TO_FP, OpVT)) {
11982  // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11983  if (DAG.SignBitIsZero(N0))
11984  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11985  }
11986 
11987  // The next optimizations are desirable only if SELECT_CC can be lowered.
11988  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11989  // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11990  if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11991  (!LegalOperations ||
11993  SDLoc DL(N);
11994  SDValue Ops[] =
11995  { N0.getOperand(0), N0.getOperand(1),
11996  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11997  N0.getOperand(2) };
11998  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11999  }
12000  }
12001 
12002  if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12003  return FTrunc;
12004 
12005  return SDValue();
12006 }
12007 
12008 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12010  SDValue N0 = N->getOperand(0);
12011  EVT VT = N->getValueType(0);
12012 
12013  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12014  return SDValue();
12015 
12016  SDValue Src = N0.getOperand(0);
12017  EVT SrcVT = Src.getValueType();
12018  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12019  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12020 
12021  // We can safely assume the conversion won't overflow the output range,
12022  // because (for example) (uint8_t)18293.f is undefined behavior.
12023 
12024  // Since we can assume the conversion won't overflow, our decision as to
12025  // whether the input will fit in the float should depend on the minimum
12026  // of the input range and output range.
12027 
12028  // This means this is also safe for a signed input and unsigned output, since
12029  // a negative input would lead to undefined behavior.
12030  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12031  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12032  unsigned ActualSize = std::min(InputSize, OutputSize);
12033  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12034 
12035  // We can only fold away the float conversion if the input range can be
12036  // represented exactly in the float range.
12037  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12038  if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12039  unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12040  : ISD::ZERO_EXTEND;
12041  return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12042  }
12043  if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12044  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12045  return DAG.getBitcast(VT, Src);
12046  }
12047  return SDValue();
12048 }
12049 
12050 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12051  SDValue N0 = N->getOperand(0);
12052  EVT VT = N->getValueType(0);
12053 
12054  // fold (fp_to_sint c1fp) -> c1
12056  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12057 
12058  return FoldIntToFPToInt(N, DAG);
12059 }
12060 
12061 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12062  SDValue N0 = N->getOperand(0);
12063  EVT VT = N->getValueType(0);
12064 
12065  // fold (fp_to_uint c1fp) -> c1
12067  return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12068 
12069  return FoldIntToFPToInt(N, DAG);
12070 }
12071 
12072 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12073  SDValue N0 = N->getOperand(0);
12074  SDValue N1 = N->getOperand(1);
12076  EVT VT = N->getValueType(0);
12077 
12078  // fold (fp_round c1fp) -> c1fp
12079  if (N0CFP)
12080  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12081 
12082  // fold (fp_round (fp_extend x)) -> x
12083  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12084  return N0.getOperand(0);
12085 
12086  // fold (fp_round (fp_round x)) -> (fp_round x)
12087  if (N0.getOpcode() == ISD::FP_ROUND) {
12088  const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12089  const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12090 
12091  // Skip this folding if it results in an fp_round from f80 to f16.
12092  //
12093  // f80 to f16 always generates an expensive (and as yet, unimplemented)
12094  // libcall to __truncxfhf2 instead of selecting native f16 conversion
12095  // instructions from f32 or f64. Moreover, the first (value-preserving)
12096  // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12097  // x86.
12098  if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12099  return SDValue();
12100 
12101  // If the first fp_round isn't a value preserving truncation, it might
12102  // introduce a tie in the second fp_round, that wouldn't occur in the
12103  // single-step fp_round we want to fold to.
12104  // In other words, double rounding isn't the same as rounding.
12105  // Also, this is a value preserving truncation iff both fp_round's are.
12106  if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12107  SDLoc DL(N);
12108  return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12109  DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12110  }
12111  }
12112 
12113  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12114  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12115  SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12116  N0.getOperand(0), N1);
12117  AddToWorklist(Tmp.getNode());
12118  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12119  Tmp, N0.getOperand(1));
12120  }
12121 
12122  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12123  return NewVSel;
12124 
12125  return SDValue();
12126 }
12127 
12128 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12129  SDValue N0 = N->getOperand(0);
12130  EVT VT = N->getValueType(0);
12131  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12133 
12134  // fold (fp_round_inreg c1fp) -> c1fp
12135  if (N0CFP && isTypeLegal(EVT)) {
12136  SDLoc DL(N);
12137  SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12138  return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12139  }
12140 
12141  return SDValue();
12142 }
12143 
12144 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12145  SDValue N0 = N->getOperand(0);
12146  EVT VT = N->getValueType(0);
12147 
12148  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12149  if (N->hasOneUse() &&
12150  N->use_begin()->getOpcode() == ISD::FP_ROUND)
12151  return SDValue();
12152 
12153  // fold (fp_extend c1fp) -> c1fp
12155  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12156 
12157  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12158  if (N0.getOpcode() == ISD::FP16_TO_FP &&
12160  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12161 
12162  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12163  // value of X.
12164  if (N0.getOpcode() == ISD::FP_ROUND
12165  && N0.getConstantOperandVal(1) == 1) {
12166  SDValue In = N0.getOperand(0);
12167  if (In.getValueType() == VT) return In;
12168  if (VT.bitsLT(In.getValueType()))
12169  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12170  In, N0.getOperand(1));
12171  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12172  }
12173 
12174  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12175  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12176  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12177  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12178  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12179  LN0->getChain(),
12180  LN0->getBasePtr(), N0.getValueType(),
12181  LN0->getMemOperand());
12182  CombineTo(N, ExtLoad);
12183  CombineTo(N0.getNode(),
12184  DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12185  N0.getValueType(), ExtLoad,
12186  DAG.getIntPtrConstant(1, SDLoc(N0))),
12187  ExtLoad.getValue(1));
12188  return SDValue(N, 0); // Return N so it doesn't get rechecked!
12189  }
12190 
12191  if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12192  return NewVSel;
12193 
12194  return SDValue();
12195 }
12196 
12197 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12198  SDValue N0 = N->getOperand(0);
12199  EVT VT = N->getValueType(0);
12200 
12201  // fold (fceil c1) -> fceil(c1)
12203  return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12204 
12205  return SDValue();
12206 }
12207 
12208 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12209  SDValue N0 = N->getOperand(0);
12210  EVT VT = N->getValueType(0);
12211 
12212  // fold (ftrunc c1) -> ftrunc(c1)
12214  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12215 
12216  // fold ftrunc (known rounded int x) -> x
12217  // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12218  // likely to be generated to extract integer from a rounded floating value.
12219  switch (N0.getOpcode()) {
12220  default: break;
12221  case ISD::FRINT:
12222  case ISD::FTRUNC:
12223  case ISD::FNEARBYINT:
12224  case ISD::FFLOOR:
12225  case ISD::FCEIL:
12226  return N0;
12227  }
12228 
12229  return SDValue();
12230 }
12231 
12232 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12233  SDValue N0 = N->getOperand(0);
12234  EVT VT = N->getValueType(0);
12235 
12236  // fold (ffloor c1) -> ffloor(c1)
12238  return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12239 
12240  return SDValue();
12241 }
12242 
12243 // FIXME: FNEG and FABS have a lot in common; refactor.
12244 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12245  SDValue N0 = N->getOperand(0);
12246  EVT VT = N->getValueType(0);
12247 
12248  // Constant fold FNEG.
12250  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12251 
12252  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12253  &DAG.getTarget().Options))
12254  return GetNegatedExpression(N0, DAG, LegalOperations);
12255 
12256  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12257  // constant pool values.
12258  if (!TLI.isFNegFree(VT) &&
12259  N0.getOpcode() == ISD::BITCAST &&
12260  N0.getNode()->hasOneUse()) {
12261  SDValue Int = N0.getOperand(0);
12262  EVT IntVT = Int.getValueType();
12263  if (IntVT.isInteger() && !IntVT.isVector()) {
12264  APInt SignMask;
12265  if (N0.getValueType().isVector()) {
12266  // For a vector, get a mask such as 0x80... per scalar element
12267  // and splat it.
12269  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12270  } else {
12271  // For a scalar, just generate 0x80...
12272  SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12273  }
12274  SDLoc DL0(N0);
12275  Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12276  DAG.getConstant(SignMask, DL0, IntVT));
12277  AddToWorklist(Int.getNode());
12278  return DAG.getBitcast(VT, Int);
12279  }
12280  }
12281 
12282  // (fneg (fmul c, x)) -> (fmul -c, x)
12283  if (N0.getOpcode() == ISD::FMUL &&
12284  (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12286  if (CFP1) {
12287  APFloat CVal = CFP1->getValueAPF();
12288  CVal.changeSign();
12289  if (Level >= AfterLegalizeDAG &&
12290  (TLI.isFPImmLegal(CVal, VT) ||
12292  return DAG.getNode(
12293  ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12294  DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12295  N0->getFlags());
12296  }
12297  }
12298 
12299  return SDValue();
12300 }
12301 
12303  APFloat (*Op)(const APFloat &, const APFloat &)) {
12304  SDValue N0 = N->getOperand(0);
12305  SDValue N1 = N->getOperand(1);
12306  EVT VT = N->getValueType(0);
12307  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12308  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12309 
12310  if (N0CFP && N1CFP) {
12311  const APFloat &C0 = N0CFP->getValueAPF();
12312  const APFloat &C1 = N1CFP->getValueAPF();
12313  return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12314  }
12315 
12316  // Canonicalize to constant on RHS.
12319  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12320 
12321  return SDValue();
12322 }
12323 
12324 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12325  return visitFMinMax(DAG, N, minnum);
12326 }
12327 
12328 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12329  return visitFMinMax(DAG, N, maxnum);
12330 }
12331 
12332 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12333  return visitFMinMax(DAG, N, minimum);
12334 }
12335 
12336 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12337  return visitFMinMax(DAG, N, maximum);
12338 }
12339 
12340 SDValue DAGCombiner::visitFABS(SDNode *N) {
12341  SDValue N0 = N->getOperand(0);
12342  EVT VT = N->getValueType(0);
12343 
12344  // fold (fabs c1) -> fabs(c1)
12346  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12347 
12348  // fold (fabs (fabs x)) -> (fabs x)
12349  if (N0.getOpcode() == ISD::FABS)
12350  return N->getOperand(0);
12351 
12352  // fold (fabs (fneg x)) -> (fabs x)
12353  // fold (fabs (fcopysign x, y)) -> (fabs x)
12354  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12355  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12356 
12357  // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12358  if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12359  SDValue Int = N0.getOperand(0);
12360  EVT IntVT = Int.getValueType();
12361  if (IntVT.isInteger() && !IntVT.isVector()) {
12362  APInt SignMask;
12363  if (N0.getValueType().isVector()) {
12364  // For a vector, get a mask such as 0x7f... per scalar element
12365  // and splat it.
12366  SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12367  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12368  } else {
12369  // For a scalar, just generate 0x7f...
12370  SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12371  }
12372  SDLoc DL(N0);
12373  Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12374  DAG.getConstant(SignMask, DL, IntVT));
12375  AddToWorklist(Int.getNode());
12376  return DAG.getBitcast(N->getValueType(0), Int);
12377  }
12378  }
12379 
12380  return SDValue();
12381 }
12382 
12383 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12384  SDValue Chain = N->getOperand(0);
12385  SDValue N1 = N->getOperand(1);
12386  SDValue N2 = N->getOperand(2);
12387 
12388  // If N is a constant we could fold this into a fallthrough or unconditional
12389  // branch. However that doesn't happen very often in normal code, because
12390  // Instcombine/SimplifyCFG should have handled the available opportunities.
12391  // If we did this folding here, it would be necessary to update the
12392  // MachineBasicBlock CFG, which is awkward.
12393 
12394  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12395  // on the target.
12396  if (N1.getOpcode() == ISD::SETCC &&
12398  N1.getOperand(0).getValueType())) {
12399  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12400  Chain, N1.getOperand(2),
12401  N1.getOperand(0), N1.getOperand(1), N2);
12402  }
12403 
12404  if (N1.hasOneUse()) {
12405  if (SDValue NewN1 = rebuildSetCC(N1))
12406  return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12407  }
12408 
12409  return SDValue();
12410 }
12411 
12412 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12413  if (N.getOpcode() == ISD::SRL ||
12414  (N.getOpcode() == ISD::TRUNCATE &&
12415  (N.getOperand(0).hasOneUse() &&
12416  N.getOperand(0).getOpcode() == ISD::SRL))) {
12417  // Look pass the truncate.
12418  if (N.getOpcode() == ISD::TRUNCATE)
12419  N = N.getOperand(0);
12420 
12421  // Match this pattern so that we can generate simpler code:
12422  //
12423  // %a = ...
12424  // %b = and i32 %a, 2
12425  // %c = srl i32 %b, 1
12426  // brcond i32 %c ...
12427  //
12428  // into
12429  //
12430  // %a = ...
12431  // %b = and i32 %a, 2
12432  // %c = setcc eq %b, 0
12433  // brcond %c ...
12434  //
12435  // This applies only when the AND constant value has one bit set and the
12436  // SRL constant is equal to the log2 of the AND constant. The back-end is
12437  // smart enough to convert the result into a TEST/JMP sequence.
12438  SDValue Op0 = N.getOperand(0);
12439  SDValue Op1 = N.getOperand(1);
12440 
12441  if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12442  SDValue AndOp1 = Op0.getOperand(1);
12443 
12444  if (AndOp1.getOpcode() == ISD::Constant) {
12445  const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12446 
12447  if (AndConst.isPowerOf2() &&
12448  cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12449  SDLoc DL(N);
12450  return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12451  Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12452  ISD::SETNE);
12453  }
12454  }
12455  }
12456  }
12457 
12458  // Transform br(xor(x, y)) -> br(x != y)
12459  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12460  if (N.getOpcode() == ISD::XOR) {
12461  // Because we may call this on a speculatively constructed
12462  // SimplifiedSetCC Node, we need to simplify this node first.
12463  // Ideally this should be folded into SimplifySetCC and not
12464  // here. For now, grab a handle to N so we don't lose it from
12465  // replacements interal to the visit.
12466  HandleSDNode XORHandle(N);
12467  while (N.getOpcode() == ISD::XOR) {
12468  SDValue Tmp = visitXOR(N.getNode());
12469  // No simplification done.
12470  if (!Tmp.getNode())
12471  break;
12472  // Returning N is form in-visit replacement that may invalidated
12473  // N. Grab value from Handle.
12474  if (Tmp.getNode() == N.getNode())
12475  N = XORHandle.getValue();
12476  else // Node simplified. Try simplifying again.
12477  N = Tmp;
12478  }
12479 
12480  if (N.getOpcode() != ISD::XOR)
12481  return N;
12482 
12483  SDNode *TheXor = N.getNode();
12484 
12485  SDValue Op0 = TheXor->getOperand(0);
12486  SDValue Op1 = TheXor->getOperand(1);
12487 
12488  if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12489  bool Equal = false;
12490  if (isOneConstant(Op0) && Op0.hasOneUse() &&
12491  Op0.getOpcode() == ISD::XOR) {
12492  TheXor = Op0.getNode();
12493  Equal = true;
12494  }
12495 
12496  EVT SetCCVT = N.getValueType();
12497  if (LegalTypes)
12498  SetCCVT = getSetCCResultType(SetCCVT);
12499  // Replace the uses of XOR with SETCC
12500  return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12501  Equal ? ISD::SETEQ : ISD::SETNE);
12502  }
12503  }
12504 
12505  return SDValue();
12506 }
12507 
12508 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12509 //
12510 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12511  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12512  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12513 
12514  // If N is a constant we could fold this into a fallthrough or unconditional
12515  // branch. However that doesn't happen very often in normal code, because
12516  // Instcombine/SimplifyCFG should have handled the available opportunities.
12517  // If we did this folding here, it would be necessary to update the
12518  // MachineBasicBlock CFG, which is awkward.
12519 
12520  // Use SimplifySetCC to simplify SETCC's.
12521  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12522  CondLHS, CondRHS, CC->get(), SDLoc(N),
12523  false);
12524  if (Simp.getNode()) AddToWorklist(Simp.getNode());
12525 
12526  // fold to a simpler setcc
12527  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12528  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12529  N->getOperand(0), Simp.getOperand(2),
12530  Simp.getOperand(0), Simp.getOperand(1),
12531  N->getOperand(4));
12532 
12533  return SDValue();
12534 }
12535 
12536 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12537 /// and that N may be folded in the load / store addressing mode.
12539  SelectionDAG &DAG,
12540  const TargetLowering &TLI) {
12541  EVT VT;
12542  unsigned AS;
12543 
12544  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
12545  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12546  return false;
12547  VT = LD->getMemoryVT();
12548  AS = LD->getAddressSpace();
12549  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
12550  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12551  return false;
12552  VT = ST->getMemoryVT();
12553  AS = ST->getAddressSpace();
12554  } else
12555  return false;
12556 
12558  if (N->getOpcode() == ISD::ADD) {
12560  if (Offset)
12561  // [reg +/- imm]
12562  AM.BaseOffs = Offset->getSExtValue();
12563  else
12564  // [reg +/- reg]
12565  AM.Scale = 1;
12566  } else if (N->getOpcode() == ISD::SUB) {
12568  if (Offset)
12569  // [reg +/- imm]
12570  AM.BaseOffs = -Offset->getSExtValue();
12571  else
12572  // [reg +/- reg]
12573  AM.Scale = 1;
12574  } else
12575  return false;
12576 
12577  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12578  VT.getTypeForEVT(*DAG.getContext()), AS);
12579 }
12580 
12581 /// Try turning a load/store into a pre-indexed load/store when the base
12582 /// pointer is an add or subtract and it has other uses besides the load/store.
12583 /// After the transformation, the new indexed load/store has effectively folded
12584 /// the add/subtract in and all of its other uses are redirected to the
12585 /// new load/store.
12586 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12587  if (Level < AfterLegalizeDAG)
12588  return false;
12589 
12590  bool isLoad = true;
12591  SDValue Ptr;
12592  EVT VT;
12593  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12594  if (LD->isIndexed())
12595  return false;
12596  VT = LD->getMemoryVT();
12597  if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12598  !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12599  return false;
12600  Ptr = LD->getBasePtr();
12601  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12602  if (ST->isIndexed())
12603  return false;
12604  VT = ST->getMemoryVT();
12605  if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12607  return false;
12608  Ptr = ST->getBasePtr();
12609  isLoad = false;
12610  } else {
12611  return false;
12612  }
12613 
12614  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12615  // out. There is no reason to make this a preinc/predec.
12616  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12617  Ptr.getNode()->hasOneUse())
12618  return false;
12619 
12620  // Ask the target to do addressing mode selection.
12621  SDValue BasePtr;
12622  SDValue Offset;
12624  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12625  return false;
12626 
12627  // Backends without true r+i pre-indexed forms may need to pass a
12628  // constant base with a variable offset so that constant coercion
12629  // will work with the patterns in canonical form.
12630  bool Swapped = false;
12631  if (isa<ConstantSDNode>(BasePtr)) {
12632  std::swap(BasePtr, Offset);
12633  Swapped = true;
12634  }
12635 
12636  // Don't create a indexed load / store with zero offset.
12637  if (isNullConstant(Offset))
12638  return false;
12639 
12640  // Try turning it into a pre-indexed load / store except when:
12641  // 1) The new base ptr is a frame index.
12642  // 2) If N is a store and the new base ptr is either the same as or is a
12643  // predecessor of the value being stored.
12644  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12645  // that would create a cycle.
12646  // 4) All uses are load / store ops that use it as old base ptr.
12647 
12648  // Check #1. Preinc'ing a frame index would require copying the stack pointer
12649  // (plus the implicit offset) to a register to preinc anyway.
12650  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12651  return false;
12652 
12653  // Check #2.
12654  if (!isLoad) {
12655  SDValue Val = cast<StoreSDNode>(N)->getValue();
12656  if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12657  return false;
12658  }
12659 
12660  // Caches for hasPredecessorHelper.
12663  Worklist.push_back(N);
12664 
12665  // If the offset is a constant, there may be other adds of constants that
12666  // can be folded with this one. We should do this to avoid having to keep
12667  // a copy of the original base pointer.
12668  SmallVector<SDNode *, 16> OtherUses;
12669  if (isa<ConstantSDNode>(Offset))
12670  for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12671  UE = BasePtr.getNode()->use_end();
12672  UI != UE; ++UI) {
12673  SDUse &Use = UI.getUse();
12674  // Skip the use that is Ptr and uses of other results from BasePtr's
12675  // node (important for nodes that return multiple results).
12676  if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12677  continue;
12678 
12679  if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12680  continue;
12681 
12682  if (Use.getUser()->getOpcode() != ISD::ADD &&
12683  Use.getUser()->getOpcode() != ISD::SUB) {
12684  OtherUses.clear();
12685  break;
12686  }
12687 
12688  SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12689  if (!isa<ConstantSDNode>(Op1)) {
12690  OtherUses.clear();
12691  break;
12692  }
12693 
12694  // FIXME: In some cases, we can be smarter about this.
12695  if (Op1.getValueType() != Offset.getValueType()) {
12696  OtherUses.clear();
12697  break;
12698  }
12699 
12700  OtherUses.push_back(Use.getUser());
12701  }
12702 
12703  if (Swapped)
12704  std::swap(BasePtr, Offset);
12705 
12706  // Now check for #3 and #4.
12707  bool RealUse = false;
12708 
12709  for (SDNode *Use : Ptr.getNode()->uses()) {
12710  if (Use == N)
12711  continue;
12712  if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12713  return false;
12714 
12715  // If Ptr may be folded in addressing mode of other use, then it's
12716  // not profitable to do this transformation.
12717  if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12718  RealUse = true;
12719  }
12720 
12721  if (!RealUse)
12722  return false;
12723 
12724  SDValue Result;
12725  if (isLoad)
12726  Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12727  BasePtr, Offset, AM);
12728  else
12729  Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12730  BasePtr, Offset, AM);
12731  ++PreIndexedNodes;
12732  ++NodesCombined;
12733  LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12734  Result.getNode()->dump(&DAG); dbgs() << '\n');
12735  WorklistRemover DeadNodes(*this);
12736  if (isLoad) {
12737  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12738  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12739  } else {
12740  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12741  }
12742 
12743  // Finally, since the node is now dead, remove it from the graph.
12744  deleteAndRecombine(N);
12745 
12746  if (Swapped)
12747  std::swap(BasePtr, Offset);
12748 
12749  // Replace other uses of BasePtr that can be updated to use Ptr
12750  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12751  unsigned OffsetIdx = 1;
12752  if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12753  OffsetIdx = 0;
12754  assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12755  BasePtr.getNode() && "Expected BasePtr operand");
12756 
12757  // We need to replace ptr0 in the following expression:
12758  // x0 * offset0 + y0 * ptr0 = t0
12759  // knowing that
12760  // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12761  //
12762  // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12763  // indexed load/store and the expression that needs to be re-written.
12764  //
12765  // Therefore, we have:
12766  // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12767 
12768  ConstantSDNode *CN =
12769  cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12770  int X0, X1, Y0, Y1;
12771  const APInt &Offset0 = CN->getAPIntValue();
12772  APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12773 
12774  X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12775  Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12776  X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12777  Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12778 
12779  unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12780 
12781  APInt CNV = Offset0;
12782  if (X0 < 0) CNV = -CNV;
12783  if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12784  else CNV = CNV - Offset1;
12785 
12786  SDLoc DL(OtherUses[i]);
12787 
12788  // We can now generate the new expression.
12789  SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12790  SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12791 
12792  SDValue NewUse = DAG.getNode(Opcode,
12793  DL,
12794  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12795  DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12796  deleteAndRecombine(OtherUses[i]);
12797  }
12798 
12799  // Replace the uses of Ptr with uses of the updated base value.
12800  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12801  deleteAndRecombine(Ptr.getNode());
12802  AddToWorklist(Result.getNode());
12803 
12804  return true;
12805 }
12806 
12807 /// Try to combine a load/store with a add/sub of the base pointer node into a
12808 /// post-indexed load/store. The transformation folded the add/subtract into the
12809 /// new indexed load/store effectively and all of its uses are redirected to the
12810 /// new load/store.
12811 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12812  if (Level < AfterLegalizeDAG)
12813  return false;
12814 
12815  bool isLoad = true;
12816  SDValue Ptr;
12817  EVT VT;
12818  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12819  if (LD->isIndexed())
12820  return false;
12821  VT = LD->getMemoryVT();
12822  if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12824  return false;
12825  Ptr = LD->getBasePtr();
12826  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12827  if (ST->isIndexed())
12828  return false;
12829  VT = ST->getMemoryVT();
12830  if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12832  return false;
12833  Ptr = ST->getBasePtr();
12834  isLoad = false;
12835  } else {
12836  return false;
12837  }
12838 
12839  if (Ptr.getNode()->hasOneUse())
12840  return false;
12841 
12842  for (SDNode *Op : Ptr.getNode()->uses()) {
12843  if (Op == N ||
12844  (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12845  continue;
12846 
12847  SDValue BasePtr;
12848  SDValue Offset;
12850  if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12851  // Don't create a indexed load / store with zero offset.
12852  if (isNullConstant(Offset))
12853  continue;
12854 
12855  // Try turning it into a post-indexed load / store except when
12856  // 1) All uses are load / store ops that use it as base ptr (and
12857  // it may be folded as addressing mmode).
12858  // 2) Op must be independent of N, i.e. Op is neither a predecessor
12859  // nor a successor of N. Otherwise, if Op is folded that would
12860  // create a cycle.
12861 
12862  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12863  continue;
12864 
12865  // Check for #1.
12866  bool TryNext = false;
12867  for (SDNode *Use : BasePtr.getNode()->uses()) {
12868  if (Use == Ptr.getNode())
12869  continue;
12870 
12871  // If all the uses are load / store addresses, then don't do the
12872  // transformation.
12873  if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12874  bool RealUse = false;
12875  for (SDNode *UseUse : Use->uses()) {
12876  if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12877  RealUse = true;
12878  }
12879 
12880  if (!RealUse) {
12881  TryNext = true;
12882  break;
12883  }
12884  }
12885  }
12886 
12887  if (TryNext)
12888  continue;
12889 
12890  // Check for #2.
12893  // Ptr is predecessor to both N and Op.
12894  Visited.insert(Ptr.getNode());
12895  Worklist.push_back(N);
12896  Worklist.push_back(Op);
12897  if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
12898  !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
12899  SDValue Result = isLoad
12900  ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12901  BasePtr, Offset, AM)
12902  : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12903  BasePtr, Offset, AM);
12904  ++PostIndexedNodes;
12905  ++NodesCombined;
12906  LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12907  dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12908  dbgs() << '\n');
12909  WorklistRemover DeadNodes(*this);
12910  if (isLoad) {
12911  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12912  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12913  } else {
12914  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12915  }
12916 
12917  // Finally, since the node is now dead, remove it from the graph.
12918  deleteAndRecombine(N);
12919 
12920  // Replace the uses of Use with uses of the updated base value.
12922  Result.getValue(isLoad ? 1 : 0));
12923  deleteAndRecombine(Op);
12924  return true;
12925  }
12926  }
12927  }
12928 
12929  return false;
12930 }
12931 
12932 /// Return the base-pointer arithmetic from an indexed \p LD.
12933 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12935  assert(AM != ISD::UNINDEXED);
12936  SDValue BP = LD->getOperand(1);
12937  SDValue Inc = LD->getOperand(2);
12938 
12939  // Some backends use TargetConstants for load offsets, but don't expect
12940  // TargetConstants in general ADD nodes. We can convert these constants into
12941  // regular Constants (if the constant is not opaque).
12942  assert((Inc.getOpcode() != ISD::TargetConstant ||
12943  !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12944  "Cannot split out indexing using opaque target constants");
12945  if (Inc.getOpcode() == ISD::TargetConstant) {
12946  ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12947  Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12948  ConstInc->getValueType(0));
12949  }
12950 
12951  unsigned Opc =
12952  (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12953  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12954 }
12955 
12956 static inline int numVectorEltsOrZero(EVT T) {
12957  return T.isVector() ? T.getVectorNumElements() : 0;
12958 }
12959 
12960 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
12961  Val = ST->getValue();
12962  EVT STType = Val.getValueType();
12963  EVT STMemType = ST->getMemoryVT();
12964  if (STType == STMemType)
12965  return true;
12966  if (isTypeLegal(STMemType))
12967  return false; // fail.
12968  if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
12969  TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
12970  Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
12971  return true;
12972  }
12973  if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
12974  STType.isInteger() && STMemType.isInteger()) {
12975  Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
12976  return true;
12977  }
12978  if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
12979  Val = DAG.getBitcast(STMemType, Val);
12980  return true;
12981  }
12982  return false; // fail.
12983 }
12984 
12985 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
12986  EVT LDMemType = LD->getMemoryVT();
12987  EVT LDType = LD->getValueType(0);
12988  assert(Val.getValueType() == LDMemType &&
12989  "Attempting to extend value of non-matching type");
12990  if (LDType == LDMemType)
12991  return true;
12992  if (LDMemType.isInteger() && LDType.isInteger()) {
12993  switch (LD->getExtensionType()) {
12994  case ISD::NON_EXTLOAD:
12995  Val = DAG.getBitcast(LDType, Val);
12996  return true;
12997  case ISD::EXTLOAD:
12998  Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
12999  return true;
13000  case ISD::SEXTLOAD:
13001  Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13002  return true;
13003  case ISD::ZEXTLOAD:
13004  Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13005  return true;
13006  }
13007  }
13008  return false;
13009 }
13010 
13011 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13012  if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13013  return SDValue();
13014  SDValue Chain = LD->getOperand(0);
13015  StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13016  if (!ST || ST->isVolatile())
13017  return SDValue();
13018 
13019  EVT LDType = LD->getValueType(0);
13020  EVT LDMemType = LD->getMemoryVT();
13021  EVT STMemType = ST->getMemoryVT();
13022  EVT STType = ST->getValue().getValueType();
13023 
13024  BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13025  BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13026  int64_t Offset;
13027  if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13028  return SDValue();
13029 
13030  // Normalize for Endianness. After this Offset=0 will denote that the least
13031  // significant bit in the loaded value maps to the least significant bit in
13032  // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13033  // n:th least significant byte of the stored value.
13034  if (DAG.getDataLayout().isBigEndian())
13035  Offset = (STMemType.getStoreSizeInBits() -
13036  LDMemType.getStoreSizeInBits()) / 8 - Offset;
13037 
13038  // Check that the stored value cover all bits that are loaded.
13039  bool STCoversLD =
13040  (Offset >= 0) &&
13041  (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13042 
13043  auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13044  if (LD->isIndexed()) {
13045  bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13046  LD->getAddressingMode() == ISD::POST_DEC);
13047  unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13048  SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13049  LD->getOperand(1), LD->getOperand(2));
13050  SDValue Ops[] = {Val, Idx, Chain};
13051  return CombineTo(LD, Ops, 3);
13052  }
13053  return CombineTo(LD, Val, Chain);
13054  };
13055 
13056  if (!STCoversLD)
13057  return SDValue();
13058 
13059  // Memory as copy space (potentially masked).
13060  if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13061  // Simple case: Direct non-truncating forwarding
13062  if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13063  return ReplaceLd(LD, ST->getValue(), Chain);
13064  // Can we model the truncate and extension with an and mask?
13065  if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13066  !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13067  // Mask to size of LDMemType
13068  auto Mask =
13070  STMemType.getSizeInBits()),
13071  SDLoc(ST), STType);
13072  auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13073  return ReplaceLd(LD, Val, Chain);
13074  }
13075  }
13076 
13077  // TODO: Deal with nonzero offset.
13078  if (LD->getBasePtr().isUndef() || Offset != 0)
13079  return SDValue();
13080  // Model necessary truncations / extenstions.
13081  SDValue Val;
13082  // Truncate Value To Stored Memory Size.
13083  do {
13084  if (!getTruncatedStoreValue(ST, Val))
13085  continue;
13086  if (!isTypeLegal(LDMemType))
13087  continue;
13088  if (STMemType != LDMemType) {
13089  // TODO: Support vectors? This requires extract_subvector/bitcast.
13090  if (!STMemType.isVector() && !LDMemType.isVector() &&
13091  STMemType.isInteger() && LDMemType.isInteger())
13092  Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13093  else
13094  continue;
13095  }
13096  if (!extendLoadedValueToExtension(LD, Val))
13097  continue;
13098  return ReplaceLd(LD, Val, Chain);
13099  } while (false);
13100 
13101  // On failure, cleanup dead nodes we may have created.
13102  if (Val->use_empty())
13103  deleteAndRecombine(Val.getNode());
13104  return SDValue();
13105 }
13106 
13107 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13108  LoadSDNode *LD = cast<LoadSDNode>(N);
13109  SDValue Chain = LD->getChain();
13110  SDValue Ptr = LD->getBasePtr();
13111 
13112  // If load is not volatile and there are no uses of the loaded value (and
13113  // the updated indexed value in case of indexed loads), change uses of the
13114  // chain value into uses of the chain input (i.e. delete the dead load).
13115  if (!LD->isVolatile()) {
13116  if (N->getValueType(1) == MVT::Other) {
13117  // Unindexed loads.
13118  if (!N->hasAnyUseOfValue(0)) {
13119  // It's not safe to use the two value CombineTo variant here. e.g.
13120  // v1, chain2 = load chain1, loc
13121  // v2, chain3 = load chain2, loc
13122  // v3 = add v2, c
13123  // Now we replace use of chain2 with chain1. This makes the second load
13124  // isomorphic to the one we are deleting, and thus makes this load live.
13125  LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13126  dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13127  dbgs() << "\n");
13128  WorklistRemover DeadNodes(*this);
13129  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13130  AddUsersToWorklist(Chain.getNode());
13131  if (N->use_empty())
13132  deleteAndRecombine(N);
13133 
13134  return SDValue(N, 0); // Return N so it doesn't get rechecked!
13135  }
13136  } else {
13137  // Indexed loads.
13138  assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13139 
13140  // If this load has an opaque TargetConstant offset, then we cannot split
13141  // the indexing into an add/sub directly (that TargetConstant may not be
13142  // valid for a different type of node, and we cannot convert an opaque
13143  // target constant into a regular constant).
13144  bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13145  cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13146 
13147  if (!N->hasAnyUseOfValue(0) &&
13148  ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13149  SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13150  SDValue Index;
13151  if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13152  Index = SplitIndexingFromLoad(LD);
13153  // Try to fold the base pointer arithmetic into subsequent loads and
13154  // stores.
13155  AddUsersToWorklist(N);
13156  } else
13157  Index = DAG.getUNDEF(N->getValueType(1));
13158  LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13159  dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13160  dbgs() << " and 2 other values\n");
13161  WorklistRemover DeadNodes(*this);
13162  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13163  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13164  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13165  deleteAndRecombine(N);
13166  return SDValue(N, 0); // Return N so it doesn't get rechecked!
13167  }
13168  }
13169  }
13170 
13171  // If this load is directly stored, replace the load value with the stored
13172  // value.
13173  if (auto V = ForwardStoreValueToDirectLoad(LD))
13174  return V;
13175 
13176  // Try to infer better alignment information than the load already has.
13177  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13178  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13179  if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13180  SDValue NewLoad = DAG.getExtLoad(
13181  LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13182  LD->getPointerInfo(), LD->getMemoryVT(), Align,
13183  LD->getMemOperand()->getFlags(), LD->getAAInfo());
13184  // NewLoad will always be N as we are only refining the alignment
13185  assert(NewLoad.getNode() == N);
13186  (void)NewLoad;
13187  }
13188  }
13189  }
13190 
13191  if (LD->isUnindexed()) {
13192  // Walk up chain skipping non-aliasing memory nodes.
13193  SDValue BetterChain = FindBetterChain(N, Chain);
13194 
13195  // If there is a better chain.
13196  if (Chain != BetterChain) {
13197  SDValue ReplLoad;
13198 
13199  // Replace the chain to void dependency.
13200  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13201  ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13202  BetterChain, Ptr, LD->getMemOperand());
13203  } else {
13204  ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13205  LD->getValueType(0),
13206  BetterChain, Ptr, LD->getMemoryVT(),
13207  LD->getMemOperand());
13208  }
13209 
13210  // Create token factor to keep old chain connected.
13211  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13212  MVT::Other, Chain, ReplLoad.getValue(1));
13213 
13214  // Replace uses with load result and token factor
13215  return CombineTo(N, ReplLoad.getValue(0), Token);
13216  }
13217  }
13218 
13219  // Try transforming N to an indexed load.
13220  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13221  return SDValue(N, 0);
13222 
13223  // Try to slice up N to more direct loads if the slices are mapped to
13224  // different register banks or pairing can take place.
13225  if (SliceUpLoad(N))
13226  return SDValue(N, 0);
13227 
13228  return SDValue();
13229 }
13230 
13231 namespace {
13232 
13233 /// Helper structure used to slice a load in smaller loads.
13234 /// Basically a slice is obtained from the following sequence:
13235 /// Origin = load Ty1, Base
13236 /// Shift = srl Ty1 Origin, CstTy Amount
13237 /// Inst = trunc Shift to Ty2
13238 ///
13239 /// Then, it will be rewritten into:
13240 /// Slice = load SliceTy, Base + SliceOffset
13241 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13242 ///
13243 /// SliceTy is deduced from the number of bits that are actually used to
13244 /// build Inst.
13245 struct LoadedSlice {
13246  /// Helper structure used to compute the cost of a slice.
13247  struct Cost {
13248  /// Are we optimizing for code size.
13249  bool ForCodeSize;
13250 
13251  /// Various cost.
13252  unsigned Loads = 0;
13253  unsigned Truncates = 0;
13254  unsigned CrossRegisterBanksCopies = 0;
13255  unsigned ZExts = 0;
13256  unsigned Shift = 0;
13257 
13258  Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13259 
13260  /// Get the cost of one isolated slice.
13261  Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13262  : ForCodeSize(ForCodeSize), Loads(1) {
13263  EVT TruncType = LS.Inst->getValueType(0);
13264  EVT LoadedType = LS.getLoadedType();
13265  if (TruncType != LoadedType &&
13266  !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13267  ZExts = 1;
13268  }
13269 
13270  /// Account for slicing gain in the current cost.
13271  /// Slicing provide a few gains like removing a shift or a
13272  /// truncate. This method allows to grow the cost of the original
13273  /// load with the gain from this slice.
13274  void addSliceGain(const LoadedSlice &LS) {
13275  // Each slice saves a truncate.
13276  const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13277  if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13278  LS.Inst->getValueType(0)))
13279  ++Truncates;
13280  // If there is a shift amount, this slice gets rid of it.
13281  if (LS.Shift)
13282  ++Shift;
13283  // If this slice can merge a cross register bank copy, account for it.
13284  if (LS.canMergeExpensiveCrossRegisterBankCopy())
13285  ++CrossRegisterBanksCopies;
13286  }
13287 
13288  Cost &operator+=(const Cost &RHS) {
13289  Loads += RHS.Loads;
13290  Truncates += RHS.Truncates;
13291  CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13292  ZExts += RHS.ZExts;
13293  Shift += RHS.Shift;
13294  return *this;
13295  }
13296 
13297  bool operator==(const Cost &RHS) const {
13298  return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13299  CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13300  ZExts == RHS.ZExts && Shift == RHS.Shift;
13301  }
13302 
13303  bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13304 
13305  bool operator<(const Cost &RHS) const {
13306  // Assume cross register banks copies are as expensive as loads.
13307  // FIXME: Do we want some more target hooks?
13308  unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13309  unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13310  // Unless we are optimizing for code size, consider the
13311  // expensive operation first.
13312  if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13313  return ExpensiveOpsLHS < ExpensiveOpsRHS;
13314  return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13315  (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13316  }
13317 
13318  bool operator>(const Cost &RHS) const { return RHS < *this; }
13319 
13320  bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13321 
13322  bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13323  };
13324 
13325  // The last instruction that represent the slice. This should be a
13326  // truncate instruction.
13327  SDNode *Inst;
13328 
13329  // The original load instruction.
13330  LoadSDNode *Origin;
13331 
13332  // The right shift amount in bits from the original load.
13333  unsigned Shift;
13334 
13335  // The DAG from which Origin came from.
13336  // This is used to get some contextual information about legal types, etc.
13337  SelectionDAG *DAG;
13338 
13339  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13340  unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13341  : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13342 
13343  /// Get the bits used in a chunk of bits \p BitWidth large.
13344  /// \return Result is \p BitWidth and has used bits set to 1 and
13345  /// not used bits set to 0.
13346  APInt getUsedBits() const {
13347  // Reproduce the trunc(lshr) sequence:
13348  // - Start from the truncated value.
13349  // - Zero extend to the desired bit width.
13350  // - Shift left.
13351  assert(Origin && "No original load to compare against.");
13352  unsigned BitWidth = Origin->getValueSizeInBits(0);
13353  assert(Inst && "This slice is not bound to an instruction");
13354  assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13355  "Extracted slice is bigger than the whole type!");
13356  APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13357  UsedBits.setAllBits();
13358  UsedBits = UsedBits.zext(BitWidth);
13359  UsedBits <<= Shift;
13360  return UsedBits;
13361  }
13362 
13363  /// Get the size of the slice to be loaded in bytes.
13364  unsigned getLoadedSize() const {
13365  unsigned SliceSize = getUsedBits().countPopulation();
13366  assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13367  return SliceSize / 8;
13368  }
13369 
13370  /// Get the type that will be loaded for this slice.
13371  /// Note: This may not be the final type for the slice.
13372  EVT getLoadedType() const {
13373  assert(DAG && "Missing context");
13374  LLVMContext &Ctxt = *DAG->getContext();
13375  return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13376  }
13377 
13378  /// Get the alignment of the load used for this slice.
13379  unsigned getAlignment() const {
13380  unsigned Alignment = Origin->getAlignment();
13381  unsigned Offset = getOffsetFromBase();
13382  if (Offset != 0)
13383  Alignment = MinAlign(Alignment, Alignment + Offset);
13384  return Alignment;
13385  }
13386 
13387  /// Check if this slice can be rewritten with legal operations.
13388  bool isLegal() const {
13389  // An invalid slice is not legal.
13390  if (!Origin || !Inst || !DAG)
13391  return false;
13392 
13393  // Offsets are for indexed load only, we do not handle that.
13394  if (!Origin->getOffset().isUndef())
13395  return false;
13396 
13397  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13398 
13399  // Check that the type is legal.
13400  EVT SliceType = getLoadedType();
13401  if (!TLI.isTypeLegal(SliceType))
13402  return false;
13403 
13404  // Check that the load is legal for this type.
13405  if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13406  return false;
13407 
13408  // Check that the offset can be computed.
13409  // 1. Check its type.
13410  EVT PtrType = Origin->getBasePtr().getValueType();
13411  if (PtrType == MVT::Untyped || PtrType.isExtended())
13412  return false;
13413 
13414  // 2. Check that it fits in the immediate.
13415  if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13416  return false;
13417 
13418  // 3. Check that the computation is legal.
13419  if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13420  return false;
13421 
13422  // Check that the zext is legal if it needs one.
13423  EVT TruncateType = Inst->getValueType(0);
13424  if (TruncateType != SliceType &&
13425  !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13426  return false;
13427 
13428  return true;
13429  }
13430 
13431  /// Get the offset in bytes of this slice in the original chunk of
13432  /// bits.
13433  /// \pre DAG != nullptr.
13434  uint64_t getOffsetFromBase() const {
13435  assert(DAG && "Missing context.");
13436  bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13437  assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13438  uint64_t Offset = Shift / 8;
13439  unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13440  assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13441  "The size of the original loaded type is not a multiple of a"
13442  " byte.");
13443  // If Offset is bigger than TySizeInBytes, it means we are loading all
13444  // zeros. This should have been optimized before in the process.
13445  assert(TySizeInBytes > Offset &&
13446  "Invalid shift amount for given loaded size");
13447  if (IsBigEndian)
13448  Offset = TySizeInBytes - Offset - getLoadedSize();
13449  return Offset;
13450  }
13451 
13452  /// Generate the sequence of instructions to load the slice
13453  /// represented by this object and redirect the uses of this slice to
13454  /// this new sequence of instructions.
13455  /// \pre this->Inst && this->Origin are valid Instructions and this
13456  /// object passed the legal check: LoadedSlice::isLegal returned true.
13457  /// \return The last instruction of the sequence used to load the slice.
13458  SDValue loadSlice() const {
13459  assert(Inst && Origin && "Unable to replace a non-existing slice.");
13460  const SDValue &OldBaseAddr = Origin->getBasePtr();
13461  SDValue BaseAddr = OldBaseAddr;
13462  // Get the offset in that chunk of bytes w.r.t. the endianness.
13463  int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13464  assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13465  if (Offset) {
13466  // BaseAddr = BaseAddr + Offset.
13467  EVT ArithType = BaseAddr.getValueType();
13468  SDLoc DL(Origin);
13469  BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13470  DAG->getConstant(Offset, DL, ArithType));
13471  }
13472 
13473  // Create the type of the loaded slice according to its size.
13474  EVT SliceType = getLoadedType();
13475 
13476  // Create the load for the slice.
13477  SDValue LastInst =
13478  DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13479  Origin->getPointerInfo().getWithOffset(Offset),
13480  getAlignment(), Origin->getMemOperand()->getFlags());
13481  // If the final type is not the same as the loaded type, this means that
13482  // we have to pad with zero. Create a zero extend for that.
13483  EVT FinalType = Inst->getValueType(0);
13484  if (SliceType != FinalType)
13485  LastInst =
13486  DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13487  return LastInst;
13488  }
13489 
13490  /// Check if this slice can be merged with an expensive cross register
13491  /// bank copy. E.g.,
13492  /// i = load i32
13493  /// f = bitcast i32 i to float
13494  bool canMergeExpensiveCrossRegisterBankCopy() const {
13495  if (!Inst || !Inst->hasOneUse())
13496  return false;
13497  SDNode *Use = *Inst->use_begin();
13498  if (Use->getOpcode() != ISD::BITCAST)
13499  return false;
13500  assert(DAG && "Missing context");
13501  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13502  EVT ResVT = Use->getValueType(0);
13503  const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13504  const TargetRegisterClass *ArgRC =
13506  if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13507  return false;
13508 
13509  // At this point, we know that we perform a cross-register-bank copy.
13510  // Check if it is expensive.
13512  // Assume bitcasts are cheap, unless both register classes do not
13513  // explicitly share a common sub class.
13514  if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13515  return false;
13516 
13517  // Check if it will be merged with the load.
13518  // 1. Check the alignment constraint.
13519  unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13520  ResVT.getTypeForEVT(*DAG->getContext()));
13521 
13522  if (RequiredAlignment > getAlignment())
13523  return false;
13524 
13525  // 2. Check that the load is a legal operation for that type.
13526  if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13527  return false;
13528 
13529  // 3. Check that we do not have a zext in the way.
13530  if (Inst->getValueType(0) != getLoadedType())
13531  return false;
13532 
13533  return true;
13534  }
13535 };
13536 
13537 } // end anonymous namespace
13538 
13539 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13540 /// \p UsedBits looks like 0..0 1..1 0..0.
13541 static bool areUsedBitsDense(const APInt &UsedBits) {
13542  // If all the bits are one, this is dense!
13543  if (UsedBits.isAllOnesValue())
13544  return true;
13545 
13546  // Get rid of the unused bits on the right.
13547  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13548  // Get rid of the unused bits on the left.
13549  if (NarrowedUsedBits.countLeadingZeros())
13550  NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13551  // Check that the chunk of bits is completely used.
13552  return NarrowedUsedBits.isAllOnesValue();
13553 }
13554 
13555 /// Check whether or not \p First and \p Second are next to each other
13556 /// in memory. This means that there is no hole between the bits loaded
13557 /// by \p First and the bits loaded by \p Second.
13558 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13559  const LoadedSlice &Second) {
13560  assert(First.Origin == Second.Origin && First.Origin &&
13561  "Unable to match different memory origins.");
13562  APInt UsedBits = First.getUsedBits();
13563  assert((UsedBits & Second.getUsedBits()) == 0 &&
13564  "Slices are not supposed to overlap.");
13565  UsedBits |= Second.getUsedBits();
13566  return areUsedBitsDense(UsedBits);
13567 }
13568 
13569 /// Adjust the \p GlobalLSCost according to the target
13570 /// paring capabilities and the layout of the slices.
13571 /// \pre \p GlobalLSCost should account for at least as many loads as
13572 /// there is in the slices in \p LoadedSlices.
13574  LoadedSlice::Cost &GlobalLSCost) {
13575  unsigned NumberOfSlices = LoadedSlices.size();
13576  // If there is less than 2 elements, no pairing is possible.
13577  if (NumberOfSlices < 2)
13578  return;
13579 
13580  // Sort the slices so that elements that are likely to be next to each
13581  // other in memory are next to each other in the list.
13582  llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13583  assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13584  return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13585  });
13586  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13587  // First (resp. Second) is the first (resp. Second) potentially candidate
13588  // to be placed in a paired load.
13589  const LoadedSlice *First = nullptr;
13590  const LoadedSlice *Second = nullptr;
13591  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13592  // Set the beginning of the pair.
13593  First = Second) {
13594  Second = &LoadedSlices[CurrSlice];
13595 
13596  // If First is NULL, it means we start a new pair.
13597  // Get to the next slice.
13598  if (!First)
13599  continue;
13600 
13601  EVT LoadedType = First->getLoadedType();
13602 
13603  // If the types of the slices are different, we cannot pair them.
13604  if (LoadedType != Second->getLoadedType())
13605  continue;
13606 
13607  // Check if the target supplies paired loads for this type.
13608  unsigned RequiredAlignment = 0;
13609  if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13610  // move to the next pair, this type is hopeless.
13611  Second = nullptr;
13612  continue;
13613  }
13614  // Check if we meet the alignment requirement.
13615  if (RequiredAlignment > First->getAlignment())
13616  continue;
13617 
13618  // Check that both loads are next to each other in memory.
13619  if (!areSlicesNextToEachOther(*First, *Second))
13620  continue;
13621 
13622  assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13623  --GlobalLSCost.Loads;
13624  // Move to the next pair.
13625  Second = nullptr;
13626  }
13627 }
13628 
13629 /// Check the profitability of all involved LoadedSlice.
13630 /// Currently, it is considered profitable if there is exactly two
13631 /// involved slices (1) which are (2) next to each other in memory, and
13632 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13633 ///
13634 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13635 /// the elements themselves.
13636 ///
13637 /// FIXME: When the cost model will be mature enough, we can relax
13638 /// constraints (1) and (2).
13640  const APInt &UsedBits, bool ForCodeSize) {
13641  unsigned NumberOfSlices = LoadedSlices.size();
13642  if (StressLoadSlicing)
13643  return NumberOfSlices > 1;
13644 
13645  // Check (1).
13646  if (NumberOfSlices != 2)
13647  return false;
13648 
13649  // Check (2).
13650  if (!areUsedBitsDense(UsedBits))
13651  return false;
13652 
13653  // Check (3).
13654  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13655  // The original code has one big load.
13656  OrigCost.Loads = 1;
13657  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13658  const LoadedSlice &LS = LoadedSlices[CurrSlice];
13659  // Accumulate the cost of all the slices.
13660  LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13661  GlobalSlicingCost += SliceCost;
13662 
13663  // Account as cost in the original configuration the gain obtained
13664  // with the current slices.
13665  OrigCost.addSliceGain(LS);
13666  }
13667 
13668  // If the target supports paired load, adjust the cost accordingly.
13669  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13670  return OrigCost > GlobalSlicingCost;
13671 }
13672 
13673 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13674 /// operations, split it in the various pieces being extracted.
13675 ///
13676 /// This sort of thing is introduced by SROA.
13677 /// This slicing takes care not to insert overlapping loads.
13678 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13679 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13680  if (Level < AfterLegalizeDAG)
13681  return false;
13682 
13683  LoadSDNode *LD = cast<LoadSDNode>(N);
13684  if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13685  !LD->getValueType(0).isInteger())
13686  return false;
13687 
13688  // Keep track of already used bits to detect overlapping values.
13689  // In that case, we will just abort the transformation.
13690  APInt UsedBits(LD->getValueSizeInBits(0), 0);
13691 
13692  SmallVector<LoadedSlice, 4> LoadedSlices;
13693 
13694  // Check if this load is used as several smaller chunks of bits.
13695  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13696  // of computation for each trunc.
13697  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13698  UI != UIEnd; ++UI) {
13699  // Skip the uses of the chain.
13700  if (UI.getUse().getResNo() != 0)
13701  continue;
13702 
13703  SDNode *User = *UI;
13704  unsigned Shift = 0;
13705 
13706  // Check if this is a trunc(lshr).
13707  if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13708  isa<ConstantSDNode>(User->getOperand(1))) {
13709  Shift = User->getConstantOperandVal(1);
13710  User = *User->use_begin();
13711  }
13712 
13713  // At this point, User is a Truncate, iff we encountered, trunc or
13714  // trunc(lshr).
13715  if (User->getOpcode() != ISD::TRUNCATE)
13716  return false;
13717 
13718  // The width of the type must be a power of 2 and greater than 8-bits.
13719  // Otherwise the load cannot be represented in LLVM IR.
13720  // Moreover, if we shifted with a non-8-bits multiple, the slice
13721  // will be across several bytes. We do not support that.
13722  unsigned Width = User->getValueSizeInBits(0);
13723  if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13724  return false;
13725 
13726  // Build the slice for this chain of computations.
13727  LoadedSlice LS(User, LD, Shift, &DAG);
13728  APInt CurrentUsedBits = LS.getUsedBits();
13729 
13730  // Check if this slice overlaps with another.
13731  if ((CurrentUsedBits & UsedBits) != 0)
13732  return false;
13733  // Update the bits used globally.
13734  UsedBits |= CurrentUsedBits;
13735 
13736  // Check if the new slice would be legal.
13737  if (!LS.isLegal())
13738  return false;
13739 
13740  // Record the slice.
13741  LoadedSlices.push_back(LS);
13742  }
13743 
13744  // Abort slicing if it does not seem to be profitable.
13745  if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13746  return false;
13747 
13748  ++SlicedLoads;
13749 
13750  // Rewrite each chain to use an independent load.
13751  // By construction, each chain can be represented by a unique load.
13752 
13753  // Prepare the argument for the new token factor for all the slices.
13754  SmallVector<SDValue, 8> ArgChains;
13756  LSIt = LoadedSlices.begin(),
13757  LSItEnd = LoadedSlices.end();
13758  LSIt != LSItEnd; ++LSIt) {
13759  SDValue SliceInst = LSIt->loadSlice();
13760  CombineTo(LSIt->Inst, SliceInst, true);
13761  if (SliceInst.getOpcode() != ISD::LOAD)
13762  SliceInst = SliceInst.getOperand(0);
13763  assert(SliceInst->getOpcode() == ISD::LOAD &&
13764  "It takes more than a zext to get to the loaded slice!!");
13765  ArgChains.push_back(SliceInst.getValue(1));
13766  }
13767 
13768  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13769  ArgChains);
13770  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13771  AddToWorklist(Chain.getNode());
13772  return true;
13773 }
13774 
13775 /// Check to see if V is (and load (ptr), imm), where the load is having
13776 /// specific bytes cleared out. If so, return the byte size being masked out
13777 /// and the shift amount.
13778 static std::pair<unsigned, unsigned>
13780  std::pair<unsigned, unsigned> Result(0, 0);
13781 
13782  // Check for the structure we're looking for.
13783  if (V->getOpcode() != ISD::AND ||
13784  !isa<ConstantSDNode>(V->getOperand(1)) ||
13786  return Result;
13787 
13788  // Check the chain and pointer.
13789  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13790  if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
13791 
13792  // This only handles simple types.
13793  if (V.getValueType() != MVT::i16 &&
13794  V.getValueType() != MVT::i32 &&
13795  V.getValueType() != MVT::i64)
13796  return Result;
13797 
13798  // Check the constant mask. Invert it so that the bits being masked out are
13799  // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
13800  // follow the sign bit for uniformity.
13801  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13802  unsigned NotMaskLZ = countLeadingZeros(NotMask);
13803  if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
13804  unsigned NotMaskTZ = countTrailingZeros(NotMask);
13805  if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
13806  if (NotMaskLZ == 64) return Result; // All zero mask.
13807 
13808  // See if we have a continuous run of bits. If so, we have 0*1+0*
13809  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13810  return Result;
13811 
13812  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13813  if (V.getValueType() != MVT::i64 && NotMaskLZ)
13814  NotMaskLZ -= 64-V.getValueSizeInBits();
13815 
13816  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13817  switch (MaskedBytes) {
13818  case 1:
13819  case 2:
13820  case 4: break;
13821  default: return Result; // All one mask, or 5-byte mask.
13822  }
13823 
13824  // Verify that the first bit starts at a multiple of mask so that the access
13825  // is aligned the same as the access width.
13826  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13827 
13828  // For narrowing to be valid, it must be the case that the load the
13829  // immediately preceeding memory operation before the store.
13830  if (LD == Chain.getNode())
13831  ; // ok.
13832  else if (Chain->getOpcode() == ISD::TokenFactor &&
13833  SDValue(LD, 1).hasOneUse()) {
13834  // LD has only 1 chain use so they are no indirect dependencies.
13835  bool isOk = false;
13836  for (const SDValue &ChainOp : Chain->op_values())
13837  if (ChainOp.getNode() == LD) {
13838  isOk = true;
13839  break;
13840  }
13841  if (!isOk)
13842  return Result;
13843  } else
13844  return Result; // Fail.
13845 
13846  Result.first = MaskedBytes;
13847  Result.second = NotMaskTZ/8;
13848  return Result;
13849 }
13850 
13851 /// Check to see if IVal is something that provides a value as specified by
13852 /// MaskInfo. If so, replace the specified store with a narrower store of
13853 /// truncated IVal.
13854 static SDNode *
13855 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13856  SDValue IVal, StoreSDNode *St,
13857  DAGCombiner *DC) {
13858  unsigned NumBytes = MaskInfo.first;
13859  unsigned ByteShift = MaskInfo.second;
13860  SelectionDAG &DAG = DC->getDAG();
13861 
13862  // Check to see if IVal is all zeros in the part being masked in by the 'or'
13863  // that uses this. If not, this is not a replacement.
13864  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13865  ByteShift*8, (ByteShift+NumBytes)*8);
13866  if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13867 
13868  // Check that it is legal on the target to do this. It is legal if the new
13869  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13870  // legalization.
13871  MVT VT = MVT::getIntegerVT(NumBytes*8);
13872  if (!DC->isTypeLegal(VT))
13873  return nullptr;
13874 
13875  // Okay, we can do this! Replace the 'St' store with a store of IVal that is
13876  // shifted by ByteShift and truncated down to NumBytes.
13877  if (ByteShift) {
13878  SDLoc DL(IVal);
13879  IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13880  DAG.getConstant(ByteShift*8, DL,
13881  DC->getShiftAmountTy(IVal.getValueType())));
13882  }
13883 
13884  // Figure out the offset for the store and the alignment of the access.
13885  unsigned StOffset;
13886  unsigned NewAlign = St->getAlignment();
13887 
13888  if (DAG.getDataLayout().isLittleEndian())
13889  StOffset = ByteShift;
13890  else
13891  StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13892 
13893  SDValue Ptr = St->getBasePtr();
13894  if (StOffset) {
13895  SDLoc DL(IVal);
13896  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13897  Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13898  NewAlign = MinAlign(NewAlign, StOffset);
13899  }
13900 
13901  // Truncate down to the new size.
13902  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13903 
13904  ++OpsNarrowed;
13905  return DAG
13906  .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13907  St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13908  .getNode();
13909 }
13910 
13911 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13912 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13913 /// narrowing the load and store if it would end up being a win for performance
13914 /// or code size.
13915 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13916  StoreSDNode *ST = cast<StoreSDNode>(N);
13917  if (ST->isVolatile())
13918  return SDValue();
13919 
13920  SDValue Chain = ST->getChain();
13921  SDValue Value = ST->getValue();
13922  SDValue Ptr = ST->getBasePtr();
13923  EVT VT = Value.getValueType();
13924 
13925  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13926  return SDValue();
13927 
13928  unsigned Opc = Value.getOpcode();
13929 
13930  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13931  // is a byte mask indicating a consecutive number of bytes, check to see if
13932  // Y is known to provide just those bytes. If so, we try to replace the
13933  // load + replace + store sequence with a single (narrower) store, which makes
13934  // the load dead.
13935  if (Opc == ISD::OR) {
13936  std::pair<unsigned, unsigned> MaskedLoad;
13937  MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13938  if (MaskedLoad.first)
13939  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13940  Value.getOperand(1), ST,this))
13941  return SDValue(NewST, 0);
13942 
13943  // Or is commutative, so try swapping X and Y.
13944  MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13945  if (MaskedLoad.first)
13946  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13947  Value.getOperand(0), ST,this))
13948  return SDValue(NewST, 0);
13949  }
13950 
13951  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13952  Value.getOperand(1).getOpcode() != ISD::Constant)
13953  return SDValue();
13954 
13955  SDValue N0 = Value.getOperand(0);
13956  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13957  Chain == SDValue(N0.getNode(), 1)) {
13958  LoadSDNode *LD = cast<LoadSDNode>(N0);
13959  if (LD->getBasePtr() != Ptr ||
13960  LD->getPointerInfo().getAddrSpace() !=
13961  ST->getPointerInfo().getAddrSpace())
13962  return SDValue();
13963 
13964  // Find the type to narrow it the load / op / store to.
13965  SDValue N1 = Value.getOperand(1);
13966  unsigned BitWidth = N1.getValueSizeInBits();
13967  APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13968  if (Opc == ISD::AND)
13969  Imm ^= APInt::getAllOnesValue(BitWidth);
13970  if (Imm == 0 || Imm.isAllOnesValue())
13971  return SDValue();
13972  unsigned ShAmt = Imm.countTrailingZeros();
13973  unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13974  unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13975  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13976  // The narrowing should be profitable, the load/store operation should be
13977  // legal (or custom) and the store size should be equal to the NewVT width.
13978  while (NewBW < BitWidth &&
13979  (NewVT.getStoreSizeInBits() != NewBW ||
13980  !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13981  !TLI.isNarrowingProfitable(VT, NewVT))) {
13982  NewBW = NextPowerOf2(NewBW);
13983  NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13984  }
13985  if (NewBW >= BitWidth)
13986  return SDValue();
13987 
13988  // If the lsb changed does not start at the type bitwidth boundary,
13989  // start at the previous one.
13990  if (ShAmt % NewBW)
13991  ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13992  APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13993  std::min(BitWidth, ShAmt + NewBW));
13994  if ((Imm & Mask) == Imm) {
13995  APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13996  if (Opc == ISD::AND)
13997  NewImm ^= APInt::getAllOnesValue(NewBW);
13998  uint64_t PtrOff = ShAmt / 8;
13999  // For big endian targets, we need to adjust the offset to the pointer to
14000  // load the correct bytes.
14001  if (DAG.getDataLayout().isBigEndian())
14002  PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14003 
14004  unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14005  Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14006  if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14007  return SDValue();
14008 
14009  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14010  Ptr.getValueType(), Ptr,
14011  DAG.getConstant(PtrOff, SDLoc(LD),
14012  Ptr.getValueType()));
14013  SDValue NewLD =
14014  DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14015  LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14016  LD->getMemOperand()->getFlags(), LD->getAAInfo());
14017  SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14018  DAG.getConstant(NewImm, SDLoc(Value),
14019  NewVT));
14020  SDValue NewST =
14021  DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14022  ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14023 
14024  AddToWorklist(NewPtr.getNode());
14025  AddToWorklist(NewLD.getNode());
14026  AddToWorklist(NewVal.getNode());
14027  WorklistRemover DeadNodes(*this);
14028  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14029  ++OpsNarrowed;
14030  return NewST;
14031  }
14032  }
14033 
14034  return SDValue();
14035 }
14036 
14037 /// For a given floating point load / store pair, if the load value isn't used
14038 /// by any other operations, then consider transforming the pair to integer
14039 /// load / store operations if the target deems the transformation profitable.
14040 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14041  StoreSDNode *ST = cast<StoreSDNode>(N);
14042  SDValue Chain = ST->getChain();
14043  SDValue Value = ST->getValue();
14044  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14045  Value.hasOneUse() &&
14046  Chain == SDValue(Value.getNode(), 1)) {
14047  LoadSDNode *LD = cast<LoadSDNode>(Value);
14048  EVT VT = LD->getMemoryVT();
14049  if (!VT.isFloatingPoint() ||
14050  VT != ST->getMemoryVT() ||
14051  LD->isNonTemporal() ||
14052  ST->isNonTemporal() ||
14053  LD->getPointerInfo().getAddrSpace() != 0 ||
14054  ST->getPointerInfo().getAddrSpace() != 0)
14055  return SDValue();
14056 
14057  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14058  if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14059  !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14062  return SDValue();
14063 
14064  unsigned LDAlign = LD->getAlignment();
14065  unsigned STAlign = ST->getAlignment();
14066  Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14067  unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14068  if (LDAlign < ABIAlign || STAlign < ABIAlign)
14069  return SDValue();
14070 
14071  SDValue NewLD =
14072  DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14073  LD->getPointerInfo(), LDAlign);
14074 
14075  SDValue NewST =
14076  DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14077  ST->getPointerInfo(), STAlign);
14078 
14079  AddToWorklist(NewLD.getNode());
14080  AddToWorklist(NewST.getNode());
14081  WorklistRemover DeadNodes(*this);
14082  DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14083  ++LdStFP2Int;
14084  return NewST;
14085  }
14086 
14087  return SDValue();
14088 }
14089 
14090 // This is a helper function for visitMUL to check the profitability
14091 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14092 // MulNode is the original multiply, AddNode is (add x, c1),
14093 // and ConstNode is c2.
14094 //
14095 // If the (add x, c1) has multiple uses, we could increase
14096 // the number of adds if we make this transformation.
14097 // It would only be worth doing this if we can remove a
14098 // multiply in the process. Check for that here.
14099 // To illustrate:
14100 // (A + c1) * c3
14101 // (A + c2) * c3
14102 // We're checking for cases where we have common "c3 * A" expressions.
14103 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14104  SDValue &AddNode,
14105  SDValue &ConstNode) {
14106  APInt Val;
14107 
14108  // If the add only has one use, this would be OK to do.
14109  if (AddNode.getNode()->hasOneUse())
14110  return true;
14111 
14112  // Walk all the users of the constant with which we're multiplying.
14113  for (SDNode *Use : ConstNode->uses()) {
14114  if (Use == MulNode) // This use is the one we're on right now. Skip it.
14115  continue;
14116 
14117  if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14118  SDNode *OtherOp;
14119  SDNode *MulVar = AddNode.getOperand(0).getNode();
14120 
14121  // OtherOp is what we're multiplying against the constant.
14122  if (Use->getOperand(0) == ConstNode)
14123  OtherOp = Use->getOperand(1).getNode();
14124  else
14125  OtherOp = Use->getOperand(0).getNode();
14126 
14127  // Check to see if multiply is with the same operand of our "add".
14128  //
14129  // ConstNode = CONST
14130  // Use = ConstNode * A <-- visiting Use. OtherOp is A.
14131  // ...
14132  // AddNode = (A + c1) <-- MulVar is A.
14133  // = AddNode * ConstNode <-- current visiting instruction.
14134  //
14135  // If we make this transformation, we will have a common
14136  // multiply (ConstNode * A) that we can save.
14137  if (OtherOp == MulVar)
14138  return true;
14139 
14140  // Now check to see if a future expansion will give us a common
14141  // multiply.
14142  //
14143  // ConstNode = CONST
14144  // AddNode = (A + c1)
14145  // ... = AddNode * ConstNode <-- current visiting instruction.
14146  // ...
14147  // OtherOp = (A + c2)
14148  // Use = OtherOp * ConstNode <-- visiting Use.
14149  //
14150  // If we make this transformation, we will have a common
14151  // multiply (CONST * A) after we also do the same transformation
14152  // to the "t2" instruction.
14153  if (OtherOp->getOpcode() == ISD::ADD &&
14155  OtherOp->getOperand(0).getNode() == MulVar)
14156  return true;
14157  }
14158  }
14159 
14160  // Didn't find a case where this would be profitable.
14161  return false;
14162 }
14163 
14164 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14165  unsigned NumStores) {
14166  SmallVector<SDValue, 8> Chains;
14168  SDLoc StoreDL(StoreNodes[0].MemNode);
14169 
14170  for (unsigned i = 0; i < NumStores; ++i) {
14171  Visited.insert(StoreNodes[i].MemNode);
14172  }
14173 
14174  // don't include nodes that are children
14175  for (unsigned i = 0; i < NumStores; ++i) {
14176  if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
14177  Chains.push_back(StoreNodes[i].MemNode->getChain());
14178  }
14179 
14180  assert(Chains.size() > 0 && "Chain should have generated a chain");
14181  return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
14182 }
14183 
14184 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14185  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14186  bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14187  // Make sure we have something to merge.
14188  if (NumStores < 2)
14189  return false;
14190 
14191  // The latest Node in the DAG.
14192  SDLoc DL(StoreNodes[0].MemNode);
14193 
14194  int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14195  unsigned SizeInBits = NumStores * ElementSizeBits;
14196  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14197 
14198  EVT StoreTy;
14199  if (UseVector) {
14200  unsigned Elts = NumStores * NumMemElts;
14201  // Get the type for the merged vector store.
14202  StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14203  } else
14204  StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14205 
14206  SDValue StoredVal;
14207  if (UseVector) {
14208  if (IsConstantSrc) {
14209  SmallVector<SDValue, 8> BuildVector;
14210  for (unsigned I = 0; I != NumStores; ++I) {
14211  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14212  SDValue Val = St->getValue();
14213  // If constant is of the wrong type, convert it now.
14214  if (MemVT != Val.getValueType()) {
14215  Val = peekThroughBitcasts(Val);
14216  // Deal with constants of wrong size.
14217  if (ElementSizeBits != Val.getValueSizeInBits()) {
14218  EVT IntMemVT =
14219  EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14220  if (isa<ConstantFPSDNode>(Val)) {
14221  // Not clear how to truncate FP values.
14222  return false;
14223  } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14224  Val = DAG.getConstant(C->getAPIntValue()
14225  .zextOrTrunc(Val.getValueSizeInBits())
14226  .zextOrTrunc(ElementSizeBits),
14227  SDLoc(C), IntMemVT);
14228  }
14229  // Make sure correctly size type is the correct type.
14230  Val = DAG.getBitcast(MemVT, Val);
14231  }
14232  BuildVector.push_back(Val);
14233  }
14234  StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14236  DL, StoreTy, BuildVector);
14237  } else {
14239  for (unsigned i = 0; i < NumStores; ++i) {
14240  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14241  SDValue Val = peekThroughBitcasts(St->getValue());
14242  // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14243  // type MemVT. If the underlying value is not the correct
14244  // type, but it is an extraction of an appropriate vector we
14245  // can recast Val to be of the correct type. This may require
14246  // converting between EXTRACT_VECTOR_ELT and
14247  // EXTRACT_SUBVECTOR.
14248  if ((MemVT != Val.getValueType()) &&
14249  (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14250  Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14251  EVT MemVTScalarTy = MemVT.getScalarType();
14252  // We may need to add a bitcast here to get types to line up.
14253  if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14254  Val = DAG.getBitcast(MemVT, Val);
14255  } else {
14256  unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14258  SDValue Vec = Val.getOperand(0);
14259  SDValue Idx = Val.getOperand(1);
14260  Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14261  }
14262  }
14263  Ops.push_back(Val);
14264  }
14265 
14266  // Build the extracted vector elements back into a vector.
14267  StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14269  DL, StoreTy, Ops);
14270  }
14271  } else {
14272  // We should always use a vector store when merging extracted vector
14273  // elements, so this path implies a store of constants.
14274  assert(IsConstantSrc && "Merged vector elements should use vector store");
14275 
14276  APInt StoreInt(SizeInBits, 0);
14277 
14278  // Construct a single integer constant which is made of the smaller
14279  // constant inputs.
14280  bool IsLE = DAG.getDataLayout().isLittleEndian();
14281  for (unsigned i = 0; i < NumStores; ++i) {
14282  unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14283  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14284 
14285  SDValue Val = St->getValue();
14286  Val = peekThroughBitcasts(Val);
14287  StoreInt <<= ElementSizeBits;
14288  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14289  StoreInt |= C->getAPIntValue()
14290  .zextOrTrunc(ElementSizeBits)
14291  .zextOrTrunc(SizeInBits);
14292  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14293  StoreInt |= C->getValueAPF()
14294  .bitcastToAPInt()
14295  .zextOrTrunc(ElementSizeBits)
14296  .zextOrTrunc(SizeInBits);
14297  // If fp truncation is necessary give up for now.
14298  if (MemVT.getSizeInBits() != ElementSizeBits)
14299  return false;
14300  } else {
14301  llvm_unreachable("Invalid constant element type");
14302  }
14303  }
14304 
14305  // Create the new Load and Store operations.
14306  StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14307  }
14308 
14309  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14310  SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14311 
14312  // make sure we use trunc store if it's necessary to be legal.
14313  SDValue NewStore;
14314  if (!UseTrunc) {
14315  NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14316  FirstInChain->getPointerInfo(),
14317  FirstInChain->getAlignment());
14318  } else { // Must be realized as a trunc store
14319  EVT LegalizedStoredValTy =
14320  TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14321  unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14322  ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14323  SDValue ExtendedStoreVal =
14324  DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14325  LegalizedStoredValTy);
14326  NewStore = DAG.getTruncStore(
14327  NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14328  FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14329  FirstInChain->getAlignment(),
14330  FirstInChain->getMemOperand()->getFlags());
14331  }
14332 
14333  // Replace all merged stores with the new store.
14334  for (unsigned i = 0; i < NumStores; ++i)
14335  CombineTo(StoreNodes[i].MemNode, NewStore);
14336 
14337  AddToWorklist(NewChain.getNode());
14338  return true;
14339 }
14340 
14341 void DAGCombiner::getStoreMergeCandidates(
14342  StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14343  SDNode *&RootNode) {
14344  // This holds the base pointer, index, and the offset in bytes from the base
14345  // pointer.
14346  BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14347  EVT MemVT = St->getMemoryVT();
14348 
14349  SDValue Val = peekThroughBitcasts(St->getValue());
14350  // We must have a base and an offset.
14351  if (!BasePtr.getBase().getNode())
14352  return;
14353 
14354  // Do not handle stores to undef base pointers.
14355  if (BasePtr.getBase().isUndef())
14356  return;
14357 
14358  bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14359  bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14361  bool IsLoadSrc = isa<LoadSDNode>(Val);
14362  BaseIndexOffset LBasePtr;
14363  // Match on loadbaseptr if relevant.
14364  EVT LoadVT;
14365  if (IsLoadSrc) {
14366  auto *Ld = cast<LoadSDNode>(Val);
14367  LBasePtr = BaseIndexOffset::match(Ld, DAG);
14368  LoadVT = Ld->getMemoryVT();
14369  // Load and store should be the same type.
14370  if (MemVT != LoadVT)
14371  return;
14372  // Loads must only have one use.
14373  if (!Ld->hasNUsesOfValue(1, 0))
14374  return;
14375  // The memory operands must not be volatile.
14376  if (Ld->isVolatile() || Ld->isIndexed())
14377  return;
14378  }
14379  auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14380  int64_t &Offset) -> bool {
14381  if (Other->isVolatile() || Other->isIndexed())
14382  return false;
14383  SDValue Val = peekThroughBitcasts(Other->getValue());
14384  // Allow merging constants of different types as integers.
14385  bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14386  : Other->getMemoryVT() != MemVT;
14387  if (IsLoadSrc) {
14388  if (NoTypeMatch)
14389  return false;
14390  // The Load's Base Ptr must also match
14391  if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14392  auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14393  if (LoadVT != OtherLd->getMemoryVT())
14394  return false;
14395  // Loads must only have one use.
14396  if (!OtherLd->hasNUsesOfValue(1, 0))
14397  return false;
14398  // The memory operands must not be volatile.
14399  if (OtherLd->isVolatile() || OtherLd->isIndexed())
14400  return false;
14401  if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14402  return false;
14403  } else
14404  return false;
14405  }
14406  if (IsConstantSrc) {
14407  if (NoTypeMatch)
14408  return false;
14409  if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14410  return false;
14411  }
14412  if (IsExtractVecSrc) {
14413  // Do not merge truncated stores here.
14414  if (Other->isTruncatingStore())
14415  return false;
14416  if (!MemVT.bitsEq(Val.getValueType()))
14417  return false;
14418  if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14420  return false;
14421  }
14422  Ptr = BaseIndexOffset::match(Other, DAG);
14423  return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14424  };
14425 
14426  // We looking for a root node which is an ancestor to all mergable
14427  // stores. We search up through a load, to our root and then down
14428  // through all children. For instance we will find Store{1,2,3} if
14429  // St is Store1, Store2. or Store3 where the root is not a load
14430  // which always true for nonvolatile ops. TODO: Expand
14431  // the search to find all valid candidates through multiple layers of loads.
14432  //
14433  // Root
14434  // |-------|-------|
14435  // Load Load Store3
14436  // | |
14437  // Store1 Store2
14438  //
14439  // FIXME: We should be able to climb and
14440  // descend TokenFactors to find candidates as well.
14441 
14442  RootNode = St->getChain().getNode();
14443 
14444  if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14445  RootNode = Ldn->getChain().getNode();
14446  for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14447  if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14448  for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14449  if (I2.getOperandNo() == 0)
14450  if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14451  BaseIndexOffset Ptr;
14452  int64_t PtrDiff;
14453  if (CandidateMatch(OtherST, Ptr, PtrDiff))
14454  StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14455  }
14456  } else
14457  for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14458  if (I.getOperandNo() == 0)
14459  if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14460  BaseIndexOffset Ptr;
14461  int64_t PtrDiff;
14462  if (CandidateMatch(OtherST, Ptr, PtrDiff))
14463  StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14464  }
14465 }
14466 
14467 // We need to check that merging these stores does not cause a loop in
14468 // the DAG. Any store candidate may depend on another candidate
14469 // indirectly through its operand (we already consider dependencies
14470 // through the chain). Check in parallel by searching up from
14471 // non-chain operands of candidates.
14472 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14473  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14474  SDNode *RootNode) {
14475  // FIXME: We should be able to truncate a full search of
14476  // predecessors by doing a BFS and keeping tabs the originating
14477  // stores from which worklist nodes come from in a similar way to
14478  // TokenFactor simplfication.
14479 
14482 
14483  // RootNode is a predecessor to all candidates so we need not search
14484  // past it. Add RootNode (peeking through TokenFactors). Do not count
14485  // these towards size check.
14486 
14487  Worklist.push_back(RootNode);
14488  while (!Worklist.empty()) {
14489  auto N = Worklist.pop_back_val();
14490  if (!Visited.insert(N).second)
14491  continue; // Already present in Visited.
14492  if (N->getOpcode() == ISD::TokenFactor) {
14493  for (SDValue Op : N->ops())
14494  Worklist.push_back(Op.getNode());
14495  }
14496  }
14497 
14498  // Don't count pruning nodes towards max.
14499  unsigned int Max = 1024 + Visited.size();
14500  // Search Ops of store candidates.
14501  for (unsigned i = 0; i < NumStores; ++i) {
14502  SDNode *N = StoreNodes[i].MemNode;
14503  // Of the 4 Store Operands:
14504  // * Chain (Op 0) -> We have already considered these
14505  // in candidate selection and can be
14506  // safely ignored
14507  // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14508  // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14509  // but aren't necessarily fromt the same base node, so
14510  // cycles possible (e.g. via indexed store).
14511  // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14512  // non-indexed stores). Not constant on all targets (e.g. ARM)
14513  // and so can participate in a cycle.
14514  for (unsigned j = 1; j < N->getNumOperands(); ++j)
14515  Worklist.push_back(N->getOperand(j).getNode());
14516  }
14517  // Search through DAG. We can stop early if we find a store node.
14518  for (unsigned i = 0; i < NumStores; ++i)
14519  if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14520  Max))
14521  return false;
14522  return true;
14523 }
14524 
14525 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14526  if (OptLevel == CodeGenOpt::None)
14527  return false;
14528 
14529  EVT MemVT = St->getMemoryVT();
14530  int64_t ElementSizeBytes = MemVT.getStoreSize();
14531  unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14532 
14533  if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14534  return false;
14535 
14536  bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14538 
14539  // This function cannot currently deal with non-byte-sized memory sizes.
14540  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14541  return false;
14542 
14543  if (!MemVT.isSimple())
14544  return false;
14545 
14546  // Perform an early exit check. Do not bother looking at stored values that
14547  // are not constants, loads, or extracted vector elements.
14548  SDValue StoredVal = peekThroughBitcasts(St->getValue());
14549  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14550  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14551  isa<ConstantFPSDNode>(StoredVal);
14552  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14553  StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14554 
14555  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14556  return false;
14557 
14558  SmallVector<MemOpLink, 8> StoreNodes;
14559  SDNode *RootNode;
14560  // Find potential store merge candidates by searching through chain sub-DAG
14561  getStoreMergeCandidates(St, StoreNodes, RootNode);
14562 
14563  // Check if there is anything to merge.
14564  if (StoreNodes.size() < 2)
14565  return false;
14566 
14567  // Sort the memory operands according to their distance from the
14568  // base pointer.
14569  llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14570  return LHS.OffsetFromBase < RHS.OffsetFromBase;
14571  });
14572 
14573  // Store Merge attempts to merge the lowest stores. This generally
14574  // works out as if successful, as the remaining stores are checked
14575  // after the first collection of stores is merged. However, in the
14576  // case that a non-mergeable store is found first, e.g., {p[-2],
14577  // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14578  // mergeable cases. To prevent this, we prune such stores from the
14579  // front of StoreNodes here.
14580 
14581  bool RV = false;
14582  while (StoreNodes.size() > 1) {
14583  unsigned StartIdx = 0;
14584  while ((StartIdx + 1 < StoreNodes.size()) &&
14585  StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14586  StoreNodes[StartIdx + 1].OffsetFromBase)
14587  ++StartIdx;
14588 
14589  // Bail if we don't have enough candidates to merge.
14590  if (StartIdx + 1 >= StoreNodes.size())
14591  return RV;
14592 
14593  if (StartIdx)
14594  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14595 
14596  // Scan the memory operations on the chain and find the first
14597  // non-consecutive store memory address.
14598  unsigned NumConsecutiveStores = 1;
14599  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14600  // Check that the addresses are consecutive starting from the second
14601  // element in the list of stores.
14602  for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14603  int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14604  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14605  break;
14606  NumConsecutiveStores = i + 1;
14607  }
14608 
14609  if (NumConsecutiveStores < 2) {
14610  StoreNodes.erase(StoreNodes.begin(),
14611  StoreNodes.begin() + NumConsecutiveStores);
14612  continue;
14613  }
14614 
14615  // The node with the lowest store address.
14616  LLVMContext &Context = *DAG.getContext();
14617  const DataLayout &DL = DAG.getDataLayout();
14618 
14619  // Store the constants into memory as one consecutive store.
14620  if (IsConstantSrc) {
14621  while (NumConsecutiveStores >= 2) {
14622  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14623  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14624  unsigned FirstStoreAlign = FirstInChain->getAlignment();
14625  unsigned LastLegalType = 1;
14626  unsigned LastLegalVectorType = 1;
14627  bool LastIntegerTrunc = false;
14628  bool NonZero = false;
14629  unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14630  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14631  StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14632  SDValue StoredVal = ST->getValue();
14633  bool IsElementZero = false;
14634  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14635  IsElementZero = C->isNullValue();
14636  else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14637  IsElementZero = C->getConstantFPValue()->isNullValue();
14638  if (IsElementZero) {
14639  if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14640  FirstZeroAfterNonZero = i;
14641  }
14642  NonZero |= !IsElementZero;
14643 
14644  // Find a legal type for the constant store.
14645  unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14646  EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14647  bool IsFast = false;
14648 
14649  // Break early when size is too large to be legal.
14650  if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14651  break;
14652 
14653  if (TLI.isTypeLegal(StoreTy) &&
14654  TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14655  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14656  FirstStoreAlign, &IsFast) &&
14657  IsFast) {
14658  LastIntegerTrunc = false;
14659  LastLegalType = i + 1;
14660  // Or check whether a truncstore is legal.
14661  } else if (TLI.getTypeAction(Context, StoreTy) ==
14663  EVT LegalizedStoredValTy =
14664  TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14665  if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14666  TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14667  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14668  FirstStoreAlign, &IsFast) &&
14669  IsFast) {
14670  LastIntegerTrunc = true;
14671  LastLegalType = i + 1;
14672  }
14673  }
14674 
14675  // We only use vectors if the constant is known to be zero or the
14676  // target allows it and the function is not marked with the
14677  // noimplicitfloat attribute.
14678  if ((!NonZero ||
14679  TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14680  !NoVectors) {
14681  // Find a legal type for the vector store.
14682  unsigned Elts = (i + 1) * NumMemElts;
14683  EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14684  if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14685  TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14686  TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14687  FirstStoreAlign, &IsFast) &&
14688  IsFast)
14689  LastLegalVectorType = i + 1;
14690  }
14691  }
14692 
14693  bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14694  unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14695 
14696  // Check if we found a legal integer type that creates a meaningful
14697  // merge.
14698  if (NumElem < 2) {
14699  // We know that candidate stores are in order and of correct
14700  // shape. While there is no mergeable sequence from the
14701  // beginning one may start later in the sequence. The only
14702  // reason a merge of size N could have failed where another of
14703  // the same size would not have, is if the alignment has
14704  // improved or we've dropped a non-zero value. Drop as many
14705  // candidates as we can here.
14706  unsigned NumSkip = 1;
14707  while (
14708  (NumSkip < NumConsecutiveStores) &&
14709  (NumSkip < FirstZeroAfterNonZero) &&
14710  (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14711  NumSkip++;
14712 
14713  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14714  NumConsecutiveStores -= NumSkip;
14715  continue;
14716  }
14717 
14718  // Check that we can merge these candidates without causing a cycle.
14719  if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14720  RootNode)) {
14721  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14722  NumConsecutiveStores -= NumElem;
14723  continue;
14724  }
14725 
14726  RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14727  UseVector, LastIntegerTrunc);
14728 
14729  // Remove merged stores for next iteration.
14730  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14731  NumConsecutiveStores -= NumElem;
14732  }
14733  continue;
14734  }
14735 
14736  // When extracting multiple vector elements, try to store them
14737  // in one vector store rather than a sequence of scalar stores.
14738  if (IsExtractVecSrc) {
14739  // Loop on Consecutive Stores on success.
14740  while (NumConsecutiveStores >= 2) {
14741  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14742  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14743  unsigned FirstStoreAlign = FirstInChain->getAlignment();
14744  unsigned NumStoresToMerge = 1;
14745  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14746  // Find a legal type for the vector store.
14747  unsigned Elts = (i + 1) * NumMemElts;
14748  EVT Ty =
14749  EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14750  bool IsFast;
14751 
14752  // Break early when size is too large to be legal.
14753  if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14754  break;
14755 
14756  if (TLI.isTypeLegal(Ty) &&
14757  TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14758  TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14759  FirstStoreAlign, &IsFast) &&
14760  IsFast)
14761  NumStoresToMerge = i + 1;
14762  }
14763 
14764  // Check if we found a legal integer type creating a meaningful
14765  // merge.
14766  if (NumStoresToMerge < 2) {
14767  // We know that candidate stores are in order and of correct
14768  // shape. While there is no mergeable sequence from the
14769  // beginning one may start later in the sequence. The only
14770  // reason a merge of size N could have failed where another of
14771  // the same size would not have, is if the alignment has
14772  // improved. Drop as many candidates as we can here.
14773  unsigned NumSkip = 1;
14774  while (
14775  (NumSkip < NumConsecutiveStores) &&
14776  (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14777  NumSkip++;
14778 
14779  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14780  NumConsecutiveStores -= NumSkip;
14781  continue;
14782  }
14783 
14784  // Check that we can merge these candidates without causing a cycle.
14785  if (!checkMergeStoreCandidatesForDependencies(
14786  StoreNodes, NumStoresToMerge, RootNode)) {
14787  StoreNodes.erase(StoreNodes.begin(),
14788  StoreNodes.begin() + NumStoresToMerge);
14789  NumConsecutiveStores -= NumStoresToMerge;
14790  continue;
14791  }
14792 
14793  RV |= MergeStoresOfConstantsOrVecElts(
14794  StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14795 
14796  StoreNodes.erase(StoreNodes.begin(),
14797  StoreNodes.begin() + NumStoresToMerge);
14798  NumConsecutiveStores -= NumStoresToMerge;
14799  }
14800  continue;
14801  }
14802 
14803  // Below we handle the case of multiple consecutive stores that
14804  // come from multiple consecutive loads. We merge them into a single
14805  // wide load and a single wide store.
14806 
14807  // Look for load nodes which are used by the stored values.
14808  SmallVector<MemOpLink, 8> LoadNodes;
14809 
14810  // Find acceptable loads. Loads need to have the same chain (token factor),
14811  // must not be zext, volatile, indexed, and they must be consecutive.
14812  BaseIndexOffset LdBasePtr;
14813 
14814  for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14815  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14816  SDValue Val = peekThroughBitcasts(St->getValue());
14817  LoadSDNode *Ld = cast<LoadSDNode>(Val);
14818 
14819  BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14820  // If this is not the first ptr that we check.
14821  int64_t LdOffset = 0;
14822  if (LdBasePtr.getBase().getNode()) {
14823  // The base ptr must be the same.
14824  if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14825  break;
14826  } else {
14827  // Check that all other base pointers are the same as this one.
14828  LdBasePtr = LdPtr;
14829  }
14830 
14831  // We found a potential memory operand to merge.
14832  LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14833  }
14834 
14835  while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14836  // If we have load/store pair instructions and we only have two values,
14837  // don't bother merging.
14838  unsigned RequiredAlignment;
14839  if (LoadNodes.size() == 2 &&
14840  TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14841  StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14842  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14843  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14844  break;
14845  }
14846  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14847  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14848  unsigned FirstStoreAlign = FirstInChain->getAlignment();
14849  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14850  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14851  unsigned FirstLoadAlign = FirstLoad->getAlignment();
14852 
14853  // Scan the memory operations on the chain and find the first
14854  // non-consecutive load memory address. These variables hold the index in
14855  // the store node array.
14856 
14857  unsigned LastConsecutiveLoad = 1;
14858 
14859  // This variable refers to the size and not index in the array.
14860  unsigned LastLegalVectorType = 1;
14861  unsigned LastLegalIntegerType = 1;
14862  bool isDereferenceable = true;
14863  bool DoIntegerTruncate = false;
14864  StartAddress = LoadNodes[0].OffsetFromBase;
14865  SDValue FirstChain = FirstLoad->getChain();
14866  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14867  // All loads must share the same chain.
14868  if (LoadNodes[i].MemNode->getChain() != FirstChain)
14869  break;
14870 
14871  int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14872  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14873  break;
14874  LastConsecutiveLoad = i;
14875 
14876  if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14877  isDereferenceable = false;
14878 
14879  // Find a legal type for the vector store.
14880  unsigned Elts = (i + 1) * NumMemElts;
14881  EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14882 
14883  // Break early when size is too large to be legal.
14884  if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14885  break;
14886 
14887  bool IsFastSt, IsFastLd;
14888  if (TLI.isTypeLegal(StoreTy) &&
14889  TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14890  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14891  FirstStoreAlign, &IsFastSt) &&
14892  IsFastSt &&
14893  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14894  FirstLoadAlign, &IsFastLd) &&
14895  IsFastLd) {
14896  LastLegalVectorType = i + 1;
14897  }
14898 
14899  // Find a legal type for the integer store.
14900  unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14901  StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14902  if (TLI.isTypeLegal(StoreTy) &&
14903  TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14904  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14905  FirstStoreAlign, &IsFastSt) &&
14906  IsFastSt &&
14907  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14908  FirstLoadAlign, &IsFastLd) &&
14909  IsFastLd) {
14910  LastLegalIntegerType = i + 1;
14911  DoIntegerTruncate = false;
14912  // Or check whether a truncstore and extload is legal.
14913  } else if (TLI.getTypeAction(Context, StoreTy) ==
14915  EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14916  if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14917  TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14918  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14919  StoreTy) &&
14920  TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14921  StoreTy) &&
14922  TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14923  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14924  FirstStoreAlign, &IsFastSt) &&
14925  IsFastSt &&
14926  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14927  FirstLoadAlign, &IsFastLd) &&
14928  IsFastLd) {
14929  LastLegalIntegerType = i + 1;
14930  DoIntegerTruncate = true;
14931  }
14932  }
14933  }
14934 
14935  // Only use vector types if the vector type is larger than the integer
14936  // type. If they are the same, use integers.
14937  bool UseVectorTy =
14938  LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14939  unsigned LastLegalType =
14940  std::max(LastLegalVectorType, LastLegalIntegerType);
14941 
14942  // We add +1 here because the LastXXX variables refer to location while
14943  // the NumElem refers to array/index size.
14944  unsigned NumElem =
14945  std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14946  NumElem = std::min(LastLegalType, NumElem);
14947 
14948  if (NumElem < 2) {
14949  // We know that candidate stores are in order and of correct
14950  // shape. While there is no mergeable sequence from the
14951  // beginning one may start later in the sequence. The only
14952  // reason a merge of size N could have failed where another of
14953  // the same size would not have is if the alignment or either
14954  // the load or store has improved. Drop as many candidates as we
14955  // can here.
14956  unsigned NumSkip = 1;
14957  while ((NumSkip < LoadNodes.size()) &&
14958  (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14959  (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14960  NumSkip++;
14961  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14962  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14963  NumConsecutiveStores -= NumSkip;
14964  continue;
14965  }
14966 
14967  // Check that we can merge these candidates without causing a cycle.
14968  if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14969  RootNode)) {
14970  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14971  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14972  NumConsecutiveStores -= NumElem;
14973  continue;
14974  }
14975 
14976  // Find if it is better to use vectors or integers to load and store
14977  // to memory.
14978  EVT JointMemOpVT;
14979  if (UseVectorTy) {
14980  // Find a legal type for the vector store.
14981  unsigned Elts = NumElem * NumMemElts;
14982  JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14983  } else {
14984  unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14985  JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14986  }
14987 
14988  SDLoc LoadDL(LoadNodes[0].MemNode);
14989  SDLoc StoreDL(StoreNodes[0].MemNode);
14990 
14991  // The merged loads are required to have the same incoming chain, so
14992  // using the first's chain is acceptable.
14993 
14994  SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14995  AddToWorklist(NewStoreChain.getNode());
14996 
14997  MachineMemOperand::Flags MMOFlags =
14998  isDereferenceable ? MachineMemOperand::MODereferenceable
15000 
15001  SDValue NewLoad, NewStore;
15002  if (UseVectorTy || !DoIntegerTruncate) {
15003  NewLoad =
15004  DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15005  FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15006  FirstLoadAlign, MMOFlags);
15007  NewStore = DAG.getStore(
15008  NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15009  FirstInChain->getPointerInfo(), FirstStoreAlign);
15010  } else { // This must be the truncstore/extload case
15011  EVT ExtendedTy =
15012  TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15013  NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15014  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15015  FirstLoad->getPointerInfo(), JointMemOpVT,
15016  FirstLoadAlign, MMOFlags);
15017  NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15018  FirstInChain->getBasePtr(),
15019  FirstInChain->getPointerInfo(),
15020  JointMemOpVT, FirstInChain->getAlignment(),
15021  FirstInChain->getMemOperand()->getFlags());
15022  }
15023 
15024  // Transfer chain users from old loads to the new load.
15025  for (unsigned i = 0; i < NumElem; ++i) {
15026  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15028  SDValue(NewLoad.getNode(), 1));
15029  }
15030 
15031  // Replace the all stores with the new store. Recursively remove
15032  // corresponding value if its no longer used.
15033  for (unsigned i = 0; i < NumElem; ++i) {
15034  SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15035  CombineTo(StoreNodes[i].MemNode, NewStore);
15036  if (Val.getNode()->use_empty())
15037  recursivelyDeleteUnusedNodes(Val.getNode());
15038  }
15039 
15040  RV = true;
15041  StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15042  LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15043  NumConsecutiveStores -= NumElem;
15044  }
15045  }
15046  return RV;
15047 }
15048 
15049 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15050  SDLoc SL(ST);
15051  SDValue ReplStore;
15052 
15053  // Replace the chain to avoid dependency.
15054  if (ST->isTruncatingStore()) {
15055  ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15056  ST->getBasePtr(), ST->getMemoryVT(),
15057  ST->getMemOperand());
15058  } else {
15059  ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15060  ST->getMemOperand());
15061  }
15062 
15063  // Create token to keep both nodes around.
15064  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15065  MVT::Other, ST->getChain(), ReplStore);
15066 
15067  // Make sure the new and old chains are cleaned up.
15068  AddToWorklist(Token.getNode());
15069 
15070  // Don't add users to work list.
15071  return CombineTo(ST, Token, false);
15072 }
15073 
15074 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15075  SDValue Value = ST->getValue();
15076  if (Value.getOpcode() == ISD::TargetConstantFP)
15077  return SDValue();
15078 
15079  SDLoc DL(ST);
15080 
15081  SDValue Chain = ST->getChain();
15082  SDValue Ptr = ST->getBasePtr();
15083 
15084  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15085 
15086  // NOTE: If the original store is volatile, this transform must not increase
15087  // the number of stores. For example, on x86-32 an f64 can be stored in one
15088  // processor operation but an i64 (which is not legal) requires two. So the
15089  // transform should not be done in this case.
15090 
15091  SDValue Tmp;
15092  switch (CFP->getSimpleValueType(0).SimpleTy) {
15093  default:
15094  llvm_unreachable("Unknown FP type");
15095  case MVT::f16: // We don't do this for these yet.
15096  case MVT::f80:
15097  case MVT::f128:
15098  case MVT::ppcf128:
15099  return SDValue();
15100  case MVT::f32:
15101  if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15103  ;
15104  Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15105  bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15106  MVT::i32);
15107  return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15108  }
15109 
15110  return SDValue();
15111  case MVT::f64:
15112  if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15113  !ST->isVolatile()) ||
15115  ;
15116  Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15117  getZExtValue(), SDLoc(CFP), MVT::i64);
15118  return DAG.getStore(Chain, DL, Tmp,
15119  Ptr, ST->getMemOperand());
15120  }
15121 
15122  if (!ST->isVolatile() &&
15124  // Many FP stores are not made apparent until after legalize, e.g. for
15125  // argument passing. Since this is so common, custom legalize the
15126  // 64-bit integer store into two 32-bit stores.
15127  uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15128  SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15129  SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15130  if (DAG.getDataLayout().isBigEndian())
15131  std::swap(Lo, Hi);
15132 
15133  unsigned Alignment = ST->getAlignment();
15134  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15135  AAMDNodes AAInfo = ST->getAAInfo();
15136 
15137  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15138  ST->getAlignment(), MMOFlags, AAInfo);
15139  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15140  DAG.getConstant(4, DL, Ptr.getValueType()));
15141  Alignment = MinAlign(Alignment, 4U);
15142  SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15143  ST->getPointerInfo().getWithOffset(4),
15144  Alignment, MMOFlags, AAInfo);
15145  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15146  St0, St1);
15147  }
15148 
15149  return SDValue();
15150  }
15151 }
15152 
15153 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15154  StoreSDNode *ST = cast<StoreSDNode>(N);
15155  SDValue Chain = ST->getChain();
15156  SDValue Value = ST->getValue();
15157  SDValue Ptr = ST->getBasePtr();
15158 
15159  // If this is a store of a bit convert, store the input value if the
15160  // resultant store does not need a higher alignment than the original.
15161  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15162  ST->isUnindexed()) {
15163  EVT SVT = Value.getOperand(0).getValueType();
15164  // If the store is volatile, we only want to change the store type if the
15165  // resulting store is legal. Otherwise we might increase the number of
15166  // memory accesses. We don't care if the original type was legal or not
15167  // as we assume software couldn't rely on the number of accesses of an
15168  // illegal type.
15169  if (((!LegalOperations && !ST->isVolatile()) ||
15170  TLI.isOperationLegal(ISD::STORE, SVT)) &&
15171  TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15172  unsigned OrigAlign = ST->getAlignment();
15173  bool Fast = false;
15174  if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15175  ST->getAddressSpace(), OrigAlign, &Fast) &&
15176  Fast) {
15177  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15178  ST->getPointerInfo(), OrigAlign,
15179  ST->getMemOperand()->getFlags(), ST->getAAInfo());
15180  }
15181  }
15182  }
15183 
15184  // Turn 'store undef, Ptr' -> nothing.
15185  if (Value.isUndef() && ST->isUnindexed())
15186  return Chain;
15187 
15188  // Try to infer better alignment information than the store already has.
15189  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15190  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15191  if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15192  SDValue NewStore =
15193  DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15194  ST->getMemoryVT(), Align,
15195  ST->getMemOperand()->getFlags(), ST->getAAInfo());
15196  // NewStore will always be N as we are only refining the alignment
15197  assert(NewStore.getNode() == N);
15198  (void)NewStore;
15199  }
15200  }
15201  }
15202 
15203  // Try transforming a pair floating point load / store ops to integer
15204  // load / store ops.
15205  if (SDValue NewST = TransformFPLoadStorePair(N))
15206  return NewST;
15207 
15208  if (ST->isUnindexed()) {
15209  // Walk up chain skipping non-aliasing memory nodes, on this store and any
15210  // adjacent stores.
15211  if (findBetterNeighborChains(ST)) {
15212  // replaceStoreChain uses CombineTo, which handled all of the worklist
15213  // manipulation. Return the original node to not do anything else.
15214  return SDValue(ST, 0);
15215  }
15216  Chain = ST->getChain();
15217  }
15218 
15219  // FIXME: is there such a thing as a truncating indexed store?
15220  if (ST->isTruncatingStore() && ST->isUnindexed() &&
15221  Value.getValueType().isInteger() &&
15222  (!isa<ConstantSDNode>(Value) ||
15223  !cast<ConstantSDNode>(Value)->isOpaque())) {
15224  // See if we can simplify the input to this truncstore with knowledge that
15225  // only the low bits are being used. For example:
15226  // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
15227  SDValue Shorter = DAG.GetDemandedBits(
15229  ST->getMemoryVT().getScalarSizeInBits()));
15230  AddToWorklist(Value.getNode());
15231  if (Shorter.getNode())
15232  return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
15233  Ptr, ST->getMemoryVT(), ST->getMemOperand());
15234 
15235  // Otherwise, see if we can simplify the operation with
15236  // SimplifyDemandedBits, which only works if the value has a single use.
15237  if (SimplifyDemandedBits(
15238  Value,
15240  ST->getMemoryVT().getScalarSizeInBits()))) {
15241  // Re-visit the store if anything changed and the store hasn't been merged
15242  // with another node (N is deleted) SimplifyDemandedBits will add Value's
15243  // node back to the worklist if necessary, but we also need to re-visit
15244  // the Store node itself.
15245  if (N->getOpcode() != ISD::DELETED_NODE)
15246  AddToWorklist(N);
15247  return SDValue(N, 0);
15248  }
15249  }
15250 
15251  // If this is a load followed by a store to the same location, then the store
15252  // is dead/noop.
15253  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15254  if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15255  ST->isUnindexed() && !ST->isVolatile() &&
15256  // There can't be any side effects between the load and store, such as
15257  // a call or store.
15258  Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15259  // The store is dead, remove it.
15260  return Chain;
15261  }
15262  }
15263 
15264  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15265  if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15266  !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
15267  ST->getMemoryVT() == ST1->getMemoryVT()) {
15268  // If this is a store followed by a store with the same value to the same
15269  // location, then the store is dead/noop.
15270  if (ST1->getValue() == Value) {
15271  // The store is dead, remove it.
15272  return Chain;
15273  }
15274 
15275  // If this is a store who's preceeding store to the same location
15276  // and no one other node is chained to that store we can effectively
15277  // drop the store. Do not remove stores to undef as they may be used as
15278  // data sinks.
15279  if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15280  !ST1->getBasePtr().isUndef()) {
15281  // ST1 is fully overwritten and can be elided. Combine with it's chain
15282  // value.
15283  CombineTo(ST1, ST1->getChain());
15284  return SDValue();
15285  }
15286  }
15287  }
15288 
15289  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15290  // truncating store. We can do this even if this is already a truncstore.
15291  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15292  && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15293  TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15294  ST->getMemoryVT())) {
15295  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15296  Ptr, ST->getMemoryVT(), ST->getMemOperand());
15297  }
15298 
15299  // Always perform this optimization before types are legal. If the target
15300  // prefers, also try this after legalization to catch stores that were created
15301  // by intrinsics or other nodes.
15302  if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15303  while (true) {
15304  // There can be multiple store sequences on the same chain.
15305  // Keep trying to merge store sequences until we are unable to do so
15306  // or until we merge the last store on the chain.
15307  bool Changed = MergeConsecutiveStores(ST);
15308  if (!Changed) break;
15309  // Return N as merge only uses CombineTo and no worklist clean
15310  // up is necessary.
15311  if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15312  return SDValue(N, 0);
15313  }
15314  }
15315 
15316  // Try transforming N to an indexed store.
15317  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15318  return SDValue(N, 0);
15319 
15320  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15321  //
15322  // Make sure to do this only after attempting to merge stores in order to
15323  // avoid changing the types of some subset of stores due to visit order,
15324  // preventing their merging.
15325  if (isa<ConstantFPSDNode>(ST->getValue())) {
15326  if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15327  return NewSt;
15328  }
15329 
15330  if (SDValue NewSt = splitMergedValStore(ST))
15331  return NewSt;
15332 
15333  return ReduceLoadOpStoreWidth(N);
15334 }
15335 
15336 /// For the instruction sequence of store below, F and I values
15337 /// are bundled together as an i64 value before being stored into memory.
15338 /// Sometimes it is more efficent to generate separate stores for F and I,
15339 /// which can remove the bitwise instructions or sink them to colder places.
15340 ///
15341 /// (store (or (zext (bitcast F to i32) to i64),
15342 /// (shl (zext I to i64), 32)), addr) -->
15343 /// (store F, addr) and (store I, addr+4)
15344 ///
15345 /// Similarly, splitting for other merged store can also be beneficial, like:
15346 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15347 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15348 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15349 /// For pair of {i16, i8}, i32 store --> two i16 stores.
15350 /// For pair of {i8, i8}, i16 store --> two i8 stores.
15351 ///
15352 /// We allow each target to determine specifically which kind of splitting is
15353 /// supported.
15354 ///
15355 /// The store patterns are commonly seen from the simple code snippet below
15356 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15357 /// void goo(const std::pair<int, float> &);
15358 /// hoo() {
15359 /// ...
15360 /// goo(std::make_pair(tmp, ftmp));
15361 /// ...
15362 /// }
15363 ///
15365  if (OptLevel == CodeGenOpt::None)
15366  return SDValue();
15367 
15368  SDValue Val = ST->getValue();
15369  SDLoc DL(ST);
15370 
15371  // Match OR operand.
15372  if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15373  return SDValue();
15374 
15375  // Match SHL operand and get Lower and Higher parts of Val.
15376  SDValue Op1 = Val.getOperand(0);
15377  SDValue Op2 = Val.getOperand(1);
15378  SDValue Lo, Hi;
15379  if (Op1.getOpcode() != ISD::SHL) {
15380  std::swap(Op1, Op2);
15381  if (Op1.getOpcode() != ISD::SHL)
15382  return SDValue();
15383  }
15384  Lo = Op2;
15385  Hi = Op1.getOperand(0);
15386  if (!Op1.hasOneUse())
15387  return SDValue();
15388 
15389  // Match shift amount to HalfValBitSize.
15390  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15392  if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15393  return SDValue();
15394 
15395  // Lo and Hi are zero-extended from int with size less equal than 32
15396  // to i64.
15397  if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15399  Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15400  Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15402  Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15403  return SDValue();
15404 
15405  // Use the EVT of low and high parts before bitcast as the input
15406  // of target query.
15407  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15408  ? Lo.getOperand(0).getValueType()
15409  : Lo.getValueType();
15410  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15411  ? Hi.getOperand(0).getValueType()
15412  : Hi.getValueType();
15413  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15414  return SDValue();
15415 
15416  // Start to split store.
15417  unsigned Alignment = ST->getAlignment();
15418  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15419  AAMDNodes AAInfo = ST->getAAInfo();
15420 
15421  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15422  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15423  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15424  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15425 
15426  SDValue Chain = ST->getChain();
15427  SDValue Ptr = ST->getBasePtr();
15428  // Lower value store.
15429  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15430  ST->getAlignment(), MMOFlags, AAInfo);
15431  Ptr =
15432  DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15433  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15434  // Higher value store.
15435  SDValue St1 =
15436  DAG.getStore(St0, DL, Hi, Ptr,
15437  ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15438  Alignment / 2, MMOFlags, AAInfo);
15439  return St1;
15440 }
15441 
15442 /// Convert a disguised subvector insertion into a shuffle:
15443 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15444 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15445 /// Note: We do not use an insert_subvector node because that requires a legal
15446 /// subvector type.
15447 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15448  SDValue InsertVal = N->getOperand(1);
15449  if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15450  !InsertVal.getOperand(0).getValueType().isVector())
15451  return SDValue();
15452 
15453  SDValue SubVec = InsertVal.getOperand(0);
15454  SDValue DestVec = N->getOperand(0);
15455  EVT SubVecVT = SubVec.getValueType();
15456  EVT VT = DestVec.getValueType();
15457  unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15458  unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15459  unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15460 
15461  // Step 1: Create a shuffle mask that implements this insert operation. The
15462  // vector that we are inserting into will be operand 0 of the shuffle, so
15463  // those elements are just 'i'. The inserted subvector is in the first
15464  // positions of operand 1 of the shuffle. Example:
15465  // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15466  SmallVector<int, 16> Mask(NumMaskVals);
15467  for (unsigned i = 0; i != NumMaskVals; ++i) {
15468  if (i / NumSrcElts == InsIndex)
15469  Mask[i] = (i % NumSrcElts) + NumMaskVals;
15470  else
15471  Mask[i] = i;
15472  }
15473 
15474  // Bail out if the target can not handle the shuffle we want to create.
15475  EVT SubVecEltVT = SubVecVT.getVectorElementType();
15476  EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15477  if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15478  return SDValue();
15479 
15480  // Step 2: Create a wide vector from the inserted source vector by appending
15481  // undefined elements. This is the same size as our destination vector.
15482  SDLoc DL(N);
15483  SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15484  ConcatOps[0] = SubVec;
15485  SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15486 
15487  // Step 3: Shuffle in the padded subvector.
15488  SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15489  SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15490  AddToWorklist(PaddedSubV.getNode());
15491  AddToWorklist(DestVecBC.getNode());
15492  AddToWorklist(Shuf.getNode());
15493  return DAG.getBitcast(VT, Shuf);
15494 }
15495 
15496 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15497  SDValue InVec = N->getOperand(0);
15498  SDValue InVal = N->getOperand(1);
15499  SDValue EltNo = N->getOperand(2);
15500  SDLoc DL(N);
15501 
15502  // If the inserted element is an UNDEF, just use the input vector.
15503  if (InVal.isUndef())
15504  return InVec;
15505 
15506  EVT VT = InVec.getValueType();
15507  unsigned NumElts = VT.getVectorNumElements();
15508 
15509  // Remove redundant insertions:
15510  // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15511  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15512  InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15513  return InVec;
15514 
15515  auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15516  if (!IndexC) {
15517  // If this is variable insert to undef vector, it might be better to splat:
15518  // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15519  if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15520  SmallVector<SDValue, 8> Ops(NumElts, InVal);
15521  return DAG.getBuildVector(VT, DL, Ops);
15522  }
15523  return SDValue();
15524  }
15525 
15526  // We must know which element is being inserted for folds below here.
15527  unsigned Elt = IndexC->getZExtValue();
15528  if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15529  return Shuf;
15530 
15531  // Canonicalize insert_vector_elt dag nodes.
15532  // Example:
15533  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15534  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15535  //
15536  // Do this only if the child insert_vector node has one use; also
15537  // do this only if indices are both constants and Idx1 < Idx0.
15538  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15539  && isa<ConstantSDNode>(InVec.getOperand(2))) {
15540  unsigned OtherElt = InVec.getConstantOperandVal(2);
15541  if (Elt < OtherElt) {
15542  // Swap nodes.
15543  SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15544  InVec.getOperand(0), InVal, EltNo);
15545  AddToWorklist(NewOp.getNode());
15546  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15547  VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15548  }
15549  }
15550 
15551  // If we can't generate a legal BUILD_VECTOR, exit
15552  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15553  return SDValue();
15554 
15555  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15556  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
15557  // vector elements.
15559  // Do not combine these two vectors if the output vector will not replace
15560  // the input vector.
15561  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15562  Ops.append(InVec.getNode()->op_begin(),
15563  InVec.getNode()->op_end());
15564  } else if (InVec.isUndef()) {
15565  Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
15566  } else {
15567  return SDValue();
15568  }
15569  assert(Ops.size() == NumElts && "Unexpected vector size");
15570 
15571  // Insert the element
15572  if (Elt < Ops.size()) {
15573  // All the operands of BUILD_VECTOR must have the same type;
15574  // we enforce that here.
15575  EVT OpVT = Ops[0].getValueType();
15576  Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15577  }
15578 
15579  // Return the new vector
15580  return DAG.getBuildVector(VT, DL, Ops);
15581 }
15582 
15583 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
15584  SDValue EltNo,
15585  LoadSDNode *OriginalLoad) {
15586  assert(!OriginalLoad->isVolatile());
15587 
15588  EVT ResultVT = EVE->getValueType(0);
15589  EVT VecEltVT = InVecVT.getVectorElementType();
15590  unsigned Align = OriginalLoad->getAlignment();
15591  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15592  VecEltVT.getTypeForEVT(*DAG.getContext()));
15593 
15594  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15595  return SDValue();
15596 
15597  ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15599  if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15600  return SDValue();
15601 
15602  Align = NewAlign;
15603 
15604  SDValue NewPtr = OriginalLoad->getBasePtr();
15605  SDValue Offset;
15606  EVT PtrType = NewPtr.getValueType();
15607  MachinePointerInfo MPI;
15608  SDLoc DL(EVE);
15609  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15610  int Elt = ConstEltNo->getZExtValue();
15611  unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15612  Offset = DAG.getConstant(PtrOff, DL, PtrType);
15613  MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15614  } else {
15615  Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15616  Offset = DAG.getNode(
15617  ISD::MUL, DL, PtrType, Offset,
15618  DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15619  MPI = OriginalLoad->getPointerInfo();
15620  }
15621  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15622 
15623  // The replacement we need to do here is a little tricky: we need to
15624  // replace an extractelement of a load with a load.
15625  // Use ReplaceAllUsesOfValuesWith to do the replacement.
15626  // Note that this replacement assumes that the extractvalue is the only
15627  // use of the load; that's okay because we don't want to perform this
15628  // transformation in other cases anyway.
15629  SDValue Load;
15630  SDValue Chain;
15631  if (ResultVT.bitsGT(VecEltVT)) {
15632  // If the result type of vextract is wider than the load, then issue an
15633  // extending load instead.
15634  ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15635  VecEltVT)
15636  ? ISD::ZEXTLOAD
15637  : ISD::EXTLOAD;
15638  Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15639  OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15640  Align, OriginalLoad->getMemOperand()->getFlags(),
15641  OriginalLoad->getAAInfo());
15642  Chain = Load.getValue(1);
15643  } else {
15644  Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15645  MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15646  OriginalLoad->getAAInfo());
15647  Chain = Load.getValue(1);
15648  if (ResultVT.bitsLT(VecEltVT))
15649  Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15650  else
15651  Load = DAG.getBitcast(ResultVT, Load);
15652  }
15653  WorklistRemover DeadNodes(*this);
15654  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15655  SDValue To[] = { Load, Chain };
15656  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15657  // Since we're explicitly calling ReplaceAllUses, add the new node to the
15658  // worklist explicitly as well.
15659  AddToWorklist(Load.getNode());
15660  AddUsersToWorklist(Load.getNode()); // Add users too
15661  // Make sure to revisit this node to clean it up; it will usually be dead.
15662  AddToWorklist(EVE);
15663  ++OpsNarrowed;
15664  return SDValue(EVE, 0);
15665 }
15666 
15667 /// Transform a vector binary operation into a scalar binary operation by moving
15668 /// the math/logic after an extract element of a vector.
15670  bool LegalOperations) {
15671  SDValue Vec = ExtElt->getOperand(0);
15672  SDValue Index = ExtElt->getOperand(1);
15673  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15674  if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
15675  return SDValue();
15676 
15677  // Targets may want to avoid this to prevent an expensive register transfer.
15678  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15679  if (!TLI.shouldScalarizeBinop(Vec))
15680  return SDValue();
15681 
15682  // Extracting an element of a vector constant is constant-folded, so this
15683  // transform is just replacing a vector op with a scalar op while moving the
15684  // extract.
15685  SDValue Op0 = Vec.getOperand(0);
15686  SDValue Op1 = Vec.getOperand(1);
15687  if (isAnyConstantBuildVector(Op0, true) ||
15688  isAnyConstantBuildVector(Op1, true)) {
15689  // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
15690  // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
15691  SDLoc DL(ExtElt);
15692  EVT VT = ExtElt->getValueType(0);
15693  SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
15694  SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
15695  return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
15696  }
15697 
15698  return SDValue();
15699 }
15700 
15701 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15702  SDValue VecOp = N->getOperand(0);
15703  SDValue Index = N->getOperand(1);
15704  EVT ScalarVT = N->getValueType(0);
15705  EVT VecVT = VecOp.getValueType();
15706  if (VecOp.isUndef())
15707  return DAG.getUNDEF(ScalarVT);
15708 
15709  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15710  //
15711  // This only really matters if the index is non-constant since other combines
15712  // on the constant elements already work.
15713  SDLoc DL(N);
15714  if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15715  Index == VecOp.getOperand(2)) {
15716  SDValue Elt = VecOp.getOperand(1);
15717  return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
15718  }
15719 
15720  // (vextract (scalar_to_vector val, 0) -> val
15721  if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15722  // Check if the result type doesn't match the inserted element type. A
15723  // SCALAR_TO_VECTOR may truncate the inserted element and the
15724  // EXTRACT_VECTOR_ELT may widen the extracted vector.
15725  SDValue InOp = VecOp.getOperand(0);
15726  if (InOp.getValueType() != ScalarVT) {
15727  assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15728  return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15729  }
15730  return InOp;
15731  }
15732 
15733  // extract_vector_elt of out-of-bounds element -> UNDEF
15734  auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15735  unsigned NumElts = VecVT.getVectorNumElements();
15736  if (IndexC && IndexC->getAPIntValue().uge(NumElts))
15737  return DAG.getUNDEF(ScalarVT);
15738 
15739  // extract_vector_elt (build_vector x, y), 1 -> y
15740  if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
15741  TLI.isTypeLegal(VecVT) &&
15742  (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
15743  SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
15744  EVT InEltVT = Elt.getValueType();
15745 
15746  // Sometimes build_vector's scalar input types do not match result type.
15747  if (ScalarVT == InEltVT)
15748  return Elt;
15749 
15750  // TODO: It may be useful to truncate if free if the build_vector implicitly
15751  // converts.
15752  }
15753 
15754  // TODO: These transforms should not require the 'hasOneUse' restriction, but
15755  // there are regressions on multiple targets without it. We can end up with a
15756  // mess of scalar and vector code if we reduce only part of the DAG to scalar.
15757  if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
15758  VecOp.hasOneUse()) {
15759  // The vector index of the LSBs of the source depend on the endian-ness.
15760  bool IsLE = DAG.getDataLayout().isLittleEndian();
15761  unsigned ExtractIndex = IndexC->getZExtValue();
15762  // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15763  unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
15764  SDValue BCSrc = VecOp.getOperand(0);
15765  if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
15766  return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
15767 
15768  if (LegalTypes && BCSrc.getValueType().isInteger() &&
15769  BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15770  // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
15771  // trunc i64 X to i32
15772  SDValue X = BCSrc.getOperand(0);
15773  assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
15774  "Extract element and scalar to vector can't change element type "
15775  "from FP to integer.");
15776  unsigned XBitWidth = X.getValueSizeInBits();
15777  unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
15778  BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
15779 
15780  // An extract element return value type can be wider than its vector
15781  // operand element type. In that case, the high bits are undefined, so
15782  // it's possible that we may need to extend rather than truncate.
15783  if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
15784  assert(XBitWidth % VecEltBitWidth == 0 &&
15785  "Scalar bitwidth must be a multiple of vector element bitwidth");
15786  return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
15787  }
15788  }
15789  }
15790 
15791  if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
15792  return BO;
15793 
15794  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15795  // We only perform this optimization before the op legalization phase because
15796  // we may introduce new vector instructions which are not backed by TD
15797  // patterns. For example on AVX, extracting elements from a wide vector
15798  // without using extract_subvector. However, if we can find an underlying
15799  // scalar value, then we can always use that.
15800  if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
15801  auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
15802  // Find the new index to extract from.
15803  int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
15804 
15805  // Extracting an undef index is undef.
15806  if (OrigElt == -1)
15807  return DAG.getUNDEF(ScalarVT);
15808 
15809  // Select the right vector half to extract from.
15810  SDValue SVInVec;
15811  if (OrigElt < (int)NumElts) {
15812  SVInVec = VecOp.getOperand(0);
15813  } else {
15814  SVInVec = VecOp.getOperand(1);
15815  OrigElt -= NumElts;
15816  }
15817 
15818  if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15819  SDValue InOp = SVInVec.getOperand(OrigElt);
15820  if (InOp.getValueType() != ScalarVT) {
15821  assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15822  InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15823  }
15824 
15825  return InOp;
15826  }
15827 
15828  // FIXME: We should handle recursing on other vector shuffles and
15829  // scalar_to_vector here as well.
15830 
15831  if (!LegalOperations ||
15832  // FIXME: Should really be just isOperationLegalOrCustom.
15834  TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
15835  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15836  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
15837  DAG.getConstant(OrigElt, DL, IndexTy));
15838  }
15839  }
15840 
15841  // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15842  // simplify it based on the (valid) extraction indices.
15843  if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
15844  return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15845  Use->getOperand(0) == VecOp &&
15846  isa<ConstantSDNode>(Use->getOperand(1));
15847  })) {
15848  APInt DemandedElts = APInt::getNullValue(NumElts);
15849  for (SDNode *Use : VecOp->uses()) {
15850  auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15851  if (CstElt->getAPIntValue().ult(NumElts))
15852  DemandedElts.setBit(CstElt->getZExtValue());
15853  }
15854  if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
15855  // We simplified the vector operand of this extract element. If this
15856  // extract is not dead, visit it again so it is folded properly.
15857  if (N->getOpcode() != ISD::DELETED_NODE)
15858  AddToWorklist(N);
15859  return SDValue(N, 0);
15860  }
15861  }
15862 
15863  // Everything under here is trying to match an extract of a loaded value.
15864  // If the result of load has to be truncated, then it's not necessarily
15865  // profitable.
15866  bool BCNumEltsChanged = false;
15867  EVT ExtVT = VecVT.getVectorElementType();
15868  EVT LVT = ExtVT;
15869  if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
15870  return SDValue();
15871 
15872  if (VecOp.getOpcode() == ISD::BITCAST) {
15873  // Don't duplicate a load with other uses.
15874  if (!VecOp.hasOneUse())
15875  return SDValue();
15876 
15877  EVT BCVT = VecOp.getOperand(0).getValueType();
15878  if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15879  return SDValue();
15880  if (NumElts != BCVT.getVectorNumElements())
15881  BCNumEltsChanged = true;
15882  VecOp = VecOp.getOperand(0);
15883  ExtVT = BCVT.getVectorElementType();
15884  }
15885 
15886  // extract (vector load $addr), i --> load $addr + i * size
15887  if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
15888  ISD::isNormalLoad(VecOp.getNode()) &&
15889  !Index->hasPredecessor(VecOp.getNode())) {
15890  auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
15891  if (VecLoad && !VecLoad->isVolatile())
15892  return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
15893  }
15894 
15895  // Perform only after legalization to ensure build_vector / vector_shuffle
15896  // optimizations have already been done.
15897  if (!LegalOperations || !IndexC)
15898  return SDValue();
15899 
15900  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15901  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15902  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15903  int Elt = IndexC->getZExtValue();
15904  LoadSDNode *LN0 = nullptr;
15905  if (ISD::isNormalLoad(VecOp.getNode())) {
15906  LN0 = cast<LoadSDNode>(VecOp);
15907  } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15908  VecOp.getOperand(0).getValueType() == ExtVT &&
15909  ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
15910  // Don't duplicate a load with other uses.
15911  if (!VecOp.hasOneUse())
15912  return SDValue();
15913 
15914  LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
15915  }
15916  if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
15917  // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15918  // =>
15919  // (load $addr+1*size)
15920 
15921  // Don't duplicate a load with other uses.
15922  if (!VecOp.hasOneUse())
15923  return SDValue();
15924 
15925  // If the bit convert changed the number of elements, it is unsafe
15926  // to examine the mask.
15927  if (BCNumEltsChanged)
15928  return SDValue();
15929 
15930  // Select the input vector, guarding against out of range extract vector.
15931  int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
15932  VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
15933 
15934  if (VecOp.getOpcode() == ISD::BITCAST) {
15935  // Don't duplicate a load with other uses.
15936  if (!VecOp.hasOneUse())
15937  return SDValue();
15938 
15939  VecOp = VecOp.getOperand(0);
15940  }
15941  if (ISD::isNormalLoad(VecOp.getNode())) {
15942  LN0 = cast<LoadSDNode>(VecOp);
15943  Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
15944  Index = DAG.getConstant(Elt, DL, Index.getValueType());
15945  }
15946  }
15947 
15948  // Make sure we found a non-volatile load and the extractelement is
15949  // the only use.
15950  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15951  return SDValue();
15952 
15953  // If Idx was -1 above, Elt is going to be -1, so just return undef.
15954  if (Elt == -1)
15955  return DAG.getUNDEF(LVT);
15956 
15957  return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
15958 }
15959 
15960 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15961 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15962  // We perform this optimization post type-legalization because
15963  // the type-legalizer often scalarizes integer-promoted vectors.
15964  // Performing this optimization before may create bit-casts which
15965  // will be type-legalized to complex code sequences.
15966  // We perform this optimization only before the operation legalizer because we
15967  // may introduce illegal operations.
15968  if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15969  return SDValue();
15970 
15971  unsigned NumInScalars = N->getNumOperands();
15972  SDLoc DL(N);
15973  EVT VT = N->getValueType(0);
15974 
15975  // Check to see if this is a BUILD_VECTOR of a bunch of values
15976  // which come from any_extend or zero_extend nodes. If so, we can create
15977  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15978  // optimizations. We do not handle sign-extend because we can't fill the sign
15979  // using shuffles.
15980  EVT SourceType = MVT::Other;
15981  bool AllAnyExt = true;
15982 
15983  for (unsigned i = 0; i != NumInScalars; ++i) {
15984  SDValue In = N->getOperand(i);
15985  // Ignore undef inputs.
15986  if (In.isUndef()) continue;
15987 
15988  bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
15989  bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15990 
15991  // Abort if the element is not an extension.
15992  if (!ZeroExt && !AnyExt) {
15993  SourceType = MVT::Other;
15994  break;
15995  }
15996 
15997  // The input is a ZeroExt or AnyExt. Check the original type.
15998  EVT InTy = In.getOperand(0).getValueType();
15999 
16000  // Check that all of the widened source types are the same.
16001  if (SourceType == MVT::Other)
16002  // First time.
16003  SourceType = InTy;
16004  else if (InTy != SourceType) {
16005  // Multiple income types. Abort.
16006  SourceType = MVT::Other;
16007  break;
16008  }
16009 
16010  // Check if all of the extends are ANY_EXTENDs.
16011  AllAnyExt &= AnyExt;
16012  }
16013 
16014  // In order to have valid types, all of the inputs must be extended from the
16015  // same source type and all of the inputs must be any or zero extend.
16016  // Scalar sizes must be a power of two.
16017  EVT OutScalarTy = VT.getScalarType();
16018  bool ValidTypes = SourceType != MVT::Other &&
16019  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16020  isPowerOf2_32(SourceType.getSizeInBits());
16021 
16022  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16023  // turn into a single shuffle instruction.
16024  if (!ValidTypes)
16025  return SDValue();
16026 
16027  bool isLE = DAG.getDataLayout().isLittleEndian();
16028  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16029  assert(ElemRatio > 1 && "Invalid element size ratio");
16030  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16031  DAG.getConstant(0, DL, SourceType);
16032 
16033  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16034  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16035 
16036  // Populate the new build_vector
16037  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16038  SDValue Cast = N->getOperand(i);
16039  assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16040  Cast.getOpcode() == ISD::ZERO_EXTEND ||
16041  Cast.isUndef()) && "Invalid cast opcode");
16042  SDValue In;
16043  if (Cast.isUndef())
16044  In = DAG.getUNDEF(SourceType);
16045  else
16046  In = Cast->getOperand(0);
16047  unsigned Index = isLE ? (i * ElemRatio) :
16048  (i * ElemRatio + (ElemRatio - 1));
16049 
16050  assert(Index < Ops.size() && "Invalid index");
16051  Ops[Index] = In;
16052  }
16053 
16054  // The type of the new BUILD_VECTOR node.
16055  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16056  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16057  "Invalid vector size");
16058  // Check if the new vector type is legal.
16059  if (!isTypeLegal(VecVT) ||
16060  (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16062  return SDValue();
16063 
16064  // Make the new BUILD_VECTOR.
16065  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16066 
16067  // The new BUILD_VECTOR node has the potential to be further optimized.
16068  AddToWorklist(BV.getNode());
16069  // Bitcast to the desired type.
16070  return DAG.getBitcast(VT, BV);
16071 }
16072 
16073 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16074  ArrayRef<int> VectorMask,
16075  SDValue VecIn1, SDValue VecIn2,
16076  unsigned LeftIdx) {
16077  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16078  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16079 
16080  EVT VT = N->getValueType(0);
16081  EVT InVT1 = VecIn1.getValueType();
16082  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16083 
16084  unsigned Vec2Offset = 0;
16085  unsigned NumElems = VT.getVectorNumElements();
16086  unsigned ShuffleNumElems = NumElems;
16087 
16088  // In case both the input vectors are extracted from same base
16089  // vector we do not need extra addend (Vec2Offset) while
16090  // computing shuffle mask.
16091  if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16092  !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16093  !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
16094  Vec2Offset = InVT1.getVectorNumElements();
16095 
16096  // We can't generate a shuffle node with mismatched input and output types.
16097  // Try to make the types match the type of the output.
16098  if (InVT1 != VT || InVT2 != VT) {
16099  if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16100  // If the output vector length is a multiple of both input lengths,
16101  // we can concatenate them and pad the rest with undefs.
16102  unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16103  assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16104  SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16105  ConcatOps[0] = VecIn1;
16106  ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16107  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16108  VecIn2 = SDValue();
16109  } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16110  if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16111  return SDValue();
16112 
16113  if (!VecIn2.getNode()) {
16114  // If we only have one input vector, and it's twice the size of the
16115  // output, split it in two.
16116  VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16117  DAG.getConstant(NumElems, DL, IdxTy));
16118  VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16119  // Since we now have shorter input vectors, adjust the offset of the
16120  // second vector's start.
16121  Vec2Offset = NumElems;
16122  } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16123  // VecIn1 is wider than the output, and we have another, possibly
16124  // smaller input. Pad the smaller input with undefs, shuffle at the
16125  // input vector width, and extract the output.
16126  // The shuffle type is different than VT, so check legality again.
16127  if (LegalOperations &&
16129  return SDValue();
16130 
16131  // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16132  // lower it back into a BUILD_VECTOR. So if the inserted type is
16133  // illegal, don't even try.
16134  if (InVT1 != InVT2) {
16135  if (!TLI.isTypeLegal(InVT2))
16136  return SDValue();
16137  VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16138  DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16139  }
16140  ShuffleNumElems = NumElems * 2;
16141  } else {
16142  // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16143  // than VecIn1. We can't handle this for now - this case will disappear
16144  // when we start sorting the vectors by type.
16145  return SDValue();
16146  }
16147  } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16148  InVT1.getSizeInBits() == VT.getSizeInBits()) {
16149  SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16150  ConcatOps[0] = VecIn2;
16151  VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16152  } else {
16153  // TODO: Support cases where the length mismatch isn't exactly by a
16154  // factor of 2.
16155  // TODO: Move this check upwards, so that if we have bad type
16156  // mismatches, we don't create any DAG nodes.
16157  return SDValue();
16158  }
16159  }
16160 
16161  // Initialize mask to undef.
16162  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16163 
16164  // Only need to run up to the number of elements actually used, not the
16165  // total number of elements in the shuffle - if we are shuffling a wider
16166  // vector, the high lanes should be set to undef.
16167  for (unsigned i = 0; i != NumElems; ++i) {
16168  if (VectorMask[i] <= 0)
16169  continue;
16170 
16171  unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16172  if (VectorMask[i] == (int)LeftIdx) {
16173  Mask[i] = ExtIndex;
16174  } else if (VectorMask[i] == (int)LeftIdx + 1) {
16175  Mask[i] = Vec2Offset + ExtIndex;
16176  }
16177  }
16178 
16179  // The type the input vectors may have changed above.
16180  InVT1 = VecIn1.getValueType();
16181 
16182  // If we already have a VecIn2, it should have the same type as VecIn1.
16183  // If we don't, get an undef/zero vector of the appropriate type.
16184  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16185  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16186 
16187  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16188  if (ShuffleNumElems > NumElems)
16189  Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16190 
16191  return Shuffle;
16192 }
16193 
16195  assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16196 
16197  // First, determine where the build vector is not undef.
16198  // TODO: We could extend this to handle zero elements as well as undefs.
16199  int NumBVOps = BV->getNumOperands();
16200  int ZextElt = -1;
16201  for (int i = 0; i != NumBVOps; ++i) {
16202  SDValue Op = BV->getOperand(i);
16203  if (Op.isUndef())
16204  continue;
16205  if (ZextElt == -1)
16206  ZextElt = i;
16207  else
16208  return SDValue();
16209  }
16210  // Bail out if there's no non-undef element.
16211  if (ZextElt == -1)
16212  return SDValue();
16213 
16214  // The build vector contains some number of undef elements and exactly
16215  // one other element. That other element must be a zero-extended scalar
16216  // extracted from a vector at a constant index to turn this into a shuffle.
16217  // Also, require that the build vector does not implicitly truncate/extend
16218  // its elements.
16219  // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16220  EVT VT = BV->getValueType(0);
16221  SDValue Zext = BV->getOperand(ZextElt);
16222  if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16224  !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16225  Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16226  return SDValue();
16227 
16228  // The zero-extend must be a multiple of the source size, and we must be
16229  // building a vector of the same size as the source of the extract element.
16230  SDValue Extract = Zext.getOperand(0);
16231  unsigned DestSize = Zext.getValueSizeInBits();
16232  unsigned SrcSize = Extract.getValueSizeInBits();
16233  if (DestSize % SrcSize != 0 ||
16234  Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16235  return SDValue();
16236 
16237  // Create a shuffle mask that will combine the extracted element with zeros
16238  // and undefs.
16239  int ZextRatio = DestSize / SrcSize;
16240  int NumMaskElts = NumBVOps * ZextRatio;
16241  SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16242  for (int i = 0; i != NumMaskElts; ++i) {
16243  if (i / ZextRatio == ZextElt) {
16244  // The low bits of the (potentially translated) extracted element map to
16245  // the source vector. The high bits map to zero. We will use a zero vector
16246  // as the 2nd source operand of the shuffle, so use the 1st element of
16247  // that vector (mask value is number-of-elements) for the high bits.
16248  if (i % ZextRatio == 0)
16249  ShufMask[i] = Extract.getConstantOperandVal(1);
16250  else
16251  ShufMask[i] = NumMaskElts;
16252  }
16253 
16254  // Undef elements of the build vector remain undef because we initialize
16255  // the shuffle mask with -1.
16256  }
16257 
16258  // Turn this into a shuffle with zero if that's legal.
16259  EVT VecVT = Extract.getOperand(0).getValueType();
16260  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16261  return SDValue();
16262 
16263  // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16264  // bitcast (shuffle V, ZeroVec, VectorMask)
16265  SDLoc DL(BV);
16266  SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16267  SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16268  ShufMask);
16269  return DAG.getBitcast(VT, Shuf);
16270 }
16271 
16272 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16273 // operations. If the types of the vectors we're extracting from allow it,
16274 // turn this into a vector_shuffle node.
16275 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16276  SDLoc DL(N);
16277  EVT VT = N->getValueType(0);
16278 
16279  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16280  if (!isTypeLegal(VT))
16281  return SDValue();
16282 
16283  if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16284  return V;
16285 
16286  // May only combine to shuffle after legalize if shuffle is legal.
16287  if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16288  return SDValue();
16289 
16290  bool UsesZeroVector = false;
16291  unsigned NumElems = N->getNumOperands();
16292 
16293  // Record, for each element of the newly built vector, which input vector
16294  // that element comes from. -1 stands for undef, 0 for the zero vector,
16295  // and positive values for the input vectors.
16296  // VectorMask maps each element to its vector number, and VecIn maps vector
16297  // numbers to their initial SDValues.
16298 
16299  SmallVector<int, 8> VectorMask(NumElems, -1);
16301  VecIn.push_back(SDValue());
16302 
16303  for (unsigned i = 0; i != NumElems; ++i) {
16304  SDValue Op = N->getOperand(i);
16305 
16306  if (Op.isUndef())
16307  continue;
16308 
16309  // See if we can use a blend with a zero vector.
16310  // TODO: Should we generalize this to a blend with an arbitrary constant
16311  // vector?
16312  if (isNullConstant(Op) || isNullFPConstant(Op)) {
16313  UsesZeroVector = true;
16314  VectorMask[i] = 0;
16315  continue;
16316  }
16317 
16318  // Not an undef or zero. If the input is something other than an
16319  // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16320  if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16321  !isa<ConstantSDNode>(Op.getOperand(1)))
16322  return SDValue();
16323  SDValue ExtractedFromVec = Op.getOperand(0);
16324 
16325  APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
16326  if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16327  return SDValue();
16328 
16329  // All inputs must have the same element type as the output.
16330  if (VT.getVectorElementType() !=
16331  ExtractedFromVec.getValueType().getVectorElementType())
16332  return SDValue();
16333 
16334  // Have we seen this input vector before?
16335  // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16336  // a map back from SDValues to numbers isn't worth it.
16337  unsigned Idx = std::distance(
16338  VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16339  if (Idx == VecIn.size())
16340  VecIn.push_back(ExtractedFromVec);
16341 
16342  VectorMask[i] = Idx;
16343  }
16344 
16345  // If we didn't find at least one input vector, bail out.
16346  if (VecIn.size() < 2)
16347  return SDValue();
16348 
16349  // If all the Operands of BUILD_VECTOR extract from same
16350  // vector, then split the vector efficiently based on the maximum
16351  // vector access index and adjust the VectorMask and
16352  // VecIn accordingly.
16353  if (VecIn.size() == 2) {
16354  unsigned MaxIndex = 0;
16355  unsigned NearestPow2 = 0;
16356  SDValue Vec = VecIn.back();
16357  EVT InVT = Vec.getValueType();
16358  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16359  SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16360 
16361  for (unsigned i = 0; i < NumElems; i++) {
16362  if (VectorMask[i] <= 0)
16363  continue;
16364  unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16365  IndexVec[i] = Index;
16366  MaxIndex = std::max(MaxIndex, Index);
16367  }
16368 
16369  NearestPow2 = PowerOf2Ceil(MaxIndex);
16370  if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16371  NumElems * 2 < NearestPow2) {
16372  unsigned SplitSize = NearestPow2 / 2;
16373  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16374  InVT.getVectorElementType(), SplitSize);
16375  if (TLI.isTypeLegal(SplitVT)) {
16376  SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16377  DAG.getConstant(SplitSize, DL, IdxTy));
16378  SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16379  DAG.getConstant(0, DL, IdxTy));
16380  VecIn.pop_back();
16381  VecIn.push_back(VecIn1);
16382  VecIn.push_back(VecIn2);
16383 
16384  for (unsigned i = 0; i < NumElems; i++) {
16385  if (VectorMask[i] <= 0)
16386  continue;
16387  VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16388  }
16389  }
16390  }
16391  }
16392 
16393  // TODO: We want to sort the vectors by descending length, so that adjacent
16394  // pairs have similar length, and the longer vector is always first in the
16395  // pair.
16396 
16397  // TODO: Should this fire if some of the input vectors has illegal type (like
16398  // it does now), or should we let legalization run its course first?
16399 
16400  // Shuffle phase:
16401  // Take pairs of vectors, and shuffle them so that the result has elements
16402  // from these vectors in the correct places.
16403  // For example, given:
16404  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16405  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16406  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16407  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16408  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16409  // We will generate:
16410  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16411  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16412  SmallVector<SDValue, 4> Shuffles;
16413  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16414  unsigned LeftIdx = 2 * In + 1;
16415  SDValue VecLeft = VecIn[LeftIdx];
16416  SDValue VecRight =
16417  (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16418 
16419  if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16420  VecRight, LeftIdx))
16421  Shuffles.push_back(Shuffle);
16422  else
16423  return SDValue();
16424  }
16425 
16426  // If we need the zero vector as an "ingredient" in the blend tree, add it
16427  // to the list of shuffles.
16428  if (UsesZeroVector)
16429  Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16430  : DAG.getConstantFP(0.0, DL, VT));
16431 
16432  // If we only have one shuffle, we're done.
16433  if (Shuffles.size() == 1)
16434  return Shuffles[0];
16435 
16436  // Update the vector mask to point to the post-shuffle vectors.
16437  for (int &Vec : VectorMask)
16438  if (Vec == 0)
16439  Vec = Shuffles.size() - 1;
16440  else
16441  Vec = (Vec - 1) / 2;
16442 
16443  // More than one shuffle. Generate a binary tree of blends, e.g. if from
16444  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16445  // generate:
16446  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16447  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16448  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16449  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16450  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16451  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16452  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16453 
16454  // Make sure the initial size of the shuffle list is even.
16455  if (Shuffles.size() % 2)
16456  Shuffles.push_back(DAG.getUNDEF(VT));
16457 
16458  for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16459  if (CurSize % 2) {
16460  Shuffles[CurSize] = DAG.getUNDEF(VT);
16461  CurSize++;
16462  }
16463  for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16464  int Left = 2 * In;
16465  int Right = 2 * In + 1;
16466  SmallVector<int, 8> Mask(NumElems, -1);
16467  for (unsigned i = 0; i != NumElems; ++i) {
16468  if (VectorMask[i] == Left) {
16469  Mask[i] = i;
16470  VectorMask[i] = In;
16471  } else if (VectorMask[i] == Right) {
16472  Mask[i] = i + NumElems;
16473  VectorMask[i] = In;
16474  }
16475  }
16476 
16477  Shuffles[In] =
16478  DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16479  }
16480  }
16481  return Shuffles[0];
16482 }
16483 
16484 // Try to turn a build vector of zero extends of extract vector elts into a
16485 // a vector zero extend and possibly an extract subvector.
16486 // TODO: Support sign extend or any extend?
16487 // TODO: Allow undef elements?
16488 // TODO: Don't require the extracts to start at element 0.
16489 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16490  if (LegalOperations)
16491  return SDValue();
16492 
16493  EVT VT = N->getValueType(0);
16494 
16495  SDValue Op0 = N->getOperand(0);
16496  auto checkElem = [&](SDValue Op) -> int64_t {
16497  if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16499  Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16500  if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16501  return C->getZExtValue();
16502  return -1;
16503  };
16504 
16505  // Make sure the first element matches
16506  // (zext (extract_vector_elt X, C))
16507  int64_t Offset = checkElem(Op0);
16508  if (Offset < 0)
16509  return SDValue();
16510 
16511  unsigned NumElems = N->getNumOperands();
16512  SDValue In = Op0.getOperand(0).getOperand(0);
16513  EVT InSVT = In.getValueType().getScalarType();
16514  EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16515 
16516  // Don't create an illegal input type after type legalization.
16517  if (LegalTypes && !TLI.isTypeLegal(InVT))
16518  return SDValue();
16519 
16520  // Ensure all the elements come from the same vector and are adjacent.
16521  for (unsigned i = 1; i != NumElems; ++i) {
16522  if ((Offset + i) != checkElem(N->getOperand(i)))
16523  return SDValue();
16524  }
16525 
16526  SDLoc DL(N);
16527  In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16528  Op0.getOperand(0).getOperand(1));
16529  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16530 }
16531 
16532 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16533  EVT VT = N->getValueType(0);
16534 
16535  // A vector built entirely of undefs is undef.
16536  if (ISD::allOperandsUndef(N))
16537  return DAG.getUNDEF(VT);
16538 
16539  // If this is a splat of a bitcast from another vector, change to a
16540  // concat_vector.
16541  // For example:
16542  // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16543  // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16544  //
16545  // If X is a build_vector itself, the concat can become a larger build_vector.
16546  // TODO: Maybe this is useful for non-splat too?
16547  if (!LegalOperations) {
16548  if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16549  Splat = peekThroughBitcasts(Splat);
16550  EVT SrcVT = Splat.getValueType();
16551  if (SrcVT.isVector()) {
16552  unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16553  EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16554  SrcVT.getVectorElementType(), NumElts);
16555  if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16556  SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16558  NewVT, Ops);
16559  return DAG.getBitcast(VT, Concat);
16560  }
16561  }
16562  }
16563  }
16564 
16565  // Check if we can express BUILD VECTOR via subvector extract.
16566  if (!LegalTypes && (N->getNumOperands() > 1)) {
16567  SDValue Op0 = N->getOperand(0);
16568  auto checkElem = [&](SDValue Op) -> uint64_t {
16569  if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16570  (Op0.getOperand(0) == Op.getOperand(0)))
16571  if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16572  return CNode->getZExtValue();
16573  return -1;
16574  };
16575 
16576  int Offset = checkElem(Op0);
16577  for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16578  if (Offset + i != checkElem(N->getOperand(i))) {
16579  Offset = -1;
16580  break;
16581  }
16582  }
16583 
16584  if ((Offset == 0) &&
16585  (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16586  return Op0.getOperand(0);
16587  if ((Offset != -1) &&
16588  ((Offset % N->getValueType(0).getVectorNumElements()) ==
16589  0)) // IDX must be multiple of output size.
16590  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16591  Op0.getOperand(0), Op0.getOperand(1));
16592  }
16593 
16594  if (SDValue V = convertBuildVecZextToZext(N))
16595  return V;
16596 
16597  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16598  return V;
16599 
16600  if (SDValue V = reduceBuildVecToShuffle(N))
16601  return V;
16602 
16603  return SDValue();
16604 }
16605 
16607  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16608  EVT OpVT = N->getOperand(0).getValueType();
16609 
16610  // If the operands are legal vectors, leave them alone.
16611  if (TLI.isTypeLegal(OpVT))
16612  return SDValue();
16613 
16614  SDLoc DL(N);
16615  EVT VT = N->getValueType(0);
16617 
16618  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16619  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16620 
16621  // Keep track of what we encounter.
16622  bool AnyInteger = false;
16623  bool AnyFP = false;
16624  for (const SDValue &Op : N->ops()) {
16625  if (ISD::BITCAST == Op.getOpcode() &&
16626  !Op.getOperand(0).getValueType().isVector())
16627  Ops.push_back(Op.getOperand(0));
16628  else if (ISD::UNDEF == Op.getOpcode())
16629  Ops.push_back(ScalarUndef);
16630  else
16631  return SDValue();
16632 
16633  // Note whether we encounter an integer or floating point scalar.
16634  // If it's neither, bail out, it could be something weird like x86mmx.
16635  EVT LastOpVT = Ops.back().getValueType();
16636  if (LastOpVT.isFloatingPoint())
16637  AnyFP = true;
16638  else if (LastOpVT.isInteger())
16639  AnyInteger = true;
16640  else
16641  return SDValue();
16642  }
16643 
16644  // If any of the operands is a floating point scalar bitcast to a vector,
16645  // use floating point types throughout, and bitcast everything.
16646  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16647  if (AnyFP) {
16648  SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16649  ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16650  if (AnyInteger) {
16651  for (SDValue &Op : Ops) {
16652  if (Op.getValueType() == SVT)
16653  continue;
16654  if (Op.isUndef())
16655  Op = ScalarUndef;
16656  else
16657  Op = DAG.getBitcast(SVT, Op);
16658  }
16659  }
16660  }
16661 
16662  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16663  VT.getSizeInBits() / SVT.getSizeInBits());
16664  return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16665 }
16666 
16667 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16668 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16669 // most two distinct vectors the same size as the result, attempt to turn this
16670 // into a legal shuffle.
16672  EVT VT = N->getValueType(0);
16673  EVT OpVT = N->getOperand(0).getValueType();
16674  int NumElts = VT.getVectorNumElements();
16675  int NumOpElts = OpVT.getVectorNumElements();
16676 
16677  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16679 
16680  for (SDValue Op : N->ops()) {
16681  Op = peekThroughBitcasts(Op);
16682 
16683  // UNDEF nodes convert to UNDEF shuffle mask values.
16684  if (Op.isUndef()) {
16685  Mask.append((unsigned)NumOpElts, -1);
16686  continue;
16687  }
16688 
16689  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16690  return SDValue();
16691 
16692  // What vector are we extracting the subvector from and at what index?
16693  SDValue ExtVec = Op.getOperand(0);
16694 
16695  // We want the EVT of the original extraction to correctly scale the
16696  // extraction index.
16697  EVT ExtVT = ExtVec.getValueType();
16698  ExtVec = peekThroughBitcasts(ExtVec);
16699 
16700  // UNDEF nodes convert to UNDEF shuffle mask values.
16701  if (ExtVec.isUndef()) {
16702  Mask.append((unsigned)NumOpElts, -1);
16703  continue;
16704  }
16705 
16706  if (!isa<ConstantSDNode>(Op.getOperand(1)))
16707  return SDValue();
16708  int ExtIdx = Op.getConstantOperandVal(1);
16709 
16710  // Ensure that we are extracting a subvector from a vector the same
16711  // size as the result.
16712  if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16713  return SDValue();
16714 
16715  // Scale the subvector index to account for any bitcast.
16716  int NumExtElts = ExtVT.getVectorNumElements();
16717  if (0 == (NumExtElts % NumElts))
16718  ExtIdx /= (NumExtElts / NumElts);
16719  else if (0 == (NumElts % NumExtElts))
16720  ExtIdx *= (NumElts / NumExtElts);
16721  else
16722  return SDValue();
16723 
16724  // At most we can reference 2 inputs in the final shuffle.
16725  if (SV0.isUndef() || SV0 == ExtVec) {
16726  SV0 = ExtVec;
16727  for (int i = 0; i != NumOpElts; ++i)
16728  Mask.push_back(i + ExtIdx);
16729  } else if (SV1.isUndef() || SV1 == ExtVec) {
16730  SV1 = ExtVec;
16731  for (int i = 0; i != NumOpElts; ++i)
16732  Mask.push_back(i + ExtIdx + NumElts);
16733  } else {
16734  return SDValue();
16735  }
16736  }
16737 
16738  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16739  return SDValue();
16740 
16741  return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16742  DAG.getBitcast(VT, SV1), Mask);
16743 }
16744 
16745 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16746  // If we only have one input vector, we don't need to do any concatenation.
16747  if (N->getNumOperands() == 1)
16748  return N->getOperand(0);
16749 
16750  // Check if all of the operands are undefs.
16751  EVT VT = N->getValueType(0);
16752  if (ISD::allOperandsUndef(N))
16753  return DAG.getUNDEF(VT);
16754 
16755  // Optimize concat_vectors where all but the first of the vectors are undef.
16756  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16757  return Op.isUndef();
16758  })) {
16759  SDValue In = N->getOperand(0);
16760  assert(In.getValueType().isVector() && "Must concat vectors");
16761 
16763 
16764  // concat_vectors(scalar_to_vector(scalar), undef) ->
16765  // scalar_to_vector(scalar)
16766  if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16767  Scalar.hasOneUse()) {
16768  EVT SVT = Scalar.getValueType().getVectorElementType();
16769  if (SVT == Scalar.getOperand(0).getValueType())
16770  Scalar = Scalar.getOperand(0);
16771  }
16772 
16773  // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
16774  if (!Scalar.getValueType().isVector()) {
16775  // If the bitcast type isn't legal, it might be a trunc of a legal type;
16776  // look through the trunc so we can still do the transform:
16777  // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16778  if (Scalar->getOpcode() == ISD::TRUNCATE &&
16779  !TLI.isTypeLegal(Scalar.getValueType()) &&
16780  TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16781  Scalar = Scalar->getOperand(0);
16782 
16783  EVT SclTy = Scalar.getValueType();
16784 
16785  if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16786  return SDValue();
16787 
16788  // Bail out if the vector size is not a multiple of the scalar size.
16789  if (VT.getSizeInBits() % SclTy.getSizeInBits())
16790  return SDValue();
16791 
16792  unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16793  if (VNTNumElms < 2)
16794  return SDValue();
16795 
16796  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16797  if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16798  return SDValue();
16799 
16800  SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16801  return DAG.getBitcast(VT, Res);
16802  }
16803  }
16804 
16805  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16806  // We have already tested above for an UNDEF only concatenation.
16807  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16808  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16809  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16810  return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16811  };
16812  if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16814  EVT SVT = VT.getScalarType();
16815 
16816  EVT MinVT = SVT;
16817  if (!SVT.isFloatingPoint()) {
16818  // If BUILD_VECTOR are from built from integer, they may have different
16819  // operand types. Get the smallest type and truncate all operands to it.
16820  bool FoundMinVT = false;
16821  for (const SDValue &Op : N->ops())
16822  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16823  EVT OpSVT = Op.getOperand(0).getValueType();
16824  MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16825  FoundMinVT = true;
16826  }
16827  assert(FoundMinVT && "Concat vector type mismatch");
16828  }
16829 
16830  for (const SDValue &Op : N->ops()) {
16831  EVT OpVT = Op.getValueType();
16832  unsigned NumElts = OpVT.getVectorNumElements();
16833 
16834  if (ISD::UNDEF == Op.getOpcode())
16835  Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16836 
16837  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16838  if (SVT.isFloatingPoint()) {
16839  assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16840  Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16841  } else {
16842  for (unsigned i = 0; i != NumElts; ++i)
16843  Opnds.push_back(
16844  DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16845  }
16846  }
16847  }
16848 
16849  assert(VT.getVectorNumElements() == Opnds.size() &&
16850  "Concat vector type mismatch");
16851  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16852  }
16853 
16854  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16855  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16856  return V;
16857 
16858  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16859  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16860  if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16861  return V;
16862 
16863  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16864  // nodes often generate nop CONCAT_VECTOR nodes.
16865  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16866  // place the incoming vectors at the exact same location.
16867  SDValue SingleSource = SDValue();
16868  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16869 
16870  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16871  SDValue Op = N->getOperand(i);
16872 
16873  if (Op.isUndef())
16874  continue;
16875 
16876  // Check if this is the identity extract:
16877  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16878  return SDValue();
16879 
16880  // Find the single incoming vector for the extract_subvector.
16881  if (SingleSource.getNode()) {
16882  if (Op.getOperand(0) != SingleSource)
16883  return SDValue();
16884  } else {
16885  SingleSource = Op.getOperand(0);
16886 
16887  // Check the source type is the same as the type of the result.
16888  // If not, this concat may extend the vector, so we can not
16889  // optimize it away.
16890  if (SingleSource.getValueType() != N->getValueType(0))
16891  return SDValue();
16892  }
16893 
16894  unsigned IdentityIndex = i * PartNumElem;
16896  // The extract index must be constant.
16897  if (!CS)
16898  return SDValue();
16899 
16900  // Check that we are reading from the identity index.
16901  if (CS->getZExtValue() != IdentityIndex)
16902  return SDValue();
16903  }
16904 
16905  if (SingleSource.getNode())
16906  return SingleSource;
16907 
16908  return SDValue();
16909 }
16910 
16911 /// If we are extracting a subvector produced by a wide binary operator try
16912 /// to use a narrow binary operator and/or avoid concatenation and extraction.
16914  // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16915  // some of these bailouts with other transforms.
16916 
16917  // The extract index must be a constant, so we can map it to a concat operand.
16918  auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16919  if (!ExtractIndexC)
16920  return SDValue();
16921 
16922  // We are looking for an optionally bitcasted wide vector binary operator
16923  // feeding an extract subvector.
16924  SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
16925  if (!ISD::isBinaryOp(BinOp.getNode()))
16926  return SDValue();
16927 
16928  // The binop must be a vector type, so we can extract some fraction of it.
16929  EVT WideBVT = BinOp.getValueType();
16930  if (!WideBVT.isVector())
16931  return SDValue();
16932 
16933  EVT VT = Extract->getValueType(0);
16934  unsigned ExtractIndex = ExtractIndexC->getZExtValue();
16935  assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
16936  "Extract index is not a multiple of the vector length.");
16937 
16938  // Bail out if this is not a proper multiple width extraction.
16939  unsigned WideWidth = WideBVT.getSizeInBits();
16940  unsigned NarrowWidth = VT.getSizeInBits();
16941  if (WideWidth % NarrowWidth != 0)
16942  return SDValue();
16943 
16944  // Bail out if we are extracting a fraction of a single operation. This can
16945  // occur because we potentially looked through a bitcast of the binop.
16946  unsigned NarrowingRatio = WideWidth / NarrowWidth;
16947  unsigned WideNumElts = WideBVT.getVectorNumElements();
16948  if (WideNumElts % NarrowingRatio != 0)
16949  return SDValue();
16950 
16951  // Bail out if the target does not support a narrower version of the binop.
16952  EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16953  WideNumElts / NarrowingRatio);
16954  unsigned BOpcode = BinOp.getOpcode();
16955  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16956  if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16957  return SDValue();
16958 
16959  // If extraction is cheap, we don't need to look at the binop operands
16960  // for concat ops. The narrow binop alone makes this transform profitable.
16961  // We can't just reuse the original extract index operand because we may have
16962  // bitcasted.
16963  unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
16964  unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16965  EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16966  if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
16967  BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
16968  // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
16969  SDLoc DL(Extract);
16970  SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
16971  SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16972  BinOp.getOperand(0), NewExtIndex);
16973  SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16974  BinOp.getOperand(1), NewExtIndex);
16975  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
16976  BinOp.getNode()->getFlags());
16977  return DAG.getBitcast(VT, NarrowBinOp);
16978  }
16979 
16980  // Only handle the case where we are doubling and then halving. A larger ratio
16981  // may require more than two narrow binops to replace the wide binop.
16982  if (NarrowingRatio != 2)
16983  return SDValue();
16984 
16985  // TODO: The motivating case for this transform is an x86 AVX1 target. That
16986  // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16987  // flavors, but no other 256-bit integer support. This could be extended to
16988  // handle any binop, but that may require fixing/adding other folds to avoid
16989  // codegen regressions.
16990  if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16991  return SDValue();
16992 
16993  // We need at least one concatenation operation of a binop operand to make
16994  // this transform worthwhile. The concat must double the input vector sizes.
16995  // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16996  SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
16997  SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
16998  bool ConcatL =
16999  LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17000  bool ConcatR =
17001  RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17002  if (!ConcatL && !ConcatR)
17003  return SDValue();
17004 
17005  // If one of the binop operands was not the result of a concat, we must
17006  // extract a half-sized operand for our new narrow binop.
17007  SDLoc DL(Extract);
17008 
17009  // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17010  // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
17011  // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
17012  SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17013  : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17014  BinOp.getOperand(0),
17015  DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17016 
17017  SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17018  : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17019  BinOp.getOperand(1),
17020  DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17021 
17022  SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17023  return DAG.getBitcast(VT, NarrowBinOp);
17024 }
17025 
17026 /// If we are extracting a subvector from a wide vector load, convert to a
17027 /// narrow load to eliminate the extraction:
17028 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17030  // TODO: Add support for big-endian. The offset calculation must be adjusted.
17031  if (DAG.getDataLayout().isBigEndian())
17032  return SDValue();
17033 
17034  auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17035  auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17036  if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17037  return SDValue();
17038 
17039  // Allow targets to opt-out.
17040  EVT VT = Extract->getValueType(0);
17041  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17042  if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17043  return SDValue();
17044 
17045  // The narrow load will be offset from the base address of the old load if
17046  // we are extracting from something besides index 0 (little-endian).
17047  SDLoc DL(Extract);
17048  SDValue BaseAddr = Ld->getOperand(1);
17049  unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17050 
17051  // TODO: Use "BaseIndexOffset" to make this more effective.
17052  SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17053  MachineFunction &MF = DAG.getMachineFunction();
17054  MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17055  VT.getStoreSize());
17056  SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17057  DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17058  return NewLd;
17059 }
17060 
17061 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
17062  EVT NVT = N->getValueType(0);
17063  SDValue V = N->getOperand(0);
17064 
17065  // Extract from UNDEF is UNDEF.
17066  if (V.isUndef())
17067  return DAG.getUNDEF(NVT);
17068 
17070  if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17071  return NarrowLoad;
17072 
17073  // Combine:
17074  // (extract_subvec (concat V1, V2, ...), i)
17075  // Into:
17076  // Vi if possible
17077  // Only operand 0 is checked as 'concat' assumes all inputs of the same
17078  // type.
17079  if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17080  isa<ConstantSDNode>(N->getOperand(1)) &&
17081  V.getOperand(0).getValueType() == NVT) {
17082  unsigned Idx = N->getConstantOperandVal(1);
17083  unsigned NumElems = NVT.getVectorNumElements();
17084  assert((Idx % NumElems) == 0 &&
17085  "IDX in concat is not a multiple of the result vector length.");
17086  return V->getOperand(Idx / NumElems);
17087  }
17088 
17089  V = peekThroughBitcasts(V);
17090 
17091  // If the input is a build vector. Try to make a smaller build vector.
17092  if (V.getOpcode() == ISD::BUILD_VECTOR) {
17093  if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17094  EVT InVT = V.getValueType();
17095  unsigned ExtractSize = NVT.getSizeInBits();
17096  unsigned EltSize = InVT.getScalarSizeInBits();
17097  // Only do this if we won't split any elements.
17098  if (ExtractSize % EltSize == 0) {
17099  unsigned NumElems = ExtractSize / EltSize;
17100  EVT EltVT = InVT.getVectorElementType();
17101  EVT ExtractVT = NumElems == 1 ? EltVT :
17102  EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
17103  if ((Level < AfterLegalizeDAG ||
17104  (NumElems == 1 ||
17105  TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17106  (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17107  unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
17108  EltSize;
17109  if (NumElems == 1) {
17110  SDValue Src = V->getOperand(IdxVal);
17111  if (EltVT != Src.getValueType())
17112  Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17113 
17114  return DAG.getBitcast(NVT, Src);
17115  }
17116 
17117  // Extract the pieces from the original build_vector.
17118  SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
17119  makeArrayRef(V->op_begin() + IdxVal,
17120  NumElems));
17121  return DAG.getBitcast(NVT, BuildVec);
17122  }
17123  }
17124  }
17125  }
17126 
17127  if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17128  // Handle only simple case where vector being inserted and vector
17129  // being extracted are of same size.
17130  EVT SmallVT = V.getOperand(1).getValueType();
17131  if (!NVT.bitsEq(SmallVT))
17132  return SDValue();
17133 
17134  // Only handle cases where both indexes are constants.
17135  auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17136  auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17137 
17138  if (InsIdx && ExtIdx) {
17139  // Combine:
17140  // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17141  // Into:
17142  // indices are equal or bit offsets are equal => V1
17143  // otherwise => (extract_subvec V1, ExtIdx)
17144  if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17145  ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17146  return DAG.getBitcast(NVT, V.getOperand(1));
17147  return DAG.getNode(
17148  ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17149  DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17150  N->getOperand(1));
17151  }
17152  }
17153 
17154  if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17155  return NarrowBOp;
17156 
17157  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17158  return SDValue(N, 0);
17159 
17160  return SDValue();
17161 }
17162 
17163 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17164 // or turn a shuffle of a single concat into simpler shuffle then concat.
17166  EVT VT = N->getValueType(0);
17167  unsigned NumElts = VT.getVectorNumElements();
17168 
17169  SDValue N0 = N->getOperand(0);
17170  SDValue N1 = N->getOperand(1);
17171  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17172 
17174  EVT ConcatVT = N0.getOperand(0).getValueType();
17175  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17176  unsigned NumConcats = NumElts / NumElemsPerConcat;
17177 
17178  // Special case: shuffle(concat(A,B)) can be more efficiently represented
17179  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17180  // half vector elements.
17181  if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17182  std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
17183  SVN->getMask().end(), [](int i) { return i == -1; })) {
17184  N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
17185  makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
17186  N1 = DAG.getUNDEF(ConcatVT);
17187  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17188  }
17189 
17190  // Look at every vector that's inserted. We're looking for exact
17191  // subvector-sized copies from a concatenated vector
17192  for (unsigned I = 0; I != NumConcats; ++I) {
17193  // Make sure we're dealing with a copy.
17194  unsigned Begin = I * NumElemsPerConcat;
17195  bool AllUndef = true, NoUndef = true;
17196  for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
17197  if (SVN->getMaskElt(J) >= 0)
17198  AllUndef = false;
17199  else
17200  NoUndef = false;
17201  }
17202 
17203  if (NoUndef) {
17204  if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
17205  return SDValue();
17206 
17207  for (unsigned J = 1; J != NumElemsPerConcat; ++J)
17208  if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
17209  return SDValue();
17210 
17211  unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
17212  if (FirstElt < N0.getNumOperands())
17213  Ops.push_back(N0.getOperand(FirstElt));
17214  else
17215  Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
17216 
17217  } else if (AllUndef) {
17218  Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
17219  } else { // Mixed with general masks and undefs, can't do optimization.
17220  return SDValue();
17221  }
17222  }
17223 
17224  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17225 }
17226 
17227 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17228 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17229 //
17230 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17231 // a simplification in some sense, but it isn't appropriate in general: some
17232 // BUILD_VECTORs are substantially cheaper than others. The general case
17233 // of a BUILD_VECTOR requires inserting each element individually (or
17234 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17235 // all constants is a single constant pool load. A BUILD_VECTOR where each
17236 // element is identical is a splat. A BUILD_VECTOR where most of the operands
17237 // are undef lowers to a small number of element insertions.
17238 //
17239 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17240 // We don't fold shuffles where one side is a non-zero constant, and we don't
17241 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17242 // non-constant operands. This seems to work out reasonably well in practice.
17244  SelectionDAG &DAG,
17245  const TargetLowering &TLI) {
17246  EVT VT = SVN->getValueType(0);
17247  unsigned NumElts = VT.getVectorNumElements();
17248  SDValue N0 = SVN->getOperand(0);
17249  SDValue N1 = SVN->getOperand(1);
17250 
17251  if (!N0->hasOneUse())
17252  return SDValue();
17253 
17254  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17255  // discussed above.
17256  if (!N1.isUndef()) {
17257  if (!N1->hasOneUse())
17258  return SDValue();
17259 
17260  bool N0AnyConst = isAnyConstantBuildVector(N0);
17261  bool N1AnyConst = isAnyConstantBuildVector(N1);
17262  if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17263  return SDValue();
17264  if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17265  return SDValue();
17266  }
17267 
17268  // If both inputs are splats of the same value then we can safely merge this
17269  // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17270  bool IsSplat = false;
17271  auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17272  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17273  if (BV0 && BV1)
17274  if (SDValue Splat0 = BV0->getSplatValue())
17275  IsSplat = (Splat0 == BV1->getSplatValue());
17276 
17278  SmallSet<SDValue, 16> DuplicateOps;
17279  for (int M : SVN->getMask()) {
17280  SDValue Op = DAG.getUNDEF(VT.getScalarType());
17281  if (M >= 0) {
17282  int Idx = M < (int)NumElts ? M : M - NumElts;
17283  SDValue &S = (M < (int)NumElts ? N0 : N1);
17284  if (S.getOpcode() == ISD::BUILD_VECTOR) {
17285  Op = S.getOperand(Idx);
17286  } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17287  assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
17288  Op = S.getOperand(0);
17289  } else {
17290  // Operand can't be combined - bail out.
17291  return SDValue();
17292  }
17293  }
17294 
17295  // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17296  // generating a splat; semantically, this is fine, but it's likely to
17297  // generate low-quality code if the target can't reconstruct an appropriate
17298  // shuffle.
17299  if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17300  if (!IsSplat && !DuplicateOps.insert(Op).second)
17301  return SDValue();
17302 
17303  Ops.push_back(Op);
17304  }
17305 
17306  // BUILD_VECTOR requires all inputs to be of the same type, find the
17307  // maximum type and extend them all.
17308  EVT SVT = VT.getScalarType();
17309  if (SVT.isInteger())
17310  for (SDValue &Op : Ops)
17311  SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17312  if (SVT != VT.getScalarType())
17313  for (SDValue &Op : Ops)
17314  Op = TLI.isZExtFree(Op.getValueType(), SVT)
17315  ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17316  : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17317  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17318 }
17319 
17320 // Match shuffles that can be converted to any_vector_extend_in_reg.
17321 // This is often generated during legalization.
17322 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17323 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17325  SelectionDAG &DAG,
17326  const TargetLowering &TLI,
17327  bool LegalOperations) {
17328  EVT VT = SVN->getValueType(0);
17329  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17330 
17331  // TODO Add support for big-endian when we have a test case.
17332  if (!VT.isInteger() || IsBigEndian)
17333  return SDValue();
17334 
17335  unsigned NumElts = VT.getVectorNumElements();
17336  unsigned EltSizeInBits = VT.getScalarSizeInBits();
17337  ArrayRef<int> Mask = SVN->getMask();
17338  SDValue N0 = SVN->getOperand(0);
17339 
17340  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17341  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17342  for (unsigned i = 0; i != NumElts; ++i) {
17343  if (Mask[i] < 0)
17344  continue;
17345  if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17346  continue;
17347  return false;
17348  }
17349  return true;
17350  };
17351 
17352  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17353  // power-of-2 extensions as they are the most likely.
17354  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17355  // Check for non power of 2 vector sizes
17356  if (NumElts % Scale != 0)
17357  continue;
17358  if (!isAnyExtend(Scale))
17359  continue;
17360 
17361  EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17362  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17363  // Never create an illegal type. Only create unsupported operations if we
17364  // are pre-legalization.
17365  if (TLI.isTypeLegal(OutVT))
17366  if (!LegalOperations ||
17368  return DAG.getBitcast(VT,
17370  SDLoc(SVN), OutVT, N0));
17371  }
17372 
17373  return SDValue();
17374 }
17375 
17376 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17377 // each source element of a large type into the lowest elements of a smaller
17378 // destination type. This is often generated during legalization.
17379 // If the source node itself was a '*_extend_vector_inreg' node then we should
17380 // then be able to remove it.
17382  SelectionDAG &DAG) {
17383  EVT VT = SVN->getValueType(0);
17384  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17385 
17386  // TODO Add support for big-endian when we have a test case.
17387  if (!VT.isInteger() || IsBigEndian)
17388  return SDValue();
17389 
17390  SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17391 
17392  unsigned Opcode = N0.getOpcode();
17393  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17394  Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17396  return SDValue();
17397 
17398  SDValue N00 = N0.getOperand(0);
17399  ArrayRef<int> Mask = SVN->getMask();
17400  unsigned NumElts = VT.getVectorNumElements();
17401  unsigned EltSizeInBits = VT.getScalarSizeInBits();
17402  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17403  unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17404 
17405  if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17406  return SDValue();
17407  unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17408 
17409  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17410  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17411  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17412  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17413  for (unsigned i = 0; i != NumElts; ++i) {
17414  if (Mask[i] < 0)
17415  continue;
17416  if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17417  continue;
17418  return false;
17419  }
17420  return true;
17421  };
17422 
17423  // At the moment we just handle the case where we've truncated back to the
17424  // same size as before the extension.
17425  // TODO: handle more extension/truncation cases as cases arise.
17426  if (EltSizeInBits != ExtSrcSizeInBits)
17427  return SDValue();
17428 
17429  // We can remove *extend_vector_inreg only if the truncation happens at
17430  // the same scale as the extension.
17431  if (isTruncate(ExtScale))
17432  return DAG.getBitcast(VT, N00);
17433 
17434  return SDValue();
17435 }
17436 
17437 // Combine shuffles of splat-shuffles of the form:
17438 // shuffle (shuffle V, undef, splat-mask), undef, M
17439 // If splat-mask contains undef elements, we need to be careful about
17440 // introducing undef's in the folded mask which are not the result of composing
17441 // the masks of the shuffles.
17443  ShuffleVectorSDNode *Splat,
17444  SelectionDAG &DAG) {
17445  ArrayRef<int> SplatMask = Splat->getMask();
17446  assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17447 
17448  // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17449  // every undef mask element in the splat-shuffle has a corresponding undef
17450  // element in the user-shuffle's mask or if the composition of mask elements
17451  // would result in undef.
17452  // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17453  // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17454  // In this case it is not legal to simplify to the splat-shuffle because we
17455  // may be exposing the users of the shuffle an undef element at index 1
17456  // which was not there before the combine.
17457  // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17458  // In this case the composition of masks yields SplatMask, so it's ok to
17459  // simplify to the splat-shuffle.
17460  // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17461  // In this case the composed mask includes all undef elements of SplatMask
17462  // and in addition sets element zero to undef. It is safe to simplify to
17463  // the splat-shuffle.
17464  auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17465  ArrayRef<int> SplatMask) {
17466  for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17467  if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17468  SplatMask[UserMask[i]] != -1)
17469  return false;
17470  return true;
17471  };
17472  if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17473  return SDValue(Splat, 0);
17474 
17475  // Create a new shuffle with a mask that is composed of the two shuffles'
17476  // masks.
17477  SmallVector<int, 32> NewMask;
17478  for (int Idx : UserMask)
17479  NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17480 
17481  return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17482  Splat->getOperand(0), Splat->getOperand(1),
17483  NewMask);
17484 }
17485 
17486 /// If the shuffle mask is taking exactly one element from the first vector
17487 /// operand and passing through all other elements from the second vector
17488 /// operand, return the index of the mask element that is choosing an element
17489 /// from the first operand. Otherwise, return -1.
17491  int MaskSize = Mask.size();
17492  int EltFromOp0 = -1;
17493  // TODO: This does not match if there are undef elements in the shuffle mask.
17494  // Should we ignore undefs in the shuffle mask instead? The trade-off is
17495  // removing an instruction (a shuffle), but losing the knowledge that some
17496  // vector lanes are not needed.
17497  for (int i = 0; i != MaskSize; ++i) {
17498  if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17499  // We're looking for a shuffle of exactly one element from operand 0.
17500  if (EltFromOp0 != -1)
17501  return -1;
17502  EltFromOp0 = i;
17503  } else if (Mask[i] != i + MaskSize) {
17504  // Nothing from operand 1 can change lanes.
17505  return -1;
17506  }
17507  }
17508  return EltFromOp0;
17509 }
17510 
17511 /// If a shuffle inserts exactly one element from a source vector operand into
17512 /// another vector operand and we can access the specified element as a scalar,
17513 /// then we can eliminate the shuffle.
17515  SelectionDAG &DAG) {
17516  // First, check if we are taking one element of a vector and shuffling that
17517  // element into another vector.
17518  ArrayRef<int> Mask = Shuf->getMask();
17519  SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17520  SDValue Op0 = Shuf->getOperand(0);
17521  SDValue Op1 = Shuf->getOperand(1);
17522  int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17523  if (ShufOp0Index == -1) {
17524  // Commute mask and check again.
17525  ShuffleVectorSDNode::commuteMask(CommutedMask);
17526  ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17527  if (ShufOp0Index == -1)
17528  return SDValue();
17529  // Commute operands to match the commuted shuffle mask.
17530  std::swap(Op0, Op1);
17531  Mask = CommutedMask;
17532  }
17533 
17534  // The shuffle inserts exactly one element from operand 0 into operand 1.
17535  // Now see if we can access that element as a scalar via a real insert element
17536  // instruction.
17537  // TODO: We can try harder to locate the element as a scalar. Examples: it
17538  // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17539  assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17540  "Shuffle mask value must be from operand 0");
17541  if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17542  return SDValue();
17543 
17544  auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17545  if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17546  return SDValue();
17547 
17548  // There's an existing insertelement with constant insertion index, so we
17549  // don't need to check the legality/profitability of a replacement operation
17550  // that differs at most in the constant value. The target should be able to
17551  // lower any of those in a similar way. If not, legalization will expand this
17552  // to a scalar-to-vector plus shuffle.
17553  //
17554  // Note that the shuffle may move the scalar from the position that the insert
17555  // element used. Therefore, our new insert element occurs at the shuffle's
17556  // mask index value, not the insert's index value.
17557  // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17558  SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17559  Op0.getOperand(2).getValueType());
17560  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17561  Op1, Op0.getOperand(1), NewInsIndex);
17562 }
17563 
17564 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17565  EVT VT = N->getValueType(0);
17566  unsigned NumElts = VT.getVectorNumElements();
17567 
17568  SDValue N0 = N->getOperand(0);
17569  SDValue N1 = N->getOperand(1);
17570 
17571  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17572 
17573  // Canonicalize shuffle undef, undef -> undef
17574  if (N0.isUndef() && N1.isUndef())
17575  return DAG.getUNDEF(VT);
17576 
17577  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17578 
17579  // Canonicalize shuffle v, v -> v, undef
17580  if (N0 == N1) {
17581  SmallVector<int, 8> NewMask;
17582  for (unsigned i = 0; i != NumElts; ++i) {
17583  int Idx = SVN->getMaskElt(i);
17584  if (Idx >= (int)NumElts) Idx -= NumElts;
17585  NewMask.push_back(Idx);
17586  }
17587  return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17588  }
17589 
17590  // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
17591  if (N0.isUndef())
17592  return DAG.getCommutedVectorShuffle(*SVN);
17593 
17594  // Remove references to rhs if it is undef
17595  if (N1.isUndef()) {
17596  bool Changed = false;
17597  SmallVector<int, 8> NewMask;
17598  for (unsigned i = 0; i != NumElts; ++i) {
17599  int Idx = SVN->getMaskElt(i);
17600  if (Idx >= (int)NumElts) {
17601  Idx = -1;
17602  Changed = true;
17603  }
17604  NewMask.push_back(Idx);
17605  }
17606  if (Changed)
17607  return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17608  }
17609 
17610  if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17611  return InsElt;
17612 
17613  // A shuffle of a single vector that is a splat can always be folded.
17614  if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17615  if (N1->isUndef() && N0Shuf->isSplat())
17616  return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17617 
17618  // If it is a splat, check if the argument vector is another splat or a
17619  // build_vector.
17620  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17621  SDNode *V = N0.getNode();
17622 
17623  // If this is a bit convert that changes the element type of the vector but
17624  // not the number of vector elements, look through it. Be careful not to
17625  // look though conversions that change things like v4f32 to v2f64.
17626  if (V->getOpcode() == ISD::BITCAST) {
17627  SDValue ConvInput = V->getOperand(0);
17628  if (ConvInput.getValueType().isVector() &&
17629  ConvInput.getValueType().getVectorNumElements() == NumElts)
17630  V = ConvInput.getNode();
17631  }
17632 
17633  if (V->getOpcode() == ISD::BUILD_VECTOR) {
17634  assert(V->getNumOperands() == NumElts &&
17635  "BUILD_VECTOR has wrong number of operands");
17636  SDValue Base;
17637  bool AllSame = true;
17638  for (unsigned i = 0; i != NumElts; ++i) {
17639  if (!V->getOperand(i).isUndef()) {
17640  Base = V->getOperand(i);
17641  break;
17642  }
17643  }
17644  // Splat of <u, u, u, u>, return <u, u, u, u>
17645  if (!Base.getNode())
17646  return N0;
17647  for (unsigned i = 0; i != NumElts; ++i) {
17648  if (V->getOperand(i) != Base) {
17649  AllSame = false;
17650  break;
17651  }
17652  }
17653  // Splat of <x, x, x, x>, return <x, x, x, x>
17654  if (AllSame)
17655  return N0;
17656 
17657  // Canonicalize any other splat as a build_vector.
17658  const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17659  SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17660  SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17661 
17662  // We may have jumped through bitcasts, so the type of the
17663  // BUILD_VECTOR may not match the type of the shuffle.
17664  if (V->getValueType(0) != VT)
17665  NewBV = DAG.getBitcast(VT, NewBV);
17666  return NewBV;
17667  }
17668  }
17669 
17670  // Simplify source operands based on shuffle mask.
17671  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17672  return SDValue(N, 0);
17673 
17674  // Match shuffles that can be converted to any_vector_extend_in_reg.
17675  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
17676  return V;
17677 
17678  // Combine "truncate_vector_in_reg" style shuffles.
17679  if (SDValue V = combineTruncationShuffle(SVN, DAG))
17680  return V;
17681 
17682  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17683  Level < AfterLegalizeVectorOps &&
17684  (N1.isUndef() ||
17685  (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17686  N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17687  if (SDValue V = partitionShuffleOfConcats(N, DAG))
17688  return V;
17689  }
17690 
17691  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17692  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17693  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
17694  if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17695  return Res;
17696 
17697  // If this shuffle only has a single input that is a bitcasted shuffle,
17698  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17699  // back to their original types.
17700  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17701  N1.isUndef() && Level < AfterLegalizeVectorOps &&
17702  TLI.isTypeLegal(VT)) {
17703  auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17704  if (Scale == 1)
17705  return SmallVector<int, 8>(Mask.begin(), Mask.end());
17706 
17707  SmallVector<int, 8> NewMask;
17708  for (int M : Mask)
17709  for (int s = 0; s != Scale; ++s)
17710  NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17711  return NewMask;
17712  };
17713 
17715  if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17716  EVT SVT = VT.getScalarType();
17717  EVT InnerVT = BC0->getValueType(0);
17718  EVT InnerSVT = InnerVT.getScalarType();
17719 
17720  // Determine which shuffle works with the smaller scalar type.
17721  EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17722  EVT ScaleSVT = ScaleVT.getScalarType();
17723 
17724  if (TLI.isTypeLegal(ScaleVT) &&
17725  0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17726  0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17727  int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17728  int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17729 
17730  // Scale the shuffle masks to the smaller scalar type.
17731  ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17732  SmallVector<int, 8> InnerMask =
17733  ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17734  SmallVector<int, 8> OuterMask =
17735  ScaleShuffleMask(SVN->getMask(), OuterScale);
17736 
17737  // Merge the shuffle masks.
17738  SmallVector<int, 8> NewMask;
17739  for (int M : OuterMask)
17740  NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17741 
17742  // Test for shuffle mask legality over both commutations.
17743  SDValue SV0 = BC0->getOperand(0);
17744  SDValue SV1 = BC0->getOperand(1);
17745  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17746  if (!LegalMask) {
17747  std::swap(SV0, SV1);
17749  LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17750  }
17751 
17752  if (LegalMask) {
17753  SV0 = DAG.getBitcast(ScaleVT, SV0);
17754  SV1 = DAG.getBitcast(ScaleVT, SV1);
17755  return DAG.getBitcast(
17756  VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17757  }
17758  }
17759  }
17760  }
17761 
17762  // Canonicalize shuffles according to rules:
17763  // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17764  // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17765  // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17766  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17767  N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17768  TLI.isTypeLegal(VT)) {
17769  // The incoming shuffle must be of the same type as the result of the
17770  // current shuffle.
17771  assert(N1->getOperand(0).getValueType() == VT &&
17772  "Shuffle types don't match");
17773 
17774  SDValue SV0 = N1->getOperand(0);
17775  SDValue SV1 = N1->getOperand(1);
17776  bool HasSameOp0 = N0 == SV0;
17777  bool IsSV1Undef = SV1.isUndef();
17778  if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17779  // Commute the operands of this shuffle so that next rule
17780  // will trigger.
17781  return DAG.getCommutedVectorShuffle(*SVN);
17782  }
17783 
17784  // Try to fold according to rules:
17785  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17786  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17787  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17788  // Don't try to fold shuffles with illegal type.
17789  // Only fold if this shuffle is the only user of the other shuffle.
17790  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17791  Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17792  ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17793 
17794  // Don't try to fold splats; they're likely to simplify somehow, or they
17795  // might be free.
17796  if (OtherSV->isSplat())
17797  return SDValue();
17798 
17799  // The incoming shuffle must be of the same type as the result of the
17800  // current shuffle.
17801  assert(OtherSV->getOperand(0).getValueType() == VT &&
17802  "Shuffle types don't match");
17803 
17804  SDValue SV0, SV1;
17806  // Compute the combined shuffle mask for a shuffle with SV0 as the first
17807  // operand, and SV1 as the second operand.
17808  for (unsigned i = 0; i != NumElts; ++i) {
17809  int Idx = SVN->getMaskElt(i);
17810  if (Idx < 0) {
17811  // Propagate Undef.
17812  Mask.push_back(Idx);
17813  continue;
17814  }
17815 
17816  SDValue CurrentVec;
17817  if (Idx < (int)NumElts) {
17818  // This shuffle index refers to the inner shuffle N0. Lookup the inner
17819  // shuffle mask to identify which vector is actually referenced.
17820  Idx = OtherSV->getMaskElt(Idx);
17821  if (Idx < 0) {
17822  // Propagate Undef.
17823  Mask.push_back(Idx);
17824  continue;
17825  }
17826 
17827  CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17828  : OtherSV->getOperand(1);
17829  } else {
17830  // This shuffle index references an element within N1.
17831  CurrentVec = N1;
17832  }
17833 
17834  // Simple case where 'CurrentVec' is UNDEF.
17835  if (CurrentVec.isUndef()) {
17836  Mask.push_back(-1);
17837  continue;
17838  }
17839 
17840  // Canonicalize the shuffle index. We don't know yet if CurrentVec
17841  // will be the first or second operand of the combined shuffle.
17842  Idx = Idx % NumElts;
17843  if (!SV0.getNode() || SV0 == CurrentVec) {
17844  // Ok. CurrentVec is the left hand side.
17845  // Update the mask accordingly.
17846  SV0 = CurrentVec;
17847  Mask.push_back(Idx);
17848  continue;
17849  }
17850 
17851  // Bail out if we cannot convert the shuffle pair into a single shuffle.
17852  if (SV1.getNode() && SV1 != CurrentVec)
17853  return SDValue();
17854 
17855  // Ok. CurrentVec is the right hand side.
17856  // Update the mask accordingly.
17857  SV1 = CurrentVec;
17858  Mask.push_back(Idx + NumElts);
17859  }
17860 
17861  // Check if all indices in Mask are Undef. In case, propagate Undef.
17862  bool isUndefMask = true;
17863  for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17864  isUndefMask &= Mask[i] < 0;
17865 
17866  if (isUndefMask)
17867  return DAG.getUNDEF(VT);
17868 
17869  if (!SV0.getNode())
17870  SV0 = DAG.getUNDEF(VT);
17871  if (!SV1.getNode())
17872  SV1 = DAG.getUNDEF(VT);
17873 
17874  // Avoid introducing shuffles with illegal mask.
17875  if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17877 
17878  if (!TLI.isShuffleMaskLegal(Mask, VT))
17879  return SDValue();
17880 
17881  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17882  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17883  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17884  std::swap(SV0, SV1);
17885  }
17886 
17887  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17888  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17889  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17890  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17891  }
17892 
17893  return SDValue();
17894 }
17895 
17896 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17897  SDValue InVal = N->getOperand(0);
17898  EVT VT = N->getValueType(0);
17899 
17900  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17901  // with a VECTOR_SHUFFLE and possible truncate.
17902  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17903  SDValue InVec = InVal->getOperand(0);
17904  SDValue EltNo = InVal->getOperand(1);
17905  auto InVecT = InVec.getValueType();
17906  if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17907  SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17908  int Elt = C0->getZExtValue();
17909  NewMask[0] = Elt;
17910  SDValue Val;
17911  // If we have an implict truncate do truncate here as long as it's legal.
17912  // if it's not legal, this should
17913  if (VT.getScalarType() != InVal.getValueType() &&
17914  InVal.getValueType().isScalarInteger() &&
17915  isTypeLegal(VT.getScalarType())) {
17916  Val =
17917  DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17918  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17919  }
17920  if (VT.getScalarType() == InVecT.getScalarType() &&
17921  VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17922  TLI.isShuffleMaskLegal(NewMask, VT)) {
17923  Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17924  DAG.getUNDEF(InVecT), NewMask);
17925  // If the initial vector is the correct size this shuffle is a
17926  // valid result.
17927  if (VT == InVecT)
17928  return Val;
17929  // If not we must truncate the vector.
17930  if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17931  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17932  SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17933  EVT SubVT =
17934  EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17935  VT.getVectorNumElements());
17936  Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17937  ZeroIdx);
17938  return Val;
17939  }
17940  }
17941  }
17942  }
17943 
17944  return SDValue();
17945 }
17946 
17947 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17948  EVT VT = N->getValueType(0);
17949  SDValue N0 = N->getOperand(0);
17950  SDValue N1 = N->getOperand(1);
17951  SDValue N2 = N->getOperand(2);
17952 
17953  // If inserting an UNDEF, just return the original vector.
17954  if (N1.isUndef())
17955  return N0;
17956 
17957  // If this is an insert of an extracted vector into an undef vector, we can
17958  // just use the input to the extract.
17959  if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17960  N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17961  return N1.getOperand(0);
17962 
17963  // If we are inserting a bitcast value into an undef, with the same
17964  // number of elements, just use the bitcast input of the extract.
17965  // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17966  // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17967  if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17969  N1.getOperand(0).getOperand(1) == N2 &&
17971  VT.getVectorNumElements() &&
17973  VT.getSizeInBits()) {
17974  return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17975  }
17976 
17977  // If both N1 and N2 are bitcast values on which insert_subvector
17978  // would makes sense, pull the bitcast through.
17979  // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17980  // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17981  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17982  SDValue CN0 = N0.getOperand(0);
17983  SDValue CN1 = N1.getOperand(0);
17984  EVT CN0VT = CN0.getValueType();
17985  EVT CN1VT = CN1.getValueType();
17986  if (CN0VT.isVector() && CN1VT.isVector() &&
17987  CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17988  CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17989  SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17990  CN0.getValueType(), CN0, CN1, N2);
17991  return DAG.getBitcast(VT, NewINSERT);
17992  }
17993  }
17994 
17995  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17996  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17997  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17998  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17999  N0.getOperand(1).getValueType() == N1.getValueType() &&
18000  N0.getOperand(2) == N2)
18001  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18002  N1, N2);
18003 
18004  // Eliminate an intermediate insert into an undef vector:
18005  // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18006  // insert_subvector undef, X, N2
18007  if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18008  N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18009  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18010  N1.getOperand(1), N2);
18011 
18012  if (!isa<ConstantSDNode>(N2))
18013  return SDValue();
18014 
18015  unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18016 
18017  // Canonicalize insert_subvector dag nodes.
18018  // Example:
18019  // (insert_subvector (insert_subvector A, Idx0), Idx1)
18020  // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18021  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18022  N1.getValueType() == N0.getOperand(1).getValueType() &&
18023  isa<ConstantSDNode>(N0.getOperand(2))) {
18024  unsigned OtherIdx = N0.getConstantOperandVal(2);
18025  if (InsIdx < OtherIdx) {
18026  // Swap nodes.
18027  SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18028  N0.getOperand(0), N1, N2);
18029  AddToWorklist(NewOp.getNode());
18030  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18031  VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18032  }
18033  }
18034 
18035  // If the input vector is a concatenation, and the insert replaces
18036  // one of the pieces, we can optimize into a single concat_vectors.
18037  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18038  N0.getOperand(0).getValueType() == N1.getValueType()) {
18039  unsigned Factor = N1.getValueType().getVectorNumElements();
18040 
18041  SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18042  Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18043 
18044  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18045  }
18046 
18047  // Simplify source operands based on insertion.
18048  if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18049  return SDValue(N, 0);
18050 
18051  return SDValue();
18052 }
18053 
18054 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18055  SDValue N0 = N->getOperand(0);
18056 
18057  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18058  if (N0->getOpcode() == ISD::FP16_TO_FP)
18059  return N0->getOperand(0);
18060 
18061  return SDValue();
18062 }
18063 
18064 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18065  SDValue N0 = N->getOperand(0);
18066 
18067  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18068  if (N0->getOpcode() == ISD::AND) {
18070  if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18071  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18072  N0.getOperand(0));
18073  }
18074  }
18075 
18076  return SDValue();
18077 }
18078 
18079 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18080 /// with the destination vector and a zero vector.
18081 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18082 /// vector_shuffle V, Zero, <0, 4, 2, 4>
18083 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18084  assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18085 
18086  EVT VT = N->getValueType(0);
18087  SDValue LHS = N->getOperand(0);
18088  SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18089  SDLoc DL(N);
18090 
18091  // Make sure we're not running after operation legalization where it
18092  // may have custom lowered the vector shuffles.
18093  if (LegalOperations)
18094  return SDValue();
18095 
18096  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18097  return SDValue();
18098 
18099  EVT RVT = RHS.getValueType();
18100  unsigned NumElts = RHS.getNumOperands();
18101 
18102  // Attempt to create a valid clear mask, splitting the mask into
18103  // sub elements and checking to see if each is
18104  // all zeros or all ones - suitable for shuffle masking.
18105  auto BuildClearMask = [&](int Split) {
18106  int NumSubElts = NumElts * Split;
18107  int NumSubBits = RVT.getScalarSizeInBits() / Split;
18108 
18109  SmallVector<int, 8> Indices;
18110  for (int i = 0; i != NumSubElts; ++i) {
18111  int EltIdx = i / Split;
18112  int SubIdx = i % Split;
18113  SDValue Elt = RHS.getOperand(EltIdx);
18114  if (Elt.isUndef()) {
18115  Indices.push_back(-1);
18116  continue;
18117  }
18118 
18119  APInt Bits;
18120  if (isa<ConstantSDNode>(Elt))
18121  Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18122  else if (isa<ConstantFPSDNode>(Elt))
18123  Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18124  else
18125  return SDValue();
18126 
18127  // Extract the sub element from the constant bit mask.
18128  if (DAG.getDataLayout().isBigEndian()) {
18129  Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18130  } else {
18131  Bits.lshrInPlace(SubIdx * NumSubBits);
18132  }
18133 
18134  if (Split > 1)
18135  Bits = Bits.trunc(NumSubBits);
18136 
18137  if (Bits.isAllOnesValue())
18138  Indices.push_back(i);
18139  else if (Bits == 0)
18140  Indices.push_back(i + NumSubElts);
18141  else
18142  return SDValue();
18143  }
18144 
18145  // Let's see if the target supports this vector_shuffle.
18146  EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18147  EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18148  if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18149  return SDValue();
18150 
18151  SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18152  return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18153  DAG.getBitcast(ClearVT, LHS),
18154  Zero, Indices));
18155  };
18156 
18157  // Determine maximum split level (byte level masking).
18158  int MaxSplit = 1;
18159  if (RVT.getScalarSizeInBits() % 8 == 0)
18160  MaxSplit = RVT.getScalarSizeInBits() / 8;
18161 
18162  for (int Split = 1; Split <= MaxSplit; ++Split)
18163  if (RVT.getScalarSizeInBits() % Split == 0)
18164  if (SDValue S = BuildClearMask(Split))
18165  return S;
18166 
18167  return SDValue();
18168 }
18169 
18170 /// Visit a binary vector operation, like ADD.
18171 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18172  assert(N->getValueType(0).isVector() &&
18173  "SimplifyVBinOp only works on vectors!");
18174 
18175  SDValue LHS = N->getOperand(0);
18176  SDValue RHS = N->getOperand(1);
18177  SDValue Ops[] = {LHS, RHS};
18178 
18179  // See if we can constant fold the vector operation.
18180  if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18181  N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18182  return Fold;
18183 
18184  // Type legalization might introduce new shuffles in the DAG.
18185  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
18186  // -> (shuffle (VBinOp (A, B)), Undef, Mask).
18187  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
18188  isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
18189  LHS.getOperand(1).isUndef() &&
18190  RHS.getOperand(1).isUndef()) {
18191  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
18192  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
18193 
18194  if (SVN0->getMask().equals(SVN1->getMask())) {
18195  EVT VT = N->getValueType(0);
18196  SDValue UndefVector = LHS.getOperand(1);
18197  SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
18198  LHS.getOperand(0), RHS.getOperand(0),
18199  N->getFlags());
18200  AddUsersToWorklist(N);
18201  return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
18202  SVN0->getMask());
18203  }
18204  }
18205 
18206  return SDValue();
18207 }
18208 
18209 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18210  SDValue N2) {
18211  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18212 
18213  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18214  cast<CondCodeSDNode>(N0.getOperand(2))->get());
18215 
18216  // If we got a simplified select_cc node back from SimplifySelectCC, then
18217  // break it down into a new SETCC node, and a new SELECT node, and then return
18218  // the SELECT node, since we were called with a SELECT node.
18219  if (SCC.getNode()) {
18220  // Check to see if we got a select_cc back (to turn into setcc/select).
18221  // Otherwise, just return whatever node we got back, like fabs.
18222  if (SCC.getOpcode() == ISD::SELECT_CC) {
18223  SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18224  N0.getValueType(),
18225  SCC.getOperand(0), SCC.getOperand(1),
18226  SCC.getOperand(4));
18227  AddToWorklist(SETCC.getNode());
18228  return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18229  SCC.getOperand(2), SCC.getOperand(3));
18230  }
18231 
18232  return SCC;
18233  }
18234  return SDValue();
18235 }
18236 
18237 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18238 /// being selected between, see if we can simplify the select. Callers of this
18239 /// should assume that TheSelect is deleted if this returns true. As such, they
18240 /// should return the appropriate thing (e.g. the node) back to the top-level of
18241 /// the DAG combiner loop to avoid it being looked at.
18242 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18243  SDValue RHS) {
18244  // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18245  // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18246  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18247  if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18248  // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18249  SDValue Sqrt = RHS;
18250  ISD::CondCode CC;
18251  SDValue CmpLHS;
18252  const ConstantFPSDNode *Zero = nullptr;
18253 
18254  if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18255  CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18256  CmpLHS = TheSelect->getOperand(0);
18257  Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18258  } else {
18259  // SELECT or VSELECT
18260  SDValue Cmp = TheSelect->getOperand(0);
18261  if (Cmp.getOpcode() == ISD::SETCC) {
18262  CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18263  CmpLHS = Cmp.getOperand(0);
18264  Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18265  }
18266  }
18267  if (Zero && Zero->isZero() &&
18268  Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18269  CC == ISD::SETULT || CC == ISD::SETLT)) {
18270  // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18271  CombineTo(TheSelect, Sqrt);
18272  return true;
18273  }
18274  }
18275  }
18276  // Cannot simplify select with vector condition
18277  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
18278 
18279  // If this is a select from two identical things, try to pull the operation
18280  // through the select.
18281  if (LHS.getOpcode() != RHS.getOpcode() ||
18282  !LHS.hasOneUse() || !RHS.hasOneUse())
18283  return false;
18284 
18285  // If this is a load and the token chain is identical, replace the select
18286  // of two loads with a load through a select of the address to load from.
18287  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
18288  // constants have been dropped into the constant pool.
18289  if (LHS.getOpcode() == ISD::LOAD) {
18290  LoadSDNode *LLD = cast<LoadSDNode>(LHS);
18291  LoadSDNode *RLD = cast<LoadSDNode>(RHS);
18292 
18293  // Token chains must be identical.
18294  if (LHS.getOperand(0) != RHS.getOperand(0) ||
18295  // Do not let this transformation reduce the number of volatile loads.
18296  LLD->isVolatile() || RLD->isVolatile() ||
18297  // FIXME: If either is a pre/post inc/dec load,
18298  // we'd need to split out the address adjustment.
18299  LLD->isIndexed() || RLD->isIndexed() ||
18300  // If this is an EXTLOAD, the VT's must match.
18301  LLD->getMemoryVT() != RLD->getMemoryVT() ||
18302  // If this is an EXTLOAD, the kind of extension must match.
18303  (LLD->getExtensionType() != RLD->getExtensionType() &&
18304  // The only exception is if one of the extensions is anyext.
18305  LLD->getExtensionType() != ISD::EXTLOAD &&
18306  RLD->getExtensionType() != ISD::EXTLOAD) ||
18307  // FIXME: this discards src value information. This is
18308  // over-conservative. It would be beneficial to be able to remember
18309  // both potential memory locations. Since we are discarding
18310  // src value info, don't do the transformation if the memory
18311  // locations are not in the default address space.
18312  LLD->getPointerInfo().getAddrSpace() != 0 ||
18313  RLD->getPointerInfo().getAddrSpace() != 0 ||
18314  !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
18315  LLD->getBasePtr().getValueType()))
18316  return false;
18317 
18318  // The loads must not depend on one another.
18319  if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
18320  return false;
18321 
18322  // Check that the select condition doesn't reach either load. If so,
18323  // folding this will induce a cycle into the DAG. If not, this is safe to
18324  // xform, so create a select of the addresses.
18325 
18328 
18329  // Always fail if LLD and RLD are not independent. TheSelect is a
18330  // predecessor to all Nodes in question so we need not search past it.
18331 
18332  Visited.insert(TheSelect);
18333  Worklist.push_back(LLD);
18334  Worklist.push_back(RLD);
18335 
18336  if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
18337  SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
18338  return false;
18339 
18340  SDValue Addr;
18341  if (TheSelect->getOpcode() == ISD::SELECT) {
18342  // We cannot do this optimization if any pair of {RLD, LLD} is a
18343  // predecessor to {RLD, LLD, CondNode}. As we've already compared the
18344  // Loads, we only need to check if CondNode is a successor to one of the
18345  // loads. We can further avoid this if there's no use of their chain
18346  // value.
18347  SDNode *CondNode = TheSelect->getOperand(0).getNode();
18348  Worklist.push_back(CondNode);
18349 
18350  if ((LLD->hasAnyUseOfValue(1) &&
18351  SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18352  (RLD->hasAnyUseOfValue(1) &&
18353  SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18354  return false;
18355 
18356  Addr = DAG.getSelect(SDLoc(TheSelect),
18357  LLD->getBasePtr().getValueType(),
18358  TheSelect->getOperand(0), LLD->getBasePtr(),
18359  RLD->getBasePtr());
18360  } else { // Otherwise SELECT_CC
18361  // We cannot do this optimization if any pair of {RLD, LLD} is a
18362  // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
18363  // the Loads, we only need to check if CondLHS/CondRHS is a successor to
18364  // one of the loads. We can further avoid this if there's no use of their
18365  // chain value.
18366 
18367  SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18368  SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18369  Worklist.push_back(CondLHS);
18370  Worklist.push_back(CondRHS);
18371 
18372  if ((LLD->hasAnyUseOfValue(1) &&
18373  SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18374  (RLD->hasAnyUseOfValue(1) &&
18375  SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18376  return false;
18377 
18378  Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18379  LLD->getBasePtr().getValueType(),
18380  TheSelect->getOperand(0),
18381  TheSelect->getOperand(1),
18382  LLD->getBasePtr(), RLD->getBasePtr(),
18383  TheSelect->getOperand(4));
18384  }
18385 
18386  SDValue Load;
18387  // It is safe to replace the two loads if they have different alignments,
18388  // but the new load must be the minimum (most restrictive) alignment of the
18389  // inputs.
18390  unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18391  MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18392  if (!RLD->isInvariant())
18393  MMOFlags &= ~MachineMemOperand::MOInvariant;
18394  if (!RLD->isDereferenceable())
18396  if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18397  // FIXME: Discards pointer and AA info.
18398  Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18399  LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18400  MMOFlags);
18401  } else {
18402  // FIXME: Discards pointer and AA info.
18403  Load = DAG.getExtLoad(
18404  LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18405  : LLD->getExtensionType(),
18406  SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18407  MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18408  }
18409 
18410  // Users of the select now use the result of the load.
18411  CombineTo(TheSelect, Load);
18412 
18413  // Users of the old loads now use the new load's chain. We know the
18414  // old-load value is dead now.
18415  CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18416  CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18417  return true;
18418  }
18419 
18420  return false;
18421 }
18422 
18423 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18424 /// bitwise 'and'.
18425 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18426  SDValue N1, SDValue N2, SDValue N3,
18427  ISD::CondCode CC) {
18428  // If this is a select where the false operand is zero and the compare is a
18429  // check of the sign bit, see if we can perform the "gzip trick":
18430  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18431  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18432  EVT XType = N0.getValueType();
18433  EVT AType = N2.getValueType();
18434  if (!isNullConstant(N3) || !XType.bitsGE(AType))
18435  return SDValue();
18436 
18437  // If the comparison is testing for a positive value, we have to invert
18438  // the sign bit mask, so only do that transform if the target has a bitwise
18439  // 'and not' instruction (the invert is free).
18440  if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18441  // (X > -1) ? A : 0
18442  // (X > 0) ? X : 0 <-- This is canonical signed max.
18443  if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18444  return SDValue();
18445  } else if (CC == ISD::SETLT) {
18446  // (X < 0) ? A : 0
18447  // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
18448  if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18449  return SDValue();
18450  } else {
18451  return SDValue();
18452  }
18453 
18454  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18455  // constant.
18456  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18457  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18458  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18459  unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18460  SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18461  SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18462  AddToWorklist(Shift.getNode());
18463 
18464  if (XType.bitsGT(AType)) {
18465  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18466  AddToWorklist(Shift.getNode());
18467  }
18468 
18469  if (CC == ISD::SETGT)
18470  Shift = DAG.getNOT(DL, Shift, AType);
18471 
18472  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18473  }
18474 
18475  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18476  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18477  AddToWorklist(Shift.getNode());
18478 
18479  if (XType.bitsGT(AType)) {
18480  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18481  AddToWorklist(Shift.getNode());
18482  }
18483 
18484  if (CC == ISD::SETGT)
18485  Shift = DAG.getNOT(DL, Shift, AType);
18486 
18487  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18488 }
18489 
18490 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18491 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18492 /// in it. This may be a win when the constant is not otherwise available
18493 /// because it replaces two constant pool loads with one.
18494 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
18495  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
18496  ISD::CondCode CC) {
18498  return SDValue();
18499 
18500  // If we are before legalize types, we want the other legalization to happen
18501  // first (for example, to avoid messing with soft float).
18502  auto *TV = dyn_cast<ConstantFPSDNode>(N2);
18503  auto *FV = dyn_cast<ConstantFPSDNode>(N3);
18504  EVT VT = N2.getValueType();
18505  if (!TV || !FV || !TLI.isTypeLegal(VT))
18506  return SDValue();
18507 
18508  // If a constant can be materialized without loads, this does not make sense.
18510  TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
18511  TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
18512  return SDValue();
18513 
18514  // If both constants have multiple uses, then we won't need to do an extra
18515  // load. The values are likely around in registers for other users.
18516  if (!TV->hasOneUse() && !FV->hasOneUse())
18517  return SDValue();
18518 
18519  Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
18520  const_cast<ConstantFP*>(TV->getConstantFPValue()) };
18521  Type *FPTy = Elts[0]->getType();
18522  const DataLayout &TD = DAG.getDataLayout();
18523 
18524  // Create a ConstantArray of the two constants.
18525  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18526  SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18527  TD.getPrefTypeAlignment(FPTy));
18528  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18529 
18530  // Get offsets to the 0 and 1 elements of the array, so we can select between
18531  // them.
18532  SDValue Zero = DAG.getIntPtrConstant(0, DL);
18533  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18534  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18535  SDValue Cond =
18536  DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
18537  AddToWorklist(Cond.getNode());
18538  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
18539  AddToWorklist(CstOffset.getNode());
18540  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
18541  AddToWorklist(CPIdx.getNode());
18542  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18544  DAG.getMachineFunction()), Alignment);
18545 }
18546 
18547 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18548 /// where 'cond' is the comparison specified by CC.
18549 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18550  SDValue N2, SDValue N3, ISD::CondCode CC,
18551  bool NotExtCompare) {
18552  // (x ? y : y) -> y.
18553  if (N2 == N3) return N2;
18554 
18555  EVT CmpOpVT = N0.getValueType();
18556  EVT VT = N2.getValueType();
18557  auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18558  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18559  auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
18560 
18561  // Determine if the condition we're dealing with is constant.
18562  SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
18563  false);
18564  if (SCC.getNode()) AddToWorklist(SCC.getNode());
18565 
18566  if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18567  // fold select_cc true, x, y -> x
18568  // fold select_cc false, x, y -> y
18569  return !SCCC->isNullValue() ? N2 : N3;
18570  }
18571 
18572  if (SDValue V =
18573  convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
18574  return V;
18575 
18576  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18577  return V;
18578 
18579  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18580  // where y is has a single bit set.
18581  // A plaintext description would be, we can turn the SELECT_CC into an AND
18582  // when the condition can be materialized as an all-ones register. Any
18583  // single bit-test can be materialized as an all-ones register with
18584  // shift-left and shift-right-arith.
18585  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18586  N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18587  SDValue AndLHS = N0->getOperand(0);
18588  auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18589  if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18590  // Shift the tested bit over the sign bit.
18591  const APInt &AndMask = ConstAndRHS->getAPIntValue();
18592  SDValue ShlAmt =
18593  DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18594  getShiftAmountTy(AndLHS.getValueType()));
18595  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18596 
18597  // Now arithmetic right shift it all the way over, so the result is either
18598  // all-ones, or zero.
18599  SDValue ShrAmt =
18600  DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18601  getShiftAmountTy(Shl.getValueType()));
18602  SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18603 
18604  return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18605  }
18606  }
18607 
18608  // fold select C, 16, 0 -> shl C, 4
18609  bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
18610  bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
18611 
18612  if ((Fold || Swap) &&
18613  TLI.getBooleanContents(CmpOpVT) ==
18615  (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
18616 
18617  if (Swap) {
18618  CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
18619  std::swap(N2C, N3C);
18620  }
18621 
18622  // If the caller doesn't want us to simplify this into a zext of a compare,
18623  // don't do it.
18624  if (NotExtCompare && N2C->isOne())
18625  return SDValue();
18626 
18627  SDValue Temp, SCC;
18628  // zext (setcc n0, n1)
18629  if (LegalTypes) {
18630  SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
18631  if (VT.bitsLT(SCC.getValueType()))
18632  Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
18633  else
18634  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18635  } else {
18636  SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18637  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18638  }
18639 
18640  AddToWorklist(SCC.getNode());
18641  AddToWorklist(Temp.getNode());
18642 
18643  if (N2C->isOne())
18644  return Temp;
18645 
18646  // shl setcc result by log2 n2c
18647  return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
18648  DAG.getConstant(N2C->getAPIntValue().logBase2(),
18649  SDLoc(Temp),
18650  getShiftAmountTy(Temp.getValueType())));
18651  }
18652 
18653  // Check to see if this is an integer abs.
18654  // select_cc setg[te] X, 0, X, -X ->
18655  // select_cc setgt X, -1, X, -X ->
18656  // select_cc setl[te] X, 0, -X, X ->
18657  // select_cc setlt X, 1, -X, X ->
18658  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18659  if (N1C) {
18660  ConstantSDNode *SubC = nullptr;
18661  if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18662  (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18663  N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18664  SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18665  else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18666  (N1C->isOne() && CC == ISD::SETLT)) &&
18667  N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18668  SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18669 
18670  if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
18671  SDLoc DL(N0);
18672  SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
18673  DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
18674  DL,
18675  getShiftAmountTy(CmpOpVT)));
18676  SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
18677  AddToWorklist(Shift.getNode());
18678  AddToWorklist(Add.getNode());
18679  return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
18680  }
18681  }
18682 
18683  // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18684  // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18685  // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18686  // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18687  // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18688  // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18689  // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18690  // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18691  if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18692  SDValue ValueOnZero = N2;
18693  SDValue Count = N3;
18694  // If the condition is NE instead of E, swap the operands.
18695  if (CC == ISD::SETNE)
18696  std::swap(ValueOnZero, Count);
18697  // Check if the value on zero is a constant equal to the bits in the type.
18698  if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18699  if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18700  // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18701  // legal, combine to just cttz.
18702  if ((Count.getOpcode() == ISD::CTTZ ||
18703  Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18704  N0 == Count.getOperand(0) &&
18705  (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18706  return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18707  // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18708  // legal, combine to just ctlz.
18709  if ((Count.getOpcode() == ISD::CTLZ ||
18710  Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18711  N0 == Count.getOperand(0) &&
18712  (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18713  return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18714  }
18715  }
18716  }
18717 
18718  return SDValue();
18719 }
18720 
18721 /// This is a stub for TargetLowering::SimplifySetCC.
18722 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18723  ISD::CondCode Cond, const SDLoc &DL,
18724  bool foldBooleans) {
18726  DagCombineInfo(DAG, Level, false, this);
18727  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18728 }
18729 
18730 /// Given an ISD::SDIV node expressing a divide by constant, return
18731 /// a DAG expression to select that will generate the same value by multiplying
18732 /// by a magic number.
18733 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18734 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18735  // when optimising for minimum size, we don't want to expand a div to a mul
18736  // and a shift.
18738  return SDValue();
18739 
18741  if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18742  for (SDNode *N : Built)
18743  AddToWorklist(N);
18744  return S;
18745  }
18746 
18747  return SDValue();
18748 }
18749 
18750 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18751 /// DAG expression that will generate the same value by right shifting.
18752 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18754  if (!C)
18755  return SDValue();
18756 
18757  // Avoid division by zero.
18758  if (C->isNullValue())
18759  return SDValue();
18760 
18762  if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18763  for (SDNode *N : Built)
18764  AddToWorklist(N);
18765  return S;
18766  }
18767 
18768  return SDValue();
18769 }
18770 
18771 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18772 /// expression that will generate the same value by multiplying by a magic
18773 /// number.
18774 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18775 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18776  // when optimising for minimum size, we don't want to expand a div to a mul
18777  // and a shift.
18779  return SDValue();
18780 
18782  if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18783  for (SDNode *N : Built)
18784  AddToWorklist(N);
18785  return S;
18786  }
18787 
18788  return SDValue();
18789 }
18790 
18791 /// Determines the LogBase2 value for a non-null input value using the
18792 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18793 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18794  EVT VT = V.getValueType();
18795  unsigned EltBits = VT.getScalarSizeInBits();
18796  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18797  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18798  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18799  return LogBase2;
18800 }
18801 
18802 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18803 /// For the reciprocal, we need to find the zero of the function:
18804 /// F(X) = A X - 1 [which has a zero at X = 1/A]
18805 /// =>
18806 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18807 /// does not require additional intermediate precision]
18808 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18809  if (Level >= AfterLegalizeDAG)
18810  return SDValue();
18811 
18812  // TODO: Handle half and/or extended types?
18813  EVT VT = Op.getValueType();
18814  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18815  return SDValue();
18816 
18817  // If estimates are explicitly disabled for this function, we're done.
18818  MachineFunction &MF = DAG.getMachineFunction();
18819  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18820  if (Enabled == TLI.ReciprocalEstimate::Disabled)
18821  return SDValue();
18822 
18823  // Estimates may be explicitly enabled for this type with a custom number of
18824  // refinement steps.
18825  int Iterations = TLI.getDivRefinementSteps(VT, MF);
18826  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18827  AddToWorklist(Est.getNode());
18828 
18829  if (Iterations) {
18830  EVT VT = Op.getValueType();
18831  SDLoc DL(Op);
18832  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18833 
18834  // Newton iterations: Est = Est + Est (1 - Arg * Est)
18835  for (int i = 0; i < Iterations; ++i) {
18836  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18837  AddToWorklist(NewEst.getNode());
18838 
18839  NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18840  AddToWorklist(NewEst.getNode());
18841 
18842  NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18843  AddToWorklist(NewEst.getNode());
18844 
18845  Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18846  AddToWorklist(Est.getNode());
18847  }
18848  }
18849  return Est;
18850  }
18851 
18852  return SDValue();
18853 }
18854 
18855 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18856 /// For the reciprocal sqrt, we need to find the zero of the function:
18857 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18858 /// =>
18859 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18860 /// As a result, we precompute A/2 prior to the iteration loop.
18861 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18862  unsigned Iterations,
18863  SDNodeFlags Flags, bool Reciprocal) {
18864  EVT VT = Arg.getValueType();
18865  SDLoc DL(Arg);
18866  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18867 
18868  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18869  // this entire sequence requires only one FP constant.
18870  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18871  AddToWorklist(HalfArg.getNode());
18872 
18873  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18874  AddToWorklist(HalfArg.getNode());
18875 
18876  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18877  for (unsigned i = 0; i < Iterations; ++i) {
18878  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18879  AddToWorklist(NewEst.getNode());
18880 
18881  NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18882  AddToWorklist(NewEst.getNode());
18883 
18884  NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18885  AddToWorklist(NewEst.getNode());
18886 
18887  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18888  AddToWorklist(Est.getNode());
18889  }
18890 
18891  // If non-reciprocal square root is requested, multiply the result by Arg.
18892  if (!Reciprocal) {
18893  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18894  AddToWorklist(Est.getNode());
18895  }
18896 
18897  return Est;
18898 }
18899 
18900 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18901 /// For the reciprocal sqrt, we need to find the zero of the function:
18902 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18903 /// =>
18904 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18905 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18906  unsigned Iterations,
18907  SDNodeFlags Flags, bool Reciprocal) {
18908  EVT VT = Arg.getValueType();
18909  SDLoc DL(Arg);
18910  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18911  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18912 
18913  // This routine must enter the loop below to work correctly
18914  // when (Reciprocal == false).
18915  assert(Iterations > 0);
18916 
18917  // Newton iterations for reciprocal square root:
18918  // E = (E * -0.5) * ((A * E) * E + -3.0)
18919  for (unsigned i = 0; i < Iterations; ++i) {
18920  SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18921  AddToWorklist(AE.getNode());
18922 
18923  SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18924  AddToWorklist(AEE.getNode());
18925 
18926  SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18927  AddToWorklist(RHS.getNode());
18928 
18929  // When calculating a square root at the last iteration build:
18930  // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18931  // (notice a common subexpression)
18932  SDValue LHS;
18933  if (Reciprocal || (i + 1) < Iterations) {
18934  // RSQRT: LHS = (E * -0.5)
18935  LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18936  } else {
18937  // SQRT: LHS = (A * E) * -0.5
18938  LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18939  }
18940  AddToWorklist(LHS.getNode());
18941 
18942  Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18943  AddToWorklist(Est.getNode());
18944  }
18945 
18946  return Est;
18947 }
18948 
18949 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18950 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18951 /// Op can be zero.
18952 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18953  bool Reciprocal) {
18954  if (Level >= AfterLegalizeDAG)
18955  return SDValue();
18956 
18957  // TODO: Handle half and/or extended types?
18958  EVT VT = Op.getValueType();
18959  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18960  return SDValue();
18961 
18962  // If estimates are explicitly disabled for this function, we're done.
18963  MachineFunction &MF = DAG.getMachineFunction();
18964  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18965  if (Enabled == TLI.ReciprocalEstimate::Disabled)
18966  return SDValue();
18967 
18968  // Estimates may be explicitly enabled for this type with a custom number of
18969  // refinement steps.
18970  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18971 
18972  bool UseOneConstNR = false;
18973  if (SDValue Est =
18974  TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18975  Reciprocal)) {
18976  AddToWorklist(Est.getNode());
18977 
18978  if (Iterations) {
18979  Est = UseOneConstNR
18980  ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18981  : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18982 
18983  if (!Reciprocal) {
18984  // The estimate is now completely wrong if the input was exactly 0.0 or
18985  // possibly a denormal. Force the answer to 0.0 for those cases.
18986  EVT VT = Op.getValueType();
18987  SDLoc DL(Op);
18988  EVT CCVT = getSetCCResultType(VT);
18989  ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18990  const Function &F = DAG.getMachineFunction().getFunction();
18991  Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18992  if (Denorms.getValueAsString().equals("ieee")) {
18993  // fabs(X) < SmallestNormal ? 0.0 : Est
18994  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18995  APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18996  SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18997  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18998  SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18999  SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19000  Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19001  AddToWorklist(Fabs.getNode());
19002  AddToWorklist(IsDenorm.getNode());
19003  AddToWorklist(Est.getNode());
19004  } else {
19005  // X == 0.0 ? 0.0 : Est
19006  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19007  SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19008  Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19009  AddToWorklist(IsZero.getNode());
19010  AddToWorklist(Est.getNode());
19011  }
19012  }
19013  }
19014  return Est;
19015  }
19016 
19017  return SDValue();
19018 }
19019 
19020 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19021  return buildSqrtEstimateImpl(Op, Flags, true);
19022 }
19023 
19024 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19025  return buildSqrtEstimateImpl(Op, Flags, false);
19026 }
19027 
19028 /// Return true if there is any possibility that the two addresses overlap.
19029 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
19030  // If they are the same then they must be aliases.
19031  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
19032 
19033  // If they are both volatile then they cannot be reordered.
19034  if (Op0->isVolatile() && Op1->isVolatile()) return true;
19035 
19036  // If one operation reads from invariant memory, and the other may store, they
19037  // cannot alias. These should really be checking the equivalent of mayWrite,
19038  // but it only matters for memory nodes other than load /store.
19039  if (Op0->isInvariant() && Op1->writeMem())
19040  return false;
19041 
19042  if (Op1->isInvariant() && Op0->writeMem())
19043  return false;
19044 
19045  unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
19046  unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
19047 
19048  // Check for BaseIndexOffset matching.
19049  BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
19050  BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
19051  int64_t PtrDiff;
19052  if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
19053  if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
19054  return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
19055 
19056  // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
19057  // able to calculate their relative offset if at least one arises
19058  // from an alloca. However, these allocas cannot overlap and we
19059  // can infer there is no alias.
19060  if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
19061  if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
19063  // If the base are the same frame index but the we couldn't find a
19064  // constant offset, (indices are different) be conservative.
19065  if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
19066  !MFI.isFixedObjectIndex(B->getIndex())))
19067  return false;
19068  }
19069 
19070  bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
19071  bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
19072  bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
19073  bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
19074  bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
19075  bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
19076 
19077  // If of mismatched base types or checkable indices we can check
19078  // they do not alias.
19079  if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
19080  (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
19081  (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
19082  return false;
19083  }
19084 
19085  // If we know required SrcValue1 and SrcValue2 have relatively large
19086  // alignment compared to the size and offset of the access, we may be able
19087  // to prove they do not alias. This check is conservative for now to catch
19088  // cases created by splitting vector types.
19089  int64_t SrcValOffset0 = Op0->getSrcValueOffset();
19090  int64_t SrcValOffset1 = Op1->getSrcValueOffset();
19091  unsigned OrigAlignment0 = Op0->getOriginalAlignment();
19092  unsigned OrigAlignment1 = Op1->getOriginalAlignment();
19093  if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19094  NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
19095  int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19096  int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19097 
19098  // There is no overlap between these relatively aligned accesses of
19099  // similar size. Return no alias.
19100  if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
19101  (OffAlign1 + NumBytes1) <= OffAlign0)
19102  return false;
19103  }
19104 
19105  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19107  : DAG.getSubtarget().useAA();
19108 #ifndef NDEBUG
19109  if (CombinerAAOnlyFunc.getNumOccurrences() &&
19111  UseAA = false;
19112 #endif
19113 
19114  if (UseAA && AA &&
19115  Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
19116  // Use alias analysis information.
19117  int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19118  int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
19119  int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
19120  AliasResult AAResult =
19121  AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
19122  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
19123  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
19124  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
19125  if (AAResult == NoAlias)
19126  return false;
19127  }
19128 
19129  // Otherwise we have to assume they alias.
19130  return true;
19131 }
19132 
19133 /// Walk up chain skipping non-aliasing memory nodes,
19134 /// looking for aliasing nodes and adding them to the Aliases vector.
19135 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
19136  SmallVectorImpl<SDValue> &Aliases) {
19137  SmallVector<SDValue, 8> Chains; // List of chains to visit.
19138  SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
19139 
19140  // Get alias information for node.
19141  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
19142 
19143  // Starting off.
19144  Chains.push_back(OriginalChain);
19145  unsigned Depth = 0;
19146 
19147  // Look at each chain and determine if it is an alias. If so, add it to the
19148  // aliases list. If not, then continue up the chain looking for the next
19149  // candidate.
19150  while (!Chains.empty()) {
19151  SDValue Chain = Chains.pop_back_val();
19152 
19153  // For TokenFactor nodes, look at each operand and only continue up the
19154  // chain until we reach the depth limit.
19155  //
19156  // FIXME: The depth check could be made to return the last non-aliasing
19157  // chain we found before we hit a tokenfactor rather than the original
19158  // chain.
19159  if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19160  Aliases.clear();
19161  Aliases.push_back(OriginalChain);
19162  return;
19163  }
19164 
19165  // Don't bother if we've been before.
19166  if (!Visited.insert(Chain.getNode()).second)
19167  continue;
19168 
19169  switch (Chain.getOpcode()) {
19170  case ISD::EntryToken:
19171  // Entry token is ideal chain operand, but handled in FindBetterChain.
19172  break;
19173 
19174  case ISD::LOAD:
19175  case ISD::STORE: {
19176  // Get alias information for Chain.
19177  bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
19178  !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
19179 
19180  // If chain is alias then stop here.
19181  if (!(IsLoad && IsOpLoad) &&
19182  isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
19183  Aliases.push_back(Chain);
19184  } else {
19185  // Look further up the chain.
19186  Chains.push_back(Chain.getOperand(0));
19187  ++Depth;
19188  }
19189  break;
19190  }
19191 
19192  case ISD::TokenFactor:
19193  // We have to check each of the operands of the token factor for "small"
19194  // token factors, so we queue them up. Adding the operands to the queue
19195  // (stack) in reverse order maintains the original order and increases the
19196  // likelihood that getNode will find a matching token factor (CSE.)
19197  if (Chain.getNumOperands() > 16) {
19198  Aliases.push_back(Chain);
19199  break;
19200  }
19201  for (unsigned n = Chain.getNumOperands(); n;)
19202  Chains.push_back(Chain.getOperand(--n));
19203  ++Depth;
19204  break;
19205 
19206  case ISD::CopyFromReg:
19207  // Forward past CopyFromReg.
19208  Chains.push_back(Chain.getOperand(0));
19209  ++Depth;
19210  break;
19211 
19212  default:
19213  // For all other instructions we will just have to take what we can get.
19214  Aliases.push_back(Chain);
19215  break;
19216  }
19217  }
19218 }
19219 
19220 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19221 /// (aliasing node.)
19222 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
19223  if (OptLevel == CodeGenOpt::None)
19224  return OldChain;
19225 
19226  // Ops for replacing token factor.
19227  SmallVector<SDValue, 8> Aliases;
19228 
19229  // Accumulate all the aliases to this node.
19230  GatherAllAliases(N, OldChain, Aliases);
19231 
19232  // If no operands then chain to entry token.
19233  if (Aliases.size() == 0)
19234  return DAG.getEntryNode();
19235 
19236  // If a single operand then chain to it. We don't need to revisit it.
19237  if (Aliases.size() == 1)
19238  return Aliases[0];
19239 
19240  // Construct a custom tailored token factor.
19241  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
19242 }
19243 
19244 // TODO: Replace with with std::monostate when we move to C++17.
19245 struct UnitT { } Unit;
19246 bool operator==(const UnitT &, const UnitT &) { return true; }
19247 bool operator!=(const UnitT &, const UnitT &) { return false; }
19248 
19249 // This function tries to collect a bunch of potentially interesting
19250 // nodes to improve the chains of, all at once. This might seem
19251 // redundant, as this function gets called when visiting every store
19252 // node, so why not let the work be done on each store as it's visited?
19253 //
19254 // I believe this is mainly important because MergeConsecutiveStores
19255 // is unable to deal with merging stores of different sizes, so unless
19256 // we improve the chains of all the potential candidates up-front
19257 // before running MergeConsecutiveStores, it might only see some of
19258 // the nodes that will eventually be candidates, and then not be able
19259 // to go from a partially-merged state to the desired final
19260 // fully-merged state.
19261 
19262 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19263  SmallVector<StoreSDNode *, 8> ChainedStores;
19264  StoreSDNode *STChain = St;
19265  // Intervals records which offsets from BaseIndex have been covered. In
19266  // the common case, every store writes to the immediately previous address
19267  // space and thus merged with the previous interval at insertion time.
19268 
19269  using IMap =
19271  IMap::Allocator A;
19272  IMap Intervals(A);
19273 
19274  // This holds the base pointer, index, and the offset in bytes from the base
19275  // pointer.
19276  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19277 
19278  // We must have a base and an offset.
19279  if (!BasePtr.getBase().getNode())
19280  return false;
19281 
19282  // Do not handle stores to undef base pointers.
19283  if (BasePtr.getBase().isUndef())
19284  return false;
19285 
19286  // Add ST's interval.
19287  Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
19288 
19289  while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
19290  // If the chain has more than one use, then we can't reorder the mem ops.
19291  if (!SDValue(Chain, 0)->hasOneUse())
19292  break;
19293  if (Chain->isVolatile() || Chain->isIndexed())
19294  break;
19295 
19296  // Find the base pointer and offset for this memory node.
19297  const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
19298  // Check that the base pointer is the same as the original one.
19299  int64_t Offset;
19300  if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
19301  break;
19302  int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
19303  // Make sure we don't overlap with other intervals by checking the ones to
19304  // the left or right before inserting.
19305  auto I = Intervals.find(Offset);
19306  // If there's a next interval, we should end before it.
19307  if (I != Intervals.end() && I.start() < (Offset + Length))
19308  break;
19309  // If there's a previous interval, we should start after it.
19310  if (I != Intervals.begin() && (--I).stop() <= Offset)
19311  break;
19312  Intervals.insert(Offset, Offset + Length, Unit);
19313 
19314  ChainedStores.push_back(Chain);
19315  STChain = Chain;
19316  }
19317 
19318  // If we didn't find a chained store, exit.
19319  if (ChainedStores.size() == 0)
19320  return false;
19321 
19322  // Improve all chained stores (St and ChainedStores members) starting from
19323  // where the store chain ended and return single TokenFactor.
19324  SDValue NewChain = STChain->getChain();
19326  for (unsigned I = ChainedStores.size(); I;) {
19327  StoreSDNode *S = ChainedStores[--I];
19328  SDValue BetterChain = FindBetterChain(S, NewChain);
19329  S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
19330  S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
19331  TFOps.push_back(SDValue(S, 0));
19332  ChainedStores[I] = S;
19333  }
19334 
19335  // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
19336  SDValue BetterChain = FindBetterChain(St, NewChain);
19337  SDValue NewST;
19338  if (St->isTruncatingStore())
19339  NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
19340  St->getBasePtr(), St->getMemoryVT(),
19341  St->getMemOperand());
19342  else
19343  NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
19344  St->getBasePtr(), St->getMemOperand());
19345 
19346  TFOps.push_back(NewST);
19347 
19348  // If we improved every element of TFOps, then we've lost the dependence on
19349  // NewChain to successors of St and we need to add it back to TFOps. Do so at
19350  // the beginning to keep relative order consistent with FindBetterChains.
19351  auto hasImprovedChain = [&](SDValue ST) -> bool {
19352  return ST->getOperand(0) != NewChain;
19353  };
19354  bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
19355  if (AddNewChain)
19356  TFOps.insert(TFOps.begin(), NewChain);
19357 
19358  SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
19359  CombineTo(St, TF);
19360 
19361  AddToWorklist(STChain);
19362  // Add TF operands worklist in reverse order.
19363  for (auto I = TF->getNumOperands(); I;)
19364  AddToWorklist(TF->getOperand(--I).getNode());
19365  AddToWorklist(TF.getNode());
19366  return true;
19367 }
19368 
19369 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
19370  if (OptLevel == CodeGenOpt::None)
19371  return false;
19372 
19373  const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19374 
19375  // We must have a base and an offset.
19376  if (!BasePtr.getBase().getNode())
19377  return false;
19378 
19379  // Do not handle stores to undef base pointers.
19380  if (BasePtr.getBase().isUndef())
19381  return false;
19382 
19383  // Directly improve a chain of disjoint stores starting at St.
19384  if (parallelizeChainedStores(St))
19385  return true;
19386 
19387  // Improve St's Chain..
19388  SDValue BetterChain = FindBetterChain(St, St->getChain());
19389  if (St->getChain() != BetterChain) {
19390  replaceStoreChain(St, BetterChain);
19391  return true;
19392  }
19393  return false;
19394 }
19395 
19396 /// This is the entry point for the file.
19398  CodeGenOpt::Level OptLevel) {
19399  /// This is the main entry point to this class.
19400  DAGCombiner(*this, AA, OptLevel).Run(Level);
19401 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1800
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:571
bool isInvariant() const
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:538
const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B, const MVT::SimpleValueType SVT=MVT::SimpleValueType::Any) const
Find the largest common subclass of A and B.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode *> Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap...
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned char TargetFlags=0)
static MVT getIntegerVT(unsigned BitWidth)
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:921
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:877
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:594
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getValueType() const
Return the ValueType of the referenced return value.
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position...
bool isShiftedMask() const
Return true if this APInt value contains a sequence of ones with the remainder zero.
Definition: APInt.h:517
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const SDValue & getOffset() const
iterator_range< use_iterator > uses()
Definition: Value.h:355
static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG)
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const
Returns if it&#39;s reasonable to merge stores to MemVT size.
static bool isConstant(const MachineInstr &MI)
bool isUndef() const
bool hasNoSignedZeros() const
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:834
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:405
LLVMContext & Context
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:359
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
DiagnosticInfoOptimizationBase::Argument NV
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC&#39;s if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:937
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:328
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
virtual bool isFPImmLegal(const APFloat &, EVT) const
Returns true if the target can instruction select the specified FP immediate natively.
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:358
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:650
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not...
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:605
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:367
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
iterator begin() const
Definition: ArrayRef.h:137
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG)
Try to fold a &#39;not&#39; shifted sign-bit with add/sub with constant operand into a shift and add with a d...
virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:519
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(SDValue V)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
static uint64_t * getMemory(unsigned numWords)
A utility function for allocating memory and checking for allocation failure.
Definition: APInt.cpp:46
const SDValue & getBasePtr() const
bool isNegative() const
Return true if the value is negative.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
Clients of various APIs that cause global effects on the DAG can optionally implement this interface...
Definition: SelectionDAG.h:280
SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:858
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always beneficiates from combining into FMA for a given value type...
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:146
const SDValue & getValue() const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node&#39;s...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
SDVTList getVTList() const
This file contains the declarations for metadata subclasses.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:251
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1329
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:159
const SDValue & getChain() const
unsigned getResNo() const
Convenience function for get().getResNo().
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
unsigned getAlignment() const
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
bool isBitwiseNot(SDValue V)
Returns true if V is a bitwise not operation.
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
bool isAllOnesOrAllOnesSplat(SDValue V)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1390
static uint32_t Concat[]
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select&#39;s if you just have operands and don&#39;t want to check...
Definition: SelectionDAG.h:950
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:508
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is &#39;desirable&#39; to us...
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:384
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
const SDNodeFlags getFlags() const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
iv Induction Variable Users
Definition: IVUsers.cpp:52
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1262
void changeSign()
Definition: APFloat.h:1050
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:466
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:212
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
bool isConstTrueVal(const SDNode *N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:353
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo. ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode *> &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators...
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:435
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
static Constant * get(ArrayType *T, ArrayRef< Constant *> V)
Definition: Constants.cpp:983
const ConstantFP * getConstantFPValue() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode *> &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
bool hasApproximateFuncs() const
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function&#39;s at...
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode *> &Visited, SmallVectorImpl< const SDNode *> &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL)
Returns sum of the base pointer and offset.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:324
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x, y)
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1632
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
static uint32_t getAlignment(const MCSectionCOFF &Sec)
bool hasOneUse() const
Return true if there is exactly one use of this node.
static bool isContractable(SDNode *N)
A description of a memory reference used in the backend.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 node can overflow.
virtual bool decomposeMulByConstant(EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1403
static ManagedStatic< DebugCounter > DC
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:325
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:993
Shift and rotation operations.
Definition: ISDOpcodes.h:410
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant BUI...
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:892
Base class for LoadSDNode and StoreSDNode.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth...
Definition: ISDOpcodes.h:393
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
const SDValue & getPassThru() const
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
op_iterator op_end() const
unsigned getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
#define INT64_MAX
Definition: DataTypes.h:77
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
This file contains the simple types necessary to represent the attributes associated with functions a...
SimpleValueType SimpleTy
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1249
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first...
const SDValue & getValue() const
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:460
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:401
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
zlib-gnu style compression
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:784
This file implements a class to represent arbitrary precision integral constant values and operations...
This represents a list of ValueType&#39;s that has been intern&#39;d by a SelectionDAG.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:449
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
This class is used to represent an MSTORE node.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
unsigned getSizeInBits() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:267
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1533
int64_t getSExtValue() const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
bool writeMem() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:478
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:478
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
bool isOneOrOneSplat(SDValue V)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
bool isKnownNeverZero(SDValue Op) const
Test whether the given SDValue is known to contain non-zero value(s).
const SDValue & getScale() const
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:402
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:429
static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI)
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
bool has(LibFunc F) const
Tests whether a library function is available.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:221
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:852
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue is known to never be NaN.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, bool IsTruncating=false, bool IsCompressing=false)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
op_iterator op_begin() const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:497
ArrayRef< SDUse > ops() const
const SDValue & getMask() const
int64_t getSrcValueOffset() const
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:992
SDValue GetDemandedBits(SDValue V, const APInt &Mask)
See if the specified operand can be simplified with the knowledge that only the bits specified by Mas...
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS)
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function&#39;s attri...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:524
bool hasAllowReciprocal() const
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:327
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:636
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
bool isZero() const
Return true if the value is positive or negative zero.
bool hasAllowContract() const
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
#define P(N)
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:396
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
bool isNegative() const
Definition: APFloat.h:1147
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:291
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
Simple binary floating point operators.
Definition: ISDOpcodes.h:283
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1185
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
iterator_range< value_op_iterator > op_values() const
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:217
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const SDValue & getOperand(unsigned Num) const
struct UnitT Unit
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:934
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:332
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool isMask(unsigned numBits) const
Definition: APInt.h:495
bool isOneValue() const
Determine if this is a value of 1.
Definition: APInt.h:411
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node&#39;s operand with EXTRACT_SUBVECTOR and return the low/high part.
virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const
Return true if an fpext operation input to an Opcode operation is free (for instance, because half-precision floating-point numbers are implicitly extended to float-precision) for an FMA instruction.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:588
CombineLevel
Definition: DAGCombine.h:16
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:740
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:443
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
void Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together, or eliminating superfluous nodes.
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1130
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This class provides iterator support for SDUse operands that use a specific SDNode.
bool isExactlyValue(double V) const
We don&#39;t rely on operator== working on double values, as it returns true for things that are clearly ...
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool hasNoNaNs() const
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
virtual bool hasPairedLoad(EVT, unsigned &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, APFloat(*Op)(const APFloat &, const APFloat &))
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
const SDValue & getValue() const
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType, bool IsExpanding=false)
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:416
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:556
static const Optional< ByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, bool Root=false)
Recursively traverses the expression calculating the origin of the requested byte of the given value...
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
const SDValue & getIndex() const
Extended Value Type.
Definition: ValueTypes.h:34
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
iterator erase(const_iterator CI)
Definition: SmallVector.h:445
const SDValue & getBasePtr() const
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices...
size_t size() const
Definition: SmallVector.h:53
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:566
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1207
bool isVolatile() const
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function&#39;s attributes...
This class contains a discriminated union of information about pointers in memory operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:750
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target&#39;s BooleanContent for type OpVT...
static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth=0)
Return 1 if we can compute the negated form of the specified expression for the same cost as the expr...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V)
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1116
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
The memory access writes data.
static SDValue combineShuffleOfSplat(ArrayRef< int > UserMask, ShuffleVectorSDNode *Splat, SelectionDAG &DAG)
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
bool use_empty() const
Return true if there are no uses of this node.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:93
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:555
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:265
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
void dump() const
Dump this node, for debugging.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if &#39;Use&#39; is a load or a store that uses N as its base pointer and that N may be folded in...
Basic Register Allocator
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:404
Helper struct to parse and store a memory address as base + index + offset.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:310
void setNoUnsignedWrap(bool b)
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode *> &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely that it saves us from materializing N0 and N1 in an integer register.
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
BlockVerifier::State From
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
static int numVectorEltsOrZero(EVT T)
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part...
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:451
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:553
const SDValue & getMask() const
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:734
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:523
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
iterator end() const
Definition: ArrayRef.h:138
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:730
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:644
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values, following the IEEE-754 2008 definition.
Definition: ISDOpcodes.h:600
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
const SDValue & getPassThru() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:413
static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const TargetLowering &TLI)
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1219
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:581
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable...
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1293
static BaseIndexOffset match(const LSBaseSDNode *N, const SelectionDAG &DAG)
Parses tree in Ptr for base, index, offset addresses.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:155
static bool Enabled
Definition: Statistic.cpp:51
const Function & getFunction() const
Return the LLVM function that this machine code represents.
static std::pair< SDValue, SDValue > SplitVSETCC(const SDNode *N, SelectionDAG &DAG)
bool isConstFalseVal(const SDNode *N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
unsigned logBase2() const
Definition: APInt.h:1748
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
MachinePointerInfo getWithOffset(int64_t O) const
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1646
size_t use_size() const
Return the number of uses of this node.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N)
Test whether the given value is a constant int or similar node.
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT...
Definition: ValueTypes.h:73
EVT getMemoryVT() const
Return the type of the in-memory value.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:70
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG)
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
virtual bool preferShiftsToClearExtremeBits(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
iterator_range< use_iterator > uses()
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:420
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth=0)
If isNegatibleForFree returns true, return the newly negated expression.
static use_iterator use_end()
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:468
const SDValue & getBasePtr() const
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:471
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out...
virtual bool isFMAFasterThanFMulAndFAdd(EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:478
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:169
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:312
int getMaskElt(unsigned Idx) const
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, const SDNodeFlags Flags=SDNodeFlags())
bool hasVectorReduction() const
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:607
Flags
Flags values. These may be or&#39;d together.
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function&#39;s attribut...
amdgpu Simplify well known AMD library false Value Value * Arg
The memory access reads data.
bool hasNoSignedWrap() const
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
static mvt_range all_valuetypes()
SimpleValueType Iteration.
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1255
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
These are IR-level optimization flags that may be propagated to SDNodes.
Represents a use of a SDNode.
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount though its operand...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
const SDValue & getValue() const
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1321
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:387
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
pointer data()
Return a pointer to the vector&#39;s buffer, even if empty().
Definition: SmallVector.h:149
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:206
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
bool isKnownToBeAPowerOfTwo(SDValue Val) const
Test if the given value is known to have exactly one bit set.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:486
This file provides utility analysis objects describing memory locations.
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:614
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
TargetOptions Options
Definition: TargetMachine.h:97
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
ISD::CondCode get() const
const ConstantInt * getConstantIntValue() const
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:403
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
Flags getFlags() const
Return the raw flags of the source value,.
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
The memory access always returns the same value (or traps).
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
virtual bool shouldFoldShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:581
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isBinaryOp(const SDNode *N)
Return true if the node is a math/logic binary operator.
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:273
uint32_t Size
Definition: Profile.cpp:47
Same for multiplication.
Definition: ISDOpcodes.h:257
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
const SDValue & getBasePtr() const
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts, adds, and multiplies for this target.
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:924
This class is used to represent an MSCATTER node.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:457
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const MachinePointerInfo & getPointerInfo() const
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:345
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if &#39;Op & Mask&#39; is known to be zero.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
unsigned getMinSignedBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1552
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
This class is used to represent an MLOAD node.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode *> &ExtendNodes, const TargetLowering &TLI)
ArrayRef< int > getMask() const
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1705
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
A vector that has set insertion semantics.
Definition: SetVector.h:41
bool isTruncatingStore() const
Return true if the op does a truncation before store.
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:962
This class is used to represent an MGATHER node.
bool hasNoUnsignedWrap() const
SDValue getValueType(EVT)
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn&#39;t be replaced with X*RSQRT(X).
bool isNonTemporal() const
bool isUndef() const
Return true if the type of the node type undefined.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.h:331
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool mergeStoresAfterLegalization() const
Allow store merging after legalization in addition to before legalization.
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:146
bool hasAllowReassociation() const
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX)
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
const APFloat & getValueAPF() const
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
bool hasNoInfs() const
virtual const SelectionDAGTargetInfo * getSelectionDAGInfo() const
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:306
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:443
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
static SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N)
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1596
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:223
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
unsigned getNumOperands() const
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations, those with specific masks.
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:465
static APInt getNullValue(unsigned numBits)
Get the &#39;0&#39; value.
Definition: APInt.h:569
const SDValue & getOperand(unsigned i) const
bool equalBaseIndex(const BaseIndexOffset &Other, const SelectionDAG &DAG) const
SDNode * getUser()
This returns the SDNode that contains this Use.
unsigned getGatherAllAliasesMaxDepth() const
uint64_t getZExtValue() const
bool isBigEndian() const
Definition: DataLayout.h:222
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags=SDNodeFlags())
Get the specified node if it&#39;s already available, or else return NULL.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:474
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
static void Split(std::vector< std::string > &V, StringRef S)
Splits a string of comma separated items in to a vector of strings.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:393
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
virtual bool hasBitPreservingFPLogic(EVT VT) const
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:584
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:131
const SDValue & getBasePtr() const
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:545
LLVMContext * getContext() const
Definition: SelectionDAG.h:407
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:375
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
#define T1
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:242
const SDValue & getMask() const
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:406
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:214
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it&#39;s profitable to narrow operations of type VT1 to VT2.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:659
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques=false)
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:914
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:380
This class is used to represent ISD::LOAD nodes.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1245