LLVM  8.0.1
X86InstrInfo.cpp
Go to the documentation of this file.
1 //===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the X86 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86InstrInfo.h"
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86InstrFoldTables.h"
18 #include "X86MachineFunctionInfo.h"
19 #include "X86Subtarget.h"
20 #include "X86TargetMachine.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/Sequence.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/MC/MCAsmInfo.h"
36 #include "llvm/MC/MCExpr.h"
37 #include "llvm/MC/MCInst.h"
39 #include "llvm/Support/Debug.h"
43 
44 using namespace llvm;
45 
46 #define DEBUG_TYPE "x86-instr-info"
47 
48 #define GET_INSTRINFO_CTOR_DTOR
49 #include "X86GenInstrInfo.inc"
50 
51 static cl::opt<bool>
52  NoFusing("disable-spill-fusing",
53  cl::desc("Disable fusing of spill code into instructions"),
54  cl::Hidden);
55 static cl::opt<bool>
56 PrintFailedFusing("print-failed-fuse-candidates",
57  cl::desc("Print instructions that the allocator wants to"
58  " fuse, but the X86 backend currently can't"),
59  cl::Hidden);
60 static cl::opt<bool>
61 ReMatPICStubLoad("remat-pic-stub-load",
62  cl::desc("Re-materialize load from stub in PIC mode"),
63  cl::init(false), cl::Hidden);
64 static cl::opt<unsigned>
65 PartialRegUpdateClearance("partial-reg-update-clearance",
66  cl::desc("Clearance between two register writes "
67  "for inserting XOR to avoid partial "
68  "register update"),
69  cl::init(64), cl::Hidden);
70 static cl::opt<unsigned>
71 UndefRegClearance("undef-reg-clearance",
72  cl::desc("How many idle instructions we would like before "
73  "certain undef register reads"),
74  cl::init(128), cl::Hidden);
75 
76 
77 // Pin the vtable to this file.
78 void X86InstrInfo::anchor() {}
79 
81  : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
82  : X86::ADJCALLSTACKDOWN32),
83  (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
84  : X86::ADJCALLSTACKUP32),
85  X86::CATCHRET,
86  (STI.is64Bit() ? X86::RETQ : X86::RETL)),
87  Subtarget(STI), RI(STI.getTargetTriple()) {
88 }
89 
90 bool
92  unsigned &SrcReg, unsigned &DstReg,
93  unsigned &SubIdx) const {
94  switch (MI.getOpcode()) {
95  default: break;
96  case X86::MOVSX16rr8:
97  case X86::MOVZX16rr8:
98  case X86::MOVSX32rr8:
99  case X86::MOVZX32rr8:
100  case X86::MOVSX64rr8:
101  if (!Subtarget.is64Bit())
102  // It's not always legal to reference the low 8-bit of the larger
103  // register in 32-bit mode.
104  return false;
106  case X86::MOVSX32rr16:
107  case X86::MOVZX32rr16:
108  case X86::MOVSX64rr16:
109  case X86::MOVSX64rr32: {
110  if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
111  // Be conservative.
112  return false;
113  SrcReg = MI.getOperand(1).getReg();
114  DstReg = MI.getOperand(0).getReg();
115  switch (MI.getOpcode()) {
116  default: llvm_unreachable("Unreachable!");
117  case X86::MOVSX16rr8:
118  case X86::MOVZX16rr8:
119  case X86::MOVSX32rr8:
120  case X86::MOVZX32rr8:
121  case X86::MOVSX64rr8:
122  SubIdx = X86::sub_8bit;
123  break;
124  case X86::MOVSX32rr16:
125  case X86::MOVZX32rr16:
126  case X86::MOVSX64rr16:
127  SubIdx = X86::sub_16bit;
128  break;
129  case X86::MOVSX64rr32:
130  SubIdx = X86::sub_32bit;
131  break;
132  }
133  return true;
134  }
135  }
136  return false;
137 }
138 
140  const MachineFunction *MF = MI.getParent()->getParent();
142 
143  if (isFrameInstr(MI)) {
144  unsigned StackAlign = TFI->getStackAlignment();
145  int SPAdj = alignTo(getFrameSize(MI), StackAlign);
146  SPAdj -= getFrameAdjustment(MI);
147  if (!isFrameSetup(MI))
148  SPAdj = -SPAdj;
149  return SPAdj;
150  }
151 
152  // To know whether a call adjusts the stack, we need information
153  // that is bound to the following ADJCALLSTACKUP pseudo.
154  // Look for the next ADJCALLSTACKUP that follows the call.
155  if (MI.isCall()) {
156  const MachineBasicBlock *MBB = MI.getParent();
158  for (auto E = MBB->end(); I != E; ++I) {
159  if (I->getOpcode() == getCallFrameDestroyOpcode() ||
160  I->isCall())
161  break;
162  }
163 
164  // If we could not find a frame destroy opcode, then it has already
165  // been simplified, so we don't care.
166  if (I->getOpcode() != getCallFrameDestroyOpcode())
167  return 0;
168 
169  return -(I->getOperand(1).getImm());
170  }
171 
172  // Currently handle only PUSHes we can reasonably expect to see
173  // in call sequences
174  switch (MI.getOpcode()) {
175  default:
176  return 0;
177  case X86::PUSH32i8:
178  case X86::PUSH32r:
179  case X86::PUSH32rmm:
180  case X86::PUSH32rmr:
181  case X86::PUSHi32:
182  return 4;
183  case X86::PUSH64i8:
184  case X86::PUSH64r:
185  case X86::PUSH64rmm:
186  case X86::PUSH64rmr:
187  case X86::PUSH64i32:
188  return 8;
189  }
190 }
191 
192 /// Return true and the FrameIndex if the specified
193 /// operand and follow operands form a reference to the stack frame.
194 bool X86InstrInfo::isFrameOperand(const MachineInstr &MI, unsigned int Op,
195  int &FrameIndex) const {
196  if (MI.getOperand(Op + X86::AddrBaseReg).isFI() &&
197  MI.getOperand(Op + X86::AddrScaleAmt).isImm() &&
198  MI.getOperand(Op + X86::AddrIndexReg).isReg() &&
199  MI.getOperand(Op + X86::AddrDisp).isImm() &&
200  MI.getOperand(Op + X86::AddrScaleAmt).getImm() == 1 &&
201  MI.getOperand(Op + X86::AddrIndexReg).getReg() == 0 &&
202  MI.getOperand(Op + X86::AddrDisp).getImm() == 0) {
203  FrameIndex = MI.getOperand(Op + X86::AddrBaseReg).getIndex();
204  return true;
205  }
206  return false;
207 }
208 
209 static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
210  switch (Opcode) {
211  default:
212  return false;
213  case X86::MOV8rm:
214  case X86::KMOVBkm:
215  MemBytes = 1;
216  return true;
217  case X86::MOV16rm:
218  case X86::KMOVWkm:
219  MemBytes = 2;
220  return true;
221  case X86::MOV32rm:
222  case X86::MOVSSrm:
223  case X86::VMOVSSZrm:
224  case X86::VMOVSSrm:
225  case X86::KMOVDkm:
226  MemBytes = 4;
227  return true;
228  case X86::MOV64rm:
229  case X86::LD_Fp64m:
230  case X86::MOVSDrm:
231  case X86::VMOVSDrm:
232  case X86::VMOVSDZrm:
233  case X86::MMX_MOVD64rm:
234  case X86::MMX_MOVQ64rm:
235  case X86::KMOVQkm:
236  MemBytes = 8;
237  return true;
238  case X86::MOVAPSrm:
239  case X86::MOVUPSrm:
240  case X86::MOVAPDrm:
241  case X86::MOVUPDrm:
242  case X86::MOVDQArm:
243  case X86::MOVDQUrm:
244  case X86::VMOVAPSrm:
245  case X86::VMOVUPSrm:
246  case X86::VMOVAPDrm:
247  case X86::VMOVUPDrm:
248  case X86::VMOVDQArm:
249  case X86::VMOVDQUrm:
250  case X86::VMOVAPSZ128rm:
251  case X86::VMOVUPSZ128rm:
252  case X86::VMOVAPSZ128rm_NOVLX:
253  case X86::VMOVUPSZ128rm_NOVLX:
254  case X86::VMOVAPDZ128rm:
255  case X86::VMOVUPDZ128rm:
256  case X86::VMOVDQU8Z128rm:
257  case X86::VMOVDQU16Z128rm:
258  case X86::VMOVDQA32Z128rm:
259  case X86::VMOVDQU32Z128rm:
260  case X86::VMOVDQA64Z128rm:
261  case X86::VMOVDQU64Z128rm:
262  MemBytes = 16;
263  return true;
264  case X86::VMOVAPSYrm:
265  case X86::VMOVUPSYrm:
266  case X86::VMOVAPDYrm:
267  case X86::VMOVUPDYrm:
268  case X86::VMOVDQAYrm:
269  case X86::VMOVDQUYrm:
270  case X86::VMOVAPSZ256rm:
271  case X86::VMOVUPSZ256rm:
272  case X86::VMOVAPSZ256rm_NOVLX:
273  case X86::VMOVUPSZ256rm_NOVLX:
274  case X86::VMOVAPDZ256rm:
275  case X86::VMOVUPDZ256rm:
276  case X86::VMOVDQU8Z256rm:
277  case X86::VMOVDQU16Z256rm:
278  case X86::VMOVDQA32Z256rm:
279  case X86::VMOVDQU32Z256rm:
280  case X86::VMOVDQA64Z256rm:
281  case X86::VMOVDQU64Z256rm:
282  MemBytes = 32;
283  return true;
284  case X86::VMOVAPSZrm:
285  case X86::VMOVUPSZrm:
286  case X86::VMOVAPDZrm:
287  case X86::VMOVUPDZrm:
288  case X86::VMOVDQU8Zrm:
289  case X86::VMOVDQU16Zrm:
290  case X86::VMOVDQA32Zrm:
291  case X86::VMOVDQU32Zrm:
292  case X86::VMOVDQA64Zrm:
293  case X86::VMOVDQU64Zrm:
294  MemBytes = 64;
295  return true;
296  }
297 }
298 
299 static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
300  switch (Opcode) {
301  default:
302  return false;
303  case X86::MOV8mr:
304  case X86::KMOVBmk:
305  MemBytes = 1;
306  return true;
307  case X86::MOV16mr:
308  case X86::KMOVWmk:
309  MemBytes = 2;
310  return true;
311  case X86::MOV32mr:
312  case X86::MOVSSmr:
313  case X86::VMOVSSmr:
314  case X86::VMOVSSZmr:
315  case X86::KMOVDmk:
316  MemBytes = 4;
317  return true;
318  case X86::MOV64mr:
319  case X86::ST_FpP64m:
320  case X86::MOVSDmr:
321  case X86::VMOVSDmr:
322  case X86::VMOVSDZmr:
323  case X86::MMX_MOVD64mr:
324  case X86::MMX_MOVQ64mr:
325  case X86::MMX_MOVNTQmr:
326  case X86::KMOVQmk:
327  MemBytes = 8;
328  return true;
329  case X86::MOVAPSmr:
330  case X86::MOVUPSmr:
331  case X86::MOVAPDmr:
332  case X86::MOVUPDmr:
333  case X86::MOVDQAmr:
334  case X86::MOVDQUmr:
335  case X86::VMOVAPSmr:
336  case X86::VMOVUPSmr:
337  case X86::VMOVAPDmr:
338  case X86::VMOVUPDmr:
339  case X86::VMOVDQAmr:
340  case X86::VMOVDQUmr:
341  case X86::VMOVUPSZ128mr:
342  case X86::VMOVAPSZ128mr:
343  case X86::VMOVUPSZ128mr_NOVLX:
344  case X86::VMOVAPSZ128mr_NOVLX:
345  case X86::VMOVUPDZ128mr:
346  case X86::VMOVAPDZ128mr:
347  case X86::VMOVDQA32Z128mr:
348  case X86::VMOVDQU32Z128mr:
349  case X86::VMOVDQA64Z128mr:
350  case X86::VMOVDQU64Z128mr:
351  case X86::VMOVDQU8Z128mr:
352  case X86::VMOVDQU16Z128mr:
353  MemBytes = 16;
354  return true;
355  case X86::VMOVUPSYmr:
356  case X86::VMOVAPSYmr:
357  case X86::VMOVUPDYmr:
358  case X86::VMOVAPDYmr:
359  case X86::VMOVDQUYmr:
360  case X86::VMOVDQAYmr:
361  case X86::VMOVUPSZ256mr:
362  case X86::VMOVAPSZ256mr:
363  case X86::VMOVUPSZ256mr_NOVLX:
364  case X86::VMOVAPSZ256mr_NOVLX:
365  case X86::VMOVUPDZ256mr:
366  case X86::VMOVAPDZ256mr:
367  case X86::VMOVDQU8Z256mr:
368  case X86::VMOVDQU16Z256mr:
369  case X86::VMOVDQA32Z256mr:
370  case X86::VMOVDQU32Z256mr:
371  case X86::VMOVDQA64Z256mr:
372  case X86::VMOVDQU64Z256mr:
373  MemBytes = 32;
374  return true;
375  case X86::VMOVUPSZmr:
376  case X86::VMOVAPSZmr:
377  case X86::VMOVUPDZmr:
378  case X86::VMOVAPDZmr:
379  case X86::VMOVDQU8Zmr:
380  case X86::VMOVDQU16Zmr:
381  case X86::VMOVDQA32Zmr:
382  case X86::VMOVDQU32Zmr:
383  case X86::VMOVDQA64Zmr:
384  case X86::VMOVDQU64Zmr:
385  MemBytes = 64;
386  return true;
387  }
388  return false;
389 }
390 
392  int &FrameIndex) const {
393  unsigned Dummy;
394  return X86InstrInfo::isLoadFromStackSlot(MI, FrameIndex, Dummy);
395 }
396 
398  int &FrameIndex,
399  unsigned &MemBytes) const {
400  if (isFrameLoadOpcode(MI.getOpcode(), MemBytes))
401  if (MI.getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
402  return MI.getOperand(0).getReg();
403  return 0;
404 }
405 
407  int &FrameIndex) const {
408  unsigned Dummy;
409  if (isFrameLoadOpcode(MI.getOpcode(), Dummy)) {
410  unsigned Reg;
411  if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
412  return Reg;
413  // Check for post-frame index elimination operations
415  if (hasLoadFromStackSlot(MI, Accesses)) {
416  FrameIndex =
417  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
418  ->getFrameIndex();
419  return 1;
420  }
421  }
422  return 0;
423 }
424 
426  int &FrameIndex) const {
427  unsigned Dummy;
428  return X86InstrInfo::isStoreToStackSlot(MI, FrameIndex, Dummy);
429 }
430 
432  int &FrameIndex,
433  unsigned &MemBytes) const {
434  if (isFrameStoreOpcode(MI.getOpcode(), MemBytes))
435  if (MI.getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
436  isFrameOperand(MI, 0, FrameIndex))
438  return 0;
439 }
440 
442  int &FrameIndex) const {
443  unsigned Dummy;
444  if (isFrameStoreOpcode(MI.getOpcode(), Dummy)) {
445  unsigned Reg;
446  if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
447  return Reg;
448  // Check for post-frame index elimination operations
450  if (hasStoreToStackSlot(MI, Accesses)) {
451  FrameIndex =
452  cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
453  ->getFrameIndex();
454  return 1;
455  }
456  }
457  return 0;
458 }
459 
460 /// Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
461 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
462  // Don't waste compile time scanning use-def chains of physregs.
464  return false;
465  bool isPICBase = false;
467  E = MRI.def_instr_end(); I != E; ++I) {
468  MachineInstr *DefMI = &*I;
469  if (DefMI->getOpcode() != X86::MOVPC32r)
470  return false;
471  assert(!isPICBase && "More than one PIC base?");
472  isPICBase = true;
473  }
474  return isPICBase;
475 }
476 
478  AliasAnalysis *AA) const {
479  switch (MI.getOpcode()) {
480  default: break;
481  case X86::MOV8rm:
482  case X86::MOV8rm_NOREX:
483  case X86::MOV16rm:
484  case X86::MOV32rm:
485  case X86::MOV64rm:
486  case X86::LD_Fp64m:
487  case X86::MOVSSrm:
488  case X86::MOVSDrm:
489  case X86::MOVAPSrm:
490  case X86::MOVUPSrm:
491  case X86::MOVAPDrm:
492  case X86::MOVUPDrm:
493  case X86::MOVDQArm:
494  case X86::MOVDQUrm:
495  case X86::VMOVSSrm:
496  case X86::VMOVSDrm:
497  case X86::VMOVAPSrm:
498  case X86::VMOVUPSrm:
499  case X86::VMOVAPDrm:
500  case X86::VMOVUPDrm:
501  case X86::VMOVDQArm:
502  case X86::VMOVDQUrm:
503  case X86::VMOVAPSYrm:
504  case X86::VMOVUPSYrm:
505  case X86::VMOVAPDYrm:
506  case X86::VMOVUPDYrm:
507  case X86::VMOVDQAYrm:
508  case X86::VMOVDQUYrm:
509  case X86::MMX_MOVD64rm:
510  case X86::MMX_MOVQ64rm:
511  // AVX-512
512  case X86::VMOVSSZrm:
513  case X86::VMOVSDZrm:
514  case X86::VMOVAPDZ128rm:
515  case X86::VMOVAPDZ256rm:
516  case X86::VMOVAPDZrm:
517  case X86::VMOVAPSZ128rm:
518  case X86::VMOVAPSZ256rm:
519  case X86::VMOVAPSZ128rm_NOVLX:
520  case X86::VMOVAPSZ256rm_NOVLX:
521  case X86::VMOVAPSZrm:
522  case X86::VMOVDQA32Z128rm:
523  case X86::VMOVDQA32Z256rm:
524  case X86::VMOVDQA32Zrm:
525  case X86::VMOVDQA64Z128rm:
526  case X86::VMOVDQA64Z256rm:
527  case X86::VMOVDQA64Zrm:
528  case X86::VMOVDQU16Z128rm:
529  case X86::VMOVDQU16Z256rm:
530  case X86::VMOVDQU16Zrm:
531  case X86::VMOVDQU32Z128rm:
532  case X86::VMOVDQU32Z256rm:
533  case X86::VMOVDQU32Zrm:
534  case X86::VMOVDQU64Z128rm:
535  case X86::VMOVDQU64Z256rm:
536  case X86::VMOVDQU64Zrm:
537  case X86::VMOVDQU8Z128rm:
538  case X86::VMOVDQU8Z256rm:
539  case X86::VMOVDQU8Zrm:
540  case X86::VMOVUPDZ128rm:
541  case X86::VMOVUPDZ256rm:
542  case X86::VMOVUPDZrm:
543  case X86::VMOVUPSZ128rm:
544  case X86::VMOVUPSZ256rm:
545  case X86::VMOVUPSZ128rm_NOVLX:
546  case X86::VMOVUPSZ256rm_NOVLX:
547  case X86::VMOVUPSZrm: {
548  // Loads from constant pools are trivially rematerializable.
549  if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
550  MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
551  MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
552  MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
554  unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
555  if (BaseReg == 0 || BaseReg == X86::RIP)
556  return true;
557  // Allow re-materialization of PIC load.
559  return false;
560  const MachineFunction &MF = *MI.getParent()->getParent();
561  const MachineRegisterInfo &MRI = MF.getRegInfo();
562  return regIsPICBase(BaseReg, MRI);
563  }
564  return false;
565  }
566 
567  case X86::LEA32r:
568  case X86::LEA64r: {
569  if (MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
570  MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
571  MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
572  !MI.getOperand(1 + X86::AddrDisp).isReg()) {
573  // lea fi#, lea GV, etc. are all rematerializable.
574  if (!MI.getOperand(1 + X86::AddrBaseReg).isReg())
575  return true;
576  unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
577  if (BaseReg == 0)
578  return true;
579  // Allow re-materialization of lea PICBase + x.
580  const MachineFunction &MF = *MI.getParent()->getParent();
581  const MachineRegisterInfo &MRI = MF.getRegInfo();
582  return regIsPICBase(BaseReg, MRI);
583  }
584  return false;
585  }
586  }
587 
588  // All other instructions marked M_REMATERIALIZABLE are always trivially
589  // rematerializable.
590  return true;
591 }
592 
596 
597  // For compile time consideration, if we are not able to determine the
598  // safety after visiting 4 instructions in each direction, we will assume
599  // it's not safe.
601  for (unsigned i = 0; Iter != E && i < 4; ++i) {
602  bool SeenDef = false;
603  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
604  MachineOperand &MO = Iter->getOperand(j);
605  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
606  SeenDef = true;
607  if (!MO.isReg())
608  continue;
609  if (MO.getReg() == X86::EFLAGS) {
610  if (MO.isUse())
611  return false;
612  SeenDef = true;
613  }
614  }
615 
616  if (SeenDef)
617  // This instruction defines EFLAGS, no need to look any further.
618  return true;
619  ++Iter;
620  // Skip over debug instructions.
621  while (Iter != E && Iter->isDebugInstr())
622  ++Iter;
623  }
624 
625  // It is safe to clobber EFLAGS at the end of a block of no successor has it
626  // live in.
627  if (Iter == E) {
628  for (MachineBasicBlock *S : MBB.successors())
629  if (S->isLiveIn(X86::EFLAGS))
630  return false;
631  return true;
632  }
633 
635  Iter = I;
636  for (unsigned i = 0; i < 4; ++i) {
637  // If we make it to the beginning of the block, it's safe to clobber
638  // EFLAGS iff EFLAGS is not live-in.
639  if (Iter == B)
640  return !MBB.isLiveIn(X86::EFLAGS);
641 
642  --Iter;
643  // Skip over debug instructions.
644  while (Iter != B && Iter->isDebugInstr())
645  --Iter;
646 
647  bool SawKill = false;
648  for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
649  MachineOperand &MO = Iter->getOperand(j);
650  // A register mask may clobber EFLAGS, but we should still look for a
651  // live EFLAGS def.
652  if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
653  SawKill = true;
654  if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
655  if (MO.isDef()) return MO.isDead();
656  if (MO.isKill()) SawKill = true;
657  }
658  }
659 
660  if (SawKill)
661  // This instruction kills EFLAGS and doesn't redefine it, so
662  // there's no need to look further.
663  return true;
664  }
665 
666  // Conservative answer.
667  return false;
668 }
669 
672  unsigned DestReg, unsigned SubIdx,
673  const MachineInstr &Orig,
674  const TargetRegisterInfo &TRI) const {
675  bool ClobbersEFLAGS = false;
676  for (const MachineOperand &MO : Orig.operands()) {
677  if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
678  ClobbersEFLAGS = true;
679  break;
680  }
681  }
682 
683  if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
684  // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
685  // effects.
686  int Value;
687  switch (Orig.getOpcode()) {
688  case X86::MOV32r0: Value = 0; break;
689  case X86::MOV32r1: Value = 1; break;
690  case X86::MOV32r_1: Value = -1; break;
691  default:
692  llvm_unreachable("Unexpected instruction!");
693  }
694 
695  const DebugLoc &DL = Orig.getDebugLoc();
696  BuildMI(MBB, I, DL, get(X86::MOV32ri))
697  .add(Orig.getOperand(0))
698  .addImm(Value);
699  } else {
700  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
701  MBB.insert(I, MI);
702  }
703 
704  MachineInstr &NewMI = *std::prev(I);
705  NewMI.substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);
706 }
707 
708 /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
710  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
711  MachineOperand &MO = MI.getOperand(i);
712  if (MO.isReg() && MO.isDef() &&
713  MO.getReg() == X86::EFLAGS && !MO.isDead()) {
714  return true;
715  }
716  }
717  return false;
718 }
719 
720 /// Check whether the shift count for a machine operand is non-zero.
721 inline static unsigned getTruncatedShiftCount(const MachineInstr &MI,
722  unsigned ShiftAmtOperandIdx) {
723  // The shift count is six bits with the REX.W prefix and five bits without.
724  unsigned ShiftCountMask = (MI.getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
725  unsigned Imm = MI.getOperand(ShiftAmtOperandIdx).getImm();
726  return Imm & ShiftCountMask;
727 }
728 
729 /// Check whether the given shift count is appropriate
730 /// can be represented by a LEA instruction.
731 inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
732  // Left shift instructions can be transformed into load-effective-address
733  // instructions if we can encode them appropriately.
734  // A LEA instruction utilizes a SIB byte to encode its scale factor.
735  // The SIB.scale field is two bits wide which means that we can encode any
736  // shift amount less than 4.
737  return ShAmt < 4 && ShAmt > 0;
738 }
739 
741  unsigned Opc, bool AllowSP, unsigned &NewSrc,
742  bool &isKill, MachineOperand &ImplicitOp,
743  LiveVariables *LV) const {
744  MachineFunction &MF = *MI.getParent()->getParent();
745  const TargetRegisterClass *RC;
746  if (AllowSP) {
747  RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
748  } else {
749  RC = Opc != X86::LEA32r ?
750  &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
751  }
752  unsigned SrcReg = Src.getReg();
753 
754  // For both LEA64 and LEA32 the register already has essentially the right
755  // type (32-bit or 64-bit) we may just need to forbid SP.
756  if (Opc != X86::LEA64_32r) {
757  NewSrc = SrcReg;
758  isKill = Src.isKill();
759  assert(!Src.isUndef() && "Undef op doesn't need optimization");
760 
762  !MF.getRegInfo().constrainRegClass(NewSrc, RC))
763  return false;
764 
765  return true;
766  }
767 
768  // This is for an LEA64_32r and incoming registers are 32-bit. One way or
769  // another we need to add 64-bit registers to the final MI.
771  ImplicitOp = Src;
772  ImplicitOp.setImplicit();
773 
774  NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
775  isKill = Src.isKill();
776  assert(!Src.isUndef() && "Undef op doesn't need optimization");
777  } else {
778  // Virtual register of the wrong class, we have to create a temporary 64-bit
779  // vreg to feed into the LEA.
780  NewSrc = MF.getRegInfo().createVirtualRegister(RC);
781  MachineInstr *Copy =
782  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
783  .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
784  .add(Src);
785 
786  // Which is obviously going to be dead after we're done with it.
787  isKill = true;
788 
789  if (LV)
790  LV->replaceKillInstruction(SrcReg, MI, *Copy);
791  }
792 
793  // We've set all the parameters without issue.
794  return true;
795 }
796 
797 MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
798  unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
799  LiveVariables *LV) const {
800  // We handle 8-bit adds and various 16-bit opcodes in the switch below.
801  bool Is16BitOp = !(MIOpc == X86::ADD8rr || MIOpc == X86::ADD8ri);
802  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
803  assert((!Is16BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
804  *RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
805  "Unexpected type for LEA transform");
806 
807  // TODO: For a 32-bit target, we need to adjust the LEA variables with
808  // something like this:
809  // Opcode = X86::LEA32r;
810  // InRegLEA = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
811  // OutRegLEA =
812  // Is8BitOp ? RegInfo.createVirtualRegister(&X86::GR32ABCD_RegClass)
813  // : RegInfo.createVirtualRegister(&X86::GR32RegClass);
814  if (!Subtarget.is64Bit())
815  return nullptr;
816 
817  unsigned Opcode = X86::LEA64_32r;
818  unsigned InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
819  unsigned OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
820 
821  // Build and insert into an implicit UNDEF value. This is OK because
822  // we will be shifting and then extracting the lower 8/16-bits.
823  // This has the potential to cause partial register stall. e.g.
824  // movw (%rbp,%rcx,2), %dx
825  // leal -65(%rdx), %esi
826  // But testing has shown this *does* help performance in 64-bit mode (at
827  // least on modern x86 machines).
829  unsigned Dest = MI.getOperand(0).getReg();
830  unsigned Src = MI.getOperand(1).getReg();
831  bool IsDead = MI.getOperand(0).isDead();
832  bool IsKill = MI.getOperand(1).isKill();
833  unsigned SubReg = Is16BitOp ? X86::sub_16bit : X86::sub_8bit;
834  assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
835  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
836  MachineInstr *InsMI =
837  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
838  .addReg(InRegLEA, RegState::Define, SubReg)
839  .addReg(Src, getKillRegState(IsKill));
840 
841  MachineInstrBuilder MIB =
842  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
843  switch (MIOpc) {
844  default: llvm_unreachable("Unreachable!");
845  case X86::SHL16ri: {
846  unsigned ShAmt = MI.getOperand(2).getImm();
847  MIB.addReg(0).addImm(1ULL << ShAmt)
848  .addReg(InRegLEA, RegState::Kill).addImm(0).addReg(0);
849  break;
850  }
851  case X86::INC16r:
852  addRegOffset(MIB, InRegLEA, true, 1);
853  break;
854  case X86::DEC16r:
855  addRegOffset(MIB, InRegLEA, true, -1);
856  break;
857  case X86::ADD8ri:
858  case X86::ADD16ri:
859  case X86::ADD16ri8:
860  case X86::ADD16ri_DB:
861  case X86::ADD16ri8_DB:
862  addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
863  break;
864  case X86::ADD8rr:
865  case X86::ADD16rr:
866  case X86::ADD16rr_DB: {
867  unsigned Src2 = MI.getOperand(2).getReg();
868  bool IsKill2 = MI.getOperand(2).isKill();
869  assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
870  unsigned InRegLEA2 = 0;
871  MachineInstr *InsMI2 = nullptr;
872  if (Src == Src2) {
873  // ADD8rr/ADD16rr killed %reg1028, %reg1028
874  // just a single insert_subreg.
875  addRegReg(MIB, InRegLEA, true, InRegLEA, false);
876  } else {
877  if (Subtarget.is64Bit())
878  InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
879  else
880  InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
881  // Build and insert into an implicit UNDEF value. This is OK because
882  // we will be shifting and then extracting the lower 8/16-bits.
883  BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
884  InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
885  .addReg(InRegLEA2, RegState::Define, SubReg)
886  .addReg(Src2, getKillRegState(IsKill2));
887  addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
888  }
889  if (LV && IsKill2 && InsMI2)
890  LV->replaceKillInstruction(Src2, MI, *InsMI2);
891  break;
892  }
893  }
894 
895  MachineInstr *NewMI = MIB;
896  MachineInstr *ExtMI =
897  BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
898  .addReg(Dest, RegState::Define | getDeadRegState(IsDead))
899  .addReg(OutRegLEA, RegState::Kill, SubReg);
900 
901  if (LV) {
902  // Update live variables.
903  LV->getVarInfo(InRegLEA).Kills.push_back(NewMI);
904  LV->getVarInfo(OutRegLEA).Kills.push_back(ExtMI);
905  if (IsKill)
906  LV->replaceKillInstruction(Src, MI, *InsMI);
907  if (IsDead)
908  LV->replaceKillInstruction(Dest, MI, *ExtMI);
909  }
910 
911  return ExtMI;
912 }
913 
914 /// This method must be implemented by targets that
915 /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
916 /// may be able to convert a two-address instruction into a true
917 /// three-address instruction on demand. This allows the X86 target (for
918 /// example) to convert ADD and SHL instructions into LEA instructions if they
919 /// would require register copies due to two-addressness.
920 ///
921 /// This method returns a null pointer if the transformation cannot be
922 /// performed, otherwise it returns the new instruction.
923 ///
924 MachineInstr *
926  MachineInstr &MI, LiveVariables *LV) const {
927  // The following opcodes also sets the condition code register(s). Only
928  // convert them to equivalent lea if the condition code register def's
929  // are dead!
930  if (hasLiveCondCodeDef(MI))
931  return nullptr;
932 
933  MachineFunction &MF = *MI.getParent()->getParent();
934  // All instructions input are two-addr instructions. Get the known operands.
935  const MachineOperand &Dest = MI.getOperand(0);
936  const MachineOperand &Src = MI.getOperand(1);
937 
938  // Ideally, operations with undef should be folded before we get here, but we
939  // can't guarantee it. Bail out because optimizing undefs is a waste of time.
940  // Without this, we have to forward undef state to new register operands to
941  // avoid machine verifier errors.
942  if (Src.isUndef())
943  return nullptr;
944  if (MI.getNumOperands() > 2)
945  if (MI.getOperand(2).isReg() && MI.getOperand(2).isUndef())
946  return nullptr;
947 
948  MachineInstr *NewMI = nullptr;
949  bool Is64Bit = Subtarget.is64Bit();
950 
951  unsigned MIOpc = MI.getOpcode();
952  switch (MIOpc) {
953  default: return nullptr;
954  case X86::SHL64ri: {
955  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
956  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
957  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
958 
959  // LEA can't handle RSP.
961  !MF.getRegInfo().constrainRegClass(Src.getReg(),
962  &X86::GR64_NOSPRegClass))
963  return nullptr;
964 
965  NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
966  .add(Dest)
967  .addReg(0)
968  .addImm(1ULL << ShAmt)
969  .add(Src)
970  .addImm(0)
971  .addReg(0);
972  break;
973  }
974  case X86::SHL32ri: {
975  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
976  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
977  if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
978 
979  unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
980 
981  // LEA can't handle ESP.
982  bool isKill;
983  unsigned SrcReg;
984  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
985  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
986  SrcReg, isKill, ImplicitOp, LV))
987  return nullptr;
988 
989  MachineInstrBuilder MIB =
990  BuildMI(MF, MI.getDebugLoc(), get(Opc))
991  .add(Dest)
992  .addReg(0)
993  .addImm(1ULL << ShAmt)
994  .addReg(SrcReg, getKillRegState(isKill))
995  .addImm(0)
996  .addReg(0);
997  if (ImplicitOp.getReg() != 0)
998  MIB.add(ImplicitOp);
999  NewMI = MIB;
1000 
1001  break;
1002  }
1003  case X86::SHL16ri: {
1004  assert(MI.getNumOperands() >= 3 && "Unknown shift instruction!");
1005  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
1006  if (!isTruncatedShiftCountForLEA(ShAmt))
1007  return nullptr;
1008  return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1009  }
1010  case X86::INC64r:
1011  case X86::INC32r: {
1012  assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
1013  unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
1014  (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1015  bool isKill;
1016  unsigned SrcReg;
1017  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1018  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
1019  ImplicitOp, LV))
1020  return nullptr;
1021 
1022  MachineInstrBuilder MIB =
1023  BuildMI(MF, MI.getDebugLoc(), get(Opc))
1024  .add(Dest)
1025  .addReg(SrcReg, getKillRegState(isKill));
1026  if (ImplicitOp.getReg() != 0)
1027  MIB.add(ImplicitOp);
1028 
1029  NewMI = addOffset(MIB, 1);
1030  break;
1031  }
1032  case X86::INC16r:
1033  return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1034  case X86::DEC64r:
1035  case X86::DEC32r: {
1036  assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
1037  unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
1038  : (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
1039 
1040  bool isKill;
1041  unsigned SrcReg;
1042  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1043  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
1044  ImplicitOp, LV))
1045  return nullptr;
1046 
1047  MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1048  .add(Dest)
1049  .addReg(SrcReg, getKillRegState(isKill));
1050  if (ImplicitOp.getReg() != 0)
1051  MIB.add(ImplicitOp);
1052 
1053  NewMI = addOffset(MIB, -1);
1054 
1055  break;
1056  }
1057  case X86::DEC16r:
1058  return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1059  case X86::ADD64rr:
1060  case X86::ADD64rr_DB:
1061  case X86::ADD32rr:
1062  case X86::ADD32rr_DB: {
1063  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1064  unsigned Opc;
1065  if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
1066  Opc = X86::LEA64r;
1067  else
1068  Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1069 
1070  bool isKill;
1071  unsigned SrcReg;
1072  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1073  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
1074  SrcReg, isKill, ImplicitOp, LV))
1075  return nullptr;
1076 
1077  const MachineOperand &Src2 = MI.getOperand(2);
1078  bool isKill2;
1079  unsigned SrcReg2;
1080  MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
1081  if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
1082  SrcReg2, isKill2, ImplicitOp2, LV))
1083  return nullptr;
1084 
1085  MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
1086  if (ImplicitOp.getReg() != 0)
1087  MIB.add(ImplicitOp);
1088  if (ImplicitOp2.getReg() != 0)
1089  MIB.add(ImplicitOp2);
1090 
1091  NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
1092  if (LV && Src2.isKill())
1093  LV->replaceKillInstruction(SrcReg2, MI, *NewMI);
1094  break;
1095  }
1096  case X86::ADD8rr:
1097  case X86::ADD16rr:
1098  case X86::ADD16rr_DB:
1099  return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1100  case X86::ADD64ri32:
1101  case X86::ADD64ri8:
1102  case X86::ADD64ri32_DB:
1103  case X86::ADD64ri8_DB:
1104  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1105  NewMI = addOffset(
1106  BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
1107  MI.getOperand(2));
1108  break;
1109  case X86::ADD32ri:
1110  case X86::ADD32ri8:
1111  case X86::ADD32ri_DB:
1112  case X86::ADD32ri8_DB: {
1113  assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
1114  unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
1115 
1116  bool isKill;
1117  unsigned SrcReg;
1118  MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
1119  if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
1120  SrcReg, isKill, ImplicitOp, LV))
1121  return nullptr;
1122 
1123  MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1124  .add(Dest)
1125  .addReg(SrcReg, getKillRegState(isKill));
1126  if (ImplicitOp.getReg() != 0)
1127  MIB.add(ImplicitOp);
1128 
1129  NewMI = addOffset(MIB, MI.getOperand(2));
1130  break;
1131  }
1132  case X86::ADD8ri:
1133  case X86::ADD16ri:
1134  case X86::ADD16ri8:
1135  case X86::ADD16ri_DB:
1136  case X86::ADD16ri8_DB:
1137  return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV);
1138  case X86::VMOVDQU8Z128rmk:
1139  case X86::VMOVDQU8Z256rmk:
1140  case X86::VMOVDQU8Zrmk:
1141  case X86::VMOVDQU16Z128rmk:
1142  case X86::VMOVDQU16Z256rmk:
1143  case X86::VMOVDQU16Zrmk:
1144  case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
1145  case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
1146  case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
1147  case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
1148  case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
1149  case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
1150  case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
1151  case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
1152  case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
1153  case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
1154  case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
1155  case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
1156  unsigned Opc;
1157  switch (MIOpc) {
1158  default: llvm_unreachable("Unreachable!");
1159  case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
1160  case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
1161  case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
1162  case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
1163  case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
1164  case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
1165  case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1166  case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1167  case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1168  case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1169  case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1170  case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1171  case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1172  case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1173  case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1174  case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1175  case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1176  case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1177  case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
1178  case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
1179  case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
1180  case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
1181  case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
1182  case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
1183  case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
1184  case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
1185  case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
1186  case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
1187  case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
1188  case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
1189  }
1190 
1191  NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1192  .add(Dest)
1193  .add(MI.getOperand(2))
1194  .add(Src)
1195  .add(MI.getOperand(3))
1196  .add(MI.getOperand(4))
1197  .add(MI.getOperand(5))
1198  .add(MI.getOperand(6))
1199  .add(MI.getOperand(7));
1200  break;
1201  }
1202  case X86::VMOVDQU8Z128rrk:
1203  case X86::VMOVDQU8Z256rrk:
1204  case X86::VMOVDQU8Zrrk:
1205  case X86::VMOVDQU16Z128rrk:
1206  case X86::VMOVDQU16Z256rrk:
1207  case X86::VMOVDQU16Zrrk:
1208  case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
1209  case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
1210  case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
1211  case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
1212  case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
1213  case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
1214  case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
1215  case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
1216  case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
1217  case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
1218  case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
1219  case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
1220  unsigned Opc;
1221  switch (MIOpc) {
1222  default: llvm_unreachable("Unreachable!");
1223  case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
1224  case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
1225  case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
1226  case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
1227  case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
1228  case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
1229  case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1230  case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1231  case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1232  case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1233  case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1234  case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1235  case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1236  case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1237  case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1238  case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1239  case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1240  case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1241  case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
1242  case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
1243  case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
1244  case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
1245  case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
1246  case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
1247  case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
1248  case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
1249  case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
1250  case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
1251  case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
1252  case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
1253  }
1254 
1255  NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
1256  .add(Dest)
1257  .add(MI.getOperand(2))
1258  .add(Src)
1259  .add(MI.getOperand(3));
1260  break;
1261  }
1262  }
1263 
1264  if (!NewMI) return nullptr;
1265 
1266  if (LV) { // Update live variables
1267  if (Src.isKill())
1268  LV->replaceKillInstruction(Src.getReg(), MI, *NewMI);
1269  if (Dest.isDead())
1270  LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
1271  }
1272 
1273  MFI->insert(MI.getIterator(), NewMI); // Insert the new inst
1274  return NewMI;
1275 }
1276 
1277 /// This determines which of three possible cases of a three source commute
1278 /// the source indexes correspond to taking into account any mask operands.
1279 /// All prevents commuting a passthru operand. Returns -1 if the commute isn't
1280 /// possible.
1281 /// Case 0 - Possible to commute the first and second operands.
1282 /// Case 1 - Possible to commute the first and third operands.
1283 /// Case 2 - Possible to commute the second and third operands.
1284 static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
1285  unsigned SrcOpIdx2) {
1286  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
1287  if (SrcOpIdx1 > SrcOpIdx2)
1288  std::swap(SrcOpIdx1, SrcOpIdx2);
1289 
1290  unsigned Op1 = 1, Op2 = 2, Op3 = 3;
1291  if (X86II::isKMasked(TSFlags)) {
1292  Op2++;
1293  Op3++;
1294  }
1295 
1296  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
1297  return 0;
1298  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
1299  return 1;
1300  if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
1301  return 2;
1302  llvm_unreachable("Unknown three src commute case.");
1303 }
1304 
1306  const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
1307  const X86InstrFMA3Group &FMA3Group) const {
1308 
1309  unsigned Opc = MI.getOpcode();
1310 
1311  // TODO: Commuting the 1st operand of FMA*_Int requires some additional
1312  // analysis. The commute optimization is legal only if all users of FMA*_Int
1313  // use only the lowest element of the FMA*_Int instruction. Such analysis are
1314  // not implemented yet. So, just return 0 in that case.
1315  // When such analysis are available this place will be the right place for
1316  // calling it.
1317  assert(!(FMA3Group.isIntrinsic() && (SrcOpIdx1 == 1 || SrcOpIdx2 == 1)) &&
1318  "Intrinsic instructions can't commute operand 1");
1319 
1320  // Determine which case this commute is or if it can't be done.
1321  unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1322  SrcOpIdx2);
1323  assert(Case < 3 && "Unexpected case number!");
1324 
1325  // Define the FMA forms mapping array that helps to map input FMA form
1326  // to output FMA form to preserve the operation semantics after
1327  // commuting the operands.
1328  const unsigned Form132Index = 0;
1329  const unsigned Form213Index = 1;
1330  const unsigned Form231Index = 2;
1331  static const unsigned FormMapping[][3] = {
1332  // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
1333  // FMA132 A, C, b; ==> FMA231 C, A, b;
1334  // FMA213 B, A, c; ==> FMA213 A, B, c;
1335  // FMA231 C, A, b; ==> FMA132 A, C, b;
1336  { Form231Index, Form213Index, Form132Index },
1337  // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
1338  // FMA132 A, c, B; ==> FMA132 B, c, A;
1339  // FMA213 B, a, C; ==> FMA231 C, a, B;
1340  // FMA231 C, a, B; ==> FMA213 B, a, C;
1341  { Form132Index, Form231Index, Form213Index },
1342  // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
1343  // FMA132 a, C, B; ==> FMA213 a, B, C;
1344  // FMA213 b, A, C; ==> FMA132 b, C, A;
1345  // FMA231 c, A, B; ==> FMA231 c, B, A;
1346  { Form213Index, Form132Index, Form231Index }
1347  };
1348 
1349  unsigned FMAForms[3];
1350  FMAForms[0] = FMA3Group.get132Opcode();
1351  FMAForms[1] = FMA3Group.get213Opcode();
1352  FMAForms[2] = FMA3Group.get231Opcode();
1353  unsigned FormIndex;
1354  for (FormIndex = 0; FormIndex < 3; FormIndex++)
1355  if (Opc == FMAForms[FormIndex])
1356  break;
1357 
1358  // Everything is ready, just adjust the FMA opcode and return it.
1359  FormIndex = FormMapping[Case][FormIndex];
1360  return FMAForms[FormIndex];
1361 }
1362 
1363 static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
1364  unsigned SrcOpIdx2) {
1365  // Determine which case this commute is or if it can't be done.
1366  unsigned Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1,
1367  SrcOpIdx2);
1368  assert(Case < 3 && "Unexpected case value!");
1369 
1370  // For each case we need to swap two pairs of bits in the final immediate.
1371  static const uint8_t SwapMasks[3][4] = {
1372  { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
1373  { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
1374  { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
1375  };
1376 
1377  uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
1378  // Clear out the bits we are swapping.
1379  uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
1380  SwapMasks[Case][2] | SwapMasks[Case][3]);
1381  // If the immediate had a bit of the pair set, then set the opposite bit.
1382  if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
1383  if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
1384  if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
1385  if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
1386  MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
1387 }
1388 
1389 // Returns true if this is a VPERMI2 or VPERMT2 instruction that can be
1390 // commuted.
1391 static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
1392 #define VPERM_CASES(Suffix) \
1393  case X86::VPERMI2##Suffix##128rr: case X86::VPERMT2##Suffix##128rr: \
1394  case X86::VPERMI2##Suffix##256rr: case X86::VPERMT2##Suffix##256rr: \
1395  case X86::VPERMI2##Suffix##rr: case X86::VPERMT2##Suffix##rr: \
1396  case X86::VPERMI2##Suffix##128rm: case X86::VPERMT2##Suffix##128rm: \
1397  case X86::VPERMI2##Suffix##256rm: case X86::VPERMT2##Suffix##256rm: \
1398  case X86::VPERMI2##Suffix##rm: case X86::VPERMT2##Suffix##rm: \
1399  case X86::VPERMI2##Suffix##128rrkz: case X86::VPERMT2##Suffix##128rrkz: \
1400  case X86::VPERMI2##Suffix##256rrkz: case X86::VPERMT2##Suffix##256rrkz: \
1401  case X86::VPERMI2##Suffix##rrkz: case X86::VPERMT2##Suffix##rrkz: \
1402  case X86::VPERMI2##Suffix##128rmkz: case X86::VPERMT2##Suffix##128rmkz: \
1403  case X86::VPERMI2##Suffix##256rmkz: case X86::VPERMT2##Suffix##256rmkz: \
1404  case X86::VPERMI2##Suffix##rmkz: case X86::VPERMT2##Suffix##rmkz:
1405 
1406 #define VPERM_CASES_BROADCAST(Suffix) \
1407  VPERM_CASES(Suffix) \
1408  case X86::VPERMI2##Suffix##128rmb: case X86::VPERMT2##Suffix##128rmb: \
1409  case X86::VPERMI2##Suffix##256rmb: case X86::VPERMT2##Suffix##256rmb: \
1410  case X86::VPERMI2##Suffix##rmb: case X86::VPERMT2##Suffix##rmb: \
1411  case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
1412  case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
1413  case X86::VPERMI2##Suffix##rmbkz: case X86::VPERMT2##Suffix##rmbkz:
1414 
1415  switch (Opcode) {
1416  default: return false;
1417  VPERM_CASES(B)
1422  VPERM_CASES(W)
1423  return true;
1424  }
1425 #undef VPERM_CASES_BROADCAST
1426 #undef VPERM_CASES
1427 }
1428 
1429 // Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
1430 // from the I opcode to the T opcode and vice versa.
1431 static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
1432 #define VPERM_CASES(Orig, New) \
1433  case X86::Orig##128rr: return X86::New##128rr; \
1434  case X86::Orig##128rrkz: return X86::New##128rrkz; \
1435  case X86::Orig##128rm: return X86::New##128rm; \
1436  case X86::Orig##128rmkz: return X86::New##128rmkz; \
1437  case X86::Orig##256rr: return X86::New##256rr; \
1438  case X86::Orig##256rrkz: return X86::New##256rrkz; \
1439  case X86::Orig##256rm: return X86::New##256rm; \
1440  case X86::Orig##256rmkz: return X86::New##256rmkz; \
1441  case X86::Orig##rr: return X86::New##rr; \
1442  case X86::Orig##rrkz: return X86::New##rrkz; \
1443  case X86::Orig##rm: return X86::New##rm; \
1444  case X86::Orig##rmkz: return X86::New##rmkz;
1445 
1446 #define VPERM_CASES_BROADCAST(Orig, New) \
1447  VPERM_CASES(Orig, New) \
1448  case X86::Orig##128rmb: return X86::New##128rmb; \
1449  case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
1450  case X86::Orig##256rmb: return X86::New##256rmb; \
1451  case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
1452  case X86::Orig##rmb: return X86::New##rmb; \
1453  case X86::Orig##rmbkz: return X86::New##rmbkz;
1454 
1455  switch (Opcode) {
1456  VPERM_CASES(VPERMI2B, VPERMT2B)
1457  VPERM_CASES_BROADCAST(VPERMI2D, VPERMT2D)
1458  VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
1459  VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
1460  VPERM_CASES_BROADCAST(VPERMI2Q, VPERMT2Q)
1461  VPERM_CASES(VPERMI2W, VPERMT2W)
1462  VPERM_CASES(VPERMT2B, VPERMI2B)
1463  VPERM_CASES_BROADCAST(VPERMT2D, VPERMI2D)
1464  VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
1465  VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
1466  VPERM_CASES_BROADCAST(VPERMT2Q, VPERMI2Q)
1467  VPERM_CASES(VPERMT2W, VPERMI2W)
1468  }
1469 
1470  llvm_unreachable("Unreachable!");
1471 #undef VPERM_CASES_BROADCAST
1472 #undef VPERM_CASES
1473 }
1474 
1476  unsigned OpIdx1,
1477  unsigned OpIdx2) const {
1478  auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
1479  if (NewMI)
1480  return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
1481  return MI;
1482  };
1483 
1484  switch (MI.getOpcode()) {
1485  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
1486  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
1487  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
1488  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
1489  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
1490  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
1491  unsigned Opc;
1492  unsigned Size;
1493  switch (MI.getOpcode()) {
1494  default: llvm_unreachable("Unreachable!");
1495  case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
1496  case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
1497  case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
1498  case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
1499  case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
1500  case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
1501  }
1502  unsigned Amt = MI.getOperand(3).getImm();
1503  auto &WorkingMI = cloneIfNew(MI);
1504  WorkingMI.setDesc(get(Opc));
1505  WorkingMI.getOperand(3).setImm(Size - Amt);
1506  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1507  OpIdx1, OpIdx2);
1508  }
1509  case X86::PFSUBrr:
1510  case X86::PFSUBRrr: {
1511  // PFSUB x, y: x = x - y
1512  // PFSUBR x, y: x = y - x
1513  unsigned Opc =
1514  (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
1515  auto &WorkingMI = cloneIfNew(MI);
1516  WorkingMI.setDesc(get(Opc));
1517  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1518  OpIdx1, OpIdx2);
1519  }
1520  case X86::BLENDPDrri:
1521  case X86::BLENDPSrri:
1522  case X86::VBLENDPDrri:
1523  case X86::VBLENDPSrri:
1524  // If we're optimizing for size, try to use MOVSD/MOVSS.
1525  if (MI.getParent()->getParent()->getFunction().optForSize()) {
1526  unsigned Mask, Opc;
1527  switch (MI.getOpcode()) {
1528  default: llvm_unreachable("Unreachable!");
1529  case X86::BLENDPDrri: Opc = X86::MOVSDrr; Mask = 0x03; break;
1530  case X86::BLENDPSrri: Opc = X86::MOVSSrr; Mask = 0x0F; break;
1531  case X86::VBLENDPDrri: Opc = X86::VMOVSDrr; Mask = 0x03; break;
1532  case X86::VBLENDPSrri: Opc = X86::VMOVSSrr; Mask = 0x0F; break;
1533  }
1534  if ((MI.getOperand(3).getImm() ^ Mask) == 1) {
1535  auto &WorkingMI = cloneIfNew(MI);
1536  WorkingMI.setDesc(get(Opc));
1537  WorkingMI.RemoveOperand(3);
1538  return TargetInstrInfo::commuteInstructionImpl(WorkingMI,
1539  /*NewMI=*/false,
1540  OpIdx1, OpIdx2);
1541  }
1542  }
1544  case X86::PBLENDWrri:
1545  case X86::VBLENDPDYrri:
1546  case X86::VBLENDPSYrri:
1547  case X86::VPBLENDDrri:
1548  case X86::VPBLENDWrri:
1549  case X86::VPBLENDDYrri:
1550  case X86::VPBLENDWYrri:{
1551  unsigned Mask;
1552  switch (MI.getOpcode()) {
1553  default: llvm_unreachable("Unreachable!");
1554  case X86::BLENDPDrri: Mask = 0x03; break;
1555  case X86::BLENDPSrri: Mask = 0x0F; break;
1556  case X86::PBLENDWrri: Mask = 0xFF; break;
1557  case X86::VBLENDPDrri: Mask = 0x03; break;
1558  case X86::VBLENDPSrri: Mask = 0x0F; break;
1559  case X86::VBLENDPDYrri: Mask = 0x0F; break;
1560  case X86::VBLENDPSYrri: Mask = 0xFF; break;
1561  case X86::VPBLENDDrri: Mask = 0x0F; break;
1562  case X86::VPBLENDWrri: Mask = 0xFF; break;
1563  case X86::VPBLENDDYrri: Mask = 0xFF; break;
1564  case X86::VPBLENDWYrri: Mask = 0xFF; break;
1565  }
1566  // Only the least significant bits of Imm are used.
1567  unsigned Imm = MI.getOperand(3).getImm() & Mask;
1568  auto &WorkingMI = cloneIfNew(MI);
1569  WorkingMI.getOperand(3).setImm(Mask ^ Imm);
1570  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1571  OpIdx1, OpIdx2);
1572  }
1573  case X86::MOVSDrr:
1574  case X86::MOVSSrr:
1575  case X86::VMOVSDrr:
1576  case X86::VMOVSSrr:{
1577  // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
1578  assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
1579 
1580  unsigned Mask, Opc;
1581  switch (MI.getOpcode()) {
1582  default: llvm_unreachable("Unreachable!");
1583  case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
1584  case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
1585  case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
1586  case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
1587  }
1588 
1589  auto &WorkingMI = cloneIfNew(MI);
1590  WorkingMI.setDesc(get(Opc));
1591  WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
1592  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1593  OpIdx1, OpIdx2);
1594  }
1595  case X86::PCLMULQDQrr:
1596  case X86::VPCLMULQDQrr:
1597  case X86::VPCLMULQDQYrr:
1598  case X86::VPCLMULQDQZrr:
1599  case X86::VPCLMULQDQZ128rr:
1600  case X86::VPCLMULQDQZ256rr: {
1601  // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
1602  // SRC2 64bits = Imm[4] ? SRC2[127:64] : SRC2[63:0]
1603  unsigned Imm = MI.getOperand(3).getImm();
1604  unsigned Src1Hi = Imm & 0x01;
1605  unsigned Src2Hi = Imm & 0x10;
1606  auto &WorkingMI = cloneIfNew(MI);
1607  WorkingMI.getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
1608  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1609  OpIdx1, OpIdx2);
1610  }
1611  case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
1612  case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
1613  case X86::VPCMPBZrri: case X86::VPCMPUBZrri:
1614  case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
1615  case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
1616  case X86::VPCMPDZrri: case X86::VPCMPUDZrri:
1617  case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
1618  case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
1619  case X86::VPCMPQZrri: case X86::VPCMPUQZrri:
1620  case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
1621  case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
1622  case X86::VPCMPWZrri: case X86::VPCMPUWZrri:
1623  case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik:
1624  case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik:
1625  case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik:
1626  case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik:
1627  case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik:
1628  case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik:
1629  case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik:
1630  case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik:
1631  case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik:
1632  case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik:
1633  case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik:
1634  case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: {
1635  // Flip comparison mode immediate (if necessary).
1636  unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7;
1637  Imm = X86::getSwappedVPCMPImm(Imm);
1638  auto &WorkingMI = cloneIfNew(MI);
1639  WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm);
1640  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1641  OpIdx1, OpIdx2);
1642  }
1643  case X86::VPCOMBri: case X86::VPCOMUBri:
1644  case X86::VPCOMDri: case X86::VPCOMUDri:
1645  case X86::VPCOMQri: case X86::VPCOMUQri:
1646  case X86::VPCOMWri: case X86::VPCOMUWri: {
1647  // Flip comparison mode immediate (if necessary).
1648  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
1649  Imm = X86::getSwappedVPCOMImm(Imm);
1650  auto &WorkingMI = cloneIfNew(MI);
1651  WorkingMI.getOperand(3).setImm(Imm);
1652  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1653  OpIdx1, OpIdx2);
1654  }
1655  case X86::VPERM2F128rr:
1656  case X86::VPERM2I128rr: {
1657  // Flip permute source immediate.
1658  // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
1659  // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
1660  unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
1661  auto &WorkingMI = cloneIfNew(MI);
1662  WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
1663  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1664  OpIdx1, OpIdx2);
1665  }
1666  case X86::MOVHLPSrr:
1667  case X86::UNPCKHPDrr:
1668  case X86::VMOVHLPSrr:
1669  case X86::VUNPCKHPDrr:
1670  case X86::VMOVHLPSZrr:
1671  case X86::VUNPCKHPDZ128rr: {
1672  assert(Subtarget.hasSSE2() && "Commuting MOVHLP/UNPCKHPD requires SSE2!");
1673 
1674  unsigned Opc = MI.getOpcode();
1675  switch (Opc) {
1676  default: llvm_unreachable("Unreachable!");
1677  case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
1678  case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
1679  case X86::VMOVHLPSrr: Opc = X86::VUNPCKHPDrr; break;
1680  case X86::VUNPCKHPDrr: Opc = X86::VMOVHLPSrr; break;
1681  case X86::VMOVHLPSZrr: Opc = X86::VUNPCKHPDZ128rr; break;
1682  case X86::VUNPCKHPDZ128rr: Opc = X86::VMOVHLPSZrr; break;
1683  }
1684  auto &WorkingMI = cloneIfNew(MI);
1685  WorkingMI.setDesc(get(Opc));
1686  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1687  OpIdx1, OpIdx2);
1688  }
1689  case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
1690  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
1691  case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
1692  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
1693  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
1694  case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr:
1695  case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
1696  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
1697  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
1698  case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
1699  case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
1700  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
1701  case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
1702  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
1703  case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
1704  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
1705  unsigned Opc;
1706  switch (MI.getOpcode()) {
1707  default: llvm_unreachable("Unreachable!");
1708  case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break;
1709  case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break;
1710  case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break;
1711  case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
1712  case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
1713  case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
1714  case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break;
1715  case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break;
1716  case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break;
1717  case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
1718  case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
1719  case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
1720  case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
1721  case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
1722  case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
1723  case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break;
1724  case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break;
1725  case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break;
1726  case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break;
1727  case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break;
1728  case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break;
1729  case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
1730  case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
1731  case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
1732  case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
1733  case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
1734  case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
1735  case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break;
1736  case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break;
1737  case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break;
1738  case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break;
1739  case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break;
1740  case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break;
1741  case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
1742  case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
1743  case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
1744  case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break;
1745  case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break;
1746  case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break;
1747  case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
1748  case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
1749  case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
1750  case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break;
1751  case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break;
1752  case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break;
1753  case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
1754  case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
1755  case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
1756  }
1757  auto &WorkingMI = cloneIfNew(MI);
1758  WorkingMI.setDesc(get(Opc));
1759  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1760  OpIdx1, OpIdx2);
1761  }
1762  case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
1763  case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
1764  case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
1765  case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
1766  case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
1767  case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
1768  case X86::VPTERNLOGDZrrik:
1769  case X86::VPTERNLOGDZ128rrik:
1770  case X86::VPTERNLOGDZ256rrik:
1771  case X86::VPTERNLOGQZrrik:
1772  case X86::VPTERNLOGQZ128rrik:
1773  case X86::VPTERNLOGQZ256rrik:
1774  case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
1775  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
1776  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
1777  case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
1778  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
1779  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
1780  case X86::VPTERNLOGDZ128rmbi:
1781  case X86::VPTERNLOGDZ256rmbi:
1782  case X86::VPTERNLOGDZrmbi:
1783  case X86::VPTERNLOGQZ128rmbi:
1784  case X86::VPTERNLOGQZ256rmbi:
1785  case X86::VPTERNLOGQZrmbi:
1786  case X86::VPTERNLOGDZ128rmbikz:
1787  case X86::VPTERNLOGDZ256rmbikz:
1788  case X86::VPTERNLOGDZrmbikz:
1789  case X86::VPTERNLOGQZ128rmbikz:
1790  case X86::VPTERNLOGQZ256rmbikz:
1791  case X86::VPTERNLOGQZrmbikz: {
1792  auto &WorkingMI = cloneIfNew(MI);
1793  commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2);
1794  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1795  OpIdx1, OpIdx2);
1796  }
1797  default: {
1799  unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
1800  auto &WorkingMI = cloneIfNew(MI);
1801  WorkingMI.setDesc(get(Opc));
1802  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1803  OpIdx1, OpIdx2);
1804  }
1805 
1806  const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
1807  MI.getDesc().TSFlags);
1808  if (FMA3Group) {
1809  unsigned Opc =
1810  getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
1811  auto &WorkingMI = cloneIfNew(MI);
1812  WorkingMI.setDesc(get(Opc));
1813  return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
1814  OpIdx1, OpIdx2);
1815  }
1816 
1817  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1818  }
1819  }
1820 }
1821 
1822 bool
1823 X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
1824  unsigned &SrcOpIdx1,
1825  unsigned &SrcOpIdx2,
1826  bool IsIntrinsic) const {
1827  uint64_t TSFlags = MI.getDesc().TSFlags;
1828 
1829  unsigned FirstCommutableVecOp = 1;
1830  unsigned LastCommutableVecOp = 3;
1831  unsigned KMaskOp = -1U;
1832  if (X86II::isKMasked(TSFlags)) {
1833  // For k-zero-masked operations it is Ok to commute the first vector
1834  // operand.
1835  // For regular k-masked operations a conservative choice is done as the
1836  // elements of the first vector operand, for which the corresponding bit
1837  // in the k-mask operand is set to 0, are copied to the result of the
1838  // instruction.
1839  // TODO/FIXME: The commute still may be legal if it is known that the
1840  // k-mask operand is set to either all ones or all zeroes.
1841  // It is also Ok to commute the 1st operand if all users of MI use only
1842  // the elements enabled by the k-mask operand. For example,
1843  // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
1844  // : v1[i];
1845  // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
1846  // // Ok, to commute v1 in FMADD213PSZrk.
1847 
1848  // The k-mask operand has index = 2 for masked and zero-masked operations.
1849  KMaskOp = 2;
1850 
1851  // The operand with index = 1 is used as a source for those elements for
1852  // which the corresponding bit in the k-mask is set to 0.
1853  if (X86II::isKMergeMasked(TSFlags))
1854  FirstCommutableVecOp = 3;
1855 
1856  LastCommutableVecOp++;
1857  } else if (IsIntrinsic) {
1858  // Commuting the first operand of an intrinsic instruction isn't possible
1859  // unless we can prove that only the lowest element of the result is used.
1860  FirstCommutableVecOp = 2;
1861  }
1862 
1863  if (isMem(MI, LastCommutableVecOp))
1864  LastCommutableVecOp--;
1865 
1866  // Only the first RegOpsNum operands are commutable.
1867  // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
1868  // that the operand is not specified/fixed.
1869  if (SrcOpIdx1 != CommuteAnyOperandIndex &&
1870  (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
1871  SrcOpIdx1 == KMaskOp))
1872  return false;
1873  if (SrcOpIdx2 != CommuteAnyOperandIndex &&
1874  (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
1875  SrcOpIdx2 == KMaskOp))
1876  return false;
1877 
1878  // Look for two different register operands assumed to be commutable
1879  // regardless of the FMA opcode. The FMA opcode is adjusted later.
1880  if (SrcOpIdx1 == CommuteAnyOperandIndex ||
1881  SrcOpIdx2 == CommuteAnyOperandIndex) {
1882  unsigned CommutableOpIdx1 = SrcOpIdx1;
1883  unsigned CommutableOpIdx2 = SrcOpIdx2;
1884 
1885  // At least one of operands to be commuted is not specified and
1886  // this method is free to choose appropriate commutable operands.
1887  if (SrcOpIdx1 == SrcOpIdx2)
1888  // Both of operands are not fixed. By default set one of commutable
1889  // operands to the last register operand of the instruction.
1890  CommutableOpIdx2 = LastCommutableVecOp;
1891  else if (SrcOpIdx2 == CommuteAnyOperandIndex)
1892  // Only one of operands is not fixed.
1893  CommutableOpIdx2 = SrcOpIdx1;
1894 
1895  // CommutableOpIdx2 is well defined now. Let's choose another commutable
1896  // operand and assign its index to CommutableOpIdx1.
1897  unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
1898  for (CommutableOpIdx1 = LastCommutableVecOp;
1899  CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
1900  // Just ignore and skip the k-mask operand.
1901  if (CommutableOpIdx1 == KMaskOp)
1902  continue;
1903 
1904  // The commuted operands must have different registers.
1905  // Otherwise, the commute transformation does not change anything and
1906  // is useless then.
1907  if (Op2Reg != MI.getOperand(CommutableOpIdx1).getReg())
1908  break;
1909  }
1910 
1911  // No appropriate commutable operands were found.
1912  if (CommutableOpIdx1 < FirstCommutableVecOp)
1913  return false;
1914 
1915  // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
1916  // to return those values.
1917  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
1918  CommutableOpIdx1, CommutableOpIdx2))
1919  return false;
1920  }
1921 
1922  return true;
1923 }
1924 
1926  unsigned &SrcOpIdx2) const {
1927  const MCInstrDesc &Desc = MI.getDesc();
1928  if (!Desc.isCommutable())
1929  return false;
1930 
1931  switch (MI.getOpcode()) {
1932  case X86::CMPSDrr:
1933  case X86::CMPSSrr:
1934  case X86::CMPPDrri:
1935  case X86::CMPPSrri:
1936  case X86::VCMPSDrr:
1937  case X86::VCMPSSrr:
1938  case X86::VCMPPDrri:
1939  case X86::VCMPPSrri:
1940  case X86::VCMPPDYrri:
1941  case X86::VCMPPSYrri:
1942  case X86::VCMPSDZrr:
1943  case X86::VCMPSSZrr:
1944  case X86::VCMPPDZrri:
1945  case X86::VCMPPSZrri:
1946  case X86::VCMPPDZ128rri:
1947  case X86::VCMPPSZ128rri:
1948  case X86::VCMPPDZ256rri:
1949  case X86::VCMPPSZ256rri: {
1950  // Float comparison can be safely commuted for
1951  // Ordered/Unordered/Equal/NotEqual tests
1952  unsigned Imm = MI.getOperand(3).getImm() & 0x7;
1953  switch (Imm) {
1954  case 0x00: // EQUAL
1955  case 0x03: // UNORDERED
1956  case 0x04: // NOT EQUAL
1957  case 0x07: // ORDERED
1958  // The indices of the commutable operands are 1 and 2.
1959  // Assign them to the returned operand indices here.
1960  return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
1961  }
1962  return false;
1963  }
1964  case X86::MOVSDrr:
1965  case X86::MOVSSrr:
1966  case X86::VMOVSDrr:
1967  case X86::VMOVSSrr:
1968  if (Subtarget.hasSSE41())
1969  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1970  return false;
1971  case X86::MOVHLPSrr:
1972  case X86::UNPCKHPDrr:
1973  case X86::VMOVHLPSrr:
1974  case X86::VUNPCKHPDrr:
1975  case X86::VMOVHLPSZrr:
1976  case X86::VUNPCKHPDZ128rr:
1977  if (Subtarget.hasSSE2())
1978  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1979  return false;
1980  case X86::VPTERNLOGDZrri: case X86::VPTERNLOGDZrmi:
1981  case X86::VPTERNLOGDZ128rri: case X86::VPTERNLOGDZ128rmi:
1982  case X86::VPTERNLOGDZ256rri: case X86::VPTERNLOGDZ256rmi:
1983  case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
1984  case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
1985  case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
1986  case X86::VPTERNLOGDZrrik:
1987  case X86::VPTERNLOGDZ128rrik:
1988  case X86::VPTERNLOGDZ256rrik:
1989  case X86::VPTERNLOGQZrrik:
1990  case X86::VPTERNLOGQZ128rrik:
1991  case X86::VPTERNLOGQZ256rrik:
1992  case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
1993  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
1994  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
1995  case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
1996  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
1997  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
1998  case X86::VPTERNLOGDZ128rmbi:
1999  case X86::VPTERNLOGDZ256rmbi:
2000  case X86::VPTERNLOGDZrmbi:
2001  case X86::VPTERNLOGQZ128rmbi:
2002  case X86::VPTERNLOGQZ256rmbi:
2003  case X86::VPTERNLOGQZrmbi:
2004  case X86::VPTERNLOGDZ128rmbikz:
2005  case X86::VPTERNLOGDZ256rmbikz:
2006  case X86::VPTERNLOGDZrmbikz:
2007  case X86::VPTERNLOGQZ128rmbikz:
2008  case X86::VPTERNLOGQZ256rmbikz:
2009  case X86::VPTERNLOGQZrmbikz:
2010  return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2011  case X86::VPMADD52HUQZ128r:
2012  case X86::VPMADD52HUQZ128rk:
2013  case X86::VPMADD52HUQZ128rkz:
2014  case X86::VPMADD52HUQZ256r:
2015  case X86::VPMADD52HUQZ256rk:
2016  case X86::VPMADD52HUQZ256rkz:
2017  case X86::VPMADD52HUQZr:
2018  case X86::VPMADD52HUQZrk:
2019  case X86::VPMADD52HUQZrkz:
2020  case X86::VPMADD52LUQZ128r:
2021  case X86::VPMADD52LUQZ128rk:
2022  case X86::VPMADD52LUQZ128rkz:
2023  case X86::VPMADD52LUQZ256r:
2024  case X86::VPMADD52LUQZ256rk:
2025  case X86::VPMADD52LUQZ256rkz:
2026  case X86::VPMADD52LUQZr:
2027  case X86::VPMADD52LUQZrk:
2028  case X86::VPMADD52LUQZrkz: {
2029  unsigned CommutableOpIdx1 = 2;
2030  unsigned CommutableOpIdx2 = 3;
2031  if (X86II::isKMasked(Desc.TSFlags)) {
2032  // Skip the mask register.
2033  ++CommutableOpIdx1;
2034  ++CommutableOpIdx2;
2035  }
2036  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2037  CommutableOpIdx1, CommutableOpIdx2))
2038  return false;
2039  if (!MI.getOperand(SrcOpIdx1).isReg() ||
2040  !MI.getOperand(SrcOpIdx2).isReg())
2041  // No idea.
2042  return false;
2043  return true;
2044  }
2045 
2046  default:
2047  const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
2048  MI.getDesc().TSFlags);
2049  if (FMA3Group)
2050  return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
2051  FMA3Group->isIntrinsic());
2052 
2053  // Handled masked instructions since we need to skip over the mask input
2054  // and the preserved input.
2055  if (X86II::isKMasked(Desc.TSFlags)) {
2056  // First assume that the first input is the mask operand and skip past it.
2057  unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
2058  unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
2059  // Check if the first input is tied. If there isn't one then we only
2060  // need to skip the mask operand which we did above.
2061  if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
2062  MCOI::TIED_TO) != -1)) {
2063  // If this is zero masking instruction with a tied operand, we need to
2064  // move the first index back to the first input since this must
2065  // be a 3 input instruction and we want the first two non-mask inputs.
2066  // Otherwise this is a 2 input instruction with a preserved input and
2067  // mask, so we need to move the indices to skip one more input.
2068  if (X86II::isKMergeMasked(Desc.TSFlags)) {
2069  ++CommutableOpIdx1;
2070  ++CommutableOpIdx2;
2071  } else {
2072  --CommutableOpIdx1;
2073  }
2074  }
2075 
2076  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
2077  CommutableOpIdx1, CommutableOpIdx2))
2078  return false;
2079 
2080  if (!MI.getOperand(SrcOpIdx1).isReg() ||
2081  !MI.getOperand(SrcOpIdx2).isReg())
2082  // No idea.
2083  return false;
2084  return true;
2085  }
2086 
2087  return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2088  }
2089  return false;
2090 }
2091 
2093  switch (BrOpc) {
2094  default: return X86::COND_INVALID;
2095  case X86::JE_1: return X86::COND_E;
2096  case X86::JNE_1: return X86::COND_NE;
2097  case X86::JL_1: return X86::COND_L;
2098  case X86::JLE_1: return X86::COND_LE;
2099  case X86::JG_1: return X86::COND_G;
2100  case X86::JGE_1: return X86::COND_GE;
2101  case X86::JB_1: return X86::COND_B;
2102  case X86::JBE_1: return X86::COND_BE;
2103  case X86::JA_1: return X86::COND_A;
2104  case X86::JAE_1: return X86::COND_AE;
2105  case X86::JS_1: return X86::COND_S;
2106  case X86::JNS_1: return X86::COND_NS;
2107  case X86::JP_1: return X86::COND_P;
2108  case X86::JNP_1: return X86::COND_NP;
2109  case X86::JO_1: return X86::COND_O;
2110  case X86::JNO_1: return X86::COND_NO;
2111  }
2112 }
2113 
2114 /// Return condition code of a SET opcode.
2116  switch (Opc) {
2117  default: return X86::COND_INVALID;
2118  case X86::SETAr: case X86::SETAm: return X86::COND_A;
2119  case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
2120  case X86::SETBr: case X86::SETBm: return X86::COND_B;
2121  case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
2122  case X86::SETEr: case X86::SETEm: return X86::COND_E;
2123  case X86::SETGr: case X86::SETGm: return X86::COND_G;
2124  case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
2125  case X86::SETLr: case X86::SETLm: return X86::COND_L;
2126  case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
2127  case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
2128  case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
2129  case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
2130  case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
2131  case X86::SETOr: case X86::SETOm: return X86::COND_O;
2132  case X86::SETPr: case X86::SETPm: return X86::COND_P;
2133  case X86::SETSr: case X86::SETSm: return X86::COND_S;
2134  }
2135 }
2136 
2137 /// Return condition code of a CMov opcode.
2139  switch (Opc) {
2140  default: return X86::COND_INVALID;
2141  case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm:
2142  case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr:
2143  return X86::COND_A;
2144  case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
2145  case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
2146  return X86::COND_AE;
2147  case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm:
2148  case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr:
2149  return X86::COND_B;
2150  case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
2151  case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
2152  return X86::COND_BE;
2153  case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm:
2154  case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr:
2155  return X86::COND_E;
2156  case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm:
2157  case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr:
2158  return X86::COND_G;
2159  case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
2160  case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
2161  return X86::COND_GE;
2162  case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm:
2163  case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr:
2164  return X86::COND_L;
2165  case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
2166  case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
2167  return X86::COND_LE;
2168  case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
2169  case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
2170  return X86::COND_NE;
2171  case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
2172  case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
2173  return X86::COND_NO;
2174  case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
2175  case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
2176  return X86::COND_NP;
2177  case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
2178  case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
2179  return X86::COND_NS;
2180  case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm:
2181  case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr:
2182  return X86::COND_O;
2183  case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm:
2184  case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr:
2185  return X86::COND_P;
2186  case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm:
2187  case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr:
2188  return X86::COND_S;
2189  }
2190 }
2191 
2193  switch (CC) {
2194  default: llvm_unreachable("Illegal condition code!");
2195  case X86::COND_E: return X86::JE_1;
2196  case X86::COND_NE: return X86::JNE_1;
2197  case X86::COND_L: return X86::JL_1;
2198  case X86::COND_LE: return X86::JLE_1;
2199  case X86::COND_G: return X86::JG_1;
2200  case X86::COND_GE: return X86::JGE_1;
2201  case X86::COND_B: return X86::JB_1;
2202  case X86::COND_BE: return X86::JBE_1;
2203  case X86::COND_A: return X86::JA_1;
2204  case X86::COND_AE: return X86::JAE_1;
2205  case X86::COND_S: return X86::JS_1;
2206  case X86::COND_NS: return X86::JNS_1;
2207  case X86::COND_P: return X86::JP_1;
2208  case X86::COND_NP: return X86::JNP_1;
2209  case X86::COND_O: return X86::JO_1;
2210  case X86::COND_NO: return X86::JNO_1;
2211  }
2212 }
2213 
2214 /// Return the inverse of the specified condition,
2215 /// e.g. turning COND_E to COND_NE.
2217  switch (CC) {
2218  default: llvm_unreachable("Illegal condition code!");
2219  case X86::COND_E: return X86::COND_NE;
2220  case X86::COND_NE: return X86::COND_E;
2221  case X86::COND_L: return X86::COND_GE;
2222  case X86::COND_LE: return X86::COND_G;
2223  case X86::COND_G: return X86::COND_LE;
2224  case X86::COND_GE: return X86::COND_L;
2225  case X86::COND_B: return X86::COND_AE;
2226  case X86::COND_BE: return X86::COND_A;
2227  case X86::COND_A: return X86::COND_BE;
2228  case X86::COND_AE: return X86::COND_B;
2229  case X86::COND_S: return X86::COND_NS;
2230  case X86::COND_NS: return X86::COND_S;
2231  case X86::COND_P: return X86::COND_NP;
2232  case X86::COND_NP: return X86::COND_P;
2233  case X86::COND_O: return X86::COND_NO;
2234  case X86::COND_NO: return X86::COND_O;
2237  }
2238 }
2239 
2240 /// Assuming the flags are set by MI(a,b), return the condition code if we
2241 /// modify the instructions such that flags are set by MI(b,a).
2243  switch (CC) {
2244  default: return X86::COND_INVALID;
2245  case X86::COND_E: return X86::COND_E;
2246  case X86::COND_NE: return X86::COND_NE;
2247  case X86::COND_L: return X86::COND_G;
2248  case X86::COND_LE: return X86::COND_GE;
2249  case X86::COND_G: return X86::COND_L;
2250  case X86::COND_GE: return X86::COND_LE;
2251  case X86::COND_B: return X86::COND_A;
2252  case X86::COND_BE: return X86::COND_AE;
2253  case X86::COND_A: return X86::COND_B;
2254  case X86::COND_AE: return X86::COND_BE;
2255  }
2256 }
2257 
2258 std::pair<X86::CondCode, bool>
2261  bool NeedSwap = false;
2262  switch (Predicate) {
2263  default: break;
2264  // Floating-point Predicates
2265  case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
2266  case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH;
2267  case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
2268  case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH;
2269  case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
2270  case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH;
2271  case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
2272  case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH;
2273  case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
2274  case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
2275  case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
2276  case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
2278  case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
2279 
2280  // Integer Predicates
2281  case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
2282  case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
2283  case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
2284  case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
2285  case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
2286  case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
2287  case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
2288  case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
2289  case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
2290  case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
2291  }
2292 
2293  return std::make_pair(CC, NeedSwap);
2294 }
2295 
2296 /// Return a set opcode for the given condition and
2297 /// whether it has memory operand.
2298 unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
2299  static const uint16_t Opc[16][2] = {
2300  { X86::SETAr, X86::SETAm },
2301  { X86::SETAEr, X86::SETAEm },
2302  { X86::SETBr, X86::SETBm },
2303  { X86::SETBEr, X86::SETBEm },
2304  { X86::SETEr, X86::SETEm },
2305  { X86::SETGr, X86::SETGm },
2306  { X86::SETGEr, X86::SETGEm },
2307  { X86::SETLr, X86::SETLm },
2308  { X86::SETLEr, X86::SETLEm },
2309  { X86::SETNEr, X86::SETNEm },
2310  { X86::SETNOr, X86::SETNOm },
2311  { X86::SETNPr, X86::SETNPm },
2312  { X86::SETNSr, X86::SETNSm },
2313  { X86::SETOr, X86::SETOm },
2314  { X86::SETPr, X86::SETPm },
2315  { X86::SETSr, X86::SETSm }
2316  };
2317 
2318  assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
2319  return Opc[CC][HasMemoryOperand ? 1 : 0];
2320 }
2321 
2322 /// Return a cmov opcode for the given condition,
2323 /// register size in bytes, and operand type.
2324 unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
2325  bool HasMemoryOperand) {
2326  static const uint16_t Opc[32][3] = {
2327  { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
2328  { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
2329  { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr },
2330  { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
2331  { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr },
2332  { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr },
2333  { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
2334  { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr },
2335  { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
2336  { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
2337  { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
2338  { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
2339  { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
2340  { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr },
2341  { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr },
2342  { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr },
2343  { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm },
2344  { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
2345  { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm },
2346  { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
2347  { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm },
2348  { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm },
2349  { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
2350  { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm },
2351  { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
2352  { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
2353  { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
2354  { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
2355  { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
2356  { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm },
2357  { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm },
2358  { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm }
2359  };
2360 
2361  assert(CC < 16 && "Can only handle standard cond codes");
2362  unsigned Idx = HasMemoryOperand ? 16+CC : CC;
2363  switch(RegBytes) {
2364  default: llvm_unreachable("Illegal register size!");
2365  case 2: return Opc[Idx][0];
2366  case 4: return Opc[Idx][1];
2367  case 8: return Opc[Idx][2];
2368  }
2369 }
2370 
2371 /// Get the VPCMP immediate for the given condition.
2373  switch (CC) {
2374  default: llvm_unreachable("Unexpected SETCC condition");
2375  case ISD::SETNE: return 4;
2376  case ISD::SETEQ: return 0;
2377  case ISD::SETULT:
2378  case ISD::SETLT: return 1;
2379  case ISD::SETUGT:
2380  case ISD::SETGT: return 6;
2381  case ISD::SETUGE:
2382  case ISD::SETGE: return 5;
2383  case ISD::SETULE:
2384  case ISD::SETLE: return 2;
2385  }
2386 }
2387 
2388 /// Get the VPCMP immediate if the opcodes are swapped.
2389 unsigned X86::getSwappedVPCMPImm(unsigned Imm) {
2390  switch (Imm) {
2391  default: llvm_unreachable("Unreachable!");
2392  case 0x01: Imm = 0x06; break; // LT -> NLE
2393  case 0x02: Imm = 0x05; break; // LE -> NLT
2394  case 0x05: Imm = 0x02; break; // NLT -> LE
2395  case 0x06: Imm = 0x01; break; // NLE -> LT
2396  case 0x00: // EQ
2397  case 0x03: // FALSE
2398  case 0x04: // NE
2399  case 0x07: // TRUE
2400  break;
2401  }
2402 
2403  return Imm;
2404 }
2405 
2406 /// Get the VPCOM immediate if the opcodes are swapped.
2407 unsigned X86::getSwappedVPCOMImm(unsigned Imm) {
2408  switch (Imm) {
2409  default: llvm_unreachable("Unreachable!");
2410  case 0x00: Imm = 0x02; break; // LT -> GT
2411  case 0x01: Imm = 0x03; break; // LE -> GE
2412  case 0x02: Imm = 0x00; break; // GT -> LT
2413  case 0x03: Imm = 0x01; break; // GE -> LE
2414  case 0x04: // EQ
2415  case 0x05: // NE
2416  case 0x06: // FALSE
2417  case 0x07: // TRUE
2418  break;
2419  }
2420 
2421  return Imm;
2422 }
2423 
2425  if (!MI.isTerminator()) return false;
2426 
2427  // Conditional branch is a special case.
2428  if (MI.isBranch() && !MI.isBarrier())
2429  return true;
2430  if (!MI.isPredicable())
2431  return true;
2432  return !isPredicated(MI);
2433 }
2434 
2436  switch (MI.getOpcode()) {
2437  case X86::TCRETURNdi:
2438  case X86::TCRETURNri:
2439  case X86::TCRETURNmi:
2440  case X86::TCRETURNdi64:
2441  case X86::TCRETURNri64:
2442  case X86::TCRETURNmi64:
2443  return true;
2444  default:
2445  return false;
2446  }
2447 }
2448 
2450  SmallVectorImpl<MachineOperand> &BranchCond,
2451  const MachineInstr &TailCall) const {
2452  if (TailCall.getOpcode() != X86::TCRETURNdi &&
2453  TailCall.getOpcode() != X86::TCRETURNdi64) {
2454  // Only direct calls can be done with a conditional branch.
2455  return false;
2456  }
2457 
2458  const MachineFunction *MF = TailCall.getParent()->getParent();
2459  if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
2460  // Conditional tail calls confuse the Win64 unwinder.
2461  return false;
2462  }
2463 
2464  assert(BranchCond.size() == 1);
2465  if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
2466  // Can't make a conditional tail call with this condition.
2467  return false;
2468  }
2469 
2471  if (X86FI->getTCReturnAddrDelta() != 0 ||
2472  TailCall.getOperand(1).getImm() != 0) {
2473  // A conditional tail call cannot do any stack adjustment.
2474  return false;
2475  }
2476 
2477  return true;
2478 }
2479 
2482  const MachineInstr &TailCall) const {
2483  assert(canMakeTailCallConditional(BranchCond, TailCall));
2484 
2486  while (I != MBB.begin()) {
2487  --I;
2488  if (I->isDebugInstr())
2489  continue;
2490  if (!I->isBranch())
2491  assert(0 && "Can't find the branch to replace!");
2492 
2493  X86::CondCode CC = X86::getCondFromBranchOpc(I->getOpcode());
2494  assert(BranchCond.size() == 1);
2495  if (CC != BranchCond[0].getImm())
2496  continue;
2497 
2498  break;
2499  }
2500 
2501  unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
2502  : X86::TCRETURNdi64cc;
2503 
2504  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
2505  MIB->addOperand(TailCall.getOperand(0)); // Destination.
2506  MIB.addImm(0); // Stack offset (not used).
2507  MIB->addOperand(BranchCond[0]); // Condition.
2508  MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
2509 
2510  // Add implicit uses and defs of all live regs potentially clobbered by the
2511  // call. This way they still appear live across the call.
2512  LivePhysRegs LiveRegs(getRegisterInfo());
2513  LiveRegs.addLiveOuts(MBB);
2515  LiveRegs.stepForward(*MIB, Clobbers);
2516  for (const auto &C : Clobbers) {
2517  MIB.addReg(C.first, RegState::Implicit);
2518  MIB.addReg(C.first, RegState::Implicit | RegState::Define);
2519  }
2520 
2521  I->eraseFromParent();
2522 }
2523 
2524 // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
2525 // not be a fallthrough MBB now due to layout changes). Return nullptr if the
2526 // fallthrough MBB cannot be identified.
2528  MachineBasicBlock *TBB) {
2529  // Look for non-EHPad successors other than TBB. If we find exactly one, it
2530  // is the fallthrough MBB. If we find zero, then TBB is both the target MBB
2531  // and fallthrough MBB. If we find more than one, we cannot identify the
2532  // fallthrough MBB and should return nullptr.
2533  MachineBasicBlock *FallthroughBB = nullptr;
2534  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2535  if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB))
2536  continue;
2537  // Return a nullptr if we found more than one fallthrough successor.
2538  if (FallthroughBB && FallthroughBB != TBB)
2539  return nullptr;
2540  FallthroughBB = *SI;
2541  }
2542  return FallthroughBB;
2543 }
2544 
2545 bool X86InstrInfo::AnalyzeBranchImpl(
2548  SmallVectorImpl<MachineInstr *> &CondBranches, bool AllowModify) const {
2549 
2550  // Start from the bottom of the block and work up, examining the
2551  // terminator instructions.
2553  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
2554  while (I != MBB.begin()) {
2555  --I;
2556  if (I->isDebugInstr())
2557  continue;
2558 
2559  // Working from the bottom, when we see a non-terminator instruction, we're
2560  // done.
2561  if (!isUnpredicatedTerminator(*I))
2562  break;
2563 
2564  // A terminator that isn't a branch can't easily be handled by this
2565  // analysis.
2566  if (!I->isBranch())
2567  return true;
2568 
2569  // Handle unconditional branches.
2570  if (I->getOpcode() == X86::JMP_1) {
2571  UnCondBrIter = I;
2572 
2573  if (!AllowModify) {
2574  TBB = I->getOperand(0).getMBB();
2575  continue;
2576  }
2577 
2578  // If the block has any instructions after a JMP, delete them.
2579  while (std::next(I) != MBB.end())
2580  std::next(I)->eraseFromParent();
2581 
2582  Cond.clear();
2583  FBB = nullptr;
2584 
2585  // Delete the JMP if it's equivalent to a fall-through.
2586  if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
2587  TBB = nullptr;
2588  I->eraseFromParent();
2589  I = MBB.end();
2590  UnCondBrIter = MBB.end();
2591  continue;
2592  }
2593 
2594  // TBB is used to indicate the unconditional destination.
2595  TBB = I->getOperand(0).getMBB();
2596  continue;
2597  }
2598 
2599  // Handle conditional branches.
2600  X86::CondCode BranchCode = X86::getCondFromBranchOpc(I->getOpcode());
2601  if (BranchCode == X86::COND_INVALID)
2602  return true; // Can't handle indirect branch.
2603 
2604  // In practice we should never have an undef eflags operand, if we do
2605  // abort here as we are not prepared to preserve the flag.
2606  if (I->getOperand(1).isUndef())
2607  return true;
2608 
2609  // Working from the bottom, handle the first conditional branch.
2610  if (Cond.empty()) {
2611  MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
2612  if (AllowModify && UnCondBrIter != MBB.end() &&
2613  MBB.isLayoutSuccessor(TargetBB)) {
2614  // If we can modify the code and it ends in something like:
2615  //
2616  // jCC L1
2617  // jmp L2
2618  // L1:
2619  // ...
2620  // L2:
2621  //
2622  // Then we can change this to:
2623  //
2624  // jnCC L2
2625  // L1:
2626  // ...
2627  // L2:
2628  //
2629  // Which is a bit more efficient.
2630  // We conditionally jump to the fall-through block.
2631  BranchCode = GetOppositeBranchCondition(BranchCode);
2632  unsigned JNCC = GetCondBranchFromCond(BranchCode);
2633  MachineBasicBlock::iterator OldInst = I;
2634 
2635  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
2636  .addMBB(UnCondBrIter->getOperand(0).getMBB());
2637  BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_1))
2638  .addMBB(TargetBB);
2639 
2640  OldInst->eraseFromParent();
2641  UnCondBrIter->eraseFromParent();
2642 
2643  // Restart the analysis.
2644  UnCondBrIter = MBB.end();
2645  I = MBB.end();
2646  continue;
2647  }
2648 
2649  FBB = TBB;
2650  TBB = I->getOperand(0).getMBB();
2651  Cond.push_back(MachineOperand::CreateImm(BranchCode));
2652  CondBranches.push_back(&*I);
2653  continue;
2654  }
2655 
2656  // Handle subsequent conditional branches. Only handle the case where all
2657  // conditional branches branch to the same destination and their condition
2658  // opcodes fit one of the special multi-branch idioms.
2659  assert(Cond.size() == 1);
2660  assert(TBB);
2661 
2662  // If the conditions are the same, we can leave them alone.
2663  X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
2664  auto NewTBB = I->getOperand(0).getMBB();
2665  if (OldBranchCode == BranchCode && TBB == NewTBB)
2666  continue;
2667 
2668  // If they differ, see if they fit one of the known patterns. Theoretically,
2669  // we could handle more patterns here, but we shouldn't expect to see them
2670  // if instruction selection has done a reasonable job.
2671  if (TBB == NewTBB &&
2672  ((OldBranchCode == X86::COND_P && BranchCode == X86::COND_NE) ||
2673  (OldBranchCode == X86::COND_NE && BranchCode == X86::COND_P))) {
2674  BranchCode = X86::COND_NE_OR_P;
2675  } else if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_NE) ||
2676  (OldBranchCode == X86::COND_E && BranchCode == X86::COND_P)) {
2677  if (NewTBB != (FBB ? FBB : getFallThroughMBB(&MBB, TBB)))
2678  return true;
2679 
2680  // X86::COND_E_AND_NP usually has two different branch destinations.
2681  //
2682  // JP B1
2683  // JE B2
2684  // JMP B1
2685  // B1:
2686  // B2:
2687  //
2688  // Here this condition branches to B2 only if NP && E. It has another
2689  // equivalent form:
2690  //
2691  // JNE B1
2692  // JNP B2
2693  // JMP B1
2694  // B1:
2695  // B2:
2696  //
2697  // Similarly it branches to B2 only if E && NP. That is why this condition
2698  // is named with COND_E_AND_NP.
2699  BranchCode = X86::COND_E_AND_NP;
2700  } else
2701  return true;
2702 
2703  // Update the MachineOperand.
2704  Cond[0].setImm(BranchCode);
2705  CondBranches.push_back(&*I);
2706  }
2707 
2708  return false;
2709 }
2710 
2712  MachineBasicBlock *&TBB,
2713  MachineBasicBlock *&FBB,
2715  bool AllowModify) const {
2716  SmallVector<MachineInstr *, 4> CondBranches;
2717  return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
2718 }
2719 
2721  MachineBranchPredicate &MBP,
2722  bool AllowModify) const {
2723  using namespace std::placeholders;
2724 
2726  SmallVector<MachineInstr *, 4> CondBranches;
2727  if (AnalyzeBranchImpl(MBB, MBP.TrueDest, MBP.FalseDest, Cond, CondBranches,
2728  AllowModify))
2729  return true;
2730 
2731  if (Cond.size() != 1)
2732  return true;
2733 
2734  assert(MBP.TrueDest && "expected!");
2735 
2736  if (!MBP.FalseDest)
2737  MBP.FalseDest = MBB.getNextNode();
2738 
2740 
2741  MachineInstr *ConditionDef = nullptr;
2742  bool SingleUseCondition = true;
2743 
2744  for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
2745  if (I->modifiesRegister(X86::EFLAGS, TRI)) {
2746  ConditionDef = &*I;
2747  break;
2748  }
2749 
2750  if (I->readsRegister(X86::EFLAGS, TRI))
2751  SingleUseCondition = false;
2752  }
2753 
2754  if (!ConditionDef)
2755  return true;
2756 
2757  if (SingleUseCondition) {
2758  for (auto *Succ : MBB.successors())
2759  if (Succ->isLiveIn(X86::EFLAGS))
2760  SingleUseCondition = false;
2761  }
2762 
2763  MBP.ConditionDef = ConditionDef;
2764  MBP.SingleUseCondition = SingleUseCondition;
2765 
2766  // Currently we only recognize the simple pattern:
2767  //
2768  // test %reg, %reg
2769  // je %label
2770  //
2771  const unsigned TestOpcode =
2772  Subtarget.is64Bit() ? X86::TEST64rr : X86::TEST32rr;
2773 
2774  if (ConditionDef->getOpcode() == TestOpcode &&
2775  ConditionDef->getNumOperands() == 3 &&
2776  ConditionDef->getOperand(0).isIdenticalTo(ConditionDef->getOperand(1)) &&
2777  (Cond[0].getImm() == X86::COND_NE || Cond[0].getImm() == X86::COND_E)) {
2778  MBP.LHS = ConditionDef->getOperand(0);
2779  MBP.RHS = MachineOperand::CreateImm(0);
2780  MBP.Predicate = Cond[0].getImm() == X86::COND_NE
2783  return false;
2784  }
2785 
2786  return true;
2787 }
2788 
2790  int *BytesRemoved) const {
2791  assert(!BytesRemoved && "code size not handled");
2792 
2794  unsigned Count = 0;
2795 
2796  while (I != MBB.begin()) {
2797  --I;
2798  if (I->isDebugInstr())
2799  continue;
2800  if (I->getOpcode() != X86::JMP_1 &&
2801  X86::getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
2802  break;
2803  // Remove the branch.
2804  I->eraseFromParent();
2805  I = MBB.end();
2806  ++Count;
2807  }
2808 
2809  return Count;
2810 }
2811 
2813  MachineBasicBlock *TBB,
2814  MachineBasicBlock *FBB,
2816  const DebugLoc &DL,
2817  int *BytesAdded) const {
2818  // Shouldn't be a fall through.
2819  assert(TBB && "insertBranch must not be told to insert a fallthrough");
2820  assert((Cond.size() == 1 || Cond.size() == 0) &&
2821  "X86 branch conditions have one component!");
2822  assert(!BytesAdded && "code size not handled");
2823 
2824  if (Cond.empty()) {
2825  // Unconditional branch?
2826  assert(!FBB && "Unconditional branch with multiple successors!");
2827  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(TBB);
2828  return 1;
2829  }
2830 
2831  // If FBB is null, it is implied to be a fall-through block.
2832  bool FallThru = FBB == nullptr;
2833 
2834  // Conditional branch.
2835  unsigned Count = 0;
2836  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
2837  switch (CC) {
2838  case X86::COND_NE_OR_P:
2839  // Synthesize NE_OR_P with two branches.
2840  BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(TBB);
2841  ++Count;
2842  BuildMI(&MBB, DL, get(X86::JP_1)).addMBB(TBB);
2843  ++Count;
2844  break;
2845  case X86::COND_E_AND_NP:
2846  // Use the next block of MBB as FBB if it is null.
2847  if (FBB == nullptr) {
2848  FBB = getFallThroughMBB(&MBB, TBB);
2849  assert(FBB && "MBB cannot be the last block in function when the false "
2850  "body is a fall-through.");
2851  }
2852  // Synthesize COND_E_AND_NP with two branches.
2853  BuildMI(&MBB, DL, get(X86::JNE_1)).addMBB(FBB);
2854  ++Count;
2855  BuildMI(&MBB, DL, get(X86::JNP_1)).addMBB(TBB);
2856  ++Count;
2857  break;
2858  default: {
2859  unsigned Opc = GetCondBranchFromCond(CC);
2860  BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
2861  ++Count;
2862  }
2863  }
2864  if (!FallThru) {
2865  // Two-way Conditional branch. Insert the second branch.
2866  BuildMI(&MBB, DL, get(X86::JMP_1)).addMBB(FBB);
2867  ++Count;
2868  }
2869  return Count;
2870 }
2871 
2872 bool X86InstrInfo::
2875  unsigned TrueReg, unsigned FalseReg,
2876  int &CondCycles, int &TrueCycles, int &FalseCycles) const {
2877  // Not all subtargets have cmov instructions.
2878  if (!Subtarget.hasCMov())
2879  return false;
2880  if (Cond.size() != 1)
2881  return false;
2882  // We cannot do the composite conditions, at least not in SSA form.
2883  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
2884  return false;
2885 
2886  // Check register classes.
2887  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2888  const TargetRegisterClass *RC =
2889  RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
2890  if (!RC)
2891  return false;
2892 
2893  // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
2894  if (X86::GR16RegClass.hasSubClassEq(RC) ||
2895  X86::GR32RegClass.hasSubClassEq(RC) ||
2896  X86::GR64RegClass.hasSubClassEq(RC)) {
2897  // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
2898  // Bridge. Probably Ivy Bridge as well.
2899  CondCycles = 2;
2900  TrueCycles = 2;
2901  FalseCycles = 2;
2902  return true;
2903  }
2904 
2905  // Can't do vectors.
2906  return false;
2907 }
2908 
2911  const DebugLoc &DL, unsigned DstReg,
2912  ArrayRef<MachineOperand> Cond, unsigned TrueReg,
2913  unsigned FalseReg) const {
2916  const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
2917  assert(Cond.size() == 1 && "Invalid Cond array");
2918  unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
2919  TRI.getRegSizeInBits(RC) / 8,
2920  false /*HasMemoryOperand*/);
2921  BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
2922 }
2923 
2924 /// Test if the given register is a physical h register.
2925 static bool isHReg(unsigned Reg) {
2926  return X86::GR8_ABCD_HRegClass.contains(Reg);
2927 }
2928 
2929 // Try and copy between VR128/VR64 and GR64 registers.
2930 static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
2931  const X86Subtarget &Subtarget) {
2932  bool HasAVX = Subtarget.hasAVX();
2933  bool HasAVX512 = Subtarget.hasAVX512();
2934 
2935  // SrcReg(MaskReg) -> DestReg(GR64)
2936  // SrcReg(MaskReg) -> DestReg(GR32)
2937 
2938  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
2939  if (X86::VK16RegClass.contains(SrcReg)) {
2940  if (X86::GR64RegClass.contains(DestReg)) {
2941  assert(Subtarget.hasBWI());
2942  return X86::KMOVQrk;
2943  }
2944  if (X86::GR32RegClass.contains(DestReg))
2945  return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
2946  }
2947 
2948  // SrcReg(GR64) -> DestReg(MaskReg)
2949  // SrcReg(GR32) -> DestReg(MaskReg)
2950 
2951  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
2952  if (X86::VK16RegClass.contains(DestReg)) {
2953  if (X86::GR64RegClass.contains(SrcReg)) {
2954  assert(Subtarget.hasBWI());
2955  return X86::KMOVQkr;
2956  }
2957  if (X86::GR32RegClass.contains(SrcReg))
2958  return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
2959  }
2960 
2961 
2962  // SrcReg(VR128) -> DestReg(GR64)
2963  // SrcReg(VR64) -> DestReg(GR64)
2964  // SrcReg(GR64) -> DestReg(VR128)
2965  // SrcReg(GR64) -> DestReg(VR64)
2966 
2967  if (X86::GR64RegClass.contains(DestReg)) {
2968  if (X86::VR128XRegClass.contains(SrcReg))
2969  // Copy from a VR128 register to a GR64 register.
2970  return HasAVX512 ? X86::VMOVPQIto64Zrr :
2971  HasAVX ? X86::VMOVPQIto64rr :
2972  X86::MOVPQIto64rr;
2973  if (X86::VR64RegClass.contains(SrcReg))
2974  // Copy from a VR64 register to a GR64 register.
2975  return X86::MMX_MOVD64from64rr;
2976  } else if (X86::GR64RegClass.contains(SrcReg)) {
2977  // Copy from a GR64 register to a VR128 register.
2978  if (X86::VR128XRegClass.contains(DestReg))
2979  return HasAVX512 ? X86::VMOV64toPQIZrr :
2980  HasAVX ? X86::VMOV64toPQIrr :
2981  X86::MOV64toPQIrr;
2982  // Copy from a GR64 register to a VR64 register.
2983  if (X86::VR64RegClass.contains(DestReg))
2984  return X86::MMX_MOVD64to64rr;
2985  }
2986 
2987  // SrcReg(FR32) -> DestReg(GR32)
2988  // SrcReg(GR32) -> DestReg(FR32)
2989 
2990  if (X86::GR32RegClass.contains(DestReg) &&
2991  X86::FR32XRegClass.contains(SrcReg))
2992  // Copy from a FR32 register to a GR32 register.
2993  return HasAVX512 ? X86::VMOVSS2DIZrr :
2994  HasAVX ? X86::VMOVSS2DIrr :
2995  X86::MOVSS2DIrr;
2996 
2997  if (X86::FR32XRegClass.contains(DestReg) &&
2998  X86::GR32RegClass.contains(SrcReg))
2999  // Copy from a GR32 register to a FR32 register.
3000  return HasAVX512 ? X86::VMOVDI2SSZrr :
3001  HasAVX ? X86::VMOVDI2SSrr :
3002  X86::MOVDI2SSrr;
3003  return 0;
3004 }
3005 
3008  const DebugLoc &DL, unsigned DestReg,
3009  unsigned SrcReg, bool KillSrc) const {
3010  // First deal with the normal symmetric copies.
3011  bool HasAVX = Subtarget.hasAVX();
3012  bool HasVLX = Subtarget.hasVLX();
3013  unsigned Opc = 0;
3014  if (X86::GR64RegClass.contains(DestReg, SrcReg))
3015  Opc = X86::MOV64rr;
3016  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
3017  Opc = X86::MOV32rr;
3018  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
3019  Opc = X86::MOV16rr;
3020  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
3021  // Copying to or from a physical H register on x86-64 requires a NOREX
3022  // move. Otherwise use a normal move.
3023  if ((isHReg(DestReg) || isHReg(SrcReg)) &&
3024  Subtarget.is64Bit()) {
3025  Opc = X86::MOV8rr_NOREX;
3026  // Both operands must be encodable without an REX prefix.
3027  assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
3028  "8-bit H register can not be copied outside GR8_NOREX");
3029  } else
3030  Opc = X86::MOV8rr;
3031  }
3032  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
3033  Opc = X86::MMX_MOVQ64rr;
3034  else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
3035  if (HasVLX)
3036  Opc = X86::VMOVAPSZ128rr;
3037  else if (X86::VR128RegClass.contains(DestReg, SrcReg))
3038  Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
3039  else {
3040  // If this an extended register and we don't have VLX we need to use a
3041  // 512-bit move.
3042  Opc = X86::VMOVAPSZrr;
3044  DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
3045  &X86::VR512RegClass);
3046  SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
3047  &X86::VR512RegClass);
3048  }
3049  } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
3050  if (HasVLX)
3051  Opc = X86::VMOVAPSZ256rr;
3052  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
3053  Opc = X86::VMOVAPSYrr;
3054  else {
3055  // If this an extended register and we don't have VLX we need to use a
3056  // 512-bit move.
3057  Opc = X86::VMOVAPSZrr;
3059  DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
3060  &X86::VR512RegClass);
3061  SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
3062  &X86::VR512RegClass);
3063  }
3064  } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
3065  Opc = X86::VMOVAPSZrr;
3066  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
3067  else if (X86::VK16RegClass.contains(DestReg, SrcReg))
3068  Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
3069  if (!Opc)
3070  Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
3071 
3072  if (Opc) {
3073  BuildMI(MBB, MI, DL, get(Opc), DestReg)
3074  .addReg(SrcReg, getKillRegState(KillSrc));
3075  return;
3076  }
3077 
3078  if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) {
3079  // FIXME: We use a fatal error here because historically LLVM has tried
3080  // lower some of these physreg copies and we want to ensure we get
3081  // reasonable bug reports if someone encounters a case no other testing
3082  // found. This path should be removed after the LLVM 7 release.
3083  report_fatal_error("Unable to copy EFLAGS physical register!");
3084  }
3085 
3086  LLVM_DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to "
3087  << RI.getName(DestReg) << '\n');
3088  report_fatal_error("Cannot emit physreg copy instruction");
3089 }
3090 
3092  const MachineOperand *&Src,
3093  const MachineOperand *&Dest) const {
3094  if (MI.isMoveReg()) {
3095  Dest = &MI.getOperand(0);
3096  Src = &MI.getOperand(1);
3097  return true;
3098  }
3099  return false;
3100 }
3101 
3102 static unsigned getLoadStoreRegOpcode(unsigned Reg,
3103  const TargetRegisterClass *RC,
3104  bool isStackAligned,
3105  const X86Subtarget &STI,
3106  bool load) {
3107  bool HasAVX = STI.hasAVX();
3108  bool HasAVX512 = STI.hasAVX512();
3109  bool HasVLX = STI.hasVLX();
3110 
3111  switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
3112  default:
3113  llvm_unreachable("Unknown spill size");
3114  case 1:
3115  assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
3116  if (STI.is64Bit())
3117  // Copying to or from a physical H register on x86-64 requires a NOREX
3118  // move. Otherwise use a normal move.
3119  if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
3120  return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
3121  return load ? X86::MOV8rm : X86::MOV8mr;
3122  case 2:
3123  if (X86::VK16RegClass.hasSubClassEq(RC))
3124  return load ? X86::KMOVWkm : X86::KMOVWmk;
3125  assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
3126  return load ? X86::MOV16rm : X86::MOV16mr;
3127  case 4:
3128  if (X86::GR32RegClass.hasSubClassEq(RC))
3129  return load ? X86::MOV32rm : X86::MOV32mr;
3130  if (X86::FR32XRegClass.hasSubClassEq(RC))
3131  return load ?
3132  (HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
3133  (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
3134  if (X86::RFP32RegClass.hasSubClassEq(RC))
3135  return load ? X86::LD_Fp32m : X86::ST_Fp32m;
3136  if (X86::VK32RegClass.hasSubClassEq(RC)) {
3137  assert(STI.hasBWI() && "KMOVD requires BWI");
3138  return load ? X86::KMOVDkm : X86::KMOVDmk;
3139  }
3140  llvm_unreachable("Unknown 4-byte regclass");
3141  case 8:
3142  if (X86::GR64RegClass.hasSubClassEq(RC))
3143  return load ? X86::MOV64rm : X86::MOV64mr;
3144  if (X86::FR64XRegClass.hasSubClassEq(RC))
3145  return load ?
3146  (HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
3147  (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
3148  if (X86::VR64RegClass.hasSubClassEq(RC))
3149  return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
3150  if (X86::RFP64RegClass.hasSubClassEq(RC))
3151  return load ? X86::LD_Fp64m : X86::ST_Fp64m;
3152  if (X86::VK64RegClass.hasSubClassEq(RC)) {
3153  assert(STI.hasBWI() && "KMOVQ requires BWI");
3154  return load ? X86::KMOVQkm : X86::KMOVQmk;
3155  }
3156  llvm_unreachable("Unknown 8-byte regclass");
3157  case 10:
3158  assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
3159  return load ? X86::LD_Fp80m : X86::ST_FpP80m;
3160  case 16: {
3161  if (X86::VR128XRegClass.hasSubClassEq(RC)) {
3162  // If stack is realigned we can use aligned stores.
3163  if (isStackAligned)
3164  return load ?
3165  (HasVLX ? X86::VMOVAPSZ128rm :
3166  HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
3167  HasAVX ? X86::VMOVAPSrm :
3168  X86::MOVAPSrm):
3169  (HasVLX ? X86::VMOVAPSZ128mr :
3170  HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
3171  HasAVX ? X86::VMOVAPSmr :
3172  X86::MOVAPSmr);
3173  else
3174  return load ?
3175  (HasVLX ? X86::VMOVUPSZ128rm :
3176  HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
3177  HasAVX ? X86::VMOVUPSrm :
3178  X86::MOVUPSrm):
3179  (HasVLX ? X86::VMOVUPSZ128mr :
3180  HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
3181  HasAVX ? X86::VMOVUPSmr :
3182  X86::MOVUPSmr);
3183  }
3184  if (X86::BNDRRegClass.hasSubClassEq(RC)) {
3185  if (STI.is64Bit())
3186  return load ? X86::BNDMOV64rm : X86::BNDMOV64mr;
3187  else
3188  return load ? X86::BNDMOV32rm : X86::BNDMOV32mr;
3189  }
3190  llvm_unreachable("Unknown 16-byte regclass");
3191  }
3192  case 32:
3193  assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
3194  // If stack is realigned we can use aligned stores.
3195  if (isStackAligned)
3196  return load ?
3197  (HasVLX ? X86::VMOVAPSZ256rm :
3198  HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
3199  X86::VMOVAPSYrm) :
3200  (HasVLX ? X86::VMOVAPSZ256mr :
3201  HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
3202  X86::VMOVAPSYmr);
3203  else
3204  return load ?
3205  (HasVLX ? X86::VMOVUPSZ256rm :
3206  HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
3207  X86::VMOVUPSYrm) :
3208  (HasVLX ? X86::VMOVUPSZ256mr :
3209  HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
3210  X86::VMOVUPSYmr);
3211  case 64:
3212  assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
3213  assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
3214  if (isStackAligned)
3215  return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
3216  else
3217  return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3218  }
3219 }
3220 
3222  MachineInstr &MemOp, MachineOperand *&BaseOp, int64_t &Offset,
3223  const TargetRegisterInfo *TRI) const {
3224  const MCInstrDesc &Desc = MemOp.getDesc();
3225  int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
3226  if (MemRefBegin < 0)
3227  return false;
3228 
3229  MemRefBegin += X86II::getOperandBias(Desc);
3230 
3231  BaseOp = &MemOp.getOperand(MemRefBegin + X86::AddrBaseReg);
3232  if (!BaseOp->isReg()) // Can be an MO_FrameIndex
3233  return false;
3234 
3235  if (MemOp.getOperand(MemRefBegin + X86::AddrScaleAmt).getImm() != 1)
3236  return false;
3237 
3238  if (MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() !=
3239  X86::NoRegister)
3240  return false;
3241 
3242  const MachineOperand &DispMO = MemOp.getOperand(MemRefBegin + X86::AddrDisp);
3243 
3244  // Displacement can be symbolic
3245  if (!DispMO.isImm())
3246  return false;
3247 
3248  Offset = DispMO.getImm();
3249 
3250  assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
3251  "operands of type register.");
3252  return true;
3253 }
3254 
3255 static unsigned getStoreRegOpcode(unsigned SrcReg,
3256  const TargetRegisterClass *RC,
3257  bool isStackAligned,
3258  const X86Subtarget &STI) {
3259  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
3260 }
3261 
3262 
3263 static unsigned getLoadRegOpcode(unsigned DestReg,
3264  const TargetRegisterClass *RC,
3265  bool isStackAligned,
3266  const X86Subtarget &STI) {
3267  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
3268 }
3269 
3272  unsigned SrcReg, bool isKill, int FrameIdx,
3273  const TargetRegisterClass *RC,
3274  const TargetRegisterInfo *TRI) const {
3275  const MachineFunction &MF = *MBB.getParent();
3276  assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
3277  "Stack slot too small for store");
3278  unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3279  bool isAligned =
3280  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
3281  RI.canRealignStack(MF);
3282  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
3283  addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc)), FrameIdx)
3284  .addReg(SrcReg, getKillRegState(isKill));
3285 }
3286 
3288  MachineFunction &MF, unsigned SrcReg, bool isKill,
3291  SmallVectorImpl<MachineInstr *> &NewMIs) const {
3293  unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
3294  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
3295  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
3296  DebugLoc DL;
3297  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
3298  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
3299  MIB.add(Addr[i]);
3300  MIB.addReg(SrcReg, getKillRegState(isKill));
3301  MIB.setMemRefs(MMOs);
3302  NewMIs.push_back(MIB);
3303 }
3304 
3305 
3308  unsigned DestReg, int FrameIdx,
3309  const TargetRegisterClass *RC,
3310  const TargetRegisterInfo *TRI) const {
3311  const MachineFunction &MF = *MBB.getParent();
3312  unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
3313  bool isAligned =
3314  (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
3315  RI.canRealignStack(MF);
3316  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
3317  addFrameReference(BuildMI(MBB, MI, DebugLoc(), get(Opc), DestReg), FrameIdx);
3318 }
3319 
3321  MachineFunction &MF, unsigned DestReg,
3324  SmallVectorImpl<MachineInstr *> &NewMIs) const {
3326  unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
3327  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
3328  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
3329  DebugLoc DL;
3330  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
3331  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
3332  MIB.add(Addr[i]);
3333  MIB.setMemRefs(MMOs);
3334  NewMIs.push_back(MIB);
3335 }
3336 
3337 bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
3338  unsigned &SrcReg2, int &CmpMask,
3339  int &CmpValue) const {
3340  switch (MI.getOpcode()) {
3341  default: break;
3342  case X86::CMP64ri32:
3343  case X86::CMP64ri8:
3344  case X86::CMP32ri:
3345  case X86::CMP32ri8:
3346  case X86::CMP16ri:
3347  case X86::CMP16ri8:
3348  case X86::CMP8ri:
3349  SrcReg = MI.getOperand(0).getReg();
3350  SrcReg2 = 0;
3351  if (MI.getOperand(1).isImm()) {
3352  CmpMask = ~0;
3353  CmpValue = MI.getOperand(1).getImm();
3354  } else {
3355  CmpMask = CmpValue = 0;
3356  }
3357  return true;
3358  // A SUB can be used to perform comparison.
3359  case X86::SUB64rm:
3360  case X86::SUB32rm:
3361  case X86::SUB16rm:
3362  case X86::SUB8rm:
3363  SrcReg = MI.getOperand(1).getReg();
3364  SrcReg2 = 0;
3365  CmpMask = 0;
3366  CmpValue = 0;
3367  return true;
3368  case X86::SUB64rr:
3369  case X86::SUB32rr:
3370  case X86::SUB16rr:
3371  case X86::SUB8rr:
3372  SrcReg = MI.getOperand(1).getReg();
3373  SrcReg2 = MI.getOperand(2).getReg();
3374  CmpMask = 0;
3375  CmpValue = 0;
3376  return true;
3377  case X86::SUB64ri32:
3378  case X86::SUB64ri8:
3379  case X86::SUB32ri:
3380  case X86::SUB32ri8:
3381  case X86::SUB16ri:
3382  case X86::SUB16ri8:
3383  case X86::SUB8ri:
3384  SrcReg = MI.getOperand(1).getReg();
3385  SrcReg2 = 0;
3386  if (MI.getOperand(2).isImm()) {
3387  CmpMask = ~0;
3388  CmpValue = MI.getOperand(2).getImm();
3389  } else {
3390  CmpMask = CmpValue = 0;
3391  }
3392  return true;
3393  case X86::CMP64rr:
3394  case X86::CMP32rr:
3395  case X86::CMP16rr:
3396  case X86::CMP8rr:
3397  SrcReg = MI.getOperand(0).getReg();
3398  SrcReg2 = MI.getOperand(1).getReg();
3399  CmpMask = 0;
3400  CmpValue = 0;
3401  return true;
3402  case X86::TEST8rr:
3403  case X86::TEST16rr:
3404  case X86::TEST32rr:
3405  case X86::TEST64rr:
3406  SrcReg = MI.getOperand(0).getReg();
3407  if (MI.getOperand(1).getReg() != SrcReg)
3408  return false;
3409  // Compare against zero.
3410  SrcReg2 = 0;
3411  CmpMask = ~0;
3412  CmpValue = 0;
3413  return true;
3414  }
3415  return false;
3416 }
3417 
3418 /// Check whether the first instruction, whose only
3419 /// purpose is to update flags, can be made redundant.
3420 /// CMPrr can be made redundant by SUBrr if the operands are the same.
3421 /// This function can be extended later on.
3422 /// SrcReg, SrcRegs: register operands for FlagI.
3423 /// ImmValue: immediate for FlagI if it takes an immediate.
3424 inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
3425  unsigned SrcReg, unsigned SrcReg2,
3426  int ImmMask, int ImmValue,
3427  const MachineInstr &OI) {
3428  if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
3429  (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
3430  (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
3431  (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
3432  ((OI.getOperand(1).getReg() == SrcReg &&
3433  OI.getOperand(2).getReg() == SrcReg2) ||
3434  (OI.getOperand(1).getReg() == SrcReg2 &&
3435  OI.getOperand(2).getReg() == SrcReg)))
3436  return true;
3437 
3438  if (ImmMask != 0 &&
3439  ((FlagI.getOpcode() == X86::CMP64ri32 &&
3440  OI.getOpcode() == X86::SUB64ri32) ||
3441  (FlagI.getOpcode() == X86::CMP64ri8 &&
3442  OI.getOpcode() == X86::SUB64ri8) ||
3443  (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
3444  (FlagI.getOpcode() == X86::CMP32ri8 &&
3445  OI.getOpcode() == X86::SUB32ri8) ||
3446  (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
3447  (FlagI.getOpcode() == X86::CMP16ri8 &&
3448  OI.getOpcode() == X86::SUB16ri8) ||
3449  (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
3450  OI.getOperand(1).getReg() == SrcReg &&
3451  OI.getOperand(2).getImm() == ImmValue)
3452  return true;
3453  return false;
3454 }
3455 
3456 /// Check whether the definition can be converted
3457 /// to remove a comparison against zero.
3458 inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
3459  NoSignFlag = false;
3460 
3461  switch (MI.getOpcode()) {
3462  default: return false;
3463 
3464  // The shift instructions only modify ZF if their shift count is non-zero.
3465  // N.B.: The processor truncates the shift count depending on the encoding.
3466  case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri:
3467  case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri:
3468  return getTruncatedShiftCount(MI, 2) != 0;
3469 
3470  // Some left shift instructions can be turned into LEA instructions but only
3471  // if their flags aren't used. Avoid transforming such instructions.
3472  case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{
3473  unsigned ShAmt = getTruncatedShiftCount(MI, 2);
3474  if (isTruncatedShiftCountForLEA(ShAmt)) return false;
3475  return ShAmt != 0;
3476  }
3477 
3478  case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
3479  case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
3480  return getTruncatedShiftCount(MI, 3) != 0;
3481 
3482  case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
3483  case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
3484  case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
3485  case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
3486  case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
3487  case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
3488  case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
3489  case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
3490  case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
3491  case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
3492  case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
3493  case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
3494  case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
3495  case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
3496  case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
3497  case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
3498  case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
3499  case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
3500  case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
3501  case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
3502  case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
3503  case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
3504  case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
3505  case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
3506  case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
3507  case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
3508  case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
3509  case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
3510  case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8:
3511  case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
3512  case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm:
3513  case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm:
3514  case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
3515  case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8:
3516  case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr:
3517  case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm:
3518  case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm:
3519  case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
3520  case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
3521  case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
3522  case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
3523  case X86::ANDN32rr: case X86::ANDN32rm:
3524  case X86::ANDN64rr: case X86::ANDN64rm:
3525  case X86::BLSI32rr: case X86::BLSI32rm:
3526  case X86::BLSI64rr: case X86::BLSI64rm:
3527  case X86::BLSMSK32rr:case X86::BLSMSK32rm:
3528  case X86::BLSMSK64rr:case X86::BLSMSK64rm:
3529  case X86::BLSR32rr: case X86::BLSR32rm:
3530  case X86::BLSR64rr: case X86::BLSR64rm:
3531  case X86::BZHI32rr: case X86::BZHI32rm:
3532  case X86::BZHI64rr: case X86::BZHI64rm:
3533  case X86::LZCNT16rr: case X86::LZCNT16rm:
3534  case X86::LZCNT32rr: case X86::LZCNT32rm:
3535  case X86::LZCNT64rr: case X86::LZCNT64rm:
3536  case X86::POPCNT16rr:case X86::POPCNT16rm:
3537  case X86::POPCNT32rr:case X86::POPCNT32rm:
3538  case X86::POPCNT64rr:case X86::POPCNT64rm:
3539  case X86::TZCNT16rr: case X86::TZCNT16rm:
3540  case X86::TZCNT32rr: case X86::TZCNT32rm:
3541  case X86::TZCNT64rr: case X86::TZCNT64rm:
3542  case X86::BLCFILL32rr: case X86::BLCFILL32rm:
3543  case X86::BLCFILL64rr: case X86::BLCFILL64rm:
3544  case X86::BLCI32rr: case X86::BLCI32rm:
3545  case X86::BLCI64rr: case X86::BLCI64rm:
3546  case X86::BLCIC32rr: case X86::BLCIC32rm:
3547  case X86::BLCIC64rr: case X86::BLCIC64rm:
3548  case X86::BLCMSK32rr: case X86::BLCMSK32rm:
3549  case X86::BLCMSK64rr: case X86::BLCMSK64rm:
3550  case X86::BLCS32rr: case X86::BLCS32rm:
3551  case X86::BLCS64rr: case X86::BLCS64rm:
3552  case X86::BLSFILL32rr: case X86::BLSFILL32rm:
3553  case X86::BLSFILL64rr: case X86::BLSFILL64rm:
3554  case X86::BLSIC32rr: case X86::BLSIC32rm:
3555  case X86::BLSIC64rr: case X86::BLSIC64rm:
3556  case X86::T1MSKC32rr: case X86::T1MSKC32rm:
3557  case X86::T1MSKC64rr: case X86::T1MSKC64rm:
3558  case X86::TZMSK32rr: case X86::TZMSK32rm:
3559  case X86::TZMSK64rr: case X86::TZMSK64rm:
3560  return true;
3561  case X86::BEXTR32rr: case X86::BEXTR64rr:
3562  case X86::BEXTR32rm: case X86::BEXTR64rm:
3563  case X86::BEXTRI32ri: case X86::BEXTRI32mi:
3564  case X86::BEXTRI64ri: case X86::BEXTRI64mi:
3565  // BEXTR doesn't update the sign flag so we can't use it.
3566  NoSignFlag = true;
3567  return true;
3568  }
3569 }
3570 
3571 /// Check whether the use can be converted to remove a comparison against zero.
3573  switch (MI.getOpcode()) {
3574  default: return X86::COND_INVALID;
3575  case X86::LZCNT16rr: case X86::LZCNT16rm:
3576  case X86::LZCNT32rr: case X86::LZCNT32rm:
3577  case X86::LZCNT64rr: case X86::LZCNT64rm:
3578  return X86::COND_B;
3579  case X86::POPCNT16rr:case X86::POPCNT16rm:
3580  case X86::POPCNT32rr:case X86::POPCNT32rm:
3581  case X86::POPCNT64rr:case X86::POPCNT64rm:
3582  return X86::COND_E;
3583  case X86::TZCNT16rr: case X86::TZCNT16rm:
3584  case X86::TZCNT32rr: case X86::TZCNT32rm:
3585  case X86::TZCNT64rr: case X86::TZCNT64rm:
3586  return X86::COND_B;
3587  case X86::BSF16rr: case X86::BSF16rm:
3588  case X86::BSF32rr: case X86::BSF32rm:
3589  case X86::BSF64rr: case X86::BSF64rm:
3590  case X86::BSR16rr: case X86::BSR16rm:
3591  case X86::BSR32rr: case X86::BSR32rm:
3592  case X86::BSR64rr: case X86::BSR64rm:
3593  return X86::COND_E;
3594  }
3595 }
3596 
3597 /// Check if there exists an earlier instruction that
3598 /// operates on the same source operands and sets flags in the same way as
3599 /// Compare; remove Compare if possible.
3600 bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
3601  unsigned SrcReg2, int CmpMask,
3602  int CmpValue,
3603  const MachineRegisterInfo *MRI) const {
3604  // Check whether we can replace SUB with CMP.
3605  unsigned NewOpcode = 0;
3606  switch (CmpInstr.getOpcode()) {
3607  default: break;
3608  case X86::SUB64ri32:
3609  case X86::SUB64ri8:
3610  case X86::SUB32ri:
3611  case X86::SUB32ri8:
3612  case X86::SUB16ri:
3613  case X86::SUB16ri8:
3614  case X86::SUB8ri:
3615  case X86::SUB64rm:
3616  case X86::SUB32rm:
3617  case X86::SUB16rm:
3618  case X86::SUB8rm:
3619  case X86::SUB64rr:
3620  case X86::SUB32rr:
3621  case X86::SUB16rr:
3622  case X86::SUB8rr: {
3623  if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
3624  return false;
3625  // There is no use of the destination register, we can replace SUB with CMP.
3626  switch (CmpInstr.getOpcode()) {
3627  default: llvm_unreachable("Unreachable!");
3628  case X86::SUB64rm: NewOpcode = X86::CMP64rm; break;
3629  case X86::SUB32rm: NewOpcode = X86::CMP32rm; break;
3630  case X86::SUB16rm: NewOpcode = X86::CMP16rm; break;
3631  case X86::SUB8rm: NewOpcode = X86::CMP8rm; break;
3632  case X86::SUB64rr: NewOpcode = X86::CMP64rr; break;
3633  case X86::SUB32rr: NewOpcode = X86::CMP32rr; break;
3634  case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
3635  case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
3636  case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
3637  case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
3638  case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
3639  case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
3640  case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
3641  case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
3642  case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
3643  }
3644  CmpInstr.setDesc(get(NewOpcode));
3645  CmpInstr.RemoveOperand(0);
3646  // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
3647  if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
3648  NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
3649  return false;
3650  }
3651  }
3652 
3653  // Get the unique definition of SrcReg.
3654  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3655  if (!MI) return false;
3656 
3657  // CmpInstr is the first instruction of the BB.
3658  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
3659 
3660  // If we are comparing against zero, check whether we can use MI to update
3661  // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
3662  bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
3663  if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
3664  return false;
3665 
3666  // If we have a use of the source register between the def and our compare
3667  // instruction we can eliminate the compare iff the use sets EFLAGS in the
3668  // right way.
3669  bool ShouldUpdateCC = false;
3670  bool NoSignFlag = false;
3672  if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
3673  // Scan forward from the use until we hit the use we're looking for or the
3674  // compare instruction.
3675  for (MachineBasicBlock::iterator J = MI;; ++J) {
3676  // Do we have a convertible instruction?
3677  NewCC = isUseDefConvertible(*J);
3678  if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
3679  J->getOperand(1).getReg() == SrcReg) {
3680  assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
3681  ShouldUpdateCC = true; // Update CC later on.
3682  // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
3683  // with the new def.
3684  Def = J;
3685  MI = &*Def;
3686  break;
3687  }
3688 
3689  if (J == I)
3690  return false;
3691  }
3692  }
3693 
3694  // We are searching for an earlier instruction that can make CmpInstr
3695  // redundant and that instruction will be saved in Sub.
3696  MachineInstr *Sub = nullptr;
3698 
3699  // We iterate backward, starting from the instruction before CmpInstr and
3700  // stop when reaching the definition of a source register or done with the BB.
3701  // RI points to the instruction before CmpInstr.
3702  // If the definition is in this basic block, RE points to the definition;
3703  // otherwise, RE is the rend of the basic block.
3705  RI = ++I.getReverse(),
3706  RE = CmpInstr.getParent() == MI->getParent()
3707  ? Def.getReverse() /* points to MI */
3708  : CmpInstr.getParent()->rend();
3709  MachineInstr *Movr0Inst = nullptr;
3710  for (; RI != RE; ++RI) {
3711  MachineInstr &Instr = *RI;
3712  // Check whether CmpInstr can be made redundant by the current instruction.
3713  if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
3714  CmpValue, Instr)) {
3715  Sub = &Instr;
3716  break;
3717  }
3718 
3719  if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
3720  Instr.readsRegister(X86::EFLAGS, TRI)) {
3721  // This instruction modifies or uses EFLAGS.
3722 
3723  // MOV32r0 etc. are implemented with xor which clobbers condition code.
3724  // They are safe to move up, if the definition to EFLAGS is dead and
3725  // earlier instructions do not read or write EFLAGS.
3726  if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
3727  Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
3728  Movr0Inst = &Instr;
3729  continue;
3730  }
3731 
3732  // We can't remove CmpInstr.
3733  return false;
3734  }
3735  }
3736 
3737  // Return false if no candidates exist.
3738  if (!IsCmpZero && !Sub)
3739  return false;
3740 
3741  bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
3742  Sub->getOperand(2).getReg() == SrcReg);
3743 
3744  // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
3745  // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
3746  // If we are done with the basic block, we need to check whether EFLAGS is
3747  // live-out.
3748  bool IsSafe = false;
3749  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
3750  MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
3751  for (++I; I != E; ++I) {
3752  const MachineInstr &Instr = *I;
3753  bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
3754  bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
3755  // We should check the usage if this instruction uses and updates EFLAGS.
3756  if (!UseEFLAGS && ModifyEFLAGS) {
3757  // It is safe to remove CmpInstr if EFLAGS is updated again.
3758  IsSafe = true;
3759  break;
3760  }
3761  if (!UseEFLAGS && !ModifyEFLAGS)
3762  continue;
3763 
3764  // EFLAGS is used by this instruction.
3766  bool OpcIsSET = false;
3767  if (IsCmpZero || IsSwapped) {
3768  // We decode the condition code from opcode.
3769  if (Instr.isBranch())
3770  OldCC = X86::getCondFromBranchOpc(Instr.getOpcode());
3771  else {
3772  OldCC = X86::getCondFromSETOpc(Instr.getOpcode());
3773  if (OldCC != X86::COND_INVALID)
3774  OpcIsSET = true;
3775  else
3776  OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
3777  }
3778  if (OldCC == X86::COND_INVALID) return false;
3779  }
3780  X86::CondCode ReplacementCC = X86::COND_INVALID;
3781  if (IsCmpZero) {
3782  switch (OldCC) {
3783  default: break;
3784  case X86::COND_A: case X86::COND_AE:
3785  case X86::COND_B: case X86::COND_BE:
3786  case X86::COND_G: case X86::COND_GE:
3787  case X86::COND_L: case X86::COND_LE:
3788  case X86::COND_O: case X86::COND_NO:
3789  // CF and OF are used, we can't perform this optimization.
3790  return false;
3791  case X86::COND_S: case X86::COND_NS:
3792  // If SF is used, but the instruction doesn't update the SF, then we
3793  // can't do the optimization.
3794  if (NoSignFlag)
3795  return false;
3796  break;
3797  }
3798 
3799  // If we're updating the condition code check if we have to reverse the
3800  // condition.
3801  if (ShouldUpdateCC)
3802  switch (OldCC) {
3803  default:
3804  return false;
3805  case X86::COND_E:
3806  ReplacementCC = NewCC;
3807  break;
3808  case X86::COND_NE:
3809  ReplacementCC = GetOppositeBranchCondition(NewCC);
3810  break;
3811  }
3812  } else if (IsSwapped) {
3813  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
3814  // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3815  // We swap the condition code and synthesize the new opcode.
3816  ReplacementCC = getSwappedCondition(OldCC);
3817  if (ReplacementCC == X86::COND_INVALID) return false;
3818  }
3819 
3820  if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
3821  // Synthesize the new opcode.
3822  bool HasMemoryOperand = Instr.hasOneMemOperand();
3823  unsigned NewOpc;
3824  if (Instr.isBranch())
3825  NewOpc = GetCondBranchFromCond(ReplacementCC);
3826  else if(OpcIsSET)
3827  NewOpc = getSETFromCond(ReplacementCC, HasMemoryOperand);
3828  else {
3829  unsigned DstReg = Instr.getOperand(0).getReg();
3830  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
3831  NewOpc = getCMovFromCond(ReplacementCC, TRI->getRegSizeInBits(*DstRC)/8,
3832  HasMemoryOperand);
3833  }
3834 
3835  // Push the MachineInstr to OpsToUpdate.
3836  // If it is safe to remove CmpInstr, the condition code of these
3837  // instructions will be modified.
3838  OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
3839  }
3840  if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
3841  // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
3842  IsSafe = true;
3843  break;
3844  }
3845  }
3846 
3847  // If EFLAGS is not killed nor re-defined, we should check whether it is
3848  // live-out. If it is live-out, do not optimize.
3849  if ((IsCmpZero || IsSwapped) && !IsSafe) {
3850  MachineBasicBlock *MBB = CmpInstr.getParent();
3851  for (MachineBasicBlock *Successor : MBB->successors())
3852  if (Successor->isLiveIn(X86::EFLAGS))
3853  return false;
3854  }
3855 
3856  // The instruction to be updated is either Sub or MI.
3857  Sub = IsCmpZero ? MI : Sub;
3858  // Move Movr0Inst to the appropriate place before Sub.
3859  if (Movr0Inst) {
3860  // Look backwards until we find a def that doesn't use the current EFLAGS.
3861  Def = Sub;
3863  InsertE = Sub->getParent()->rend();
3864  for (; InsertI != InsertE; ++InsertI) {
3865  MachineInstr *Instr = &*InsertI;
3866  if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
3867  Instr->modifiesRegister(X86::EFLAGS, TRI)) {
3868  Sub->getParent()->remove(Movr0Inst);
3869  Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
3870  Movr0Inst);
3871  break;
3872  }
3873  }
3874  if (InsertI == InsertE)
3875  return false;
3876  }
3877 
3878  // Make sure Sub instruction defines EFLAGS and mark the def live.
3879  unsigned i = 0, e = Sub->getNumOperands();
3880  for (; i != e; ++i) {
3881  MachineOperand &MO = Sub->getOperand(i);
3882  if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
3883  MO.setIsDead(false);
3884  break;
3885  }
3886  }
3887  assert(i != e && "Unable to locate a def EFLAGS operand");
3888 
3889  CmpInstr.eraseFromParent();
3890 
3891  // Modify the condition code of instructions in OpsToUpdate.
3892  for (auto &Op : OpsToUpdate)
3893  Op.first->setDesc(get(Op.second));
3894  return true;
3895 }
3896 
3897 /// Try to remove the load by folding it to a register
3898 /// operand at the use. We fold the load instructions if load defines a virtual
3899 /// register, the virtual register is used once in the same BB, and the
3900 /// instructions in-between do not load or store, and have no side effects.
3902  const MachineRegisterInfo *MRI,
3903  unsigned &FoldAsLoadDefReg,
3904  MachineInstr *&DefMI) const {
3905  // Check whether we can move DefMI here.
3906  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
3907  assert(DefMI);
3908  bool SawStore = false;
3909  if (!DefMI->isSafeToMove(nullptr, SawStore))
3910  return nullptr;
3911 
3912  // Collect information about virtual register operands of MI.
3913  SmallVector<unsigned, 1> SrcOperandIds;
3914  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
3915  MachineOperand &MO = MI.getOperand(i);
3916  if (!MO.isReg())
3917  continue;
3918  unsigned Reg = MO.getReg();
3919  if (Reg != FoldAsLoadDefReg)
3920  continue;
3921  // Do not fold if we have a subreg use or a def.
3922  if (MO.getSubReg() || MO.isDef())
3923  return nullptr;
3924  SrcOperandIds.push_back(i);
3925  }
3926  if (SrcOperandIds.empty())
3927  return nullptr;
3928 
3929  // Check whether we can fold the def into SrcOperandId.
3930  if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
3931  FoldAsLoadDefReg = 0;
3932  return FoldMI;
3933  }
3934 
3935  return nullptr;
3936 }
3937 
3938 /// Expand a single-def pseudo instruction to a two-addr
3939 /// instruction with two undef reads of the register being defined.
3940 /// This is used for mapping:
3941 /// %xmm4 = V_SET0
3942 /// to:
3943 /// %xmm4 = PXORrr undef %xmm4, undef %xmm4
3944 ///
3946  const MCInstrDesc &Desc) {
3947  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
3948  unsigned Reg = MIB->getOperand(0).getReg();
3949  MIB->setDesc(Desc);
3950 
3951  // MachineInstr::addOperand() will insert explicit operands before any
3952  // implicit operands.
3953  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
3954  // But we don't trust that.
3955  assert(MIB->getOperand(1).getReg() == Reg &&
3956  MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
3957  return true;
3958 }
3959 
3960 /// Expand a single-def pseudo instruction to a two-addr
3961 /// instruction with two %k0 reads.
3962 /// This is used for mapping:
3963 /// %k4 = K_SET1
3964 /// to:
3965 /// %k4 = KXNORrr %k0, %k0
3967  const MCInstrDesc &Desc, unsigned Reg) {
3968  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
3969  MIB->setDesc(Desc);
3970  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
3971  return true;
3972 }
3973 
3975  bool MinusOne) {
3976  MachineBasicBlock &MBB = *MIB->getParent();
3977  DebugLoc DL = MIB->getDebugLoc();
3978  unsigned Reg = MIB->getOperand(0).getReg();
3979 
3980  // Insert the XOR.
3981  BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
3982  .addReg(Reg, RegState::Undef)
3983  .addReg(Reg, RegState::Undef);
3984 
3985  // Turn the pseudo into an INC or DEC.
3986  MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
3987  MIB.addReg(Reg);
3988 
3989  return true;
3990 }
3991 
3993  const TargetInstrInfo &TII,
3994  const X86Subtarget &Subtarget) {
3995  MachineBasicBlock &MBB = *MIB->getParent();
3996  DebugLoc DL = MIB->getDebugLoc();
3997  int64_t Imm = MIB->getOperand(1).getImm();
3998  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
4000 
4001  int StackAdjustment;
4002 
4003  if (Subtarget.is64Bit()) {
4004  assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
4005  MIB->getOpcode() == X86::MOV32ImmSExti8);
4006 
4007  // Can't use push/pop lowering if the function might write to the red zone.
4008  X86MachineFunctionInfo *X86FI =
4010  if (X86FI->getUsesRedZone()) {
4011  MIB->setDesc(TII.get(MIB->getOpcode() ==
4012  X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
4013  return true;
4014  }
4015 
4016  // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
4017  // widen the register if necessary.
4018  StackAdjustment = 8;
4019  BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
4020  MIB->setDesc(TII.get(X86::POP64r));
4021  MIB->getOperand(0)
4023  } else {
4024  assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
4025  StackAdjustment = 4;
4026  BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
4027  MIB->setDesc(TII.get(X86::POP32r));
4028  }
4029 
4030  // Build CFI if necessary.
4031  MachineFunction &MF = *MBB.getParent();
4032  const X86FrameLowering *TFL = Subtarget.getFrameLowering();
4033  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
4034  bool NeedsDwarfCFI =
4035  !IsWin64Prologue &&
4037  bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
4038  if (EmitCFI) {
4039  TFL->BuildCFI(MBB, I, DL,
4040  MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
4041  TFL->BuildCFI(MBB, std::next(I), DL,
4042  MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
4043  }
4044 
4045  return true;
4046 }
4047 
4048 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
4049 // code sequence is needed for other targets.
4051  const TargetInstrInfo &TII) {
4052  MachineBasicBlock &MBB = *MIB->getParent();
4053  DebugLoc DL = MIB->getDebugLoc();
4054  unsigned Reg = MIB->getOperand(0).getReg();
4055  const GlobalValue *GV =
4056  cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
4057  auto Flags = MachineMemOperand::MOLoad |
4061  MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8);
4063 
4064  BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
4066  .addMemOperand(MMO);
4067  MIB->setDebugLoc(DL);
4068  MIB->setDesc(TII.get(X86::MOV64rm));
4069  MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
4070 }
4071 
4073  MachineBasicBlock &MBB = *MIB->getParent();
4074  MachineFunction &MF = *MBB.getParent();
4075  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
4076  const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4077  unsigned XorOp =
4078  MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr;
4079  MIB->setDesc(TII.get(XorOp));
4080  MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef);
4081  return true;
4082 }
4083 
4084 // This is used to handle spills for 128/256-bit registers when we have AVX512,
4085 // but not VLX. If it uses an extended register we need to use an instruction
4086 // that loads the lower 128/256-bit, but is available with only AVX512F.
4088  const TargetRegisterInfo *TRI,
4089  const MCInstrDesc &LoadDesc,
4090  const MCInstrDesc &BroadcastDesc,
4091  unsigned SubIdx) {
4092  unsigned DestReg = MIB->getOperand(0).getReg();
4093  // Check if DestReg is XMM16-31 or YMM16-31.
4094  if (TRI->getEncodingValue(DestReg) < 16) {
4095  // We can use a normal VEX encoded load.
4096  MIB->setDesc(LoadDesc);
4097  } else {
4098  // Use a 128/256-bit VBROADCAST instruction.
4099  MIB->setDesc(BroadcastDesc);
4100  // Change the destination to a 512-bit register.
4101  DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
4102  MIB->getOperand(0).setReg(DestReg);
4103  }
4104  return true;
4105 }
4106 
4107 // This is used to handle spills for 128/256-bit registers when we have AVX512,
4108 // but not VLX. If it uses an extended register we need to use an instruction
4109 // that stores the lower 128/256-bit, but is available with only AVX512F.
4111  const TargetRegisterInfo *TRI,
4112  const MCInstrDesc &StoreDesc,
4113  const MCInstrDesc &ExtractDesc,
4114  unsigned SubIdx) {
4115  unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
4116  // Check if DestReg is XMM16-31 or YMM16-31.
4117  if (TRI->getEncodingValue(SrcReg) < 16) {
4118  // We can use a normal VEX encoded store.
4119  MIB->setDesc(StoreDesc);
4120  } else {
4121  // Use a VEXTRACTF instruction.
4122  MIB->setDesc(ExtractDesc);
4123  // Change the destination to a 512-bit register.
4124  SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
4125  MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg);
4126  MIB.addImm(0x0); // Append immediate to extract from the lower bits.
4127  }
4128 
4129  return true;
4130 }
4132  bool HasAVX = Subtarget.hasAVX();
4133  MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
4134  switch (MI.getOpcode()) {
4135  case X86::MOV32r0:
4136  return Expand2AddrUndef(MIB, get(X86::XOR32rr));
4137  case X86::MOV32r1:
4138  return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
4139  case X86::MOV32r_1:
4140  return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
4141  case X86::MOV32ImmSExti8:
4142  case X86::MOV64ImmSExti8:
4143  return ExpandMOVImmSExti8(MIB, *this, Subtarget);
4144  case X86::SETB_C8r:
4145  return Expand2AddrUndef(MIB, get(X86::SBB8rr));
4146  case X86::SETB_C16r:
4147  return Expand2AddrUndef(MIB, get(X86::SBB16rr));
4148  case X86::SETB_C32r:
4149  return Expand2AddrUndef(MIB, get(X86::SBB32rr));
4150  case X86::SETB_C64r:
4151  return Expand2AddrUndef(MIB, get(X86::SBB64rr));
4152  case X86::MMX_SET0:
4153  return Expand2AddrUndef(MIB, get(X86::MMX_PXORirr));
4154  case X86::V_SET0:
4155  case X86::FsFLD0SS:
4156  case X86::FsFLD0SD:
4157  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
4158  case X86::AVX_SET0: {
4159  assert(HasAVX && "AVX not supported");
4161  unsigned SrcReg = MIB->getOperand(0).getReg();
4162  unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4163  MIB->getOperand(0).setReg(XReg);
4164  Expand2AddrUndef(MIB, get(X86::VXORPSrr));
4165  MIB.addReg(SrcReg, RegState::ImplicitDefine);
4166  return true;
4167  }
4168  case X86::AVX512_128_SET0:
4169  case X86::AVX512_FsFLD0SS:
4170  case X86::AVX512_FsFLD0SD: {
4171  bool HasVLX = Subtarget.hasVLX();
4172  unsigned SrcReg = MIB->getOperand(0).getReg();
4174  if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
4175  return Expand2AddrUndef(MIB,
4176  get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4177  // Extended register without VLX. Use a larger XOR.
4178  SrcReg =
4179  TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
4180  MIB->getOperand(0).setReg(SrcReg);
4181  return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
4182  }
4183  case X86::AVX512_256_SET0:
4184  case X86::AVX512_512_SET0: {
4185  bool HasVLX = Subtarget.hasVLX();
4186  unsigned SrcReg = MIB->getOperand(0).getReg();
4188  if (HasVLX || TRI->getEncodingValue(SrcReg) < 16) {
4189  unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
4190  MIB->getOperand(0).setReg(XReg);
4191  Expand2AddrUndef(MIB,
4192  get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
4193  MIB.addReg(SrcReg, RegState::ImplicitDefine);
4194  return true;
4195  }
4196  return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
4197  }
4198  case X86::V_SETALLONES:
4199  return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
4200  case X86::AVX2_SETALLONES:
4201  return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
4202  case X86::AVX1_SETALLONES: {
4203  unsigned Reg = MIB->getOperand(0).getReg();
4204  // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS.
4205  MIB->setDesc(get(X86::VCMPPSYrri));
4206  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf);
4207  return true;
4208  }
4209  case X86::AVX512_512_SETALLONES: {
4210  unsigned Reg = MIB->getOperand(0).getReg();
4211  MIB->setDesc(get(X86::VPTERNLOGDZrri));
4212  // VPTERNLOGD needs 3 register inputs and an immediate.
4213  // 0xff will return 1s for any input.
4214  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef)
4215  .addReg(Reg, RegState::Undef).addImm(0xff);
4216  return true;
4217  }
4218  case X86::AVX512_512_SEXT_MASK_32:
4219  case X86::AVX512_512_SEXT_MASK_64: {
4220  unsigned Reg = MIB->getOperand(0).getReg();
4221  unsigned MaskReg = MIB->getOperand(1).getReg();
4222  unsigned MaskState = getRegState(MIB->getOperand(1));
4223  unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
4224  X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
4225  MI.RemoveOperand(1);
4226  MIB->setDesc(get(Opc));
4227  // VPTERNLOG needs 3 register inputs and an immediate.
4228  // 0xff will return 1s for any input.
4229  MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
4230  .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
4231  return true;
4232  }
4233  case X86::VMOVAPSZ128rm_NOVLX:
4234  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
4235  get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
4236  case X86::VMOVUPSZ128rm_NOVLX:
4237  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
4238  get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
4239  case X86::VMOVAPSZ256rm_NOVLX:
4240  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
4241  get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
4242  case X86::VMOVUPSZ256rm_NOVLX:
4243  return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
4244  get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
4245  case X86::VMOVAPSZ128mr_NOVLX:
4246  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
4247  get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
4248  case X86::VMOVUPSZ128mr_NOVLX:
4249  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
4250  get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
4251  case X86::VMOVAPSZ256mr_NOVLX:
4252  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
4253  get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
4254  case X86::VMOVUPSZ256mr_NOVLX:
4255  return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
4256  get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
4257  case X86::MOV32ri64: {
4258  unsigned Reg = MIB->getOperand(0).getReg();
4259  unsigned Reg32 = RI.getSubReg(Reg, X86::sub_32bit);
4260  MI.setDesc(get(X86::MOV32ri));
4261  MIB->getOperand(0).setReg(Reg32);
4262  MIB.addReg(Reg, RegState::ImplicitDefine);
4263  return true;
4264  }
4265 
4266  // KNL does not recognize dependency-breaking idioms for mask registers,
4267  // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
4268  // Using %k0 as the undef input register is a performance heuristic based
4269  // on the assumption that %k0 is used less frequently than the other mask
4270  // registers, since it is not usable as a write mask.
4271  // FIXME: A more advanced approach would be to choose the best input mask
4272  // register based on context.
4273  case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
4274  case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
4275  case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
4276  case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
4277  case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
4278  case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
4279  case TargetOpcode::LOAD_STACK_GUARD:
4280  expandLoadStackGuard(MIB, *this);
4281  return true;
4282  case X86::XOR64_FP:
4283  case X86::XOR32_FP:
4284  return expandXorFP(MIB, *this);
4285  }
4286  return false;
4287 }
4288 
4289 /// Return true for all instructions that only update
4290 /// the first 32 or 64-bits of the destination register and leave the rest
4291 /// unmodified. This can be used to avoid folding loads if the instructions
4292 /// only update part of the destination register, and the non-updated part is
4293 /// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
4294 /// instructions breaks the partial register dependency and it can improve
4295 /// performance. e.g.:
4296 ///
4297 /// movss (%rdi), %xmm0
4298 /// cvtss2sd %xmm0, %xmm0
4299 ///
4300 /// Instead of
4301 /// cvtss2sd (%rdi), %xmm0
4302 ///
4303 /// FIXME: This should be turned into a TSFlags.
4304 ///
4305 static bool hasPartialRegUpdate(unsigned Opcode,
4306  const X86Subtarget &Subtarget) {
4307  switch (Opcode) {
4308  case X86::CVTSI2SSrr:
4309  case X86::CVTSI2SSrm:
4310  case X86::CVTSI642SSrr:
4311  case X86::CVTSI642SSrm:
4312  case X86::CVTSI2SDrr:
4313  case X86::CVTSI2SDrm:
4314  case X86::CVTSI642SDrr:
4315  case X86::CVTSI642SDrm:
4316  case X86::CVTSD2SSrr:
4317  case X86::CVTSD2SSrm:
4318  case X86::CVTSS2SDrr:
4319  case X86::CVTSS2SDrm:
4320  case X86::MOVHPDrm:
4321  case X86::MOVHPSrm:
4322  case X86::MOVLPDrm:
4323  case X86::MOVLPSrm:
4324  case X86::RCPSSr:
4325  case X86::RCPSSm:
4326  case X86::RCPSSr_Int:
4327  case X86::RCPSSm_Int:
4328  case X86::ROUNDSDr:
4329  case X86::ROUNDSDm:
4330  case X86::ROUNDSSr:
4331  case X86::ROUNDSSm:
4332  case X86::RSQRTSSr:
4333  case X86::RSQRTSSm:
4334  case X86::RSQRTSSr_Int:
4335  case X86::RSQRTSSm_Int:
4336  case X86::SQRTSSr:
4337  case X86::SQRTSSm:
4338  case X86::SQRTSSr_Int:
4339  case X86::SQRTSSm_Int:
4340  case X86::SQRTSDr:
4341  case X86::SQRTSDm:
4342  case X86::SQRTSDr_Int:
4343  case X86::SQRTSDm_Int:
4344  return true;
4345  // GPR
4346  case X86::POPCNT32rm:
4347  case X86::POPCNT32rr:
4348  case X86::POPCNT64rm:
4349  case X86::POPCNT64rr:
4350  return Subtarget.hasPOPCNTFalseDeps();
4351  case X86::LZCNT32rm:
4352  case X86::LZCNT32rr:
4353  case X86::LZCNT64rm:
4354  case X86::LZCNT64rr:
4355  case X86::TZCNT32rm:
4356  case X86::TZCNT32rr:
4357  case X86::TZCNT64rm:
4358  case X86::TZCNT64rr:
4359  return Subtarget.hasLZCNTFalseDeps();
4360  }
4361 
4362  return false;
4363 }
4364 
4365 /// Inform the BreakFalseDeps pass how many idle
4366 /// instructions we would like before a partial register update.
4368  const MachineInstr &MI, unsigned OpNum,
4369  const TargetRegisterInfo *TRI) const {
4370  if (OpNum != 0 || !hasPartialRegUpdate(MI.getOpcode(), Subtarget))
4371  return 0;
4372 
4373  // If MI is marked as reading Reg, the partial register update is wanted.
4374  const MachineOperand &MO = MI.getOperand(0);
4375  unsigned Reg = MO.getReg();
4377  if (MO.readsReg() || MI.readsVirtualRegister(Reg))
4378  return 0;
4379  } else {
4380  if (MI.readsRegister(Reg, TRI))
4381  return 0;
4382  }
4383 
4384  // If any instructions in the clearance range are reading Reg, insert a
4385  // dependency breaking instruction, which is inexpensive and is likely to
4386  // be hidden in other instruction's cycles.
4388 }
4389 
4390 // Return true for any instruction the copies the high bits of the first source
4391 // operand into the unused high bits of the destination operand.
4392 static bool hasUndefRegUpdate(unsigned Opcode) {
4393  switch (Opcode) {
4394  case X86::VCVTSI2SSrr:
4395  case X86::VCVTSI2SSrm:
4396  case X86::VCVTSI2SSrr_Int:
4397  case X86::VCVTSI2SSrm_Int:
4398  case X86::VCVTSI642SSrr:
4399  case X86::VCVTSI642SSrm:
4400  case X86::VCVTSI642SSrr_Int:
4401  case X86::VCVTSI642SSrm_Int:
4402  case X86::VCVTSI2SDrr:
4403  case X86::VCVTSI2SDrm:
4404  case X86::VCVTSI2SDrr_Int:
4405  case X86::VCVTSI2SDrm_Int:
4406  case X86::VCVTSI642SDrr:
4407  case X86::VCVTSI642SDrm:
4408  case X86::VCVTSI642SDrr_Int:
4409  case X86::VCVTSI642SDrm_Int:
4410  case X86::VCVTSD2SSrr:
4411  case X86::VCVTSD2SSrm:
4412  case X86::VCVTSD2SSrr_Int:
4413  case X86::VCVTSD2SSrm_Int:
4414  case X86::VCVTSS2SDrr:
4415  case X86::VCVTSS2SDrm:
4416  case X86::VCVTSS2SDrr_Int:
4417  case X86::VCVTSS2SDrm_Int:
4418  case X86::VRCPSSr:
4419  case X86::VRCPSSr_Int:
4420  case X86::VRCPSSm:
4421  case X86::VRCPSSm_Int:
4422  case X86::VROUNDSDr:
4423  case X86::VROUNDSDm:
4424  case X86::VROUNDSDr_Int:
4425  case X86::VROUNDSDm_Int:
4426  case X86::VROUNDSSr:
4427  case X86::VROUNDSSm:
4428  case X86::VROUNDSSr_Int:
4429  case X86::VROUNDSSm_Int:
4430  case X86::VRSQRTSSr:
4431  case X86::VRSQRTSSr_Int:
4432  case X86::VRSQRTSSm:
4433  case X86::VRSQRTSSm_Int:
4434  case X86::VSQRTSSr:
4435  case X86::VSQRTSSr_Int:
4436  case X86::VSQRTSSm:
4437  case X86::VSQRTSSm_Int:
4438  case X86::VSQRTSDr:
4439  case X86::VSQRTSDr_Int:
4440  case X86::VSQRTSDm:
4441  case X86::VSQRTSDm_Int:
4442  // AVX-512
4443  case X86::VCVTSI2SSZrr:
4444  case X86::VCVTSI2SSZrm:
4445  case X86::VCVTSI2SSZrr_Int:
4446  case X86::VCVTSI2SSZrrb_Int:
4447  case X86::VCVTSI2SSZrm_Int:
4448  case X86::VCVTSI642SSZrr:
4449  case X86::VCVTSI642SSZrm:
4450  case X86::VCVTSI642SSZrr_Int:
4451  case X86::VCVTSI642SSZrrb_Int:
4452  case X86::VCVTSI642SSZrm_Int:
4453  case X86::VCVTSI2SDZrr:
4454  case X86::VCVTSI2SDZrm:
4455  case X86::VCVTSI2SDZrr_Int:
4456  case X86::VCVTSI2SDZrrb_Int:
4457  case X86::VCVTSI2SDZrm_Int:
4458  case X86::VCVTSI642SDZrr:
4459  case X86::VCVTSI642SDZrm:
4460  case X86::VCVTSI642SDZrr_Int:
4461  case X86::VCVTSI642SDZrrb_Int:
4462  case X86::VCVTSI642SDZrm_Int:
4463  case X86::VCVTUSI2SSZrr:
4464  case X86::VCVTUSI2SSZrm:
4465  case X86::VCVTUSI2SSZrr_Int:
4466  case X86::VCVTUSI2SSZrrb_Int:
4467  case X86::VCVTUSI2SSZrm_Int:
4468  case X86::VCVTUSI642SSZrr:
4469  case X86::VCVTUSI642SSZrm:
4470  case X86::VCVTUSI642SSZrr_Int:
4471  case X86::VCVTUSI642SSZrrb_Int:
4472  case X86::VCVTUSI642SSZrm_Int:
4473  case X86::VCVTUSI2SDZrr:
4474  case X86::VCVTUSI2SDZrm:
4475  case X86::VCVTUSI2SDZrr_Int:
4476  case X86::VCVTUSI2SDZrm_Int:
4477  case X86::VCVTUSI642SDZrr:
4478  case X86::VCVTUSI642SDZrm:
4479  case X86::VCVTUSI642SDZrr_Int:
4480  case X86::VCVTUSI642SDZrrb_Int:
4481  case X86::VCVTUSI642SDZrm_Int:
4482  case X86::VCVTSD2SSZrr:
4483  case X86::VCVTSD2SSZrr_Int:
4484  case X86::VCVTSD2SSZrrb_Int:
4485  case X86::VCVTSD2SSZrm:
4486  case X86::VCVTSD2SSZrm_Int:
4487  case X86::VCVTSS2SDZrr:
4488  case X86::VCVTSS2SDZrr_Int:
4489  case X86::VCVTSS2SDZrrb_Int:
4490  case X86::VCVTSS2SDZrm:
4491  case X86::VCVTSS2SDZrm_Int:
4492  case X86::VGETEXPSDZr:
4493  case X86::VGETEXPSDZrb:
4494  case X86::VGETEXPSDZm:
4495  case X86::VGETEXPSSZr:
4496  case X86::VGETEXPSSZrb:
4497  case X86::VGETEXPSSZm:
4498  case X86::VGETMANTSDZrri:
4499  case X86::VGETMANTSDZrrib:
4500  case X86::VGETMANTSDZrmi:
4501  case X86::VGETMANTSSZrri:
4502  case X86::VGETMANTSSZrrib:
4503  case X86::VGETMANTSSZrmi:
4504  case X86::VRNDSCALESDZr:
4505  case X86::VRNDSCALESDZr_Int:
4506  case X86::VRNDSCALESDZrb_Int:
4507  case X86::VRNDSCALESDZm:
4508  case X86::VRNDSCALESDZm_Int:
4509  case X86::VRNDSCALESSZr:
4510  case X86::VRNDSCALESSZr_Int:
4511  case X86::VRNDSCALESSZrb_Int:
4512  case X86::VRNDSCALESSZm:
4513  case X86::VRNDSCALESSZm_Int:
4514  case X86::VRCP14SDZrr:
4515  case X86::VRCP14SDZrm:
4516  case X86::VRCP14SSZrr:
4517  case X86::VRCP14SSZrm:
4518  case X86::VRCP28SDZr:
4519  case X86::VRCP28SDZrb:
4520  case X86::VRCP28SDZm:
4521  case X86::VRCP28SSZr:
4522  case X86::VRCP28SSZrb:
4523  case X86::VRCP28SSZm:
4524  case X86::VREDUCESSZrmi:
4525  case X86::VREDUCESSZrri:
4526  case X86::VREDUCESSZrrib:
4527  case X86::VRSQRT14SDZrr:
4528  case X86::VRSQRT14SDZrm:
4529  case X86::VRSQRT14SSZrr:
4530  case X86::VRSQRT14SSZrm:
4531  case X86::VRSQRT28SDZr:
4532  case X86::VRSQRT28SDZrb:
4533  case X86::VRSQRT28SDZm:
4534  case X86::VRSQRT28SSZr:
4535  case X86::VRSQRT28SSZrb:
4536  case X86::VRSQRT28SSZm:
4537  case X86::VSQRTSSZr:
4538  case X86::VSQRTSSZr_Int:
4539  case X86::VSQRTSSZrb_Int:
4540  case X86::VSQRTSSZm:
4541  case X86::VSQRTSSZm_Int:
4542  case X86::VSQRTSDZr:
4543  case X86::VSQRTSDZr_Int:
4544  case X86::VSQRTSDZrb_Int:
4545  case X86::VSQRTSDZm:
4546  case X86::VSQRTSDZm_Int:
4547  return true;
4548  }
4549 
4550  return false;
4551 }
4552 
4553 /// Inform the BreakFalseDeps pass how many idle instructions we would like
4554 /// before certain undef register reads.
4555 ///
4556 /// This catches the VCVTSI2SD family of instructions:
4557 ///
4558 /// vcvtsi2sdq %rax, undef %xmm0, %xmm14
4559 ///
4560 /// We should to be careful *not* to catch VXOR idioms which are presumably
4561 /// handled specially in the pipeline:
4562 ///
4563 /// vxorps undef %xmm1, undef %xmm1, %xmm1
4564 ///
4565 /// Like getPartialRegUpdateClearance, this makes a strong assumption that the
4566 /// high bits that are passed-through are not live.
4567 unsigned
4569  const TargetRegisterInfo *TRI) const {
4570  if (!hasUndefRegUpdate(MI.getOpcode()))
4571  return 0;
4572 
4573  // Set the OpNum parameter to the first source operand.
4574  OpNum = 1;
4575 
4576  const MachineOperand &MO = MI.getOperand(OpNum);
4578  return UndefRegClearance;
4579  }
4580  return 0;
4581 }
4582 
4584  MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {
4585  unsigned Reg = MI.getOperand(OpNum).getReg();
4586  // If MI kills this register, the false dependence is already broken.
4587  if (MI.killsRegister(Reg, TRI))
4588  return;
4589 
4590  if (X86::VR128RegClass.contains(Reg)) {
4591  // These instructions are all floating point domain, so xorps is the best
4592  // choice.
4593  unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
4594  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(Opc), Reg)
4595  .addReg(Reg, RegState::Undef)
4596  .addReg(Reg, RegState::Undef);
4597  MI.addRegisterKilled(Reg, TRI, true);
4598  } else if (X86::VR256RegClass.contains(Reg)) {
4599  // Use vxorps to clear the full ymm register.
4600  // It wants to read and write the xmm sub-register.
4601  unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
4602  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::VXORPSrr), XReg)
4603  .addReg(XReg, RegState::Undef)
4604  .addReg(XReg, RegState::Undef)
4606  MI.addRegisterKilled(Reg, TRI, true);
4607  } else if (X86::GR64RegClass.contains(Reg)) {
4608  // Using XOR32rr because it has shorter encoding and zeros up the upper bits
4609  // as well.
4610  unsigned XReg = TRI->getSubReg(Reg, X86::sub_32bit);
4611  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), XReg)
4612  .addReg(XReg, RegState::Undef)
4613  .addReg(XReg, RegState::Undef)
4615  MI.addRegisterKilled(Reg, TRI, true);
4616  } else if (X86::GR32RegClass.contains(Reg)) {
4617  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg)
4618  .addReg(Reg, RegState::Undef)
4619  .addReg(Reg, RegState::Undef);
4620  MI.addRegisterKilled(Reg, TRI, true);
4621  }
4622 }
4623 
4625  int PtrOffset = 0) {
4626  unsigned NumAddrOps = MOs.size();
4627 
4628  if (NumAddrOps < 4) {
4629  // FrameIndex only - add an immediate offset (whether its zero or not).
4630  for (unsigned i = 0; i != NumAddrOps; ++i)
4631  MIB.add(MOs[i]);
4632  addOffset(MIB, PtrOffset);
4633  } else {
4634  // General Memory Addressing - we need to add any offset to an existing
4635  // offset.
4636  assert(MOs.size() == 5 && "Unexpected memory operand list length");
4637  for (unsigned i = 0; i != NumAddrOps; ++i) {
4638  const MachineOperand &MO = MOs[i];
4639  if (i == 3 && PtrOffset != 0) {
4640  MIB.addDisp(MO, PtrOffset);
4641  } else {
4642  MIB.add(MO);
4643  }
4644  }
4645  }
4646 }
4647 
4649  MachineInstr &NewMI,
4650  const TargetInstrInfo &TII) {
4653 
4654  for (int Idx : llvm::seq<int>(0, NewMI.getNumOperands())) {
4655  MachineOperand &MO = NewMI.getOperand(Idx);
4656  // We only need to update constraints on virtual register operands.
4657  if (!MO.isReg())
4658  continue;
4659  unsigned Reg = MO.getReg();
4660  if (!TRI.isVirtualRegister(Reg))
4661  continue;
4662 
4663  auto *NewRC = MRI.constrainRegClass(
4664  Reg, TII.getRegClass(NewMI.getDesc(), Idx, &TRI, MF));
4665  if (!NewRC) {
4666  LLVM_DEBUG(
4667  dbgs() << "WARNING: Unable to update register constraint for operand "
4668  << Idx << " of instruction:\n";
4669  NewMI.dump(); dbgs() << "\n");
4670  }
4671  }
4672 }
4673 
4674 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
4676  MachineBasicBlock::iterator InsertPt,
4677  MachineInstr &MI,
4678  const TargetInstrInfo &TII) {
4679  // Create the base instruction with the memory operand as the first part.
4680  // Omit the implicit operands, something BuildMI can't do.
4681  MachineInstr *NewMI =
4682  MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
4683  MachineInstrBuilder MIB(MF, NewMI);
4684  addOperands(MIB, MOs);
4685 
4686  // Loop over the rest of the ri operands, converting them over.
4687  unsigned NumOps = MI.getDesc().getNumOperands() - 2;
4688  for (unsigned i = 0; i != NumOps; ++i) {
4689  MachineOperand &MO = MI.getOperand(i + 2);
4690  MIB.add(MO);
4691  }
4692  for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
4693  MachineOperand &MO = MI.getOperand(i);
4694  MIB.add(MO);
4695  }
4696 
4697  updateOperandRegConstraints(MF, *NewMI, TII);
4698 
4699  MachineBasicBlock *MBB = InsertPt->getParent();
4700  MBB->insert(InsertPt, NewMI);
4701 
4702  return MIB;
4703 }
4704 
4705 static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
4706  unsigned OpNo, ArrayRef<MachineOperand> MOs,
4707  MachineBasicBlock::iterator InsertPt,
4708  MachineInstr &MI, const TargetInstrInfo &TII,
4709  int PtrOffset = 0) {
4710  // Omit the implicit operands, something BuildMI can't do.
4711  MachineInstr *NewMI =
4712  MF.CreateMachineInstr(TII.get(Opcode), MI.getDebugLoc(), true);
4713  MachineInstrBuilder MIB(MF, NewMI);
4714 
4715  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
4716  MachineOperand &MO = MI.getOperand(i);
4717  if (i == OpNo) {
4718  assert(MO.isReg() && "Expected to fold into reg operand!");
4719  addOperands(MIB, MOs, PtrOffset);
4720  } else {
4721  MIB.add(MO);
4722  }
4723  }
4724 
4725  updateOperandRegConstraints(MF, *NewMI, TII);
4726 
4727  MachineBasicBlock *MBB = InsertPt->getParent();
4728  MBB->insert(InsertPt, NewMI);
4729 
4730  return MIB;
4731 }
4732 
4733 static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
4735  MachineBasicBlock::iterator InsertPt,
4736  MachineInstr &MI) {
4737  MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
4738  MI.getDebugLoc(), TII.get(Opcode));
4739  addOperands(MIB, MOs);
4740  return MIB.addImm(0);
4741 }
4742 
4743 MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
4744  MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
4746  unsigned Size, unsigned Align) const {
4747  switch (MI.getOpcode()) {
4748  case X86::INSERTPSrr:
4749  case X86::VINSERTPSrr:
4750  case X86::VINSERTPSZrr:
4751  // Attempt to convert the load of inserted vector into a fold load
4752  // of a single float.
4753  if (OpNum == 2) {
4754  unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm();
4755  unsigned ZMask = Imm & 15;
4756  unsigned DstIdx = (Imm >> 4) & 3;
4757  unsigned SrcIdx = (Imm >> 6) & 3;
4758 
4760  const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
4761  unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
4762  if (Size <= RCSize && 4 <= Align) {
4763  int PtrOffset = SrcIdx * 4;
4764  unsigned NewImm = (DstIdx << 4) | ZMask;
4765  unsigned NewOpCode =
4766  (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
4767  (MI.getOpcode() == X86::VINSERTPSrr) ? X86::VINSERTPSrm :
4768  X86::INSERTPSrm;
4769  MachineInstr *NewMI =
4770  FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
4771  NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
4772  return NewMI;
4773  }
4774  }
4775  break;
4776  case X86::MOVHLPSrr:
4777  case X86::VMOVHLPSrr:
4778  case X86::VMOVHLPSZrr:
4779  // Move the upper 64-bits of the second operand to the lower 64-bits.
4780  // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
4781  // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
4782  if (OpNum == 2) {
4784  const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
4785  unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
4786  if (Size <= RCSize && 8 <= Align) {
4787  unsigned NewOpCode =
4788  (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
4789  (MI.getOpcode() == X86::VMOVHLPSrr) ? X86::VMOVLPSrm :
4790  X86::MOVLPSrm;
4791  MachineInstr *NewMI =
4792  FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
4793  return NewMI;
4794  }
4795  }
4796  break;
4797  };
4798 
4799  return nullptr;
4800 }
4801 
4803  if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
4804  !MI.getOperand(1).isReg())
4805  return false;
4806 
4807  // The are two cases we need to handle depending on where in the pipeline
4808  // the folding attempt is being made.
4809  // -Register has the undef flag set.
4810  // -Register is produced by the IMPLICIT_DEF instruction.
4811 
4812  if (MI.getOperand(1).isUndef())
4813  return true;
4814 
4815  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4816  MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
4817  return VRegDef && VRegDef->isImplicitDef();
4818 }
4819 
4820 
4822  MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
4824  unsigned Size, unsigned Align, bool AllowCommute) const {
4825  bool isSlowTwoMemOps = Subtarget.slowTwoMemOps();
4826  bool isTwoAddrFold = false;
4827 
4828  // For CPUs that favor the register form of a call or push,
4829  // do not fold loads into calls or pushes, unless optimizing for size
4830  // aggressively.
4831  if (isSlowTwoMemOps && !MF.getFunction().optForMinSize() &&
4832  (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r ||
4833  MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r ||
4834  MI.getOpcode() == X86::PUSH64r))
4835  return nullptr;
4836 
4837  // Avoid partial and undef register update stalls unless optimizing for size.
4838  if (!MF.getFunction().optForSize() &&
4839  (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
4841  return nullptr;
4842 
4843  unsigned NumOps = MI.getDesc().getNumOperands();
4844  bool isTwoAddr =
4845  NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
4846 
4847  // FIXME: AsmPrinter doesn't know how to handle
4848  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
4849  if (MI.getOpcode() == X86::ADD32ri &&
4851  return nullptr;
4852 
4853  // GOTTPOFF relocation loads can only be folded into add instructions.
4854  // FIXME: Need to exclude other relocations that only support specific
4855  // instructions.
4856  if (MOs.size() == X86::AddrNumOperands &&
4857  MOs[X86::AddrDisp].getTargetFlags() == X86II::MO_GOTTPOFF &&
4858  MI.getOpcode() != X86::ADD64rr)
4859  return nullptr;
4860 
4861  MachineInstr *NewMI = nullptr;
4862 
4863  // Attempt to fold any custom cases we have.
4864  if (MachineInstr *CustomMI =
4865  foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
4866  return CustomMI;
4867 
4868  const X86MemoryFoldTableEntry *I = nullptr;
4869 
4870  // Folding a memory location into the two-address part of a two-address
4871  // instruction is different than folding it other places. It requires
4872  // replacing the *two* registers with the memory location.
4873  if (isTwoAddr && NumOps >= 2 && OpNum < 2 && MI.getOperand(0).isReg() &&
4874  MI.getOperand(1).isReg() &&
4875  MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
4877  isTwoAddrFold = true;
4878  } else {
4879  if (OpNum == 0) {
4880  if (MI.getOpcode() == X86::MOV32r0) {
4881  NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
4882  if (NewMI)
4883  return NewMI;
4884  }
4885  }
4886 
4887  I = lookupFoldTable(MI.getOpcode(), OpNum);
4888  }
4889 
4890  if (I != nullptr) {
4891  unsigned Opcode = I->DstOp;
4892  unsigned MinAlign = (I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
4893  if (Align < MinAlign)
4894  return nullptr;
4895  bool NarrowToMOV32rm = false;
4896  if (Size) {
4898  const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
4899  &RI, MF);
4900  unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
4901  if (Size < RCSize) {
4902  // Check if it's safe to fold the load. If the size of the object is
4903  // narrower than the load width, then it's not.
4904  if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
4905  return nullptr;
4906  // If this is a 64-bit load, but the spill slot is 32, then we can do
4907  // a 32-bit load which is implicitly zero-extended. This likely is
4908  // due to live interval analysis remat'ing a load from stack slot.
4909  if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
4910  return nullptr;
4911  Opcode = X86::MOV32rm;
4912  NarrowToMOV32rm = true;
4913  }
4914  }
4915 
4916  if (isTwoAddrFold)
4917  NewMI = FuseTwoAddrInst(MF, Opcode, MOs, InsertPt, MI, *this);
4918  else
4919  NewMI = FuseInst(MF, Opcode, OpNum, MOs, InsertPt, MI, *this);
4920 
4921  if (NarrowToMOV32rm) {
4922  // If this is the special case where we use a MOV32rm to load a 32-bit
4923  // value and zero-extend the top bits. Change the destination register
4924  // to a 32-bit one.
4925  unsigned DstReg = NewMI->getOperand(0).getReg();
4927  NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
4928  else
4929  NewMI->getOperand(0).setSubReg(X86::sub_32bit);
4930  }
4931  return NewMI;
4932  }
4933 
4934  // If the instruction and target operand are commutable, commute the
4935  // instruction and try again.
4936  if (AllowCommute) {
4937  unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
4938  if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
4939  bool HasDef = MI.getDesc().getNumDefs();
4940  unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
4941  unsigned Reg1 = MI.getOperand(CommuteOpIdx1).getReg();
4942  unsigned Reg2 = MI.getOperand(CommuteOpIdx2).getReg();
4943  bool Tied1 =
4944  0 == MI.getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
4945  bool Tied2 =
4946  0 == MI.getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
4947 
4948  // If either of the commutable operands are tied to the destination
4949  // then we can not commute + fold.
4950  if ((HasDef && Reg0 == Reg1 && Tied1) ||
4951  (HasDef && Reg0 == Reg2 && Tied2))
4952  return nullptr;
4953 
4954  MachineInstr *CommutedMI =
4955  commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
4956  if (!CommutedMI) {
4957  // Unable to commute.
4958  return nullptr;
4959  }
4960  if (CommutedMI != &MI) {
4961  // New instruction. We can't fold from this.
4962  CommutedMI->eraseFromParent();
4963  return nullptr;
4964  }
4965 
4966  // Attempt to fold with the commuted version of the instruction.
4967  NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
4968  Size, Align, /*AllowCommute=*/false);
4969  if (NewMI)
4970  return NewMI;
4971 
4972  // Folding failed again - undo the commute before returning.
4973  MachineInstr *UncommutedMI =
4974  commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
4975  if (!UncommutedMI) {
4976  // Unable to commute.
4977  return nullptr;
4978  }
4979  if (UncommutedMI != &MI) {
4980  // New instruction. It doesn't need to be kept.
4981  UncommutedMI->eraseFromParent();
4982  return nullptr;
4983  }
4984 
4985  // Return here to prevent duplicate fuse failure report.
4986  return nullptr;
4987  }
4988  }
4989 
4990  // No fusion
4991  if (PrintFailedFusing && !MI.isCopy())
4992  dbgs() << "We failed to fuse operand " << OpNum << " in " << MI;
4993  return nullptr;
4994 }
4995 
4996 MachineInstr *
4998  ArrayRef<unsigned> Ops,
4999  MachineBasicBlock::iterator InsertPt,
5000  int FrameIndex, LiveIntervals *LIS) const {
5001  // Check switch flag
5002  if (NoFusing)
5003  return nullptr;
5004 
5005  // Avoid partial and undef register update stalls unless optimizing for size.
5006  if (!MF.getFunction().optForSize() &&
5007  (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
5009  return nullptr;
5010 
5011  // Don't fold subreg spills, or reloads that use a high subreg.
5012  for (auto Op : Ops) {
5013  MachineOperand &MO = MI.getOperand(Op);
5014  auto SubReg = MO.getSubReg();
5015  if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
5016  return nullptr;
5017  }
5018 
5019  const MachineFrameInfo &MFI = MF.getFrameInfo();
5020  unsigned Size = MFI.getObjectSize(FrameIndex);
5021  unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
5022  // If the function stack isn't realigned we don't want to fold instructions
5023  // that need increased alignment.
5024  if (!RI.needsStackRealignment(MF))
5025  Alignment =
5026  std::min(Alignment, Subtarget.getFrameLowering()->getStackAlignment());
5027  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
5028  unsigned NewOpc = 0;
5029  unsigned RCSize = 0;
5030  switch (MI.getOpcode()) {
5031  default: return nullptr;
5032  case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
5033  case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
5034  case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
5035  case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
5036  }
5037  // Check if it's safe to fold the load. If the size of the object is
5038  // narrower than the load width, then it's not.
5039  if (Size < RCSize)
5040  return nullptr;
5041  // Change to CMPXXri r, 0 first.
5042  MI.setDesc(get(NewOpc));
5043  MI.getOperand(1).ChangeToImmediate(0);
5044  } else if (Ops.size() != 1)
5045  return nullptr;
5046 
5047  return foldMemoryOperandImpl(MF, MI, Ops[0],
5048  MachineOperand::CreateFI(FrameIndex), InsertPt,
5049  Size, Alignment, /*AllowCommute=*/true);
5050 }
5051 
5052 /// Check if \p LoadMI is a partial register load that we can't fold into \p MI
5053 /// because the latter uses contents that wouldn't be defined in the folded
5054 /// version. For instance, this transformation isn't legal:
5055 /// movss (%rdi), %xmm0
5056 /// addps %xmm0, %xmm0
5057 /// ->
5058 /// addps (%rdi), %xmm0
5059 ///
5060 /// But this one is:
5061 /// movss (%rdi), %xmm0
5062 /// addss %xmm0, %xmm0
5063 /// ->
5064 /// addss (%rdi), %xmm0
5065 ///
5067  const MachineInstr &UserMI,
5068  const MachineFunction &MF) {
5069  unsigned Opc = LoadMI.getOpcode();
5070  unsigned UserOpc = UserMI.getOpcode();
5072  const TargetRegisterClass *RC =
5073  MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
5074  unsigned RegSize = TRI.getRegSizeInBits(*RC);
5075 
5076  if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) &&
5077  RegSize > 32) {
5078  // These instructions only load 32 bits, we can't fold them if the
5079  // destination register is wider than 32 bits (4 bytes), and its user
5080  // instruction isn't scalar (SS).
5081  switch (UserOpc) {
5082  case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
5083  case X86::CMPSSrr_Int: case X86::VCMPSSrr_Int: case X86::VCMPSSZrr_Int:
5084  case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
5085  case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
5086  case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
5087  case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
5088  case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
5089  case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
5090  case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
5091  case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
5092  case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
5093  case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
5094  case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
5095  case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
5096  case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
5097  case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
5098  case X86::VFMADD213SSr_Int: case X86::VFNMADD213SSr_Int:
5099  case X86::VFMADD231SSr_Int: case X86::VFNMADD231SSr_Int:
5100  case X86::VFMSUB132SSr_Int: case X86::VFNMSUB132SSr_Int:
5101  case X86::VFMSUB213SSr_Int: case X86::VFNMSUB213SSr_Int:
5102  case X86::VFMSUB231SSr_Int: case X86::VFNMSUB231SSr_Int:
5103  case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int:
5104  case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int:
5105  case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int:
5106  case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
5107  case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
5108  case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
5109  case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
5110  case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
5111  case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
5112  case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
5113  case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
5114  case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
5115  case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
5116  case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
5117  case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
5118  case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
5119  case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
5120  case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
5121  return false;
5122  default:
5123  return true;
5124  }
5125  }
5126 
5127  if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) &&
5128  RegSize > 64) {
5129  // These instructions only load 64 bits, we can't fold them if the
5130  // destination register is wider than 64 bits (8 bytes), and its user
5131  // instruction isn't scalar (SD).
5132  switch (UserOpc) {
5133  case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
5134  case X86::CMPSDrr_Int: case X86::VCMPSDrr_Int: case X86::VCMPSDZrr_Int:
5135  case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
5136  case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
5137  case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
5138  case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
5139  case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
5140  case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
5141  case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
5142  case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
5143  case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
5144  case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
5145  case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
5146  case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
5147  case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
5148  case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
5149  case X86::VFMADD213SDr_Int: case X86::VFNMADD213SDr_Int:
5150  case X86::VFMADD231SDr_Int: case X86::VFNMADD231SDr_Int:
5151  case X86::VFMSUB132SDr_Int: case X86::VFNMSUB132SDr_Int:
5152  case X86::VFMSUB213SDr_Int: case X86::VFNMSUB213SDr_Int:
5153  case X86::VFMSUB231SDr_Int: case X86::VFNMSUB231SDr_Int:
5154  case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int:
5155  case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int:
5156  case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int:
5157  case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
5158  case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
5159  case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
5160  case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
5161  case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
5162  case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
5163  case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
5164  case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
5165  case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
5166  case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
5167  case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
5168  case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
5169  case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
5170  case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
5171  case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
5172  return false;
5173  default:
5174  return true;
5175  }
5176  }
5177 
5178  return false;
5179 }
5180 
5183  MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
5184  LiveIntervals *LIS) const {
5185 
5186  // TODO: Support the case where LoadMI loads a wide register, but MI
5187  // only uses a subreg.
5188  for (auto Op : Ops) {
5189  if (MI.getOperand(Op).getSubReg())
5190  return nullptr;
5191  }
5192 
5193  // If loading from a FrameIndex, fold directly from the FrameIndex.
5194  unsigned NumOps = LoadMI.getDesc().getNumOperands();
5195  int FrameIndex;
5196  if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
5197  if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
5198  return nullptr;
5199  return foldMemoryOperandImpl(MF, MI, Ops, InsertPt, FrameIndex, LIS);
5200  }
5201 
5202  // Check switch flag
5203  if (NoFusing) return nullptr;
5204 
5205  // Avoid partial and undef register update stalls unless optimizing for size.
5206  if (!MF.getFunction().optForSize() &&
5207  (hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
5209  return nullptr;
5210 
5211  // Determine the alignment of the load.
5212  unsigned Alignment = 0;
5213  if (LoadMI.hasOneMemOperand())
5214  Alignment = (*LoadMI.memoperands_begin())->getAlignment();
5215  else
5216  switch (LoadMI.getOpcode()) {
5217  case X86::AVX512_512_SET0:
5218  case X86::AVX512_512_SETALLONES:
5219  Alignment = 64;
5220  break;
5221  case X86::AVX2_SETALLONES:
5222  case X86::AVX1_SETALLONES:
5223  case X86::AVX_SET0:
5224  case X86::AVX512_256_SET0:
5225  Alignment = 32;
5226  break;
5227  case X86::V_SET0:
5228  case X86::V_SETALLONES:
5229  case X86::AVX512_128_SET0:
5230  Alignment = 16;
5231  break;
5232  case X86::MMX_SET0:
5233  case X86::FsFLD0SD:
5234  case X86::AVX512_FsFLD0SD:
5235  Alignment = 8;
5236  break;
5237  case X86::FsFLD0SS:
5238  case X86::AVX512_FsFLD0SS:
5239  Alignment = 4;
5240  break;
5241  default:
5242  return nullptr;
5243  }
5244  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
5245  unsigned NewOpc = 0;
5246  switch (MI.getOpcode()) {
5247  default: return nullptr;
5248  case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
5249  case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
5250  case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
5251  case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
5252  }
5253  // Change to CMPXXri r, 0 first.
5254  MI.setDesc(get(NewOpc));
5255  MI.getOperand(1).ChangeToImmediate(0);
5256  } else if (Ops.size() != 1)
5257  return nullptr;
5258 
5259  // Make sure the subregisters match.
5260  // Otherwise we risk changing the size of the load.
5261  if (LoadMI.getOperand(0).getSubReg() != MI.getOperand(Ops[0]).getSubReg())
5262  return nullptr;
5263 
5265  switch (LoadMI.getOpcode()) {
5266  case X86::MMX_SET0:
5267  case X86::V_SET0:
5268  case X86::V_SETALLONES:
5269  case X86::AVX2_SETALLONES:
5270  case X86::AVX1_SETALLONES:
5271  case X86::AVX_SET0:
5272  case X86::AVX512_128_SET0:
5273  case X86::AVX512_256_SET0:
5274  case X86::AVX512_512_SET0:
5275  case X86::AVX512_512_SETALLONES:
5276  case X86::FsFLD0SD:
5277  case X86::AVX512_FsFLD0SD:
5278  case X86::FsFLD0SS:
5279  case X86::AVX512_FsFLD0SS: {
5280  // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
5281  // Create a constant-pool entry and operands to load from it.
5282 
5283  // Medium and large mode can't fold loads this way.
5284  if (MF.getTarget().getCodeModel() != CodeModel::Small &&
5286  return nullptr;
5287 
5288  // x86-32 PIC requires a PIC base register for constant pools.
5289  unsigned PICBase = 0;
5290  if (MF.getTarget().isPositionIndependent()) {
5291  if (Subtarget.is64Bit())
5292  PICBase = X86::RIP;
5293  else
5294  // FIXME: PICBase = getGlobalBaseReg(&MF);
5295  // This doesn't work for several reasons.
5296  // 1. GlobalBaseReg may have been spilled.
5297  // 2. It may not be live at MI.
5298  return nullptr;
5299  }
5300 
5301  // Create a constant-pool entry.
5302  MachineConstantPool &MCP = *MF.getConstantPool();
5303  Type *Ty;
5304  unsigned Opc = LoadMI.getOpcode();
5305  if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS)
5307  else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
5309  else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
5311  else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 ||
5312  Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES)
5314  else if (Opc == X86::MMX_SET0)
5316  else
5318 
5319  bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES ||
5320  Opc == X86::AVX512_512_SETALLONES ||
5321  Opc == X86::AVX1_SETALLONES);
5322  const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
5324  unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
5325 
5326  // Create operands to load from the constant pool entry.
5327  MOs.push_back(MachineOperand::CreateReg(PICBase, false));
5329  MOs.push_back(MachineOperand::CreateReg(0, false));
5330  MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
5331  MOs.push_back(MachineOperand::CreateReg(0, false));
5332  break;
5333  }
5334  default: {
5335  if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
5336  return nullptr;
5337 
5338  // Folding a normal load. Just copy the load's address operands.
5339  MOs.append(LoadMI.operands_begin() + NumOps - X86::AddrNumOperands,
5340  LoadMI.operands_begin() + NumOps);
5341  break;
5342  }
5343  }
5344  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, InsertPt,
5345  /*Size=*/0, Alignment, /*AllowCommute=*/true);
5346 }
5347 
5351 
5352  for (MachineMemOperand *MMO : MMOs) {
5353  if (!MMO->isLoad())
5354  continue;
5355 
5356  if (!MMO->isStore()) {
5357  // Reuse the MMO.
5358  LoadMMOs.push_back(MMO);
5359  } else {
5360  // Clone the MMO and unset the store flag.
5361  LoadMMOs.push_back(MF.getMachineMemOperand(
5362  MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOStore,
5363  MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
5364  MMO->getSyncScopeID(), MMO->getOrdering(),
5365  MMO->getFailureOrdering()));
5366  }
5367  }
5368 
5369  return LoadMMOs;
5370 }
5371 
5375 
5376  for (MachineMemOperand *MMO : MMOs) {
5377  if (!MMO->isStore())
5378  continue;
5379 
5380  if (!MMO->isLoad()) {
5381  // Reuse the MMO.
5382  StoreMMOs.push_back(MMO);
5383  } else {
5384  // Clone the MMO and unset the load flag.
5385  StoreMMOs.push_back(MF.getMachineMemOperand(
5386  MMO->getPointerInfo(), MMO->getFlags() & ~MachineMemOperand::MOLoad,
5387  MMO->getSize(), MMO->getBaseAlignment(), MMO->getAAInfo(), nullptr,
5388  MMO->getSyncScopeID(), MMO->getOrdering(),
5389  MMO->getFailureOrdering()));
5390  }
5391  }
5392 
5393  return StoreMMOs;
5394 }
5395 
5397  MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad,
5398  bool UnfoldStore, SmallVectorImpl<MachineInstr *> &NewMIs) const {
5400  if (I == nullptr)
5401  return false;
5402  unsigned Opc = I->DstOp;
5403  unsigned Index = I->Flags & TB_INDEX_MASK;
5404  bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
5405  bool FoldedStore = I->Flags & TB_FOLDED_STORE;
5406  if (UnfoldLoad && !FoldedLoad)
5407  return false;
5408  UnfoldLoad &= FoldedLoad;
5409  if (UnfoldStore && !FoldedStore)
5410  return false;
5411  UnfoldStore &= FoldedStore;
5412 
5413  const MCInstrDesc &MCID = get(Opc);
5414  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
5415  // TODO: Check if 32-byte or greater accesses are slow too?
5416  if (!MI.hasOneMemOperand() && RC == &X86::VR128RegClass &&
5417  Subtarget.isUnalignedMem16Slow())
5418  // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
5419  // conservatively assume the address is unaligned. That's bad for
5420  // performance.
5421  return false;
5426  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5427  MachineOperand &Op = MI.getOperand(i);
5428  if (i >= Index && i < Index + X86::AddrNumOperands)
5429  AddrOps.push_back(Op);
5430  else if (Op.isReg() && Op.isImplicit())
5431  ImpOps.push_back(Op);
5432  else if (i < Index)
5433  BeforeOps.push_back(Op);
5434  else if (i > Index)
5435  AfterOps.push_back(Op);
5436  }
5437 
5438  // Emit the load instruction.
5439  if (UnfoldLoad) {
5440  auto MMOs = extractLoadMMOs(MI.memoperands(), MF);
5441  loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs, NewMIs);
5442  if (UnfoldStore) {
5443  // Address operands cannot be marked isKill.
5444  for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
5445  MachineOperand &MO = NewMIs[0]->getOperand(i);
5446  if (MO.isReg())
5447  MO.setIsKill(false);
5448  }
5449  }
5450  }
5451 
5452  // Emit the data processing instruction.
5453  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI.getDebugLoc(), true);
5454  MachineInstrBuilder MIB(MF, DataMI);
5455 
5456  if (FoldedStore)
5457  MIB.addReg(Reg, RegState::Define);
5458  for (MachineOperand &BeforeOp : BeforeOps)
5459  MIB.add(BeforeOp);
5460  if (FoldedLoad)
5461  MIB.addReg(Reg);
5462  for (MachineOperand &AfterOp : AfterOps)
5463  MIB.add(AfterOp);
5464  for (MachineOperand &ImpOp : ImpOps) {
5465  MIB.addReg(ImpOp.getReg(),
5466  getDefRegState(ImpOp.isDef()) |
5468  getKillRegState(ImpOp.isKill()) |
5469  getDeadRegState(ImpOp.isDead()) |
5470  getUndefRegState(ImpOp.isUndef()));
5471  }
5472  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
5473  switch (DataMI->getOpcode()) {
5474  default: break;
5475  case X86::CMP64ri32:
5476  case X86::CMP64ri8:
5477  case X86::CMP32ri:
5478  case X86::CMP32ri8:
5479  case X86::CMP16ri:
5480  case X86::CMP16ri8:
5481  case X86::CMP8ri: {
5482  MachineOperand &MO0 = DataMI->getOperand(0);
5483  MachineOperand &MO1 = DataMI->getOperand(1);
5484  if (MO1.getImm() == 0) {
5485  unsigned NewOpc;
5486  switch (DataMI->getOpcode()) {
5487  default: llvm_unreachable("Unreachable!");
5488  case X86::CMP64ri8:
5489  case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
5490  case X86::CMP32ri8:
5491  case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
5492  case X86::CMP16ri8:
5493  case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
5494  case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
5495  }
5496  DataMI->setDesc(get(NewOpc));
5497  MO1.ChangeToRegister(MO0.getReg(), false);
5498  }
5499  }
5500  }
5501  NewMIs.push_back(DataMI);
5502 
5503  // Emit the store instruction.
5504  if (UnfoldStore) {
5505  const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
5506  auto MMOs = extractStoreMMOs(MI.memoperands(), MF);
5507  storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs, NewMIs);
5508  }
5509 
5510  return true;
5511 }
5512 
5513 bool
5515  SmallVectorImpl<SDNode*> &NewNodes) const {
5516  if (!N->isMachineOpcode())
5517  return false;
5518 
5520  if (I == nullptr)
5521  return false;
5522  unsigned Opc = I->DstOp;
5523  unsigned Index = I->Flags & TB_INDEX_MASK;
5524  bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
5525  bool FoldedStore = I->Flags & TB_FOLDED_STORE;
5526  const MCInstrDesc &MCID = get(Opc);
5527  MachineFunction &MF = DAG.getMachineFunction();
5529  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
5530  unsigned NumDefs = MCID.NumDefs;
5531  std::vector<SDValue> AddrOps;
5532  std::vector<SDValue> BeforeOps;
5533  std::vector<SDValue> AfterOps;
5534  SDLoc dl(N);
5535  unsigned NumOps = N->getNumOperands();
5536  for (unsigned i = 0; i != NumOps-1; ++i) {
5537  SDValue Op = N->getOperand(i);
5538  if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
5539  AddrOps.push_back(Op);
5540  else if (i < Index-NumDefs)
5541  BeforeOps.push_back(Op);
5542  else if (i > Index-NumDefs)
5543  AfterOps.push_back(Op);
5544  }
5545  SDValue Chain = N->getOperand(NumOps-1);
5546  AddrOps.push_back(Chain);
5547 
5548  // Emit the load instruction.
5549  SDNode *Load = nullptr;
5550  if (FoldedLoad) {
5551  EVT VT = *TRI.legalclasstypes_begin(*RC);
5552  auto MMOs = extractLoadMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
5553  if (MMOs.empty() && RC == &X86::VR128RegClass &&
5554  Subtarget.isUnalignedMem16Slow())
5555  // Do not introduce a slow unaligned load.
5556  return false;
5557  // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
5558  // memory access is slow above.
5559  unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
5560  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
5561  Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
5562  VT, MVT::Other, AddrOps);
5563  NewNodes.push_back(Load);
5564 
5565  // Preserve memory reference information.
5566  DAG.setNodeMemRefs(cast<MachineSDNode>(Load), MMOs);
5567  }
5568 
5569  // Emit the data processing instruction.
5570  std::vector<EVT> VTs;
5571  const TargetRegisterClass *DstRC = nullptr;
5572  if (MCID.getNumDefs() > 0) {
5573  DstRC = getRegClass(MCID, 0, &RI, MF);
5574  VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
5575  }
5576  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
5577  EVT VT = N->getValueType(i);
5578  if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
5579  VTs.push_back(VT);
5580  }
5581  if (Load)
5582  BeforeOps.push_back(SDValue(Load, 0));
5583  BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
5584  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
5585  switch (Opc) {
5586  default: break;
5587  case X86::CMP64ri32:
5588  case X86::CMP64ri8:
5589  case X86::CMP32ri:
5590  case X86::CMP32ri8:
5591  case X86::CMP16ri:
5592  case X86::CMP16ri8:
5593  case X86::CMP8ri:
5594  if (isNullConstant(BeforeOps[1])) {
5595  switch (Opc) {
5596  default: llvm_unreachable("Unreachable!");
5597  case X86::CMP64ri8:
5598  case X86::CMP64ri32: Opc = X86::TEST64rr; break;
5599  case X86::CMP32ri8:
5600  case X86::CMP32ri: Opc = X86::TEST32rr; break;
5601  case X86::CMP16ri8:
5602  case X86::CMP16ri: Opc = X86::TEST16rr; break;
5603  case X86::CMP8ri: Opc = X86::TEST8rr; break;
5604  }
5605  BeforeOps[1] = BeforeOps[0];
5606  }
5607  }
5608  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
5609  NewNodes.push_back(NewNode);
5610 
5611  // Emit the store instruction.
5612  if (FoldedStore) {
5613  AddrOps.pop_back();
5614  AddrOps.push_back(SDValue(NewNode, 0));
5615  AddrOps.push_back(Chain);
5616  auto MMOs = extractStoreMMOs(cast<MachineSDNode>(N)->memoperands(), MF);
5617  if (MMOs.empty() && RC == &X86::VR128RegClass &&
5618  Subtarget.isUnalignedMem16Slow())
5619  // Do not introduce a slow unaligned store.
5620  return false;
5621  // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
5622  // memory access is slow above.
5623  unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
5624  bool isAligned = !MMOs.empty() && MMOs.front()->getAlignment() >= Alignment;
5625  SDNode *Store =
5626  DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
5627  dl, MVT::Other, AddrOps);
5628  NewNodes.push_back(Store);
5629 
5630  // Preserve memory reference information.
5631  DAG.setNodeMemRefs(cast<MachineSDNode>(Store), MMOs);
5632  }
5633 
5634  return true;
5635 }
5636 
5638  bool UnfoldLoad, bool UnfoldStore,
5639  unsigned *LoadRegIndex) const {
5641  if (I == nullptr)
5642  return 0;
5643  bool FoldedLoad = I->Flags & TB_FOLDED_LOAD;
5644  bool FoldedStore = I->Flags & TB_FOLDED_STORE;
5645  if (UnfoldLoad && !FoldedLoad)
5646  return 0;
5647  if (UnfoldStore && !FoldedStore)
5648  return 0;
5649  if (LoadRegIndex)
5650  *LoadRegIndex = I->Flags & TB_INDEX_MASK;
5651  return I->DstOp;
5652 }
5653 
5654 bool
5656  int64_t &Offset1, int64_t &Offset2) const {
5657  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
5658  return false;
5659  unsigned Opc1 = Load1->getMachineOpcode();
5660  unsigned Opc2 = Load2->getMachineOpcode();
5661  switch (Opc1) {
5662  default: return false;
5663  case X86::MOV8rm:
5664  case X86::MOV16rm:
5665  case X86::MOV32rm:
5666  case X86::MOV64rm:
5667  case X86::LD_Fp32m:
5668  case X86::LD_Fp64m:
5669  case X86::LD_Fp80m:
5670  case X86::MOVSSrm:
5671  case X86::MOVSDrm:
5672  case X86::MMX_MOVD64rm:
5673  case X86::MMX_MOVQ64rm:
5674  case X86::MOVAPSrm:
5675  case X86::MOVUPSrm:
5676  case X86::MOVAPDrm:
5677  case X86::MOVUPDrm:
5678  case X86::MOVDQArm:
5679  case X86::MOVDQUrm:
5680  // AVX load instructions
5681  case X86::VMOVSSrm:
5682  case X86::VMOVSDrm:
5683  case X86::VMOVAPSrm:
5684  case X86::VMOVUPSrm:
5685  case X86::VMOVAPDrm:
5686  case X86::VMOVUPDrm:
5687  case X86::VMOVDQArm:
5688  case X86::VMOVDQUrm:
5689  case X86::VMOVAPSYrm:
5690  case X86::VMOVUPSYrm:
5691  case X86::VMOVAPDYrm:
5692  case X86::VMOVUPDYrm:
5693  case X86::VMOVDQAYrm:
5694  case X86::VMOVDQUYrm:
5695  // AVX512 load instructions
5696  case X86::VMOVSSZrm:
5697  case X86::VMOVSDZrm:
5698  case X86::VMOVAPSZ128rm:
5699  case X86::VMOVUPSZ128rm:
5700  case X86::VMOVAPSZ128rm_NOVLX:
5701  case X86::VMOVUPSZ128rm_NOVLX:
5702  case X86::VMOVAPDZ128rm:
5703  case X86::VMOVUPDZ128rm:
5704  case X86::VMOVDQU8Z128rm:
5705  case X86::VMOVDQU16Z128rm:
5706  case X86::VMOVDQA32Z128rm:
5707  case X86::VMOVDQU32Z128rm:
5708  case X86::VMOVDQA64Z128rm:
5709  case X86::VMOVDQU64Z128rm:
5710  case X86::VMOVAPSZ256rm:
5711  case X86::VMOVUPSZ256rm:
5712  case X86::VMOVAPSZ256rm_NOVLX:
5713  case X86::VMOVUPSZ256rm_NOVLX:
5714  case X86::VMOVAPDZ256rm:
5715  case X86::VMOVUPDZ256rm:
5716  case X86::VMOVDQU8Z256rm:
5717  case X86::VMOVDQU16Z256rm:
5718  case X86::VMOVDQA32Z256rm:
5719  case X86::VMOVDQU32Z256rm:
5720  case X86::VMOVDQA64Z256rm:
5721  case X86::VMOVDQU64Z256rm:
5722  case X86::VMOVAPSZrm:
5723  case X86::VMOVUPSZrm:
5724  case X86::VMOVAPDZrm:
5725  case X86::VMOVUPDZrm:
5726  case X86::VMOVDQU8Zrm:
5727  case X86::VMOVDQU16Zrm:
5728  case X86::VMOVDQA32Zrm:
5729  case X86::VMOVDQU32Zrm:
5730  case X86::VMOVDQA64Zrm:
5731  case X86::VMOVDQU64Zrm:
5732  case X86::KMOVBkm:
5733  case X86::KMOVWkm:
5734  case X86::KMOVDkm:
5735  case X86::KMOVQkm:
5736  break;
5737  }
5738  switch (Opc2) {
5739  default: return false;
5740  case X86::MOV8rm:
5741  case X86::MOV16rm:
5742  case X86::MOV32rm:
5743  case X86::MOV64rm:
5744  case X86::LD_Fp32m:
5745  case X86::LD_Fp64m:
5746  case X86::LD_Fp80m:
5747  case X86::MOVSSrm:
5748  case X86::MOVSDrm:
5749  case X86::MMX_MOVD64rm:
5750  case X86::MMX_MOVQ64rm:
5751  case X86::MOVAPSrm:
5752  case X86::MOVUPSrm:
5753  case X86::MOVAPDrm:
5754  case X86::MOVUPDrm:
5755  case X86::MOVDQArm:
5756  case X86::MOVDQUrm:
5757  // AVX load instructions
5758  case X86::VMOVSSrm:
5759  case X86::VMOVSDrm:
5760  case X86::VMOVAPSrm:
5761  case X86::VMOVUPSrm:
5762  case X86::VMOVAPDrm:
5763  case X86::VMOVUPDrm:
5764  case X86::VMOVDQArm:
5765  case X86::VMOVDQUrm:
5766  case X86::VMOVAPSYrm:
5767  case X86::VMOVUPSYrm:
5768  case X86::VMOVAPDYrm:
5769  case X86::VMOVUPDYrm:
5770  case X86::VMOVDQAYrm:
5771  case X86::VMOVDQUYrm:
5772  // AVX512 load instructions
5773  case X86::VMOVSSZrm:
5774  case X86::VMOVSDZrm:
5775  case X86::VMOVAPSZ128rm:
5776  case X86::VMOVUPSZ128rm:
5777  case X86::VMOVAPSZ128rm_NOVLX:
5778  case X86::VMOVUPSZ128rm_NOVLX:
5779  case X86::VMOVAPDZ128rm:
5780  case X86::VMOVUPDZ128rm:
5781  case X86::VMOVDQU8Z128rm:
5782  case X86::VMOVDQU16Z128rm:
5783  case X86::VMOVDQA32Z128rm:
5784  case X86::VMOVDQU32Z128rm:
5785  case X86::VMOVDQA64Z128rm:
5786  case X86::VMOVDQU64Z128rm:
5787  case X86::VMOVAPSZ256rm:
5788  case X86::VMOVUPSZ256rm:
5789  case X86::VMOVAPSZ256rm_NOVLX:
5790  case X86::VMOVUPSZ256rm_NOVLX:
5791  case X86::VMOVAPDZ256rm:
5792  case X86::VMOVUPDZ256rm:
5793  case X86::VMOVDQU8Z256rm:
5794  case X86::VMOVDQU16Z256rm:
5795  case X86::VMOVDQA32Z256rm:
5796  case X86::VMOVDQU32Z256rm:
5797  case X86::VMOVDQA64Z256rm:
5798  case X86::VMOVDQU64Z256rm:
5799  case X86::VMOVAPSZrm:
5800  case X86::VMOVUPSZrm:
5801  case X86::VMOVAPDZrm:
5802  case X86::VMOVUPDZrm:
5803  case X86::VMOVDQU8Zrm:
5804  case X86::VMOVDQU16Zrm:
5805  case X86::VMOVDQA32Zrm:
5806  case X86::VMOVDQU32Zrm:
5807  case X86::VMOVDQA64Zrm:
5808  case X86::VMOVDQU64Zrm:
5809  case X86::KMOVBkm:
5810  case X86::KMOVWkm:
5811  case X86::KMOVDkm:
5812  case X86::KMOVQkm:
5813  break;
5814  }
5815 
5816  // Lambda to check if both the loads have the same value for an operand index.
5817  auto HasSameOp = [&](int I) {
5818  return Load1->getOperand(I) == Load2->getOperand(I);
5819  };
5820 
5821  // All operands except the displacement should match.
5822  if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
5823  !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
5824  return false;
5825 
5826  // Chain Operand must be the same.
5827  if (!HasSameOp(5))
5828  return false;
5829 
5830  // Now let's examine if the displacements are constants.
5831  auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
5832  auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
5833  if (!Disp1 || !Disp2)
5834  return false;
5835 
5836  Offset1 = Disp1->getSExtValue();
5837  Offset2 = Disp2->getSExtValue();
5838  return true;
5839 }
5840 
5842  int64_t Offset1, int64_t Offset2,
5843  unsigned NumLoads) const {
5844  assert(Offset2 > Offset1);
5845  if ((Offset2 - Offset1) / 8 > 64)
5846  return false;
5847 
5848  unsigned Opc1 = Load1->getMachineOpcode();
5849  unsigned Opc2 = Load2->getMachineOpcode();
5850  if (Opc1 != Opc2)
5851  return false; // FIXME: overly conservative?
5852 
5853  switch (Opc1) {
5854  default: break;
5855  case X86::LD_Fp32m:
5856  case X86::LD_Fp64m:
5857  case X86::LD_Fp80m:
5858  case X86::MMX_MOVD64rm:
5859  case X86::MMX_MOVQ64rm:
5860  return false;
5861  }
5862 
5863  EVT VT = Load1->getValueType(0);
5864  switch (VT.getSimpleVT().SimpleTy) {
5865  default:
5866  // XMM registers. In 64-bit mode we can be a bit more aggressive since we
5867  // have 16 of them to play with.
5868  if (Subtarget.is64Bit()) {
5869  if (NumLoads >= 3)
5870  return false;
5871  } else if (NumLoads) {
5872  return false;
5873  }
5874  break;
5875  case MVT::i8:
5876  case MVT::i16:
5877  case MVT::i32:
5878  case MVT::i64:
5879  case MVT::f32:
5880  case MVT::f64:
5881  if (NumLoads)
5882  return false;
5883  break;
5884  }
5885 
5886  return true;
5887 }
5888 
5889 bool X86InstrInfo::
5891  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
5892  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
5893  Cond[0].setImm(GetOppositeBranchCondition(CC));
5894  return false;
5895 }
5896 
5897 bool X86InstrInfo::
5899  // FIXME: Return false for x87 stack register classes for now. We can't
5900  // allow any loads of these registers before FpGet_ST0_80.
5901  return !(RC == &X86::CCRRegClass || RC == &X86::DFCCRRegClass ||
5902  RC == &X86::RFP32RegClass || RC == &X86::RFP64RegClass ||
5903  RC == &X86::RFP80RegClass);
5904 }
5905 
5906 /// Return a virtual register initialized with the
5907 /// the global base register value. Output instructions required to
5908 /// initialize the register in the function entry block, if necessary.
5909 ///
5910 /// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
5911 ///
5913  assert((!Subtarget.is64Bit() ||
5915  MF->getTarget().getCodeModel() == CodeModel::Large) &&
5916  "X86-64 PIC uses RIP relative addressing");
5917 
5919  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
5920  if (GlobalBaseReg != 0)
5921  return GlobalBaseReg;
5922 
5923  // Create the register. The code to initialize it is inserted
5924  // later, by the CGBR pass (below).
5925  MachineRegisterInfo &RegInfo = MF->getRegInfo();
5926  GlobalBaseReg = RegInfo.createVirtualRegister(
5927  Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
5928  X86FI->setGlobalBaseReg(GlobalBaseReg);
5929  return GlobalBaseReg;
5930 }
5931 
5932 // These are the replaceable SSE instructions. Some of these have Int variants
5933 // that we don't include here. We don't want to replace instructions selected
5934 // by intrinsics.
5935 static const uint16_t ReplaceableInstrs[][3] = {
5936  //PackedSingle PackedDouble PackedInt
5937  { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
5938  { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
5939  { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
5940  { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
5941  { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
5942  { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
5943  { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
5944  { X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
5945  { X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
5946  { X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
5947  { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
5948  { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
5949  { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
5950  { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
5951  { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
5952  { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
5953  { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
5954  { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
5955  { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
5956  { X86::UNPCKLPDrm, X86::UNPCKLPDrm, X86::PUNPCKLQDQrm },
5957  { X86::MOVLHPSrr, X86::UNPCKLPDrr, X86::PUNPCKLQDQrr },
5958  { X86::UNPCKHPDrm, X86::UNPCKHPDrm, X86::PUNPCKHQDQrm },
5959  { X86::UNPCKHPDrr, X86::UNPCKHPDrr, X86::PUNPCKHQDQrr },
5960  { X86::UNPCKLPSrm, X86::UNPCKLPSrm, X86::PUNPCKLDQrm },
5961  { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
5962  { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
5963  { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
5964  { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
5965  { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
5966  // AVX 128-bit support
5967  { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
5968  { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
5969  { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
5970  { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
5971  { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
5972  { X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
5973  { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
5974  { X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
5975  { X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
5976  { X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
5977  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
5978  { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
5979  { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
5980  { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
5981  { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
5982  { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
5983  { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
5984  { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
5985  { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
5986  { X86::VUNPCKLPDrm, X86::VUNPCKLPDrm, X86::VPUNPCKLQDQrm },
5987  { X86::VMOVLHPSrr, X86::VUNPCKLPDrr, X86::VPUNPCKLQDQrr },
5988  { X86::VUNPCKHPDrm, X86::VUNPCKHPDrm, X86::VPUNPCKHQDQrm },
5989  { X86::VUNPCKHPDrr, X86::VUNPCKHPDrr, X86::VPUNPCKHQDQrr },
5990  { X86::VUNPCKLPSrm, X86::VUNPCKLPSrm, X86::VPUNPCKLDQrm },
5991  { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
5992  { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
5993  { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
5994  { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
5995  { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
5996  // AVX 256-bit support
5997  { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr },
5998  { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm },
5999  { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
6000  { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
6001  { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
6002  { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
6003  { X86::VPERMPSYrm, X86::VPERMPSYrm, X86::VPERMDYrm },
6004  { X86::VPERMPSYrr, X86::VPERMPSYrr, X86::VPERMDYrr },
6005  { X86::VPERMPDYmi, X86::VPERMPDYmi, X86::VPERMQYmi },
6006  { X86::VPERMPDYri, X86::VPERMPDYri, X86::VPERMQYri },
6007  // AVX512 support
6008  { X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
6009  { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
6010  { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr },
6011  { X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
6012  { X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
6013  { X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
6014  { X86::VMOVSDZrm, X86::VMOVSDZrm, X86::VMOVQI2PQIZrm },
6015  { X86::VMOVSSZrm, X86::VMOVSSZrm, X86::VMOVDI2PDIZrm },
6016  { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
6017  { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
6018  { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
6019  { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
6020  { X86::VBROADCASTSSZr, X86::VBROADCASTSSZr, X86::VPBROADCASTDZr },
6021  { X86::VBROADCASTSSZm, X86::VBROADCASTSSZm, X86::VPBROADCASTDZm },
6022  { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
6023  { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
6024  { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
6025  { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
6026  { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
6027  { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
6028  { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
6029  { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm },
6030  { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr },
6031  { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm },
6032  { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr },
6033  { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm },
6034  { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr },
6035  { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm },
6036  { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr },
6037  { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm },
6038  { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr },
6039  { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr },
6040  { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr },
6041  { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr },
6042  { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr },
6043  { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr },
6044  { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr },
6045  { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr },
6046  { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr },
6047  { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr },
6048  { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr },
6049  { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr },
6050  { X86::VPERMILPSmi, X86::VPERMILPSmi, X86::VPSHUFDmi },
6051  { X86::VPERMILPSri, X86::VPERMILPSri, X86::VPSHUFDri },
6052  { X86::VPERMILPSZ128mi, X86::VPERMILPSZ128mi, X86::VPSHUFDZ128mi },
6053  { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128ri, X86::VPSHUFDZ128ri },
6054  { X86::VPERMILPSZ256mi, X86::VPERMILPSZ256mi, X86::VPSHUFDZ256mi },
6055  { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256ri, X86::VPSHUFDZ256ri },
6056  { X86::VPERMILPSZmi, X86::VPERMILPSZmi, X86::VPSHUFDZmi },
6057  { X86::VPERMILPSZri, X86::VPERMILPSZri, X86::VPSHUFDZri },
6058  { X86::VPERMPSZ256rm, X86::VPERMPSZ256rm, X86::VPERMDZ256rm },
6059  { X86::VPERMPSZ256rr, X86::VPERMPSZ256rr, X86::VPERMDZ256rr },
6060  { X86::VPERMPDZ256mi, X86::VPERMPDZ256mi, X86::VPERMQZ256mi },
6061  { X86::VPERMPDZ256ri, X86::VPERMPDZ256ri, X86::VPERMQZ256ri },
6062  { X86::VPERMPDZ256rm, X86::VPERMPDZ256rm, X86::VPERMQZ256rm },
6063  { X86::VPERMPDZ256rr, X86::VPERMPDZ256rr, X86::VPERMQZ256rr },
6064  { X86::VPERMPSZrm, X86::VPERMPSZrm, X86::VPERMDZrm },
6065  { X86::VPERMPSZrr, X86::VPERMPSZrr, X86::VPERMDZrr },
6066  { X86::VPERMPDZmi, X86::VPERMPDZmi, X86::VPERMQZmi },
6067  { X86::VPERMPDZri, X86::VPERMPDZri, X86::VPERMQZri },
6068  { X86::VPERMPDZrm, X86::VPERMPDZrm, X86::VPERMQZrm },
6069  { X86::VPERMPDZrr, X86::VPERMPDZrr, X86::VPERMQZrr },
6070  { X86::VUNPCKLPDZ256rm, X86::VUNPCKLPDZ256rm, X86::VPUNPCKLQDQZ256rm },
6071  { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rr, X86::VPUNPCKLQDQZ256rr },
6072  { X86::VUNPCKHPDZ256rm, X86::VUNPCKHPDZ256rm, X86::VPUNPCKHQDQZ256rm },
6073  { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rr, X86::VPUNPCKHQDQZ256rr },
6074  { X86::VUNPCKLPSZ256rm, X86::VUNPCKLPSZ256rm, X86::VPUNPCKLDQZ256rm },
6075  { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rr, X86::VPUNPCKLDQZ256rr },
6076  { X86::VUNPCKHPSZ256rm, X86::VUNPCKHPSZ256rm, X86::VPUNPCKHDQZ256rm },
6077  { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rr, X86::VPUNPCKHDQZ256rr },
6078  { X86::VUNPCKLPDZ128rm, X86::VUNPCKLPDZ128rm, X86::VPUNPCKLQDQZ128rm },
6079  { X86::VMOVLHPSZrr, X86::VUNPCKLPDZ128rr, X86::VPUNPCKLQDQZ128rr },
6080  { X86::VUNPCKHPDZ128rm, X86::VUNPCKHPDZ128rm, X86::VPUNPCKHQDQZ128rm },
6081  { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rr, X86::VPUNPCKHQDQZ128rr },
6082  { X86::VUNPCKLPSZ128rm, X86::VUNPCKLPSZ128rm, X86::VPUNPCKLDQZ128rm },
6083  { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rr, X86::VPUNPCKLDQZ128rr },
6084  { X86::VUNPCKHPSZ128rm, X86::VUNPCKHPSZ128rm, X86::VPUNPCKHDQZ128rm },
6085  { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rr, X86::VPUNPCKHDQZ128rr },
6086  { X86::VUNPCKLPDZrm, X86::VUNPCKLPDZrm, X86::VPUNPCKLQDQZrm },
6087  { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrr, X86::VPUNPCKLQDQZrr },
6088  { X86::VUNPCKHPDZrm, X86::VUNPCKHPDZrm, X86::VPUNPCKHQDQZrm },
6089  { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrr, X86::VPUNPCKHQDQZrr },
6090  { X86::VUNPCKLPSZrm, X86::VUNPCKLPSZrm, X86::VPUNPCKLDQZrm },
6091  { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr },
6092  { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm },
6093  { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr },
6094  { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr },
6095  { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr },
6096 };
6097 
6098 static const uint16_t ReplaceableInstrsAVX2[][3] = {
6099  //PackedSingle PackedDouble PackedInt
6100  { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
6101  { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
6102  { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
6103  { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr },
6104  { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
6105  { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
6106  { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
6107  { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
6108  { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
6109  { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
6110  { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
6111  { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr},
6112  { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
6113  { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
6114  { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
6115  { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
6116  { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
6117  { X86::VBLENDPSYrri, X86::VBLENDPSYrri, X86::VPBLENDDYrri },
6118  { X86::VBLENDPSYrmi, X86::VBLENDPSYrmi, X86::VPBLENDDYrmi },
6119  { X86::VPERMILPSYmi, X86::VPERMILPSYmi, X86::VPSHUFDYmi },
6120  { X86::VPERMILPSYri, X86::VPERMILPSYri, X86::VPSHUFDYri },
6121  { X86::VUNPCKLPDYrm, X86::VUNPCKLPDYrm, X86::VPUNPCKLQDQYrm },
6122  { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrr, X86::VPUNPCKLQDQYrr },
6123  { X86::VUNPCKHPDYrm, X86::VUNPCKHPDYrm, X86::VPUNPCKHQDQYrm },
6124  { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrr, X86::VPUNPCKHQDQYrr },
6125  { X86::VUNPCKLPSYrm, X86::VUNPCKLPSYrm, X86::VPUNPCKLDQYrm },
6126  { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrr, X86::VPUNPCKLDQYrr },
6127  { X86::VUNPCKHPSYrm, X86::VUNPCKHPSYrm, X86::VPUNPCKHDQYrm },
6128  { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrr, X86::VPUNPCKHDQYrr },
6129 };
6130 
6131 static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
6132  //PackedSingle PackedDouble PackedInt
6133  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
6134  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
6135  { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
6136  { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
6137 };
6138 
6139 static const uint16_t ReplaceableInstrsAVX512[][4] = {
6140  // Two integer columns for 64-bit and 32-bit elements.
6141  //PackedSingle PackedDouble PackedInt PackedInt
6142  { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
6143  { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
6144  { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
6145  { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
6146  { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
6147  { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
6148  { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
6149  { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
6150  { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
6151  { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
6152  { X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
6153  { X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
6154  { X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
6155  { X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
6156  { X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
6157 };
6158 
6159 static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
6160  // Two integer columns for 64-bit and 32-bit elements.
6161  //PackedSingle PackedDouble PackedInt PackedInt
6162  { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
6163  { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
6164  { X86::VANDPSZ128rm, X86::VANDPDZ128rm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
6165  { X86::VANDPSZ128rr, X86::VANDPDZ128rr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
6166  { X86::VORPSZ128rm, X86::VORPDZ128rm, X86::VPORQZ128rm, X86::VPORDZ128rm },
6167  { X86::VORPSZ128rr, X86::VORPDZ128rr, X86::VPORQZ128rr, X86::VPORDZ128rr },
6168  { X86::VXORPSZ128rm, X86::VXORPDZ128rm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
6169  { X86::VXORPSZ128rr, X86::VXORPDZ128rr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
6170  { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
6171  { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
6172  { X86::VANDPSZ256rm, X86::VANDPDZ256rm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
6173  { X86::VANDPSZ256rr, X86::VANDPDZ256rr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
6174  { X86::VORPSZ256rm, X86::VORPDZ256rm, X86::VPORQZ256rm, X86::VPORDZ256rm },
6175  { X86::VORPSZ256rr, X86::VORPDZ256rr, X86::VPORQZ256rr, X86::VPORDZ256rr },
6176  { X86::VXORPSZ256rm, X86::VXORPDZ256rm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
6177  { X86::VXORPSZ256rr, X86::VXORPDZ256rr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
6178  { X86::VANDNPSZrm, X86::VANDNPDZrm, X86::VPANDNQZrm, X86::VPANDNDZrm },
6179  { X86::VANDNPSZrr, X86::VANDNPDZrr, X86::VPANDNQZrr, X86::VPANDNDZrr },
6180  { X86::VANDPSZrm, X86::VANDPDZrm, X86::VPANDQZrm, X86::VPANDDZrm },
6181  { X86::VANDPSZrr, X86::VANDPDZrr, X86::VPANDQZrr, X86::VPANDDZrr },
6182  { X86::VORPSZrm, X86::VORPDZrm, X86::VPORQZrm, X86::VPORDZrm },
6183  { X86::VORPSZrr, X86::VORPDZrr, X86::VPORQZrr, X86::VPORDZrr },
6184  { X86::VXORPSZrm, X86::VXORPDZrm, X86::VPXORQZrm, X86::VPXORDZrm },
6185  { X86::VXORPSZrr, X86::VXORPDZrr, X86::VPXORQZrr, X86::VPXORDZrr },
6186 };
6187 
6188 static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
6189  // Two integer columns for 64-bit and 32-bit elements.
6190  //PackedSingle PackedDouble
6191  //PackedInt PackedInt
6192  { X86::VANDNPSZ128rmk, X86::VANDNPDZ128rmk,
6193  X86::VPANDNQZ128rmk, X86::VPANDNDZ128rmk },
6194  { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz,
6195  X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz },
6196  { X86::VANDNPSZ128rrk, X86::VANDNPDZ128rrk,
6197  X86::VPANDNQZ128rrk, X86::VPANDNDZ128rrk },
6198  { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz,
6199  X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz },
6200  { X86::VANDPSZ128rmk, X86::VANDPDZ128rmk,
6201  X86::VPANDQZ128rmk, X86::VPANDDZ128rmk },
6202  { X86::VANDPSZ128rmkz, X86::VANDPDZ128rmkz,
6203  X86::VPANDQZ128rmkz, X86::VPANDDZ128rmkz },
6204  { X86::VANDPSZ128rrk, X86::VANDPDZ128rrk,
6205  X86::VPANDQZ128rrk, X86::VPANDDZ128rrk },
6206  { X86::VANDPSZ128rrkz, X86::VANDPDZ128rrkz,
6207  X86::VPANDQZ128rrkz, X86::VPANDDZ128rrkz },
6208  { X86::VORPSZ128rmk, X86::VORPDZ128rmk,
6209  X86::VPORQZ128rmk, X86::VPORDZ128rmk },
6210  { X86::VORPSZ128rmkz, X86::VORPDZ128rmkz,
6211  X86::VPORQZ128rmkz, X86::VPORDZ128rmkz },
6212  { X86::VORPSZ128rrk, X86::VORPDZ128rrk,
6213  X86::VPORQZ128rrk, X86::VPORDZ128rrk },
6214  { X86::VORPSZ128rrkz, X86::VORPDZ128rrkz,
6215  X86::VPORQZ128rrkz, X86::VPORDZ128rrkz },
6216  { X86::VXORPSZ128rmk, X86::VXORPDZ128rmk,
6217  X86::VPXORQZ128rmk, X86::VPXORDZ128rmk },
6218  { X86::VXORPSZ128rmkz, X86::VXORPDZ128rmkz,
6219  X86::VPXORQZ128rmkz, X86::VPXORDZ128rmkz },
6220  { X86::VXORPSZ128rrk, X86::VXORPDZ128rrk,
6221  X86::VPXORQZ128rrk, X86::VPXORDZ128rrk },
6222  { X86::VXORPSZ128rrkz, X86::VXORPDZ128rrkz,
6223  X86::VPXORQZ128rrkz, X86::VPXORDZ128rrkz },
6224  { X86::VANDNPSZ256rmk, X86::VANDNPDZ256rmk,
6225  X86::VPANDNQZ256rmk, X86::VPANDNDZ256rmk },
6226  { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz,
6227  X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz },
6228  { X86::VANDNPSZ256rrk, X86::VANDNPDZ256rrk,
6229  X86::VPANDNQZ256rrk, X86::VPANDNDZ256rrk },
6230  { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz,
6231  X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz },
6232  { X86::VANDPSZ256rmk, X86::VANDPDZ256rmk,
6233  X86::VPANDQZ256rmk, X86::VPANDDZ256rmk },
6234  { X86::VANDPSZ256rmkz, X86::VANDPDZ256rmkz,
6235  X86::VPANDQZ256rmkz, X86::VPANDDZ256rmkz },
6236  { X86::VANDPSZ256rrk, X86::VANDPDZ256rrk,
6237  X86::VPANDQZ256rrk, X86::VPANDDZ256rrk },
6238  { X86::VANDPSZ256rrkz, X86::VANDPDZ256rrkz,
6239  X86::VPANDQZ256rrkz, X86::VPANDDZ256rrkz },
6240  { X86::VORPSZ256rmk, X86::VORPDZ256rmk,
6241  X86::VPORQZ256rmk, X86::VPORDZ256rmk },
6242  { X86::VORPSZ256rmkz, X86::VORPDZ256rmkz,
6243  X86::VPORQZ256rmkz, X86::VPORDZ256rmkz },
6244  { X86::VORPSZ256rrk, X86::VORPDZ256rrk,
6245  X86::VPORQZ256rrk, X86::VPORDZ256rrk },
6246  { X86::VORPSZ256rrkz, X86::VORPDZ256rrkz,
6247  X86::VPORQZ256rrkz, X86::VPORDZ256rrkz },
6248  { X86::VXORPSZ256rmk, X86::VXORPDZ256rmk,
6249  X86::VPXORQZ256rmk, X86::VPXORDZ256rmk },
6250  { X86::VXORPSZ256rmkz, X86::VXORPDZ256rmkz,
6251  X86::VPXORQZ256rmkz, X86::VPXORDZ256rmkz },
6252  { X86::VXORPSZ256rrk, X86::VXORPDZ256rrk,
6253  X86::VPXORQZ256rrk, X86::VPXORDZ256rrk },
6254  { X86::VXORPSZ256rrkz, X86::VXORPDZ256rrkz,
6255  X86::VPXORQZ256rrkz, X86::VPXORDZ256rrkz },
6256  { X86::VANDNPSZrmk, X86::VANDNPDZrmk,
6257  X86::VPANDNQZrmk, X86::VPANDNDZrmk },
6258  { X86::VANDNPSZrmkz, X86::VANDNPDZrmkz,
6259  X86::VPANDNQZrmkz, X86::VPANDNDZrmkz },
6260  { X86::VANDNPSZrrk, X86::VANDNPDZrrk,
6261  X86::VPANDNQZrrk, X86::VPANDNDZrrk },
6262  { X86::VANDNPSZrrkz, X86::VANDNPDZrrkz,
6263  X86::VPANDNQZrrkz, X86::VPANDNDZrrkz },
6264  { X86::VANDPSZrmk, X86::VANDPDZrmk,
6265  X86::VPANDQZrmk, X86::VPANDDZrmk },
6266  { X86::VANDPSZrmkz, X86::VANDPDZrmkz,
6267  X86::VPANDQZrmkz, X86::VPANDDZrmkz },
6268  { X86::VANDPSZrrk, X86::VANDPDZrrk,
6269  X86::VPANDQZrrk, X86::VPANDDZrrk },
6270  { X86::VANDPSZrrkz, X86::VANDPDZrrkz,
6271  X86::VPANDQZrrkz, X86::VPANDDZrrkz },
6272  { X86::VORPSZrmk, X86::VORPDZrmk,
6273  X86::VPORQZrmk, X86::VPORDZrmk },
6274  { X86::VORPSZrmkz, X86::VORPDZrmkz,
6275  X86::VPORQZrmkz, X86::VPORDZrmkz },
6276  { X86::VORPSZrrk, X86::VORPDZrrk,
6277  X86::VPORQZrrk, X86::VPORDZrrk },
6278  { X86::VORPSZrrkz, X86::VORPDZrrkz,
6279  X86::VPORQZrrkz, X86::VPORDZrrkz },
6280  { X86::VXORPSZrmk, X86::VXORPDZrmk,
6281  X86::VPXORQZrmk, X86::VPXORDZrmk },
6282  { X86::VXORPSZrmkz, X86::VXORPDZrmkz,
6283  X86::VPXORQZrmkz, X86::VPXORDZrmkz },
6284  { X86::VXORPSZrrk, X86::VXORPDZrrk,
6285  X86::VPXORQZrrk, X86::VPXORDZrrk },
6286  { X86::VXORPSZrrkz, X86::VXORPDZrrkz,
6287  X86::VPXORQZrrkz, X86::VPXORDZrrkz },
6288  // Broadcast loads can be handled the same as masked operations to avoid
6289  // changing element size.
6290  { X86::VANDNPSZ128rmb, X86::VANDNPDZ128rmb,
6291  X86::VPANDNQZ128rmb, X86::VPANDNDZ128rmb },
6292  { X86::VANDPSZ128rmb, X86::VANDPDZ128rmb,
6293  X86::VPANDQZ128rmb, X86::VPANDDZ128rmb },
6294  { X86::VORPSZ128rmb, X86::VORPDZ128rmb,
6295  X86::VPORQZ128rmb, X86::VPORDZ128rmb },
6296  { X86::VXORPSZ128rmb, X86::VXORPDZ128rmb,
6297  X86::VPXORQZ128rmb, X86::VPXORDZ128rmb },
6298  { X86::VANDNPSZ256rmb, X86::VANDNPDZ256rmb,
6299  X86::VPANDNQZ256rmb, X86::VPANDNDZ256rmb },
6300  { X86::VANDPSZ256rmb, X86::VANDPDZ256rmb,
6301  X86::VPANDQZ256rmb, X86::VPANDDZ256rmb },
6302  { X86::VORPSZ256rmb, X86::VORPDZ256rmb,
6303  X86::VPORQZ256rmb, X86::VPORDZ256rmb },
6304  { X86::VXORPSZ256rmb, X86::VXORPDZ256rmb,
6305  X86::VPXORQZ256rmb, X86::VPXORDZ256rmb },
6306  { X86::VANDNPSZrmb, X86::VANDNPDZrmb,
6307  X86::VPANDNQZrmb, X86::VPANDNDZrmb },
6308  { X86::VANDPSZrmb, X86::VANDPDZrmb,
6309  X86::VPANDQZrmb, X86::VPANDDZrmb },
6310  { X86::VANDPSZrmb, X86::VANDPDZrmb,
6311  X86::VPANDQZrmb, X86::VPANDDZrmb },
6312  { X86::VORPSZrmb, X86::VORPDZrmb,
6313  X86::VPORQZrmb, X86::VPORDZrmb },
6314  { X86::VXORPSZrmb, X86::VXORPDZrmb,
6315  X86::VPXORQZrmb, X86::VPXORDZrmb },
6316  { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
6317  X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
6318  { X86::VANDPSZ128rmbk, X86::VANDPDZ128rmbk,
6319  X86::VPANDQZ128rmbk, X86::VPANDDZ128rmbk },
6320  { X86::VORPSZ128rmbk, X86::VORPDZ128rmbk,
6321  X86::VPORQZ128rmbk, X86::VPORDZ128rmbk },
6322  { X86::VXORPSZ128rmbk, X86::VXORPDZ128rmbk,
6323  X86::VPXORQZ128rmbk, X86::VPXORDZ128rmbk },
6324  { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
6325  X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
6326  { X86::VANDPSZ256rmbk, X86::VANDPDZ256rmbk,
6327  X86::VPANDQZ256rmbk, X86::VPANDDZ256rmbk },
6328  { X86::VORPSZ256rmbk, X86::VORPDZ256rmbk,
6329  X86::VPORQZ256rmbk, X86::VPORDZ256rmbk },
6330  { X86::VXORPSZ256rmbk, X86::VXORPDZ256rmbk,
6331  X86::VPXORQZ256rmbk, X86::VPXORDZ256rmbk },
6332  { X86::VANDNPSZrmbk, X86::VANDNPDZrmbk,
6333  X86::VPANDNQZrmbk, X86::VPANDNDZrmbk },
6334  { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
6335  X86::VPANDQZrmbk, X86::VPANDDZrmbk },
6336  { X86::VANDPSZrmbk, X86::VANDPDZrmbk,
6337  X86::VPANDQZrmbk, X86::VPANDDZrmbk },
6338  { X86::VORPSZrmbk, X86::VORPDZrmbk,
6339  X86::VPORQZrmbk, X86::VPORDZrmbk },
6340  { X86::VXORPSZrmbk, X86::VXORPDZrmbk,
6341  X86::VPXORQZrmbk, X86::VPXORDZrmbk },
6342  { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
6343  X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
6344  { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
6345  X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
6346  { X86::VORPSZ128rmbkz, X86::VORPDZ128rmbkz,
6347  X86::VPORQZ128rmbkz, X86::VPORDZ128rmbkz },
6348  { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
6349  X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
6350  { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
6351  X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
6352  { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
6353  X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
6354  { X86::VORPSZ256rmbkz, X86::VORPDZ256rmbkz,
6355  X86::VPORQZ256rmbkz, X86::VPORDZ256rmbkz },
6356  { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
6357  X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
6358  { X86::VANDNPSZrmbkz, X86::VANDNPDZrmbkz,
6359  X86::VPANDNQZrmbkz, X86::VPANDNDZrmbkz },
6360  { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
6361  X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
6362  { X86::VANDPSZrmbkz, X86::VANDPDZrmbkz,
6363  X86::VPANDQZrmbkz, X86::VPANDDZrmbkz },
6364  { X86::VORPSZrmbkz, X86::VORPDZrmbkz,
6365  X86::VPORQZrmbkz, X86::VPORDZrmbkz },
6366  { X86::VXORPSZrmbkz, X86::VXORPDZrmbkz,
6367  X86::VPXORQZrmbkz, X86::VPXORDZrmbkz },
6368 };
6369 
6370 // NOTE: These should only be used by the custom domain methods.
6371 static const uint16_t ReplaceableCustomInstrs[][3] = {
6372  //PackedSingle PackedDouble PackedInt
6373  { X86::BLENDPSrmi, X86::BLENDPDrmi, X86::PBLENDWrmi },
6374  { X86::BLENDPSrri, X86::BLENDPDrri, X86::PBLENDWrri },
6375  { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDWrmi },
6376  { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDWrri },
6377  { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDWYrmi },
6378  { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDWYrri },
6379 };
6380 static const uint16_t ReplaceableCustomAVX2Instrs[][3] = {
6381  //PackedSingle PackedDouble PackedInt
6382  { X86::VBLENDPSrmi, X86::VBLENDPDrmi, X86::VPBLENDDrmi },
6383  { X86::VBLENDPSrri, X86::VBLENDPDrri, X86::VPBLENDDrri },
6384  { X86::VBLENDPSYrmi, X86::VBLENDPDYrmi, X86::VPBLENDDYrmi },
6385  { X86::VBLENDPSYrri, X86::VBLENDPDYrri, X86::VPBLENDDYrri },
6386 };
6387 
6388 // Special table for changing EVEX logic instructions to VEX.
6389 // TODO: Should we run EVEX->VEX earlier?
6390 static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4] = {
6391  // Two integer columns for 64-bit and 32-bit elements.
6392  //PackedSingle PackedDouble PackedInt PackedInt
6393  { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
6394  { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
6395  { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDQZ128rm, X86::VPANDDZ128rm },
6396  { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDQZ128rr, X86::VPANDDZ128rr },
6397  { X86::VORPSrm, X86::VORPDrm, X86::VPORQZ128rm, X86::VPORDZ128rm },
6398  { X86::VORPSrr, X86::VORPDrr, X86::VPORQZ128rr, X86::VPORDZ128rr },
6399  { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORQZ128rm, X86::VPXORDZ128rm },
6400  { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORQZ128rr, X86::VPXORDZ128rr },
6401  { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
6402  { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
6403  { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDQZ256rm, X86::VPANDDZ256rm },
6404  { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDQZ256rr, X86::VPANDDZ256rr },
6405  { X86::VORPSYrm, X86::VORPDYrm, X86::VPORQZ256rm, X86::VPORDZ256rm },
6406  { X86::VORPSYrr, X86::VORPDYrr, X86::VPORQZ256rr, X86::VPORDZ256rr },
6407  { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORQZ256rm, X86::VPXORDZ256rm },
6408  { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORQZ256rr, X86::VPXORDZ256rr },
6409 };
6410 
6411 // FIXME: Some shuffle and unpack instructions have equivalents in different
6412 // domains, but they require a bit more work than just switching opcodes.
6413 
6414 static const uint16_t *lookup(unsigned opcode, unsigned domain,
6415  ArrayRef<uint16_t[3]> Table) {
6416  for (const uint16_t (&Row)[3] : Table)
6417  if (Row[domain-1] == opcode)
6418  return Row;
6419  return nullptr;
6420 }
6421 
6422 static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
6423  ArrayRef<uint16_t[4]> Table) {
6424  // If this is the integer domain make sure to check both integer columns.
6425  for (const uint16_t (&Row)[4] : Table)
6426  if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
6427  return Row;
6428  return nullptr;
6429 }
6430 
6431 // Helper to attempt to widen/narrow blend masks.
6432 static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth,
6433  unsigned NewWidth, unsigned *pNewMask = nullptr) {
6434  assert(((OldWidth % NewWidth) == 0 || (NewWidth % OldWidth) == 0) &&
6435  "Illegal blend mask scale");
6436  unsigned NewMask = 0;
6437 
6438  if ((OldWidth % NewWidth) == 0) {
6439  unsigned Scale = OldWidth / NewWidth;
6440  unsigned SubMask = (1u << Scale) - 1;
6441  for (unsigned i = 0; i != NewWidth; ++i) {
6442  unsigned Sub = (OldMask >> (i * Scale)) & SubMask;
6443  if (Sub == SubMask)
6444  NewMask |= (1u << i);
6445  else if (Sub != 0x0)
6446  return false;
6447  }
6448  } else {
6449  unsigned Scale = NewWidth / OldWidth;
6450  unsigned SubMask = (1u << Scale) - 1;
6451  for (unsigned i = 0; i != OldWidth; ++i) {
6452  if (OldMask & (1 << i)) {
6453  NewMask |= (SubMask << (i * Scale));
6454  }
6455  }
6456  }
6457 
6458  if (pNewMask)
6459  *pNewMask = NewMask;
6460  return true;
6461 }
6462 
6464  unsigned Opcode = MI.getOpcode();
6465  unsigned NumOperands = MI.getDesc().getNumOperands();
6466 
6467  auto GetBlendDomains = [&](unsigned ImmWidth, bool Is256) {
6468  uint16_t validDomains = 0;
6469  if (MI.getOperand(NumOperands - 1).isImm()) {
6470  unsigned Imm = MI.getOperand(NumOperands - 1).getImm();
6471  if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4))
6472  validDomains |= 0x2; // PackedSingle
6473  if (AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2))
6474  validDomains |= 0x4; // PackedDouble
6475  if (!Is256 || Subtarget.hasAVX2())
6476  validDomains |= 0x8; // PackedInt
6477  }
6478  return validDomains;
6479  };
6480 
6481  switch (Opcode) {
6482  case X86::BLENDPDrmi:
6483  case X86::BLENDPDrri:
6484  case X86::VBLENDPDrmi:
6485  case X86::VBLENDPDrri:
6486  return GetBlendDomains(2, false);
6487  case X86::VBLENDPDYrmi:
6488  case X86::VBLENDPDYrri:
6489  return GetBlendDomains(4, true);
6490  case X86::BLENDPSrmi:
6491  case X86::BLENDPSrri:
6492  case X86::VBLENDPSrmi:
6493  case X86::VBLENDPSrri:
6494  case X86::VPBLENDDrmi:
6495  case X86::VPBLENDDrri:
6496  return GetBlendDomains(4, false);
6497  case X86::VBLENDPSYrmi:
6498  case X86::VBLENDPSYrri:
6499  case X86::VPBLENDDYrmi:
6500  case X86::VPBLENDDYrri:
6501  return GetBlendDomains(8, true);
6502  case X86::PBLENDWrmi:
6503  case X86::PBLENDWrri:
6504  case X86::VPBLENDWrmi:
6505  case X86::VPBLENDWrri:
6506  // Treat VPBLENDWY as a 128-bit vector as it repeats the lo/hi masks.
6507  case X86::VPBLENDWYrmi:
6508  case X86::VPBLENDWYrri:
6509  return GetBlendDomains(8, false);
6510  case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
6511  case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
6512  case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
6513  case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
6514  case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
6515  case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
6516  case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
6517  case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
6518  case X86::VPORDZ128rr: case X86::VPORDZ128rm:
6519  case X86::VPORDZ256rr: case X86::VPORDZ256rm:
6520  case X86::VPORQZ128rr: case X86::VPORQZ128rm:
6521  case X86::VPORQZ256rr: case X86::VPORQZ256rm:
6522  case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
6523  case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
6524  case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
6525  case X86::VPXORQZ256rr: case X86::VPXORQZ256rm:
6526  // If we don't have DQI see if we can still switch from an EVEX integer
6527  // instruction to a VEX floating point instruction.
6528  if (Subtarget.hasDQI())
6529  return 0;
6530 
6531  if (RI.getEncodingValue(MI.getOperand(0).getReg()) >= 16)
6532  return 0;
6533  if (RI.getEncodingValue(MI.getOperand(1).getReg()) >= 16)
6534  return 0;
6535  // Register forms will have 3 operands. Memory form will have more.
6536  if (NumOperands == 3 &&
6537  RI.getEncodingValue(MI.getOperand(2).getReg()) >= 16)
6538  return 0;
6539 
6540  // All domains are valid.
6541  return 0xe;
6542  case X86::MOVHLPSrr:
6543  // We can swap domains when both inputs are the same register.
6544  // FIXME: This doesn't catch all the cases we would like. If the input
6545  // register isn't KILLed by the instruction, the two address instruction
6546  // pass puts a COPY on one input. The other input uses the original
6547  // register. This prevents the same physical register from being used by
6548  // both inputs.
6549  if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
6550  MI.getOperand(0).getSubReg() == 0 &&
6551  MI.getOperand(1).getSubReg() == 0 &&
6552  MI.getOperand(2).getSubReg() == 0)
6553  return 0x6;
6554  return 0;
6555  }
6556  return 0;
6557 }
6558 
6560  unsigned Domain) const {
6561  assert(Domain > 0 && Domain < 4 && "Invalid execution domain");
6562  uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
6563  assert(dom && "Not an SSE instruction");
6564 
6565  unsigned Opcode = MI.getOpcode();
6566  unsigned NumOperands = MI.getDesc().getNumOperands();
6567 
6568  auto SetBlendDomain = [&](unsigned ImmWidth, bool Is256) {
6569  if (MI.getOperand(NumOperands - 1).isImm()) {
6570  unsigned Imm = MI.getOperand(NumOperands - 1).getImm() & 255;
6571  Imm = (ImmWidth == 16 ? ((Imm << 8) | Imm) : Imm);
6572  unsigned NewImm = Imm;
6573 
6574  const uint16_t *table = lookup(Opcode, dom, ReplaceableCustomInstrs);
6575  if (!table)
6576  table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
6577 
6578  if (Domain == 1) { // PackedSingle
6579  AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
6580  } else if (Domain == 2) { // PackedDouble
6581  AdjustBlendMask(Imm, ImmWidth, Is256 ? 4 : 2, &NewImm);
6582  } else if (Domain == 3) { // PackedInt
6583  if (Subtarget.hasAVX2()) {
6584  // If we are already VPBLENDW use that, else use VPBLENDD.
6585  if ((ImmWidth / (Is256 ? 2 : 1)) != 8) {
6586  table = lookup(Opcode, dom, ReplaceableCustomAVX2Instrs);
6587  AdjustBlendMask(Imm, ImmWidth, Is256 ? 8 : 4, &NewImm);
6588  }
6589  } else {
6590  assert(!Is256 && "128-bit vector expected");
6591  AdjustBlendMask(Imm, ImmWidth, 8, &NewImm);
6592  }
6593  }
6594 
6595  assert(table && table[Domain - 1] && "Unknown domain op");
6596  MI.setDesc(get(table[Domain - 1]));
6597  MI.getOperand(NumOperands - 1).setImm(NewImm & 255);
6598  }
6599  return true;
6600  };
6601 
6602  switch (Opcode) {
6603  case X86::BLENDPDrmi:
6604  case X86::BLENDPDrri:
6605  case X86::VBLENDPDrmi:
6606  case X86::VBLENDPDrri:
6607  return SetBlendDomain(2, false);
6608  case X86::VBLENDPDYrmi:
6609  case X86::VBLENDPDYrri:
6610  return SetBlendDomain(4, true);
6611  case X86::BLENDPSrmi:
6612  case X86::BLENDPSrri:
6613  case X86::VBLENDPSrmi:
6614  case X86::VBLENDPSrri:
6615  case X86::VPBLENDDrmi:
6616  case X86::VPBLENDDrri:
6617  return SetBlendDomain(4, false);
6618  case X86::VBLENDPSYrmi:
6619  case X86::VBLENDPSYrri:
6620  case X86::VPBLENDDYrmi:
6621  case X86::VPBLENDDYrri:
6622  return SetBlendDomain(8, true);
6623  case X86::PBLENDWrmi:
6624  case X86::PBLENDWrri:
6625  case X86::VPBLENDWrmi:
6626  case X86::VPBLENDWrri:
6627  return SetBlendDomain(8, false);
6628  case X86::VPBLENDWYrmi:
6629  case X86::VPBLENDWYrri:
6630  return SetBlendDomain(16, true);
6631  case X86::VPANDDZ128rr: case X86::VPANDDZ128rm:
6632  case X86::VPANDDZ256rr: case X86::VPANDDZ256rm:
6633  case X86::VPANDQZ128rr: case X86::VPANDQZ128rm:
6634  case X86::VPANDQZ256rr: case X86::VPANDQZ256rm:
6635  case X86::VPANDNDZ128rr: case X86::VPANDNDZ128rm:
6636  case X86::VPANDNDZ256rr: case X86::VPANDNDZ256rm:
6637  case X86::VPANDNQZ128rr: case X86::VPANDNQZ128rm:
6638  case X86::VPANDNQZ256rr: case X86::VPANDNQZ256rm:
6639  case X86::VPORDZ128rr: case X86::VPORDZ128rm:
6640  case X86::VPORDZ256rr: case X86::VPORDZ256rm:
6641  case X86::VPORQZ128rr: case X86::VPORQZ128rm:
6642  case X86::VPORQZ256rr: case X86::VPORQZ256rm:
6643  case X86::VPXORDZ128rr: case X86::VPXORDZ128rm:
6644  case X86::VPXORDZ256rr: case X86::VPXORDZ256rm:
6645  case X86::VPXORQZ128rr: case X86::VPXORQZ128rm:
6646  case X86::VPXORQZ256rr: case X86::VPXORQZ256rm: {
6647  // Without DQI, convert EVEX instructions to VEX instructions.
6648  if (Subtarget.hasDQI())
6649  return false;
6650 
6651  const uint16_t *table = lookupAVX512(MI.getOpcode(), dom,
6653  assert(table && "Instruction not found in table?");
6654  // Don't change integer Q instructions to D instructions and
6655  // use D intructions if we started with a PS instruction.
6656  if (Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
6657  Domain = 4;
6658  MI.setDesc(get(table[Domain - 1]));
6659  return true;
6660  }
6661  case X86::UNPCKHPDrr:
6662  case X86::MOVHLPSrr:
6663  // We just need to commute the instruction which will switch the domains.
6664  if (Domain != dom && Domain != 3 &&
6665  MI.getOperand(1).getReg() == MI.getOperand(2).getReg() &&
6666  MI.getOperand(0).getSubReg() == 0 &&
6667  MI.getOperand(1).getSubReg() == 0 &&
6668  MI.getOperand(2).getSubReg() == 0) {
6669  commuteInstruction(MI, false);
6670  return true;
6671  }
6672  // We must always return true for MOVHLPSrr.
6673  if (Opcode == X86::MOVHLPSrr)
6674  return true;
6675  }
6676  return false;
6677 }
6678 
6679 std::pair<uint16_t, uint16_t>
6681  uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
6682  unsigned opcode = MI.getOpcode();
6683  uint16_t validDomains = 0;
6684  if (domain) {
6685  // Attempt to match for custom instructions.
6686  validDomains = getExecutionDomainCustom(MI);
6687  if (validDomains)
6688  return std::make_pair(domain, validDomains);
6689 
6690  if (lookup(opcode, domain, ReplaceableInstrs)) {
6691  validDomains = 0xe;
6692  } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
6693  validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
6694  } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
6695  // Insert/extract instructions should only effect domain if AVX2
6696  // is enabled.
6697  if (!Subtarget.hasAVX2())
6698  return std::make_pair(0, 0);
6699  validDomains = 0xe;
6700  } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
6701  validDomains = 0xe;
6702  } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
6703  ReplaceableInstrsAVX512DQ)) {
6704  validDomains = 0xe;
6705  } else if (Subtarget.hasDQI()) {
6706  if (const uint16_t *table = lookupAVX512(opcode, domain,
6707  ReplaceableInstrsAVX512DQMasked)) {
6708  if (domain == 1 || (domain == 3 && table[3] == opcode))
6709  validDomains = 0xa;
6710  else
6711  validDomains = 0xc;
6712  }
6713  }
6714  }
6715  return std::make_pair(domain, validDomains);
6716 }
6717 
6718 void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
6719  assert(Domain>0 && Domain<4 && "Invalid execution domain");
6720  uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
6721  assert(dom && "Not an SSE instruction");
6722 
6723  // Attempt to match for custom instructions.
6724  if (setExecutionDomainCustom(MI, Domain))
6725  return;
6726 
6727  const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
6728  if (!table) { // try the other table
6729  assert((Subtarget.hasAVX2() || Domain < 3) &&
6730  "256-bit vector operations only available in AVX2");
6731  table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
6732  }
6733  if (!table) { // try the other table
6734  assert(Subtarget.hasAVX2() &&
6735  "256-bit insert/extract only available in AVX2");
6737  }
6738  if (!table) { // try the AVX512 table
6739  assert(Subtarget.hasAVX512() && "Requires AVX-512");
6740  table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
6741  // Don't change integer Q instructions to D instructions.
6742  if (table && Domain == 3 && table[3] == MI.getOpcode())
6743  Domain = 4;
6744  }
6745  if (!table) { // try the AVX512DQ table
6746  assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
6747  table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
6748  // Don't change integer Q instructions to D instructions and
6749  // use D intructions if we started with a PS instruction.
6750  if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
6751  Domain = 4;
6752  }
6753  if (!table) { // try the AVX512DQMasked table
6754  assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
6756  if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
6757  Domain = 4;
6758  }
6759  assert(table && "Cannot change domain");
6760  MI.setDesc(get(table[Domain - 1]));
6761 }
6762 
6763 /// Return the noop instruction to use for a noop.
6764 void X86InstrInfo::getNoop(MCInst &NopInst) const {
6765  NopInst.setOpcode(X86::NOOP);
6766 }
6767 
6768 bool X86InstrInfo::isHighLatencyDef(int opc) const {
6769  switch (opc) {
6770  default: return false;
6771  case X86::DIVPDrm:
6772  case X86::DIVPDrr:
6773  case X86::DIVPSrm:
6774  case X86::DIVPSrr:
6775  case X86::DIVSDrm:
6776  case X86::DIVSDrm_Int:
6777  case X86::DIVSDrr:
6778  case X86::DIVSDrr_Int:
6779  case X86::DIVSSrm:
6780  case X86::DIVSSrm_Int:
6781  case X86::DIVSSrr:
6782  case X86::DIVSSrr_Int:
6783  case X86::SQRTPDm:
6784  case X86::SQRTPDr:
6785  case X86::SQRTPSm:
6786  case X86::SQRTPSr:
6787  case X86::SQRTSDm:
6788  case X86::SQRTSDm_Int:
6789  case X86::SQRTSDr:
6790  case X86::SQRTSDr_Int:
6791  case X86::SQRTSSm:
6792  case X86::SQRTSSm_Int:
6793  case X86::SQRTSSr:
6794  case X86::SQRTSSr_Int:
6795  // AVX instructions with high latency
6796  case X86::VDIVPDrm:
6797  case X86::VDIVPDrr:
6798  case X86::VDIVPDYrm:
6799  case X86::VDIVPDYrr:
6800  case X86::VDIVPSrm:
6801  case X86::VDIVPSrr:
6802  case X86::VDIVPSYrm:
6803  case X86::VDIVPSYrr:
6804  case X86::VDIVSDrm:
6805  case X86::VDIVSDrm_Int:
6806  case X86::VDIVSDrr:
6807  case X86::VDIVSDrr_Int:
6808  case X86::VDIVSSrm:
6809  case X86::VDIVSSrm_Int:
6810  case X86::VDIVSSrr:
6811  case X86::VDIVSSrr_Int:
6812  case X86::VSQRTPDm:
6813  case X86::VSQRTPDr:
6814  case X86::VSQRTPDYm:
6815  case X86::VSQRTPDYr:
6816  case X86::VSQRTPSm:
6817  case X86::VSQRTPSr:
6818  case X86::VSQRTPSYm:
6819  case X86::VSQRTPSYr:
6820  case X86::VSQRTSDm:
6821  case X86::VSQRTSDm_Int:
6822  case X86::VSQRTSDr:
6823  case X86::VSQRTSDr_Int:
6824  case X86::VSQRTSSm:
6825  case X86::VSQRTSSm_Int:
6826  case X86::VSQRTSSr:
6827  case X86::VSQRTSSr_Int:
6828  // AVX512 instructions with high latency
6829  case X86::VDIVPDZ128rm:
6830  case X86::VDIVPDZ128rmb:
6831  case X86::VDIVPDZ128rmbk:
6832  case X86::VDIVPDZ128rmbkz:
6833  case X86::VDIVPDZ128rmk:
6834  case X86::VDIVPDZ128rmkz:
6835  case X86::VDIVPDZ128rr:
6836  case X86::VDIVPDZ128rrk:
6837  case X86::VDIVPDZ128rrkz:
6838  case X86::VDIVPDZ256rm:
6839  case X86::VDIVPDZ256rmb:
6840  case X86::VDIVPDZ256rmbk:
6841  case X86::VDIVPDZ256rmbkz:
6842  case X86::VDIVPDZ256rmk:
6843  case X86::VDIVPDZ256rmkz:
6844  case X86::VDIVPDZ256rr:
6845  case X86::VDIVPDZ256rrk:
6846  case X86::VDIVPDZ256rrkz:
6847  case X86::VDIVPDZrrb:
6848  case X86::VDIVPDZrrbk:
6849  case X86::VDIVPDZrrbkz:
6850  case X86::VDIVPDZrm:
6851  case X86::VDIVPDZrmb:
6852  case X86::VDIVPDZrmbk:
6853  case X86::VDIVPDZrmbkz:
6854  case X86::VDIVPDZrmk:
6855  case X86::VDIVPDZrmkz:
6856  case X86::VDIVPDZrr:
6857  case X86::VDIVPDZrrk:
6858  case X86::VDIVPDZrrkz:
6859  case X86::VDIVPSZ128rm:
6860  case X86::VDIVPSZ128rmb:
6861  case X86::VDIVPSZ128rmbk:
6862  case X86::VDIVPSZ128rmbkz:
6863  case X86::VDIVPSZ128rmk:
6864  case X86::VDIVPSZ128rmkz:
6865  case X86::VDIVPSZ128rr:
6866  case X86::VDIVPSZ128rrk:
6867  case X86::VDIVPSZ128rrkz:
6868  case X86::VDIVPSZ256rm:
6869  case X86::VDIVPSZ256rmb:
6870  case X86::VDIVPSZ256rmbk:
6871  case X86::VDIVPSZ256rmbkz:
6872  case X86::VDIVPSZ256rmk:
6873  case X86::VDIVPSZ256rmkz:
6874  case X86::VDIVPSZ256rr:
6875  case X86::VDIVPSZ256rrk:
6876  case X86::VDIVPSZ256rrkz:
6877  case X86::VDIVPSZrrb:
6878  case X86::VDIVPSZrrbk:
6879  case X86::VDIVPSZrrbkz:
6880  case X86::VDIVPSZrm:
6881  case X86::VDIVPSZrmb:
6882  case X86::VDIVPSZrmbk:
6883  case X86::VDIVPSZrmbkz:
6884  case X86::VDIVPSZrmk:
6885  case X86::VDIVPSZrmkz:
6886  case X86::VDIVPSZrr:
6887  case X86::VDIVPSZrrk:
6888  case X86::VDIVPSZrrkz:
6889  case X86::VDIVSDZrm:
6890  case X86::VDIVSDZrr:
6891  case X86::VDIVSDZrm_Int:
6892  case X86::VDIVSDZrm_Intk:
6893  case X86::VDIVSDZrm_Intkz:
6894  case X86::VDIVSDZrr_Int:
6895  case X86::VDIVSDZrr_Intk:
6896  case X86::VDIVSDZrr_Intkz:
6897  case X86::VDIVSDZrrb_Int:
6898  case X86::VDIVSDZrrb_Intk:
6899  case X86::VDIVSDZrrb_Intkz:
6900  case X86::VDIVSSZrm:
6901  case X86::VDIVSSZrr:
6902  case X86::VDIVSSZrm_Int:
6903  case X86::VDIVSSZrm_Intk:
6904  case X86::VDIVSSZrm_Intkz:
6905  case X86::VDIVSSZrr_Int:
6906  case X86::VDIVSSZrr_Intk:
6907  case X86::VDIVSSZrr_Intkz:
6908  case X86::VDIVSSZrrb_Int:
6909  case X86::VDIVSSZrrb_Intk:
6910  case X86::VDIVSSZrrb_Intkz:
6911  case X86::VSQRTPDZ128m:
6912  case X86::VSQRTPDZ128mb:
6913  case X86::VSQRTPDZ128mbk:
6914  case X86::VSQRTPDZ128mbkz:
6915  case X86::VSQRTPDZ128mk:
6916  case X86::VSQRTPDZ128mkz:
6917  case X86::VSQRTPDZ128r:
6918  case X86::VSQRTPDZ128rk:
6919  case X86::VSQRTPDZ128rkz:
6920  case X86::VSQRTPDZ256m:
6921  case X86::VSQRTPDZ256mb:
6922  case X86::VSQRTPDZ256mbk:
6923  case X86::VSQRTPDZ256mbkz:
6924  case X86::VSQRTPDZ256mk:
6925  case X86::VSQRTPDZ256mkz:
6926  case X86::VSQRTPDZ256r:
6927  case X86::VSQRTPDZ256rk:
6928  case X86::VSQRTPDZ256rkz:
6929  case X86::VSQRTPDZm:
6930  case X86::VSQRTPDZmb:
6931  case X86::VSQRTPDZmbk:
6932  case X86::VSQRTPDZmbkz:
6933  case X86::VSQRTPDZmk:
6934  case X86::VSQRTPDZmkz:
6935  case X86::VSQRTPDZr:
6936  case X86::VSQRTPDZrb:
6937  case X86::VSQRTPDZrbk:
6938  case X86::VSQRTPDZrbkz:
6939  case X86::VSQRTPDZrk:
6940  case X86::VSQRTPDZrkz:
6941  case X86::VSQRTPSZ128m:
6942  case X86::VSQRTPSZ128mb:
6943  case X86::VSQRTPSZ128mbk:
6944  case X86::VSQRTPSZ128mbkz:
6945  case X86::VSQRTPSZ128mk:
6946  case X86::VSQRTPSZ128mkz:
6947  case X86::VSQRTPSZ128r:
6948  case X86::VSQRTPSZ128rk:
6949  case X86::VSQRTPSZ128rkz:
6950  case X86::VSQRTPSZ256m:
6951  case X86::VSQRTPSZ256mb:
6952  case X86::VSQRTPSZ256mbk:
6953  case X86::VSQRTPSZ256mbkz:
6954  case X86::VSQRTPSZ256mk:
6955  case X86::VSQRTPSZ256mkz:
6956  case X86::VSQRTPSZ256r:
6957  case X86::VSQRTPSZ256rk:
6958  case X86::VSQRTPSZ256rkz:
6959  case X86::VSQRTPSZm:
6960  case X86::VSQRTPSZmb:
6961  case X86::VSQRTPSZmbk:
6962  case X86::VSQRTPSZmbkz:
6963  case X86::VSQRTPSZmk:
6964  case X86::VSQRTPSZmkz:
6965  case X86::VSQRTPSZr:
6966  case X86::VSQRTPSZrb:
6967  case X86::VSQRTPSZrbk:
6968  case X86::VSQRTPSZrbkz:
6969  case X86::VSQRTPSZrk:
6970  case X86::VSQRTPSZrkz:
6971  case X86::VSQRTSDZm:
6972  case X86::VSQRTSDZm_Int:
6973  case X86::VSQRTSDZm_Intk:
6974  case X86::VSQRTSDZm_Intkz:
6975  case X86::VSQRTSDZr:
6976  case X86::VSQRTSDZr_Int:
6977  case X86::VSQRTSDZr_Intk:
6978  case X86::VSQRTSDZr_Intkz:
6979  case X86::VSQRTSDZrb_Int:
6980  case X86::VSQRTSDZrb_Intk:
6981  case X86::VSQRTSDZrb_Intkz:
6982  case X86::VSQRTSSZm:
6983  case X86::VSQRTSSZm_Int:
6984  case X86::VSQRTSSZm_Intk:
6985  case X86::VSQRTSSZm_Intkz:
6986  case X86::VSQRTSSZr:
6987  case X86::VSQRTSSZr_Int:
6988  case X86::VSQRTSSZr_Intk:
6989  case X86::VSQRTSSZr_Intkz:
6990  case X86::VSQRTSSZrb_Int:
6991  case X86::VSQRTSSZrb_Intk:
6992  case X86::VSQRTSSZrb_Intkz:
6993 
6994  case X86::VGATHERDPDYrm:
6995  case X86::VGATHERDPDZ128rm:
6996  case X86::VGATHERDPDZ256rm:
6997  case X86::VGATHERDPDZrm:
6998  case X86::VGATHERDPDrm:
6999  case X86::VGATHERDPSYrm:
7000  case X86::VGATHERDPSZ128rm:
7001  case X86::VGATHERDPSZ256rm:
7002  case X86::VGATHERDPSZrm:
7003  case X86::VGATHERDPSrm:
7004  case X86::VGATHERPF0DPDm:
7005  case X86::VGATHERPF0DPSm:
7006  case X86::VGATHERPF0QPDm:
7007  case X86::VGATHERPF0QPSm:
7008  case X86::VGATHERPF1DPDm:
7009  case X86::VGATHERPF1DPSm:
7010  case X86::VGATHERPF1QPDm:
7011  case X86::VGATHERPF1QPSm:
7012  case X86::VGATHERQPDYrm:
7013  case X86::VGATHERQPDZ128rm:
7014  case X86::VGATHERQPDZ256rm:
7015  case X86::VGATHERQPDZrm:
7016  case X86::VGATHERQPDrm:
7017  case X86::VGATHERQPSYrm:
7018  case X86::VGATHERQPSZ128rm:
7019  case X86::VGATHERQPSZ256rm:
7020  case X86::VGATHERQPSZrm:
7021  case X86::VGATHERQPSrm:
7022  case X86::VPGATHERDDYrm:
7023  case X86::VPGATHERDDZ128rm:
7024  case X86::VPGATHERDDZ256rm:
7025  case X86::VPGATHERDDZrm:
7026  case X86::VPGATHERDDrm:
7027  case X86::VPGATHERDQYrm:
7028  case X86::VPGATHERDQZ128rm:
7029  case X86::VPGATHERDQZ256rm:
7030  case X86::VPGATHERDQZrm:
7031  case X86::VPGATHERDQrm:
7032  case X86::VPGATHERQDYrm:
7033  case X86::VPGATHERQDZ128rm:
7034  case X86::VPGATHERQDZ256rm:
7035  case X86::VPGATHERQDZrm:
7036  case X86::VPGATHERQDrm:
7037  case X86::VPGATHERQQYrm:
7038  case X86::VPGATHERQQZ128rm:
7039  case X86::VPGATHERQQZ256rm:
7040  case X86::VPGATHERQQZrm:
7041  case X86::VPGATHERQQrm:
7042  case X86::VSCATTERDPDZ128mr:
7043  case X86::VSCATTERDPDZ256mr:
7044  case X86::VSCATTERDPDZmr:
7045  case X86::VSCATTERDPSZ128mr:
7046  case X86::VSCATTERDPSZ256mr:
7047  case X86::VSCATTERDPSZmr:
7048  case X86::VSCATTERPF0DPDm:
7049  case X86::VSCATTERPF0DPSm:
7050  case X86::VSCATTERPF0QPDm:
7051  case X86::VSCATTERPF0QPSm:
7052  case X86::VSCATTERPF1DPDm:
7053  case X86::VSCATTERPF1DPSm:
7054  case X86::VSCATTERPF1QPDm:
7055  case X86::VSCATTERPF1QPSm:
7056  case X86::VSCATTERQPDZ128mr:
7057  case X86::VSCATTERQPDZ256mr:
7058  case X86::VSCATTERQPDZmr:
7059  case X86::VSCATTERQPSZ128mr:
7060  case X86::VSCATTERQPSZ256mr:
7061  case X86::VSCATTERQPSZmr:
7062  case X86::VPSCATTERDDZ128mr:
7063  case X86::VPSCATTERDDZ256mr:
7064  case X86::VPSCATTERDDZmr:
7065  case X86::VPSCATTERDQZ128mr:
7066  case X86::VPSCATTERDQZ256mr:
7067  case X86::VPSCATTERDQZmr:
7068  case X86::VPSCATTERQDZ128mr:
7069  case X86::VPSCATTERQDZ256mr:
7070  case X86::VPSCATTERQDZmr:
7071  case X86::VPSCATTERQQZ128mr:
7072  case X86::VPSCATTERQQZ256mr:
7073  case X86::VPSCATTERQQZmr:
7074  return true;
7075  }
7076 }
7077 
7079  const MachineRegisterInfo *MRI,
7080  const MachineInstr &DefMI,
7081  unsigned DefIdx,
7082  const MachineInstr &UseMI,
7083  unsigned UseIdx) const {
7084  return isHighLatencyDef(DefMI.getOpcode());
7085 }
7086 
7088  const MachineBasicBlock *MBB) const {
7089  assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
7090  "Reassociation needs binary operators");
7091 
7092  // Integer binary math/logic instructions have a third source operand:
7093  // the EFLAGS register. That operand must be both defined here and never
7094  // used; ie, it must be dead. If the EFLAGS operand is live, then we can
7095  // not change anything because rearranging the operands could affect other
7096  // instructions that depend on the exact status flags (zero, sign, etc.)
7097  // that are set by using these particular operands with this operation.
7098  if (Inst.getNumOperands() == 4) {
7099  assert(Inst.getOperand(3).isReg() &&
7100  Inst.getOperand(3).getReg() == X86::EFLAGS &&
7101  "Unexpected operand in reassociable instruction");
7102  if (!Inst.getOperand(3).isDead())
7103  return false;
7104  }
7105 
7106  return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
7107 }
7108 
7109 // TODO: There are many more machine instruction opcodes to match:
7110 // 1. Other data types (integer, vectors)
7111 // 2. Other math / logic operations (xor, or)
7112 // 3. Other forms of the same operation (intrinsics and other variants)
7114  switch (Inst.getOpcode()) {
7115  case X86::AND8rr:
7116  case X86::AND16rr:
7117  case X86::AND32rr:
7118  case X86::AND64rr:
7119  case X86::OR8rr:
7120  case X86::OR16rr:
7121  case X86::OR32rr:
7122  case X86::OR64rr:
7123  case X86::XOR8rr:
7124  case X86::XOR16rr:
7125  case X86::XOR32rr:
7126  case X86::XOR64rr:
7127  case X86::IMUL16rr:
7128  case X86::IMUL32rr:
7129  case X86::IMUL64rr:
7130  case X86::PANDrr:
7131  case X86::PORrr:
7132  case X86::PXORrr:
7133  case X86::ANDPDrr:
7134  case X86::ANDPSrr:
7135  case X86::ORPDrr:
7136  case X86::ORPSrr:
7137  case X86::XORPDrr:
7138  case X86::XORPSrr:
7139  case X86::PADDBrr:
7140  case X86::PADDWrr:
7141  case X86::PADDDrr:
7142  case X86::PADDQrr:
7143  case X86::VPANDrr:
7144  case X86::VPANDYrr:
7145  case X86::VPANDDZ128rr:
7146  case X86::VPANDDZ256rr:
7147  case X86::VPANDDZrr:
7148  case X86::VPANDQZ128rr:
7149  case X86::VPANDQZ256rr:
7150  case X86::VPANDQZrr:
7151  case X86::VPORrr:
7152  case X86::VPORYrr:
7153  case X86::VPORDZ128rr:
7154  case X86::VPORDZ256rr:
7155  case X86::VPORDZrr:
7156  case X86::VPORQZ128rr:
7157  case X86::VPORQZ256rr:
7158  case X86::VPORQZrr:
7159  case X86::VPXORrr:
7160  case X86::VPXORYrr:
7161  case X86::VPXORDZ128rr:
7162  case X86::VPXORDZ256rr:
7163  case X86::VPXORDZrr:
7164  case X86::VPXORQZ128rr:
7165  case X86::VPXORQZ256rr:
7166  case X86::VPXORQZrr:
7167  case X86::VANDPDrr:
7168  case X86::VANDPSrr:
7169  case X86::VANDPDYrr:
7170  case X86::VANDPSYrr:
7171  case X86::VANDPDZ128rr:
7172  case X86::VANDPSZ128rr:
7173  case X86::VANDPDZ256rr:
7174  case X86::VANDPSZ256rr:
7175  case X86::VANDPDZrr:
7176  case X86::VANDPSZrr:
7177  case X86::VORPDrr:
7178  case X86::VORPSrr:
7179  case X86::VORPDYrr:
7180  case X86::VORPSYrr:
7181  case X86::VORPDZ128rr:
7182  case X86::VORPSZ128rr:
7183  case X86::VORPDZ256rr:
7184  case X86::VORPSZ256rr:
7185  case X86::VORPDZrr:
7186  case X86::VORPSZrr:
7187  case X86::VXORPDrr:
7188  case X86::VXORPSrr:
7189  case X86::VXORPDYrr:
7190  case X86::VXORPSYrr:
7191  case X86::VXORPDZ128rr:
7192  case X86::VXORPSZ128rr:
7193  case X86::VXORPDZ256rr:
7194  case X86::VXORPSZ256rr:
7195  case X86::VXORPDZrr:
7196  case X86::VXORPSZrr:
7197  case X86::KADDBrr:
7198  case X86::KADDWrr:
7199  case X86::KADDDrr:
7200  case X86::KADDQrr:
7201  case X86::KANDBrr:
7202  case X86::KANDWrr:
7203  case X86::KANDDrr:
7204  case X86::KANDQrr:
7205  case X86::KORBrr:
7206  case X86::KORWrr:
7207  case X86::KORDrr:
7208  case X86::KORQrr:
7209  case X86::KXORBrr:
7210  case X86::KXORWrr:
7211  case X86::KXORDrr:
7212  case X86::KXORQrr:
7213  case X86::VPADDBrr:
7214  case X86::VPADDWrr:
7215  case X86::VPADDDrr:
7216  case X86::VPADDQrr:
7217  case X86::VPADDBYrr:
7218  case X86::VPADDWYrr:
7219  case X86::VPADDDYrr:
7220  case X86::VPADDQYrr:
7221  case X86::VPADDBZ128rr:
7222  case X86::VPADDWZ128rr:
7223  case X86::VPADDDZ128rr:
7224  case X86::VPADDQZ128rr:
7225  case X86::VPADDBZ256rr:
7226  case X86::VPADDWZ256rr:
7227  case X86::VPADDDZ256rr:
7228  case X86::VPADDQZ256rr:
7229  case X86::VPADDBZrr:
7230  case X86::VPADDWZrr:
7231  case X86::VPADDDZrr:
7232  case X86::VPADDQZrr:
7233  case X86::VPMULLWrr:
7234  case X86::VPMULLWYrr:
7235  case X86::VPMULLWZ128rr:
7236  case X86::VPMULLWZ256rr:
7237  case X86::VPMULLWZrr:
7238  case X86::VPMULLDrr:
7239  case X86::VPMULLDYrr:
7240  case X86::VPMULLDZ128rr:
7241  case X86::VPMULLDZ256rr:
7242  case X86::VPMULLDZrr:
7243  case X86::VPMULLQZ128rr:
7244  case X86::VPMULLQZ256rr:
7245  case X86::VPMULLQZrr:
7246  // Normal min/max instructions are not commutative because of NaN and signed
7247  // zero semantics, but these are. Thus, there's no need to check for global
7248  // relaxed math; the instructions themselves have the properties we need.
7249  case X86::MAXCPDrr:
7250  case X86::MAXCPSrr:
7251  case X86::MAXCSDrr:
7252  case X86::MAXCSSrr:
7253  case X86::MINCPDrr:
7254  case X86::MINCPSrr:
7255  case X86::MINCSDrr:
7256  case X86::MINCSSrr:
7257  case X86::VMAXCPDrr:
7258  case X86::VMAXCPSrr:
7259  case X86::VMAXCPDYrr:
7260  case X86::VMAXCPSYrr:
7261  case X86::VMAXCPDZ128rr:
7262  case X86::VMAXCPSZ128rr:
7263  case X86::VMAXCPDZ256rr:
7264  case X86::VMAXCPSZ256rr:
7265  case X86::VMAXCPDZrr:
7266  case X86::VMAXCPSZrr:
7267  case X86::VMAXCSDrr:
7268  case X86::VMAXCSSrr:
7269  case X86::VMAXCSDZrr:
7270  case X86::VMAXCSSZrr:
7271  case X86::VMINCPDrr:
7272  case X86::VMINCPSrr:
7273  case X86::VMINCPDYrr:
7274  case X86::VMINCPSYrr:
7275  case X86::VMINCPDZ128rr:
7276  case X86::VMINCPSZ128rr:
7277  case X86::VMINCPDZ256rr:
7278  case X86::VMINCPSZ256rr:
7279  case X86::VMINCPDZrr:
7280  case X86::VMINCPSZrr:
7281  case X86::VMINCSDrr:
7282  case X86::VMINCSSrr:
7283  case X86::VMINCSDZrr:
7284  case X86::VMINCSSZrr:
7285  return true;
7286  case X86::ADDPDrr:
7287  case X86::ADDPSrr:
7288  case X86::ADDSDrr:
7289  case X86::ADDSSrr:
7290  case X86::MULPDrr:
7291  case X86::MULPSrr:
7292  case X86::MULSDrr:
7293  case X86::MULSSrr:
7294  case X86::VADDPDrr:
7295  case X86::VADDPSrr:
7296  case X86::VADDPDYrr:
7297  case X86::VADDPSYrr:
7298  case X86::VADDPDZ128rr:
7299  case X86::VADDPSZ128rr:
7300  case X86::VADDPDZ256rr:
7301  case X86::VADDPSZ256rr:
7302  case X86::VADDPDZrr:
7303  case X86::VADDPSZrr:
7304  case X86::VADDSDrr:
7305  case X86::VADDSSrr:
7306  case X86::VADDSDZrr:
7307  case X86::VADDSSZrr:
7308  case X86::VMULPDrr:
7309  case X86::VMULPSrr:
7310  case X86::VMULPDYrr:
7311  case X86::VMULPSYrr:
7312  case X86::VMULPDZ128rr:
7313  case X86::VMULPSZ128rr:
7314  case X86::VMULPDZ256rr:
7315  case X86::VMULPSZ256rr:
7316  case X86::VMULPDZrr:
7317  case X86::VMULPSZrr:
7318  case X86::VMULSDrr:
7319  case X86::VMULSSrr:
7320  case X86::VMULSDZrr:
7321  case X86::VMULSSZrr:
7322  return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
7323  default:
7324  return false;
7325  }
7326 }
7327 
7328 /// This is an architecture-specific helper function of reassociateOps.
7329 /// Set special operand attributes for new instructions after reassociation.
7331  MachineInstr &OldMI2,
7332  MachineInstr &NewMI1,
7333  MachineInstr &NewMI2) const {
7334  // Integer instructions define an implicit EFLAGS source register operand as
7335  // the third source (fourth total) operand.
7336  if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
7337  return;
7338 
7339  assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
7340  "Unexpected instruction type for reassociation");
7341 
7342  MachineOperand &OldOp1 = OldMI1.getOperand(3);
7343  MachineOperand &OldOp2 = OldMI2.getOperand(3);
7344  MachineOperand &NewOp1 = NewMI1.getOperand(3);
7345  MachineOperand &NewOp2 = NewMI2.getOperand(3);
7346 
7347  assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
7348  "Must have dead EFLAGS operand in reassociable instruction");
7349  assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
7350  "Must have dead EFLAGS operand in reassociable instruction");
7351 
7352  (void)OldOp1;
7353  (void)OldOp2;
7354 
7355  assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
7356  "Unexpected operand in reassociable instruction");
7357  assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
7358  "Unexpected operand in reassociable instruction");
7359 
7360  // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
7361  // of this pass or other passes. The EFLAGS operands must be dead in these new
7362  // instructions because the EFLAGS operands in the original instructions must
7363  // be dead in order for reassociation to occur.
7364  NewOp1.setIsDead();
7365  NewOp2.setIsDead();
7366 }
7367 
7368 std::pair<unsigned, unsigned>
7370  return std::make_pair(TF, 0u);
7371 }
7372 
7375  using namespace X86II;
7376  static const std::pair<unsigned, const char *> TargetFlags[] = {
7377  {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
7378  {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
7379  {MO_GOT, "x86-got"},
7380  {MO_GOTOFF, "x86-gotoff"},
7381  {MO_GOTPCREL, "x86-gotpcrel"},
7382  {MO_PLT, "x86-plt"},
7383  {MO_TLSGD, "x86-tlsgd"},
7384  {MO_TLSLD, "x86-tlsld"},
7385  {MO_TLSLDM, "x86-tlsldm"},
7386  {MO_GOTTPOFF, "x86-gottpoff"},
7387  {MO_INDNTPOFF, "x86-indntpoff"},
7388  {MO_TPOFF, "x86-tpoff"},
7389  {MO_DTPOFF, "x86-dtpoff"},
7390  {MO_NTPOFF, "x86-ntpoff"},
7391  {MO_GOTNTPOFF, "x86-gotntpoff"},
7392  {MO_DLLIMPORT, "x86-dllimport"},
7393  {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
7394  {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
7395  {MO_TLVP, "x86-tlvp"},
7396  {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
7397  {MO_SECREL, "x86-secrel"},
7398  {MO_COFFSTUB, "x86-coffstub"}};
7399  return makeArrayRef(TargetFlags);
7400 }
7401 
7402 namespace {
7403  /// Create Global Base Reg pass. This initializes the PIC
7404  /// global base register for x86-32.
7405  struct CGBR : public MachineFunctionPass {
7406  static char ID;
7407  CGBR() : MachineFunctionPass(ID) {}
7408 
7409  bool runOnMachineFunction(MachineFunction &MF) override {
7410  const X86TargetMachine *TM =
7411  static_cast<const X86TargetMachine *>(&MF.getTarget());
7412  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
7413 
7414  // Don't do anything in the 64-bit small and kernel code models. They use
7415  // RIP-relative addressing for everything.
7416  if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small ||
7417  TM->getCodeModel() == CodeModel::Kernel))
7418  return false;
7419 
7420  // Only emit a global base reg in PIC mode.
7421  if (!TM->isPositionIndependent())
7422  return false;
7423 
7425  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
7426 
7427  // If we didn't need a GlobalBaseReg, don't insert code.
7428  if (GlobalBaseReg == 0)
7429  return false;
7430 
7431  // Insert the set of GlobalBaseReg into the first MBB of the function
7432  MachineBasicBlock &FirstMBB = MF.front();
7433  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
7434  DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
7435  MachineRegisterInfo &RegInfo = MF.getRegInfo();
7436  const X86InstrInfo *TII = STI.getInstrInfo();
7437 
7438  unsigned PC;
7439  if (STI.isPICStyleGOT())
7440  PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
7441  else
7442  PC = GlobalBaseReg;
7443 
7444  if (STI.is64Bit()) {
7445  if (TM->getCodeModel() == CodeModel::Medium) {
7446  // In the medium code model, use a RIP-relative LEA to materialize the
7447  // GOT.
7448  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
7449  .addReg(X86::RIP)
7450  .addImm(0)
7451  .addReg(0)
7452  .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
7453  .addReg(0);
7454  } else if (TM->getCodeModel() == CodeModel::Large) {
7455  // In the large code model, we are aiming for this code, though the
7456  // register allocation may vary:
7457  // leaq .LN$pb(%rip), %rax
7458  // movq $_GLOBAL_OFFSET_TABLE_ - .LN$pb, %rcx
7459  // addq %rcx, %rax
7460  // RAX now holds address of _GLOBAL_OFFSET_TABLE_.
7461  unsigned PBReg = RegInfo.createVirtualRegister(&X86::GR64RegClass);
7462  unsigned GOTReg =
7463  RegInfo.createVirtualRegister(&X86::GR64RegClass);
7464  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PBReg)
7465  .addReg(X86::RIP)
7466  .addImm(0)
7467  .addReg(0)
7468  .addSym(MF.getPICBaseSymbol())
7469  .addReg(0);
7470  std::prev(MBBI)->setPreInstrSymbol(MF, MF.getPICBaseSymbol());
7471  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOV64ri), GOTReg)
7472  .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
7474  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD64rr), PC)
7475  .addReg(PBReg, RegState::Kill)
7476  .addReg(GOTReg, RegState::Kill);
7477  } else {
7478  llvm_unreachable("unexpected code model");
7479  }
7480  } else {
7481  // Operand of MovePCtoStack is completely ignored by asm printer. It's
7482  // only used in JIT code emission as displacement to pc.
7483  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
7484 
7485  // If we're using vanilla 'GOT' PIC style, we should use relative
7486  // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
7487  if (STI.isPICStyleGOT()) {
7488  // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
7489  // %some_register
7490  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
7491  .addReg(PC)
7492  .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
7494  }
7495  }
7496 
7497  return true;
7498  }
7499 
7500  StringRef getPassName() const override {
7501  return "X86 PIC Global Base Reg Initialization";
7502  }
7503 
7504  void getAnalysisUsage(AnalysisUsage &AU) const override {
7505  AU.setPreservesCFG();
7507  }
7508  };
7509 }
7510 
7511 char CGBR::ID = 0;
7512 FunctionPass*
7513 llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
7514 
7515 namespace {
7516  struct LDTLSCleanup : public MachineFunctionPass {
7517  static char ID;
7518  LDTLSCleanup() : MachineFunctionPass(ID) {}
7519 
7520  bool runOnMachineFunction(MachineFunction &MF) override {
7521  if (skipFunction(MF.getFunction()))
7522  return false;
7523 
7525  if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
7526  // No point folding accesses if there isn't at least two.
7527  return false;
7528  }
7529 
7530  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
7531  return VisitNode(DT->getRootNode(), 0);
7532  }
7533 
7534  // Visit the dominator subtree rooted at Node in pre-order.
7535  // If TLSBaseAddrReg is non-null, then use that to replace any
7536  // TLS_base_addr instructions. Otherwise, create the register
7537  // when the first such instruction is seen, and then use it
7538  // as we encounter more instructions.
7539  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
7540  MachineBasicBlock *BB = Node->getBlock();
7541  bool Changed = false;
7542 
7543  // Traverse the current block.
7544  for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
7545  ++I) {
7546  switch (I->getOpcode()) {
7547  case X86::TLS_base_addr32:
7548  case X86::TLS_base_addr64:
7549  if (TLSBaseAddrReg)
7550  I = ReplaceTLSBaseAddrCall(*I, TLSBaseAddrReg);
7551  else
7552  I = SetRegister(*I, &TLSBaseAddrReg);
7553  Changed = true;
7554  break;
7555  default:
7556  break;
7557  }
7558  }
7559 
7560  // Visit the children of this block in the dominator tree.
7561  for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
7562  I != E; ++I) {
7563  Changed |= VisitNode(*I, TLSBaseAddrReg);
7564  }
7565 
7566  return Changed;
7567  }
7568 
7569  // Replace the TLS_base_addr instruction I with a copy from
7570  // TLSBaseAddrReg, returning the new instruction.
7571  MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr &I,
7572  unsigned TLSBaseAddrReg) {
7573  MachineFunction *MF = I.getParent()->getParent();
7574  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
7575  const bool is64Bit = STI.is64Bit();
7576  const X86InstrInfo *TII = STI.getInstrInfo();
7577 
7578  // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
7579  MachineInstr *Copy =
7580  BuildMI(*I.getParent(), I, I.getDebugLoc(),
7581  TII->get(TargetOpcode::COPY), is64Bit ? X86::RAX : X86::EAX)
7582  .addReg(TLSBaseAddrReg);
7583 
7584  // Erase the TLS_base_addr instruction.
7585  I.eraseFromParent();
7586 
7587  return Copy;
7588  }
7589 
7590  // Create a virtual register in *TLSBaseAddrReg, and populate it by
7591  // inserting a copy instruction after I. Returns the new instruction.
7592  MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) {
7593  MachineFunction *MF = I.getParent()->getParent();
7594  const X86Subtarget &STI = MF->getSubtarget<X86Subtarget>();
7595  const bool is64Bit = STI.is64Bit();
7596  const X86InstrInfo *TII = STI.getInstrInfo();
7597 
7598  // Create a virtual register for the TLS base address.
7599  MachineRegisterInfo &RegInfo = MF->getRegInfo();
7600  *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
7601  ? &X86::GR64RegClass
7602  : &X86::GR32RegClass);
7603 
7604  // Insert a copy from RAX/EAX to TLSBaseAddrReg.
7605  MachineInstr *Next = I.getNextNode();
7606  MachineInstr *Copy =
7607  BuildMI(*I.getParent(), Next, I.getDebugLoc(),
7608  TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
7609  .addReg(is64Bit ? X86::RAX : X86::EAX);
7610 
7611  return Copy;
7612  }
7613 
7614  StringRef getPassName() const override {
7615  return "Local Dynamic TLS Access Clean-up";
7616  }
7617 
7618  void getAnalysisUsage(AnalysisUsage &AU) const override {
7619  AU.setPreservesCFG();
7622  }
7623  };
7624 }
7625 
7626 char LDTLSCleanup::ID = 0;
7627 FunctionPass*
7628 llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
7629 
7630 /// Constants defining how certain sequences should be outlined.
7631 ///
7632 /// \p MachineOutlinerDefault implies that the function is called with a call
7633 /// instruction, and a return must be emitted for the outlined function frame.
7634 ///
7635 /// That is,
7636 ///
7637 /// I1 OUTLINED_FUNCTION:
7638 /// I2 --> call OUTLINED_FUNCTION I1
7639 /// I3 I2
7640 /// I3
7641 /// ret
7642 ///
7643 /// * Call construction overhead: 1 (call instruction)
7644 /// * Frame construction overhead: 1 (return instruction)
7645 ///
7646 /// \p MachineOutlinerTailCall implies that the function is being tail called.
7647 /// A jump is emitted instead of a call, and the return is already present in
7648 /// the outlined sequence. That is,
7649 ///
7650 /// I1 OUTLINED_FUNCTION:
7651 /// I2 --> jmp OUTLINED_FUNCTION I1
7652 /// ret I2
7653 /// ret
7654 ///
7655 /// * Call construction overhead: 1 (jump instruction)
7656 /// * Frame construction overhead: 0 (don't need to return)
7657 ///
7661 };
7662 
7664  std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
7665  unsigned SequenceSize =
7666  std::accumulate(RepeatedSequenceLocs[0].front(),
7667  std::next(RepeatedSequenceLocs[0].back()), 0,
7668  [](unsigned Sum, const MachineInstr &MI) {
7669  // FIXME: x86 doesn't implement getInstSizeInBytes, so
7670  // we can't tell the cost. Just assume each instruction
7671  // is one byte.
7672  if (MI.isDebugInstr() || MI.isKill())
7673  return Sum;
7674  return Sum + 1;
7675  });
7676 
7677  // FIXME: Use real size in bytes for call and ret instructions.
7678  if (RepeatedSequenceLocs[0].back()->isTerminator()) {
7679  for (outliner::Candidate &C : RepeatedSequenceLocs)
7680  C.setCallInfo(MachineOutlinerTailCall, 1);
7681 
7682  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
7683  0, // Number of bytes to emit frame.
7684  MachineOutlinerTailCall // Type of frame.
7685  );
7686  }
7687 
7688  for (outliner::Candidate &C : RepeatedSequenceLocs)
7689  C.setCallInfo(MachineOutlinerDefault, 1);
7690 
7691  return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1,
7693 }
7694 
7696  bool OutlineFromLinkOnceODRs) const {
7697  const Function &F = MF.getFunction();
7698 
7699  // Does the function use a red zone? If it does, then we can't risk messing
7700  // with the stack.
7702  // It could have a red zone. If it does, then we don't want to touch it.
7704  if (!X86FI || X86FI->getUsesRedZone())
7705  return false;
7706  }
7707 
7708  // If we *don't* want to outline from things that could potentially be deduped
7709  // then return false.
7710  if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
7711  return false;
7712 
7713  // This function is viable for outlining, so return true.
7714  return true;
7715 }
7716 
7719  MachineInstr &MI = *MIT;
7720  // Don't allow debug values to impact outlining type.
7721  if (MI.isDebugInstr() || MI.isIndirectDebugValue())
7723 
7724  // At this point, KILL instructions don't really tell us much so we can go
7725  // ahead and skip over them.
7726  if (MI.isKill())
7728 
7729  // Is this a tail call? If yes, we can outline as a tail call.
7730  if (isTailCall(MI))
7732 
7733  // Is this the terminator of a basic block?
7734  if (MI.isTerminator() || MI.isReturn()) {
7735 
7736  // Does its parent have any successors in its MachineFunction?
7737  if (MI.getParent()->succ_empty())
7739 
7740  // It does, so we can't tail call it.
7742  }
7743 
7744  // Don't outline anything that modifies or reads from the stack pointer.
7745  //
7746  // FIXME: There are instructions which are being manually built without
7747  // explicit uses/defs so we also have to check the MCInstrDesc. We should be
7748  // able to remove the extra checks once those are fixed up. For example,
7749  // sometimes we might get something like %rax = POP64r 1. This won't be
7750  // caught by modifiesRegister or readsRegister even though the instruction
7751  // really ought to be formed so that modifiesRegister/readsRegister would
7752  // catch it.
7753  if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
7754  MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
7755  MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
7757 
7758  // Outlined calls change the instruction pointer, so don't read from it.
7759  if (MI.readsRegister(X86::RIP, &RI) ||
7760  MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
7761  MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
7763 
7764  // Positions can't safely be outlined.
7765  if (MI.isPosition())
7767 
7768  // Make sure none of the operands of this instruction do anything tricky.
7769  for (const MachineOperand &MOP : MI.operands())
7770  if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
7771  MOP.isTargetIndex())
7773 
7775 }
7776 
7778  MachineFunction &MF,
7779  const outliner::OutlinedFunction &OF)
7780  const {
7781  // If we're a tail call, we already have a return, so don't do anything.
7783  return;
7784 
7785  // We're a normal call, so our sequence doesn't have a return instruction.
7786  // Add it in.
7787  MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
7788  MBB.insert(MBB.end(), retq);
7789 }
7790 
7794  MachineFunction &MF,
7795  const outliner::Candidate &C) const {
7796  // Is it a tail call?
7798  // Yes, just insert a JMP.
7799  It = MBB.insert(It,
7800  BuildMI(MF, DebugLoc(), get(X86::TAILJMPd64))
7801  .addGlobalAddress(M.getNamedValue(MF.getName())));
7802  } else {
7803  // No, insert a call.
7804  It = MBB.insert(It,
7805  BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
7806  .addGlobalAddress(M.getNamedValue(MF.getName())));
7807  }
7808 
7809  return It;
7810 }
7811 
7812 #define GET_INSTRINFO_HELPERS
7813 #include "X86GenInstrInfo.inc"
unsigned getTargetFlags() const
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:152
bool hasAVX() const
Definition: X86Subtarget.h:560
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
unsigned GetCondBranchFromCond(CondCode CC)
uint64_t CallInst * C
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand *> MMOs) const
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:522
This routine provides some synthesis utilities to produce sequences of values.
const MachineInstrBuilder & add(const MachineOperand &MO) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:584
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
static bool hasUndefRegUpdate(unsigned Opcode)
static cl::opt< bool > NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions"), cl::Hidden)
static bool Expand2AddrKreg(MachineInstrBuilder &MIB, const MCInstrDesc &Desc, unsigned Reg)
Expand a single-def pseudo instruction to a two-addr instruction with two k0 reads.
bool canMakeTailCallConditional(SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Check if there exists an earlier instruction that operates on the same source ...
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:165
uint16_t getExecutionDomainCustom(const MachineInstr &MI) const
bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register...
bool use_nodbg_empty(unsigned RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register...
const X86MemoryFoldTableEntry * lookupUnfoldTable(unsigned MemOp)
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI)
Return true if register is PIC base; i.e.g defined by X86::MOVPC32r.
bool IsDead
bool hasDebugInfo() const
Returns true if valid debug info is present.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
bool expandPostRAPseudo(MachineInstr &MI) const override
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool hasSSE41() const
Definition: X86Subtarget.h:558
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
bool setExecutionDomainCustom(MachineInstr &MI, unsigned Domain) const
MO_TLSLDM - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:63
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, const X86Subtarget &Subtarget)
bool isUnconditionalTailCall(const MachineInstr &MI) const override
void ChangeToRegister(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value...
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
This class is used to group {132, 213, 231} forms of FMA opcodes together.
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
MO_TLSLD - On a symbol operand this indicates that the immediate is the offset of the GOT entry with ...
Definition: X86BaseInfo.h:136
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:437
static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate is some TLS offset from the ...
Definition: X86BaseInfo.h:222
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const override
areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to determine if two loads are lo...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:481
const X86MemoryFoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
CondCode getCondFromCMovOpc(unsigned Opc)
Return condition code of a CMov opcode.
void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, ArrayRef< MachineMemOperand *> MMOs, SmallVectorImpl< MachineInstr *> &NewMIs) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:483
MO_TLSGD - Represents the offset into the global offset table at which.
Definition: MipsBaseInfo.h:58
static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes)
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
unsigned getReg() const
getReg - Returns the register number.
bool isHighLatencyDef(int opc) const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
CondCode getCondFromSETOpc(unsigned Opc)
Return condition code of a SET opcode.
unsigned getSubReg() const
virtual bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const
Return true when Inst has reassociable operands in the same MBB.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
unsigned getPartialRegUpdateClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before a partial register upd...
bool isPredicable(QueryType Type=AllInBundle) const
Return true if this instruction has a predicate operand that controls execution.
Definition: MachineInstr.h:687
unsigned less or equal
Definition: InstrTypes.h:672
unsigned less than
Definition: InstrTypes.h:671
static cl::opt< bool > ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden)
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:652
static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
AddrNumOperands - Total number of operands in a memory reference.
Definition: X86BaseInfo.h:42
uint64_t getSize() const
Return the size in bytes of the memory reference.
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:662
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, const TargetRegisterInfo *TRI) const override
Inform the BreakFalseDeps pass how many idle instructions we would like before certain undef register...
static MachineInstr * FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII)
unsigned get213Opcode() const
Returns the 213 form of FMA opcode.
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
void setIsDead(bool Val=true)
F(f)
MachineModuleInfo & getMMI() const
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
MachineInstrBundleIterator< const MachineInstr > const_iterator
MO_GOTPCREL - On a symbol operand this indicates that the immediate is offset to the GOT entry for th...
Definition: X86BaseInfo.h:110
MO_DTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry with...
Definition: X86BaseInfo.h:179
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned char TargetFlags=0) const
void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override
std::pair< uint16_t, uint16_t > getExecutionDomain(const MachineInstr &MI) const override
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp=false)
CreateMachineInstr - Allocate a new MachineInstr.
static bool isAligned(const Value *Base, const APInt &Offset, unsigned Align, const DataLayout &DL)
Definition: Loads.cpp:28
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment)
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
Definition: MCDwarf.h:482
unsigned getSpillSize(const TargetRegisterClass &RC) const
Return the size in bytes of the stack slot allocated to hold a spilled copy of a register from class ...
An individual sequence of instructions to be replaced with a call to an outlined function.
unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isLoadFromStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
static bool expandNOVLXStore(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &StoreDesc, const MCInstrDesc &ExtractDesc, unsigned SubIdx)
return AArch64::GPR64RegClass contains(Reg)
iterator_range< succ_iterator > successors()
static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
GlobalBaseReg - On Darwin, this node represents the result of the mflr at function entry...
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const override
convertToThreeAddress - This method must be implemented by targets that set the M_CONVERTIBLE_TO_3_AD...
static MachineInstr * MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI)
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:657
AnalysisUsage & addRequired()
static X86::CondCode isUseDefConvertible(const MachineInstr &MI)
Check whether the use can be converted to remove a comparison against zero.
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:656
static uint32_t getAlignment(const MCSectionCOFF &Sec)
MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates that the reference is actually...
Definition: X86BaseInfo.h:210
A description of a memory reference used in the backend.
static SmallVector< MachineMemOperand *, 2 > extractStoreMMOs(ArrayRef< MachineMemOperand *> MMOs, MachineFunction &MF)
static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI, bool load)
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:164
bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
MachineInstr * optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const override
optimizeLoadInstr - Try to remove the load by folding it to a register operand at the use...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
void setExecutionDomain(MachineInstr &MI, unsigned Domain) const override
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void setImplicit(bool Val=true)
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
unsigned SubReg
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
CondCode getCondFromBranchOpc(unsigned Opc)
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:649
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:653
SimpleValueType SimpleTy
static int getRegClass(RegisterKind Is, unsigned RegWidth)
static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget)
Return true for all instructions that only update the first 32 or 64-bits of the destination register...
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override
This is an architecture-specific helper function of reassociateOps.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.
The memory access is dereferenceable (i.e., doesn&#39;t trap).
bool isReallyTriviallyReMaterializable(const MachineInstr &MI, AliasAnalysis *AA) const override
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
unsigned getGlobalBaseReg(MachineFunction *MF) const
getGlobalBaseReg - Return a virtual register initialized with the the global base register value...
unsigned getSwappedVPCMPImm(unsigned Imm)
Get the VPCMP immediate if the opcodes are swapped.
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const override
isCoalescableExtInstr - Return true if the instruction is a "coalescable" extension instruction...
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override
isSafeToMoveRegClassDefs - Return true if it&#39;s safe to move a machine instruction that defines the sp...
static const uint16_t * lookup(unsigned opcode, unsigned domain, ArrayRef< uint16_t[3]> Table)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
static bool isMem(const MachineInstr &MI, unsigned Op)
Definition: X86InstrInfo.h:161
static const uint16_t ReplaceableInstrsAVX512DQMasked[][4]
bool hasVLX() const
Definition: X86Subtarget.h:657
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2 if h...
int64_t getSExtValue() const
uint16_t StackAdjustment(const RuntimeFunction &RF)
StackAdjustment - calculated stack adjustment in words.
Definition: ARMWinEH.h:197
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register...
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:398
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override
shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to determine (in conjunction w...
static def_instr_iterator def_instr_end()
unsigned getSwappedVPCOMImm(unsigned Imm)
Get the VPCOM immediate if the opcodes are swapped.
MO_GOTTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry wi...
Definition: X86BaseInfo.h:154
static void updateOperandRegConstraints(MachineFunction &MF, MachineInstr &NewMI, const TargetInstrInfo &TII)
static const uint16_t ReplaceableInstrsAVX512DQ[][4]
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type. ...
Definition: Module.cpp:114
Base class for the actual dominator tree node.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
unsigned get231Opcode() const
Returns the 231 form of FMA opcode.
static const uint16_t ReplaceableCustomInstrs[][3]
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:959
bool isUnpredicatedTerminator(const MachineInstr &MI) const override
bool isIntrinsic() const
Returns true iff the group of FMA opcodes holds intrinsic opcodes.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
unsigned getUndefRegState(bool B)
reverse_iterator rend()
MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the reference is actually to the "...
Definition: X86BaseInfo.h:205
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst) const
Wraps up getting a CFI index and building a MachineInstr for it.
static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, const X86Subtarget &Subtarget)
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
const X86InstrFMA3Group * getFMA3Group(unsigned Opcode, uint64_t TSFlags)
Returns a reference to a group of FMA3 opcodes to where the given Opcode is included.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
reverse_iterator rbegin()
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:657
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:161
BasicBlockListType::iterator iterator
unsigned getKillRegState(bool B)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc, bool &isKill, MachineOperand &ImplicitOp, LiveVariables *LV) const
Given an operand within a MachineInstr, insert preceding code to put it into the right format for a p...
static ARCCC::CondCode GetOppositeBranchCondition(ARCCC::CondCode CC)
Return the inverse of passed condition, i.e. turning COND_E to COND_NE.
void ChangeToImmediate(int64_t ImmVal)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value...
TargetInstrInfo - Interface to description of machine instruction set.
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:623
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
unsigned getDeadRegState(bool B)
static cl::opt< bool > PrintFailedFusing("print-failed-fuse-candidates", cl::desc("Print instructions that the allocator wants to" " fuse, but the X86 backend currently can't"), cl::Hidden)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
unsigned getDefRegState(bool B)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
NodeT * getBlock() const
unsigned getObjectAlignment(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasLinkOnceODRLinkage() const
Definition: GlobalValue.h:427
MO_SECREL - On a symbol operand this indicates that the immediate is the offset from beginning of sec...
Definition: ARMBaseInfo.h:272
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
static unsigned getCommutedVPERMV3Opcode(unsigned Opcode)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
std::pair< CondCode, bool > getX86ConditionCode(CmpInst::Predicate Predicate)
Return a pair of condition code for the given predicate and whether the instruction operands should b...
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
const TargetRegisterInfo * getTargetRegisterInfo() const
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
int getSPAdjust(const MachineInstr &MI) const override
getSPAdjust - This returns the stack pointer adjustment made by this instruction. ...
unsigned const MachineRegisterInfo * MRI
static bool isTruncatedShiftCountForLEA(unsigned ShAmt)
Check whether the given shift count is appropriate can be represented by a LEA instruction.
MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a relocation of: SYMBOL_LABEL + [...
Definition: X86BaseInfo.h:83
static MachineOperand CreateCPI(unsigned Idx, int Offset, unsigned char TargetFlags=0)
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const
isSafeToClobberEFLAGS - Return true if it&#39;s safe insert an instruction tha would clobber the EFLAGS c...
InstrType
Represents how an instruction should be mapped by the outliner.
static bool isRedundantFlagInstr(const MachineInstr &FlagI, unsigned SrcReg, unsigned SrcReg2, int ImmMask, int ImmValue, const MachineInstr &OI)
Check whether the first instruction, whose only purpose is to update flags, can be made redundant...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes)
static const uint16_t ReplaceableInstrs[][3]
bool readsVirtualRegister(unsigned Reg) const
Return true if the MachineInstr reads the specified virtual register.
FunctionPass * createX86GlobalBaseRegPass()
This pass initializes a global base register for PIC on x86-32.
bool registerDefIsDead(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
unsigned CallConstructionID
Identifier denoting the instructions to emit to call an outlined function from this point...
MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is the offset of the GOT entry w...
Definition: X86BaseInfo.h:195
MO_TLVP - On a symbol operand this indicates that the immediate is some TLS offset.
Definition: X86BaseInfo.h:216
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder & UseMI
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
The information necessary to create an outlined function for some class of candidate.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
static void expandLoadStackGuard(MachineInstrBuilder &MIB, const TargetInstrInfo &TII)
const SDValue & getOperand(unsigned Num) const
static bool is64Bit(const char *name)
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static bool expandNOVLXLoad(MachineInstrBuilder &MIB, const TargetRegisterInfo *TRI, const MCInstrDesc &LoadDesc, const MCInstrDesc &BroadcastDesc, unsigned SubIdx)
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
void getNoop(MCInst &NopInst) const override
Return the noop instruction to use for a noop.
unsigned getSubReg(unsigned Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo...
unsigned getVPCMPImmForCond(ISD::CondCode CC)
Get the VPCMP immediate for the given condition.
Represent the analysis usage information of a pass.
static const MachineInstrBuilder & addRegOffset(const MachineInstrBuilder &MIB, unsigned Reg, bool isKill, int Offset)
addRegOffset - This function is used to add a memory reference of the form [Reg + Offset]...
X86InstrInfo(X86Subtarget &STI)
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:549
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:598
const PseudoSourceValue * getPseudoValue() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void setImm(int64_t immVal)
static void addOperands(MachineInstrBuilder &MIB, ArrayRef< MachineOperand > MOs, int PtrOffset=0)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:655
MO_NTPOFF - On a symbol operand this indicates that the immediate is the negative thread-pointer offs...
Definition: X86BaseInfo.h:187
#define VPERM_CASES(Suffix)
unsigned getOperandBias(const MCInstrDesc &Desc)
getOperandBias - compute whether all of the def operands are repeated in the uses and therefore shoul...
Definition: X86BaseInfo.h:658
self_iterator getIterator()
Definition: ilist_node.h:82
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:319
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned CommuteOpIdx1, unsigned CommuteOpIdx2) const override
Commutes the operands in the given instruction by changing the operands order and/or changing the ins...
static unsigned GetCondBranchFromCond(XCore::CondCode CC)
GetCondBranchFromCond - Return the Branch instruction opcode that matches the cc. ...
unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand=false)
Return a set opcode for the given condition and whether it has a memory operand.
static unsigned getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
This determines which of three possible cases of a three source commute the source indexes correspond...
bool isCopy() const
Extended Value Type.
Definition: ValueTypes.h:34
bool isImplicitDef() const
VarInfo & getVarInfo(unsigned RegIdx)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
const MachineBasicBlock & front() const
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
size_t size() const
Definition: SmallVector.h:53
unsigned char NumDefs
Definition: MCInstrDesc.h:168
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef< MachineOperand > Cond, unsigned TrueReg, unsigned FalseReg) const override
bool isTargetWin64() const
Definition: X86Subtarget.h:771
bool isDebugInstr() const
Definition: MachineInstr.h:999
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:661
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint64_t getAlignment() const
Return the minimum known alignment in bytes of the actual memory reference.
signed greater than
Definition: InstrTypes.h:673
static const uint16_t * lookupAVX512(unsigned opcode, unsigned domain, ArrayRef< uint16_t[4]> Table)
static bool isHReg(unsigned Reg)
Test if the given register is a physical h register.
const X86MemoryFoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
void setIsKill(bool Val=true)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
static bool AdjustBlendMask(unsigned OldMask, unsigned OldWidth, unsigned NewWidth, unsigned *pNewMask=nullptr)
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:491
std::vector< MachineInstr * > Kills
Kills - List of MachineInstruction&#39;s which are the last use of this virtual register (kill it) in the...
Definition: LiveVariables.h:89
MO_TPOFF - On a symbol operand this indicates that the immediate is the thread-pointer offset for the...
Definition: X86BaseInfo.h:171
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:650
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
bool analyzeBranchPredicate(MachineBasicBlock &MBB, TargetInstrInfo::MachineBranchPredicate &MBP, bool AllowModify=false) const override
bool isPICStyleGOT() const
Definition: X86Subtarget.h:775
#define VPERM_CASES_BROADCAST(Suffix)
static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag)
Check whether the definition can be converted to remove a comparison against zero.
void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one...
void setOpcode(unsigned Op)
Definition: MCInst.h:173
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
unsigned isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
isStoreToStackSlotPostFE - Check for post-frame ptr elimination stack locations as well...
MachineDomTreeNode * getRootNode() const
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr &MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl< MachineInstr *> &NewMIs) const override
unfoldMemoryOperand - Separate a single instruction which folded a load or a store or a load and a st...
bool hasLiveCondCodeDef(MachineInstr &MI) const
True if MI has a condition code def, e.g.
MachineOperand class - Representation of each machine instruction operand.
static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII, bool MinusOne)
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:660
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
MachineInstrBuilder MachineInstrBuilder & DefMI
static const MachineInstrBuilder & addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2)
addRegReg - This function is used to add a memory reference of the form: [Reg + Reg].
Information about stack frame layout on the target.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:675
unsigned getNumLocalDynamicTLSAccesses() const
static void commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2)
int64_t getFrameAdjustment(const MachineInstr &I) const
Returns the stack pointer adjustment that happens inside the frame setup..destroy sequence (e...
Definition: X86InstrInfo.h:191
const MachineInstrBuilder & addDisp(const MachineOperand &Disp, int64_t off, unsigned char TargetFlags=0) const
const X86RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
Definition: X86InstrInfo.h:187
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly. ...
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
bool isKMergeMasked(uint64_t TSFlags)
isKMergedMasked - Is this a merge masked instruction.
Definition: X86BaseInfo.h:817
MachineInstr * remove(MachineInstr *I)
Remove the unbundled instruction from the instruction list without deleting it.
int64_t getImm() const
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const
Returns true iff the routine could find two commutable operands in the given machine instruction...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
bool needsUnwindTableEntry() const
True if this function needs an unwind table.
Definition: Function.h:573
unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false)
Returns the sub or super register of a specific X86 register.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
static SmallVector< MachineMemOperand *, 2 > extractLoadMMOs(ArrayRef< MachineMemOperand *> MMOs, MachineFunction &MF)
signed less or equal
Definition: InstrTypes.h:676
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasPOPCNTFalseDeps() const
Definition: X86Subtarget.h:626
void setDebugLoc(DebugLoc dl)
Replace current source information with new such.
static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineInstr &UserMI, const MachineFunction &MF)
Check if LoadMI is a partial register load that we can&#39;t fold into MI because the latter uses content...
bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given condition, register size in bytes, and operand type...
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
static MachineBasicBlock * getFallThroughMBB(MachineBasicBlock *MBB, MachineBasicBlock *TBB)
def_instr_iterator def_instr_begin(unsigned RegNo) const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:686
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
The memory access reads data.
static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3]
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, unsigned, unsigned, int &, int &, int &) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr) const override
foldMemoryOperand - If this target supports it, fold a load or store of the specified stack slot into...
Representation of each machine instruction.
Definition: MachineInstr.h:64
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MO_GOTOFF - On a symbol operand this indicates that the immediate is the offset to the location of th...
Definition: X86BaseInfo.h:102
bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool isMoveReg(QueryType Type=IgnoreBundle) const
Return true if this instruction is a register move.
Definition: MachineInstr.h:706
uint16_t getEncodingValue(unsigned RegNo) const
Returns the encoding for RegNo.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:49
static cl::opt< unsigned > UndefRegClearance("undef-reg-clearance", cl::desc("How many idle instructions we would like before " "certain undef register reads"), cl::init(128), cl::Hidden)
void setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just prior to the instruction itself.
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg...
static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, const X86Subtarget &STI)
unsigned greater or equal
Definition: InstrTypes.h:670
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
bool isPositionIndependent() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
TargetOptions Options
Definition: TargetMachine.h:97
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned char TargetFlags=0) const
void setReg(unsigned Reg)
Change the register this operand corresponds to.
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI)
static MachineOperand CreateImm(int64_t Val)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static MachineInstr * FuseInst(MachineFunction &MF, unsigned Opcode, unsigned OpNo, ArrayRef< MachineOperand > MOs, MachineBasicBlock::iterator InsertPt, MachineInstr &MI, const TargetInstrInfo &TII, int PtrOffset=0)
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override
void setSubReg(unsigned subReg)
virtual const TargetFrameLowering * getFrameLowering() const
The memory access always returns the same value (or traps).
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isKMasked(uint64_t TSFlags)
isKMasked - Is this a masked instruction.
Definition: X86BaseInfo.h:812
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:654
static cl::opt< unsigned > PartialRegUpdateClearance("partial-reg-update-clearance", cl::desc("Clearance between two register writes " "for inserting XOR to avoid partial " "register update"), cl::init(64), cl::Hidden)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
void replaceBranchWithTailCall(MachineBasicBlock &MBB, SmallVectorImpl< MachineOperand > &Cond, const MachineInstr &TailCall) const override
uint32_t Size
Definition: Profile.cpp:47
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
virtual outliner::OutlinedFunction getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:658
bool isKill() const
bool isAssociativeAndCommutative(const MachineInstr &Inst) const override
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCopyInstrImpl(const MachineInstr &MI, const MachineOperand *&Source, const MachineOperand *&Destination) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
static const uint16_t ReplaceableCustomAVX2Instrs[][3]
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const uint16_t ReplaceableInstrsAVX2[][3]
void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl< MachineOperand > &Addr, const TargetRegisterClass *RC, ArrayRef< MachineMemOperand *> MMOs, SmallVectorImpl< MachineInstr *> &NewMIs) const
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:560
bool hasReassociableOperands(const MachineInstr &Inst, const MachineBasicBlock *MBB) const override
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:649
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:659
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
typename std::vector< DomTreeNodeBase * >::iterator iterator
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
bool isPosition() const
Definition: MachineInstr.h:995
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool hasAVX512() const
Definition: X86Subtarget.h:562
MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the immediate should get the value of th...
Definition: X86BaseInfo.h:88
bool addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
IRTranslator LLVM IR MI
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:640
bool hasBWI() const
Definition: X86Subtarget.h:656
unsigned greater than
Definition: InstrTypes.h:669
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static const uint16_t ReplaceableInstrsAVX512[][4]
void RemoveOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with...
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
AddrSegmentReg - The operand # of the segment in the memory operand.
Definition: X86BaseInfo.h:39
unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, const X86InstrFMA3Group &FMA3Group) const
Returns an adjusted FMA opcode that must be used in FMA instruction that performs the same computatio...
void stepForward(const MachineInstr &MI, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand *>> &Clobbers)
Simulates liveness when stepping forward over an instruction(bundle).
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override
Returns true iff the routine could find two commutable operands in the given machine instruction...
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
unsigned get132Opcode() const
Returns the 132 form of FMA opcode.
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:45
#define LLVM_DEBUG(X)
Definition: Debug.h:123
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:651
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:79
bool isIndirectDebugValue() const
A DBG_VALUE is indirect iff the first operand is a register and the second operand is an immediate...
FunctionPass * createCleanupLocalDynamicTLSPass()
This pass combines multiple accesses to local-dynamic TLS variables so that the TLS base address for ...
static bool Expand2AddrUndef(MachineInstrBuilder &MIB, const MCInstrDesc &Desc)
Expand a single-def pseudo instruction to a two-addr instruction with two undef reads of the register...
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex=nullptr) const override
getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new instruction after load / store ar...
static MachineOperand CreateFI(int Idx)
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static bool isCommutableVPERMV3Instruction(unsigned Opcode)
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:48
outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool isSafeToMove(AliasAnalysis *AA, bool &SawStore) const
Return true if it is safe to move this instruction.
unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one...
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool hasHighOperandLatency(const TargetSchedModel &SchedModel, const MachineRegisterInfo *MRI, const MachineInstr &DefMI, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const override
bool hasSSE2() const
Definition: X86Subtarget.h:555
static const uint16_t ReplaceableCustomAVX512LogicInstrs[][4]
signed greater or equal
Definition: InstrTypes.h:674
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
bool hasLZCNTFalseDeps() const
Definition: X86Subtarget.h:627
bool isImplicit() const
vt_iterator legalclasstypes_begin(const TargetRegisterClass &RC) const
Loop over all of the value types that can be represented by values in the given register class...
unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
int getMemoryOperandNo(uint64_t TSFlags)
getMemoryOperandNo - The function returns the MCInst operand # for the first field of the memory oper...
Definition: X86BaseInfo.h:699
static unsigned getTruncatedShiftCount(const MachineInstr &MI, unsigned ShiftAmtOperandIdx)
Check whether the shift count for a machine operand is non-zero.