LLVM  8.0.1
AutoUpgrade.cpp
Go to the documentation of this file.
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DIBuilder.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/DiagnosticInfo.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/IR/Verifier.h"
30 #include "llvm/Support/Regex.h"
31 #include <cstring>
32 using namespace llvm;
33 
34 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
35 
36 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
37 // changed their type from v4f32 to v2i64.
39  Function *&NewFn) {
40  // Check whether this is an old version of the function, which received
41  // v4f32 arguments.
42  Type *Arg0Type = F->getFunctionType()->getParamType(0);
43  if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44  return false;
45 
46  // Yes, it's old, replace it with new version.
47  rename(F);
48  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49  return true;
50 }
51 
52 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
53 // arguments have changed their type from i32 to i8.
55  Function *&NewFn) {
56  // Check that the last argument is an i32.
57  Type *LastArgType = F->getFunctionType()->getParamType(
58  F->getFunctionType()->getNumParams() - 1);
59  if (!LastArgType->isIntegerTy(32))
60  return false;
61 
62  // Move this function aside and map down.
63  rename(F);
64  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65  return true;
66 }
67 
69  // All of the intrinsics matches below should be marked with which llvm
70  // version started autoupgrading them. At some point in the future we would
71  // like to use this information to remove upgrade code for some older
72  // intrinsics. It is currently undecided how we will determine that future
73  // point.
74  if (Name == "addcarryx.u32" || // Added in 8.0
75  Name == "addcarryx.u64" || // Added in 8.0
76  Name == "addcarry.u32" || // Added in 8.0
77  Name == "addcarry.u64" || // Added in 8.0
78  Name == "subborrow.u32" || // Added in 8.0
79  Name == "subborrow.u64" || // Added in 8.0
80  Name.startswith("sse2.padds.") || // Added in 8.0
81  Name.startswith("sse2.psubs.") || // Added in 8.0
82  Name.startswith("sse2.paddus.") || // Added in 8.0
83  Name.startswith("sse2.psubus.") || // Added in 8.0
84  Name.startswith("avx2.padds.") || // Added in 8.0
85  Name.startswith("avx2.psubs.") || // Added in 8.0
86  Name.startswith("avx2.paddus.") || // Added in 8.0
87  Name.startswith("avx2.psubus.") || // Added in 8.0
88  Name.startswith("avx512.padds.") || // Added in 8.0
89  Name.startswith("avx512.psubs.") || // Added in 8.0
90  Name.startswith("avx512.mask.padds.") || // Added in 8.0
91  Name.startswith("avx512.mask.psubs.") || // Added in 8.0
92  Name.startswith("avx512.mask.paddus.") || // Added in 8.0
93  Name.startswith("avx512.mask.psubus.") || // Added in 8.0
94  Name=="ssse3.pabs.b.128" || // Added in 6.0
95  Name=="ssse3.pabs.w.128" || // Added in 6.0
96  Name=="ssse3.pabs.d.128" || // Added in 6.0
97  Name.startswith("fma4.vfmadd.s") || // Added in 7.0
98  Name.startswith("fma.vfmadd.") || // Added in 7.0
99  Name.startswith("fma.vfmsub.") || // Added in 7.0
100  Name.startswith("fma.vfmaddsub.") || // Added in 7.0
101  Name.startswith("fma.vfmsubadd.") || // Added in 7.0
102  Name.startswith("fma.vfnmadd.") || // Added in 7.0
103  Name.startswith("fma.vfnmsub.") || // Added in 7.0
104  Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
105  Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
106  Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
107  Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
108  Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
109  Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
110  Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
111  Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
112  Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
113  Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
114  Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
115  Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
116  Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
117  Name.startswith("avx512.kunpck") || //added in 6.0
118  Name.startswith("avx2.pabs.") || // Added in 6.0
119  Name.startswith("avx512.mask.pabs.") || // Added in 6.0
120  Name.startswith("avx512.broadcastm") || // Added in 6.0
121  Name == "sse.sqrt.ss" || // Added in 7.0
122  Name == "sse2.sqrt.sd" || // Added in 7.0
123  Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
124  Name.startswith("avx.sqrt.p") || // Added in 7.0
125  Name.startswith("sse2.sqrt.p") || // Added in 7.0
126  Name.startswith("sse.sqrt.p") || // Added in 7.0
127  Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
128  Name.startswith("sse2.pcmpeq.") || // Added in 3.1
129  Name.startswith("sse2.pcmpgt.") || // Added in 3.1
130  Name.startswith("avx2.pcmpeq.") || // Added in 3.1
131  Name.startswith("avx2.pcmpgt.") || // Added in 3.1
132  Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
133  Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
134  Name.startswith("avx.vperm2f128.") || // Added in 6.0
135  Name == "avx2.vperm2i128" || // Added in 6.0
136  Name == "sse.add.ss" || // Added in 4.0
137  Name == "sse2.add.sd" || // Added in 4.0
138  Name == "sse.sub.ss" || // Added in 4.0
139  Name == "sse2.sub.sd" || // Added in 4.0
140  Name == "sse.mul.ss" || // Added in 4.0
141  Name == "sse2.mul.sd" || // Added in 4.0
142  Name == "sse.div.ss" || // Added in 4.0
143  Name == "sse2.div.sd" || // Added in 4.0
144  Name == "sse41.pmaxsb" || // Added in 3.9
145  Name == "sse2.pmaxs.w" || // Added in 3.9
146  Name == "sse41.pmaxsd" || // Added in 3.9
147  Name == "sse2.pmaxu.b" || // Added in 3.9
148  Name == "sse41.pmaxuw" || // Added in 3.9
149  Name == "sse41.pmaxud" || // Added in 3.9
150  Name == "sse41.pminsb" || // Added in 3.9
151  Name == "sse2.pmins.w" || // Added in 3.9
152  Name == "sse41.pminsd" || // Added in 3.9
153  Name == "sse2.pminu.b" || // Added in 3.9
154  Name == "sse41.pminuw" || // Added in 3.9
155  Name == "sse41.pminud" || // Added in 3.9
156  Name == "avx512.kand.w" || // Added in 7.0
157  Name == "avx512.kandn.w" || // Added in 7.0
158  Name == "avx512.knot.w" || // Added in 7.0
159  Name == "avx512.kor.w" || // Added in 7.0
160  Name == "avx512.kxor.w" || // Added in 7.0
161  Name == "avx512.kxnor.w" || // Added in 7.0
162  Name == "avx512.kortestc.w" || // Added in 7.0
163  Name == "avx512.kortestz.w" || // Added in 7.0
164  Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
165  Name.startswith("avx2.pmax") || // Added in 3.9
166  Name.startswith("avx2.pmin") || // Added in 3.9
167  Name.startswith("avx512.mask.pmax") || // Added in 4.0
168  Name.startswith("avx512.mask.pmin") || // Added in 4.0
169  Name.startswith("avx2.vbroadcast") || // Added in 3.8
170  Name.startswith("avx2.pbroadcast") || // Added in 3.8
171  Name.startswith("avx.vpermil.") || // Added in 3.1
172  Name.startswith("sse2.pshuf") || // Added in 3.9
173  Name.startswith("avx512.pbroadcast") || // Added in 3.9
174  Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
175  Name.startswith("avx512.mask.movddup") || // Added in 3.9
176  Name.startswith("avx512.mask.movshdup") || // Added in 3.9
177  Name.startswith("avx512.mask.movsldup") || // Added in 3.9
178  Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
179  Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
180  Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
181  Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
182  Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
183  Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
184  Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
185  Name.startswith("avx512.mask.punpckl") || // Added in 3.9
186  Name.startswith("avx512.mask.punpckh") || // Added in 3.9
187  Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
188  Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
189  Name.startswith("avx512.mask.pand.") || // Added in 3.9
190  Name.startswith("avx512.mask.pandn.") || // Added in 3.9
191  Name.startswith("avx512.mask.por.") || // Added in 3.9
192  Name.startswith("avx512.mask.pxor.") || // Added in 3.9
193  Name.startswith("avx512.mask.and.") || // Added in 3.9
194  Name.startswith("avx512.mask.andn.") || // Added in 3.9
195  Name.startswith("avx512.mask.or.") || // Added in 3.9
196  Name.startswith("avx512.mask.xor.") || // Added in 3.9
197  Name.startswith("avx512.mask.padd.") || // Added in 4.0
198  Name.startswith("avx512.mask.psub.") || // Added in 4.0
199  Name.startswith("avx512.mask.pmull.") || // Added in 4.0
200  Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
201  Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
202  Name == "avx512.mask.cvtudq2ps.128" || // Added in 7.0
203  Name == "avx512.mask.cvtudq2ps.256" || // Added in 7.0
204  Name == "avx512.mask.cvtqq2pd.128" || // Added in 7.0
205  Name == "avx512.mask.cvtqq2pd.256" || // Added in 7.0
206  Name == "avx512.mask.cvtuqq2pd.128" || // Added in 7.0
207  Name == "avx512.mask.cvtuqq2pd.256" || // Added in 7.0
208  Name == "avx512.mask.cvtdq2ps.128" || // Added in 7.0
209  Name == "avx512.mask.cvtdq2ps.256" || // Added in 7.0
210  Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
211  Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
212  Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
213  Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
214  Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
215  Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
216  Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
217  Name == "avx512.cvtusi2sd" || // Added in 7.0
218  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
219  Name.startswith("avx512.mask.permvar.") || // Added in 7.0
220  Name == "sse2.pmulu.dq" || // Added in 7.0
221  Name == "sse41.pmuldq" || // Added in 7.0
222  Name == "avx2.pmulu.dq" || // Added in 7.0
223  Name == "avx2.pmul.dq" || // Added in 7.0
224  Name == "avx512.pmulu.dq.512" || // Added in 7.0
225  Name == "avx512.pmul.dq.512" || // Added in 7.0
226  Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
227  Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
228  Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
229  Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
230  Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
231  Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
232  Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
233  Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
234  Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
235  Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
236  Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
237  Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
238  Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
239  Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
240  Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
241  Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
242  Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
243  Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
244  Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
245  Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
246  Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
247  Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
248  Name.startswith("avx512.mask.psll.d") || // Added in 4.0
249  Name.startswith("avx512.mask.psll.q") || // Added in 4.0
250  Name.startswith("avx512.mask.psll.w") || // Added in 4.0
251  Name.startswith("avx512.mask.psra.d") || // Added in 4.0
252  Name.startswith("avx512.mask.psra.q") || // Added in 4.0
253  Name.startswith("avx512.mask.psra.w") || // Added in 4.0
254  Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
255  Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
256  Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
257  Name.startswith("avx512.mask.pslli") || // Added in 4.0
258  Name.startswith("avx512.mask.psrai") || // Added in 4.0
259  Name.startswith("avx512.mask.psrli") || // Added in 4.0
260  Name.startswith("avx512.mask.psllv") || // Added in 4.0
261  Name.startswith("avx512.mask.psrav") || // Added in 4.0
262  Name.startswith("avx512.mask.psrlv") || // Added in 4.0
263  Name.startswith("sse41.pmovsx") || // Added in 3.8
264  Name.startswith("sse41.pmovzx") || // Added in 3.9
265  Name.startswith("avx2.pmovsx") || // Added in 3.9
266  Name.startswith("avx2.pmovzx") || // Added in 3.9
267  Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
268  Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
269  Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
270  Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
271  Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
272  Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
273  Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
274  Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
275  Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
276  Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
277  Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
278  Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
279  Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
280  Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
281  Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
282  Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
283  Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
284  Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
285  Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
286  Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
287  Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
288  Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
289  Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
290  Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
291  Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
292  Name.startswith("avx512.vpshld.") || // Added in 8.0
293  Name.startswith("avx512.vpshrd.") || // Added in 8.0
294  Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
295  Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
296  Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
297  Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
298  Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
299  Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
300  Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
301  Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
302  Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
303  Name == "sse.cvtsi2ss" || // Added in 7.0
304  Name == "sse.cvtsi642ss" || // Added in 7.0
305  Name == "sse2.cvtsi2sd" || // Added in 7.0
306  Name == "sse2.cvtsi642sd" || // Added in 7.0
307  Name == "sse2.cvtss2sd" || // Added in 7.0
308  Name == "sse2.cvtdq2pd" || // Added in 3.9
309  Name == "sse2.cvtdq2ps" || // Added in 7.0
310  Name == "sse2.cvtps2pd" || // Added in 3.9
311  Name == "avx.cvtdq2.pd.256" || // Added in 3.9
312  Name == "avx.cvtdq2.ps.256" || // Added in 7.0
313  Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
314  Name.startswith("avx.vinsertf128.") || // Added in 3.7
315  Name == "avx2.vinserti128" || // Added in 3.7
316  Name.startswith("avx512.mask.insert") || // Added in 4.0
317  Name.startswith("avx.vextractf128.") || // Added in 3.7
318  Name == "avx2.vextracti128" || // Added in 3.7
319  Name.startswith("avx512.mask.vextract") || // Added in 4.0
320  Name.startswith("sse4a.movnt.") || // Added in 3.9
321  Name.startswith("avx.movnt.") || // Added in 3.2
322  Name.startswith("avx512.storent.") || // Added in 3.9
323  Name == "sse41.movntdqa" || // Added in 5.0
324  Name == "avx2.movntdqa" || // Added in 5.0
325  Name == "avx512.movntdqa" || // Added in 5.0
326  Name == "sse2.storel.dq" || // Added in 3.9
327  Name.startswith("sse.storeu.") || // Added in 3.9
328  Name.startswith("sse2.storeu.") || // Added in 3.9
329  Name.startswith("avx.storeu.") || // Added in 3.9
330  Name.startswith("avx512.mask.storeu.") || // Added in 3.9
331  Name.startswith("avx512.mask.store.p") || // Added in 3.9
332  Name.startswith("avx512.mask.store.b.") || // Added in 3.9
333  Name.startswith("avx512.mask.store.w.") || // Added in 3.9
334  Name.startswith("avx512.mask.store.d.") || // Added in 3.9
335  Name.startswith("avx512.mask.store.q.") || // Added in 3.9
336  Name == "avx512.mask.store.ss" || // Added in 7.0
337  Name.startswith("avx512.mask.loadu.") || // Added in 3.9
338  Name.startswith("avx512.mask.load.") || // Added in 3.9
339  Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
340  Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
341  Name == "sse42.crc32.64.8" || // Added in 3.4
342  Name.startswith("avx.vbroadcast.s") || // Added in 3.5
343  Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
344  Name.startswith("avx512.mask.palignr.") || // Added in 3.9
345  Name.startswith("avx512.mask.valign.") || // Added in 4.0
346  Name.startswith("sse2.psll.dq") || // Added in 3.7
347  Name.startswith("sse2.psrl.dq") || // Added in 3.7
348  Name.startswith("avx2.psll.dq") || // Added in 3.7
349  Name.startswith("avx2.psrl.dq") || // Added in 3.7
350  Name.startswith("avx512.psll.dq") || // Added in 3.9
351  Name.startswith("avx512.psrl.dq") || // Added in 3.9
352  Name == "sse41.pblendw" || // Added in 3.7
353  Name.startswith("sse41.blendp") || // Added in 3.7
354  Name.startswith("avx.blend.p") || // Added in 3.7
355  Name == "avx2.pblendw" || // Added in 3.7
356  Name.startswith("avx2.pblendd.") || // Added in 3.7
357  Name.startswith("avx.vbroadcastf128") || // Added in 4.0
358  Name == "avx2.vbroadcasti128" || // Added in 3.7
359  Name.startswith("avx512.mask.broadcastf") || // Added in 6.0
360  Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
361  Name == "xop.vpcmov" || // Added in 3.8
362  Name == "xop.vpcmov.256" || // Added in 5.0
363  Name.startswith("avx512.mask.move.s") || // Added in 4.0
364  Name.startswith("avx512.cvtmask2") || // Added in 5.0
365  (Name.startswith("xop.vpcom") && // Added in 3.2
366  F->arg_size() == 2) ||
367  Name.startswith("xop.vprot") || // Added in 8.0
368  Name.startswith("avx512.prol") || // Added in 8.0
369  Name.startswith("avx512.pror") || // Added in 8.0
370  Name.startswith("avx512.mask.prorv.") || // Added in 8.0
371  Name.startswith("avx512.mask.pror.") || // Added in 8.0
372  Name.startswith("avx512.mask.prolv.") || // Added in 8.0
373  Name.startswith("avx512.mask.prol.") || // Added in 8.0
374  Name.startswith("avx512.ptestm") || //Added in 6.0
375  Name.startswith("avx512.ptestnm") || //Added in 6.0
376  Name.startswith("sse2.pavg") || // Added in 6.0
377  Name.startswith("avx2.pavg") || // Added in 6.0
378  Name.startswith("avx512.mask.pavg")) // Added in 6.0
379  return true;
380 
381  return false;
382 }
383 
385  Function *&NewFn) {
386  // Only handle intrinsics that start with "x86.".
387  if (!Name.startswith("x86."))
388  return false;
389  // Remove "x86." prefix.
390  Name = Name.substr(4);
391 
392  if (ShouldUpgradeX86Intrinsic(F, Name)) {
393  NewFn = nullptr;
394  return true;
395  }
396 
397  if (Name == "rdtscp") { // Added in 8.0
398  // If this intrinsic has 0 operands, it's the new version.
399  if (F->getFunctionType()->getNumParams() == 0)
400  return false;
401 
402  rename(F);
405  return true;
406  }
407 
408  // SSE4.1 ptest functions may have an old signature.
409  if (Name.startswith("sse41.ptest")) { // Added in 3.2
410  if (Name.substr(11) == "c")
412  if (Name.substr(11) == "z")
414  if (Name.substr(11) == "nzc")
416  }
417  // Several blend and other instructions with masks used the wrong number of
418  // bits.
419  if (Name == "sse41.insertps") // Added in 3.6
421  NewFn);
422  if (Name == "sse41.dppd") // Added in 3.6
424  NewFn);
425  if (Name == "sse41.dpps") // Added in 3.6
427  NewFn);
428  if (Name == "sse41.mpsadbw") // Added in 3.6
430  NewFn);
431  if (Name == "avx.dp.ps.256") // Added in 3.6
433  NewFn);
434  if (Name == "avx2.mpsadbw") // Added in 3.6
436  NewFn);
437 
438  // frcz.ss/sd may need to have an argument dropped. Added in 3.2
439  if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
440  rename(F);
443  return true;
444  }
445  if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
446  rename(F);
449  return true;
450  }
451  // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
452  if (Name.startswith("xop.vpermil2")) { // Added in 3.9
453  auto Idx = F->getFunctionType()->getParamType(2);
454  if (Idx->isFPOrFPVectorTy()) {
455  rename(F);
456  unsigned IdxSize = Idx->getPrimitiveSizeInBits();
457  unsigned EltSize = Idx->getScalarSizeInBits();
458  Intrinsic::ID Permil2ID;
459  if (EltSize == 64 && IdxSize == 128)
460  Permil2ID = Intrinsic::x86_xop_vpermil2pd;
461  else if (EltSize == 32 && IdxSize == 128)
462  Permil2ID = Intrinsic::x86_xop_vpermil2ps;
463  else if (EltSize == 64 && IdxSize == 256)
465  else
467  NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
468  return true;
469  }
470  }
471 
472  if (Name == "seh.recoverfp") {
474  return true;
475  }
476 
477  return false;
478 }
479 
481  assert(F && "Illegal to upgrade a non-existent Function.");
482 
483  // Quickly eliminate it, if it's not a candidate.
484  StringRef Name = F->getName();
485  if (Name.size() <= 8 || !Name.startswith("llvm."))
486  return false;
487  Name = Name.substr(5); // Strip off "llvm."
488 
489  switch (Name[0]) {
490  default: break;
491  case 'a': {
492  if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
494  F->arg_begin()->getType());
495  return true;
496  }
497  if (Name.startswith("arm.neon.vclz")) {
498  Type* args[2] = {
499  F->arg_begin()->getType(),
501  };
502  // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
503  // the end of the name. Change name from llvm.arm.neon.vclz.* to
504  // llvm.ctlz.*
505  FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
506  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
507  "llvm.ctlz." + Name.substr(14), F->getParent());
508  return true;
509  }
510  if (Name.startswith("arm.neon.vcnt")) {
512  F->arg_begin()->getType());
513  return true;
514  }
515  Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
516  if (vldRegex.match(Name)) {
517  auto fArgs = F->getFunctionType()->params();
518  SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
519  // Can't use Intrinsic::getDeclaration here as the return types might
520  // then only be structurally equal.
521  FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
522  NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
523  "llvm." + Name + ".p0i8", F->getParent());
524  return true;
525  }
526  Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
527  if (vstRegex.match(Name)) {
528  static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
532 
533  static const Intrinsic::ID StoreLaneInts[] = {
536  };
537 
538  auto fArgs = F->getFunctionType()->params();
539  Type *Tys[] = {fArgs[0], fArgs[1]};
540  if (Name.find("lane") == StringRef::npos)
542  StoreInts[fArgs.size() - 3], Tys);
543  else
545  StoreLaneInts[fArgs.size() - 5], Tys);
546  return true;
547  }
548  if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
550  return true;
551  }
552  break;
553  }
554 
555  case 'c': {
556  if (Name.startswith("ctlz.") && F->arg_size() == 1) {
557  rename(F);
559  F->arg_begin()->getType());
560  return true;
561  }
562  if (Name.startswith("cttz.") && F->arg_size() == 1) {
563  rename(F);
565  F->arg_begin()->getType());
566  return true;
567  }
568  break;
569  }
570  case 'd': {
571  if (Name == "dbg.value" && F->arg_size() == 4) {
572  rename(F);
574  return true;
575  }
576  break;
577  }
578  case 'i':
579  case 'l': {
580  bool IsLifetimeStart = Name.startswith("lifetime.start");
581  if (IsLifetimeStart || Name.startswith("invariant.start")) {
582  Intrinsic::ID ID = IsLifetimeStart ?
584  auto Args = F->getFunctionType()->params();
585  Type* ObjectPtr[1] = {Args[1]};
586  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
587  rename(F);
588  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
589  return true;
590  }
591  }
592 
593  bool IsLifetimeEnd = Name.startswith("lifetime.end");
594  if (IsLifetimeEnd || Name.startswith("invariant.end")) {
595  Intrinsic::ID ID = IsLifetimeEnd ?
597 
598  auto Args = F->getFunctionType()->params();
599  Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
600  if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
601  rename(F);
602  NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
603  return true;
604  }
605  }
606  if (Name.startswith("invariant.group.barrier")) {
607  // Rename invariant.group.barrier to launder.invariant.group
608  auto Args = F->getFunctionType()->params();
609  Type* ObjectPtr[1] = {Args[0]};
610  rename(F);
613  return true;
614 
615  }
616 
617  break;
618  }
619  case 'm': {
620  if (Name.startswith("masked.load.")) {
621  Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
623  rename(F);
626  Tys);
627  return true;
628  }
629  }
630  if (Name.startswith("masked.store.")) {
631  auto Args = F->getFunctionType()->params();
632  Type *Tys[] = { Args[0], Args[1] };
634  rename(F);
637  Tys);
638  return true;
639  }
640  }
641  // Renaming gather/scatter intrinsics with no address space overloading
642  // to the new overload which includes an address space
643  if (Name.startswith("masked.gather.")) {
644  Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
646  rename(F);
649  return true;
650  }
651  }
652  if (Name.startswith("masked.scatter.")) {
653  auto Args = F->getFunctionType()->params();
654  Type *Tys[] = {Args[0], Args[1]};
656  rename(F);
659  return true;
660  }
661  }
662  // Updating the memory intrinsics (memcpy/memmove/memset) that have an
663  // alignment parameter to embedding the alignment as an attribute of
664  // the pointer args.
665  if (Name.startswith("memcpy.") && F->arg_size() == 5) {
666  rename(F);
667  // Get the types of dest, src, and len
668  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
670  ParamTypes);
671  return true;
672  }
673  if (Name.startswith("memmove.") && F->arg_size() == 5) {
674  rename(F);
675  // Get the types of dest, src, and len
676  ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
678  ParamTypes);
679  return true;
680  }
681  if (Name.startswith("memset.") && F->arg_size() == 5) {
682  rename(F);
683  // Get the types of dest, and len
684  const auto *FT = F->getFunctionType();
685  Type *ParamTypes[2] = {
686  FT->getParamType(0), // Dest
687  FT->getParamType(2) // len
688  };
690  ParamTypes);
691  return true;
692  }
693  break;
694  }
695  case 'n': {
696  if (Name.startswith("nvvm.")) {
697  Name = Name.substr(5);
698 
699  // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
701  .Cases("brev32", "brev64", Intrinsic::bitreverse)
702  .Case("clz.i", Intrinsic::ctlz)
703  .Case("popc.i", Intrinsic::ctpop)
705  if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
706  NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
707  {F->getReturnType()});
708  return true;
709  }
710 
711  // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
712  // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
713  //
714  // TODO: We could add lohi.i2d.
715  bool Expand = StringSwitch<bool>(Name)
716  .Cases("abs.i", "abs.ll", true)
717  .Cases("clz.ll", "popc.ll", "h2f", true)
718  .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
719  .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
720  .Default(false);
721  if (Expand) {
722  NewFn = nullptr;
723  return true;
724  }
725  }
726  break;
727  }
728  case 'o':
729  // We only need to change the name to match the mangling including the
730  // address space.
731  if (Name.startswith("objectsize.")) {
732  Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
733  if (F->arg_size() == 2 ||
735  rename(F);
737  Tys);
738  return true;
739  }
740  }
741  break;
742 
743  case 's':
744  if (Name == "stackprotectorcheck") {
745  NewFn = nullptr;
746  return true;
747  }
748  break;
749 
750  case 'x':
751  if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
752  return true;
753  }
754  // Remangle our intrinsic since we upgrade the mangling
756  if (Result != None) {
757  NewFn = Result.getValue();
758  return true;
759  }
760 
761  // This may not belong here. This function is effectively being overloaded
762  // to both detect an intrinsic which needs upgrading, and to provide the
763  // upgraded form of the intrinsic. We should perhaps have two separate
764  // functions for this.
765  return false;
766 }
767 
769  NewFn = nullptr;
770  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
771  assert(F != NewFn && "Intrinsic function upgraded to the same function");
772 
773  // Upgrade intrinsic attributes. This does not change the function.
774  if (NewFn)
775  F = NewFn;
776  if (Intrinsic::ID id = F->getIntrinsicID())
778  return Upgraded;
779 }
780 
782  // Nothing to do yet.
783  return false;
784 }
785 
786 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
787 // to byte shuffles.
789  Value *Op, unsigned Shift) {
790  Type *ResultTy = Op->getType();
791  unsigned NumElts = ResultTy->getVectorNumElements() * 8;
792 
793  // Bitcast from a 64-bit element type to a byte element type.
794  Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
795  Op = Builder.CreateBitCast(Op, VecTy, "cast");
796 
797  // We'll be shuffling in zeroes.
798  Value *Res = Constant::getNullValue(VecTy);
799 
800  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
801  // we'll just return the zero vector.
802  if (Shift < 16) {
803  uint32_t Idxs[64];
804  // 256/512-bit version is split into 2/4 16-byte lanes.
805  for (unsigned l = 0; l != NumElts; l += 16)
806  for (unsigned i = 0; i != 16; ++i) {
807  unsigned Idx = NumElts + i - Shift;
808  if (Idx < NumElts)
809  Idx -= NumElts - 16; // end of lane, switch operand.
810  Idxs[l + i] = Idx + l;
811  }
812 
813  Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
814  }
815 
816  // Bitcast back to a 64-bit element type.
817  return Builder.CreateBitCast(Res, ResultTy, "cast");
818 }
819 
820 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
821 // to byte shuffles.
823  unsigned Shift) {
824  Type *ResultTy = Op->getType();
825  unsigned NumElts = ResultTy->getVectorNumElements() * 8;
826 
827  // Bitcast from a 64-bit element type to a byte element type.
828  Type *VecTy = VectorType::get(Builder.getInt8Ty(), NumElts);
829  Op = Builder.CreateBitCast(Op, VecTy, "cast");
830 
831  // We'll be shuffling in zeroes.
832  Value *Res = Constant::getNullValue(VecTy);
833 
834  // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
835  // we'll just return the zero vector.
836  if (Shift < 16) {
837  uint32_t Idxs[64];
838  // 256/512-bit version is split into 2/4 16-byte lanes.
839  for (unsigned l = 0; l != NumElts; l += 16)
840  for (unsigned i = 0; i != 16; ++i) {
841  unsigned Idx = i + Shift;
842  if (Idx >= 16)
843  Idx += NumElts - 16; // end of lane, switch operand.
844  Idxs[l + i] = Idx + l;
845  }
846 
847  Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
848  }
849 
850  // Bitcast back to a 64-bit element type.
851  return Builder.CreateBitCast(Res, ResultTy, "cast");
852 }
853 
855  unsigned NumElts) {
857  cast<IntegerType>(Mask->getType())->getBitWidth());
858  Mask = Builder.CreateBitCast(Mask, MaskTy);
859 
860  // If we have less than 8 elements, then the starting mask was an i8 and
861  // we need to extract down to the right number of elements.
862  if (NumElts < 8) {
863  uint32_t Indices[4];
864  for (unsigned i = 0; i != NumElts; ++i)
865  Indices[i] = i;
866  Mask = Builder.CreateShuffleVector(Mask, Mask,
867  makeArrayRef(Indices, NumElts),
868  "extract");
869  }
870 
871  return Mask;
872 }
873 
875  Value *Op0, Value *Op1) {
876  // If the mask is all ones just emit the first operation.
877  if (const auto *C = dyn_cast<Constant>(Mask))
878  if (C->isAllOnesValue())
879  return Op0;
880 
881  Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
882  return Builder.CreateSelect(Mask, Op0, Op1);
883 }
884 
886  Value *Op0, Value *Op1) {
887  // If the mask is all ones just emit the first operation.
888  if (const auto *C = dyn_cast<Constant>(Mask))
889  if (C->isAllOnesValue())
890  return Op0;
891 
892  llvm::VectorType *MaskTy =
894  Mask->getType()->getIntegerBitWidth());
895  Mask = Builder.CreateBitCast(Mask, MaskTy);
896  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
897  return Builder.CreateSelect(Mask, Op0, Op1);
898 }
899 
900 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
901 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
902 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
904  Value *Op1, Value *Shift,
905  Value *Passthru, Value *Mask,
906  bool IsVALIGN) {
907  unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
908 
909  unsigned NumElts = Op0->getType()->getVectorNumElements();
910  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
911  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
912  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
913 
914  // Mask the immediate for VALIGN.
915  if (IsVALIGN)
916  ShiftVal &= (NumElts - 1);
917 
918  // If palignr is shifting the pair of vectors more than the size of two
919  // lanes, emit zero.
920  if (ShiftVal >= 32)
921  return llvm::Constant::getNullValue(Op0->getType());
922 
923  // If palignr is shifting the pair of input vectors more than one lane,
924  // but less than two lanes, convert to shifting in zeroes.
925  if (ShiftVal > 16) {
926  ShiftVal -= 16;
927  Op1 = Op0;
929  }
930 
931  uint32_t Indices[64];
932  // 256-bit palignr operates on 128-bit lanes so we need to handle that
933  for (unsigned l = 0; l < NumElts; l += 16) {
934  for (unsigned i = 0; i != 16; ++i) {
935  unsigned Idx = ShiftVal + i;
936  if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
937  Idx += NumElts - 16; // End of lane, switch operand.
938  Indices[l + i] = Idx + l;
939  }
940  }
941 
942  Value *Align = Builder.CreateShuffleVector(Op1, Op0,
943  makeArrayRef(Indices, NumElts),
944  "palignr");
945 
946  return EmitX86Select(Builder, Mask, Align, Passthru);
947 }
948 
950  bool ZeroMask, bool IndexForm) {
951  Type *Ty = CI.getType();
952  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
953  unsigned EltWidth = Ty->getScalarSizeInBits();
954  bool IsFloat = Ty->isFPOrFPVectorTy();
955  Intrinsic::ID IID;
956  if (VecWidth == 128 && EltWidth == 32 && IsFloat)
958  else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
960  else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
962  else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
964  else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
966  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
968  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
970  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
972  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
974  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
976  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
978  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
980  else if (VecWidth == 128 && EltWidth == 16)
982  else if (VecWidth == 256 && EltWidth == 16)
984  else if (VecWidth == 512 && EltWidth == 16)
986  else if (VecWidth == 128 && EltWidth == 8)
988  else if (VecWidth == 256 && EltWidth == 8)
990  else if (VecWidth == 512 && EltWidth == 8)
992  else
993  llvm_unreachable("Unexpected intrinsic");
994 
995  Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
996  CI.getArgOperand(2) };
997 
998  // If this isn't index form we need to swap operand 0 and 1.
999  if (!IndexForm)
1000  std::swap(Args[0], Args[1]);
1001 
1002  Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1003  Args);
1004  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1005  : Builder.CreateBitCast(CI.getArgOperand(1),
1006  Ty);
1007  return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1008 }
1009 
1011  bool IsSigned, bool IsAddition) {
1012  Type *Ty = CI.getType();
1013  Value *Op0 = CI.getOperand(0);
1014  Value *Op1 = CI.getOperand(1);
1015 
1016  Intrinsic::ID IID =
1017  IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1018  : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1019  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1020  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1021 
1022  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1023  Value *VecSrc = CI.getOperand(2);
1024  Value *Mask = CI.getOperand(3);
1025  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1026  }
1027  return Res;
1028 }
1029 
1031  bool IsRotateRight) {
1032  Type *Ty = CI.getType();
1033  Value *Src = CI.getArgOperand(0);
1034  Value *Amt = CI.getArgOperand(1);
1035 
1036  // Amount may be scalar immediate, in which case create a splat vector.
1037  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1038  // we only care about the lowest log2 bits anyway.
1039  if (Amt->getType() != Ty) {
1040  unsigned NumElts = Ty->getVectorNumElements();
1041  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1042  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1043  }
1044 
1045  Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1046  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1047  Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1048 
1049  if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1050  Value *VecSrc = CI.getOperand(2);
1051  Value *Mask = CI.getOperand(3);
1052  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1053  }
1054  return Res;
1055 }
1056 
1058  bool IsShiftRight, bool ZeroMask) {
1059  Type *Ty = CI.getType();
1060  Value *Op0 = CI.getArgOperand(0);
1061  Value *Op1 = CI.getArgOperand(1);
1062  Value *Amt = CI.getArgOperand(2);
1063 
1064  if (IsShiftRight)
1065  std::swap(Op0, Op1);
1066 
1067  // Amount may be scalar immediate, in which case create a splat vector.
1068  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1069  // we only care about the lowest log2 bits anyway.
1070  if (Amt->getType() != Ty) {
1071  unsigned NumElts = Ty->getVectorNumElements();
1072  Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1073  Amt = Builder.CreateVectorSplat(NumElts, Amt);
1074  }
1075 
1076  Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1077  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1078  Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1079 
1080  unsigned NumArgs = CI.getNumArgOperands();
1081  if (NumArgs >= 4) { // For masked intrinsics.
1082  Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1083  ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1084  CI.getArgOperand(0);
1085  Value *Mask = CI.getOperand(NumArgs - 1);
1086  Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1087  }
1088  return Res;
1089 }
1090 
1092  Value *Ptr, Value *Data, Value *Mask,
1093  bool Aligned) {
1094  // Cast the pointer to the right type.
1095  Ptr = Builder.CreateBitCast(Ptr,
1097  unsigned Align =
1098  Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
1099 
1100  // If the mask is all ones just emit a regular store.
1101  if (const auto *C = dyn_cast<Constant>(Mask))
1102  if (C->isAllOnesValue())
1103  return Builder.CreateAlignedStore(Data, Ptr, Align);
1104 
1105  // Convert the mask from an integer type to a vector of i1.
1106  unsigned NumElts = Data->getType()->getVectorNumElements();
1107  Mask = getX86MaskVec(Builder, Mask, NumElts);
1108  return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
1109 }
1110 
1112  Value *Ptr, Value *Passthru, Value *Mask,
1113  bool Aligned) {
1114  // Cast the pointer to the right type.
1115  Ptr = Builder.CreateBitCast(Ptr,
1116  llvm::PointerType::getUnqual(Passthru->getType()));
1117  unsigned Align =
1118  Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
1119 
1120  // If the mask is all ones just emit a regular store.
1121  if (const auto *C = dyn_cast<Constant>(Mask))
1122  if (C->isAllOnesValue())
1123  return Builder.CreateAlignedLoad(Ptr, Align);
1124 
1125  // Convert the mask from an integer type to a vector of i1.
1126  unsigned NumElts = Passthru->getType()->getVectorNumElements();
1127  Mask = getX86MaskVec(Builder, Mask, NumElts);
1128  return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
1129 }
1130 
1131 static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
1132  Value *Op0 = CI.getArgOperand(0);
1133  llvm::Type *Ty = Op0->getType();
1134  Value *Zero = llvm::Constant::getNullValue(Ty);
1135  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero);
1136  Value *Neg = Builder.CreateNeg(Op0);
1137  Value *Res = Builder.CreateSelect(Cmp, Op0, Neg);
1138 
1139  if (CI.getNumArgOperands() == 3)
1140  Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1));
1141 
1142  return Res;
1143 }
1144 
1146  ICmpInst::Predicate Pred) {
1147  Value *Op0 = CI.getArgOperand(0);
1148  Value *Op1 = CI.getArgOperand(1);
1149  Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
1150  Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
1151 
1152  if (CI.getNumArgOperands() == 4)
1153  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1154 
1155  return Res;
1156 }
1157 
1158 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1159  Type *Ty = CI.getType();
1160 
1161  // Arguments have a vXi32 type so cast to vXi64.
1162  Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1163  Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1164 
1165  if (IsSigned) {
1166  // Shift left then arithmetic shift right.
1167  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1168  LHS = Builder.CreateShl(LHS, ShiftAmt);
1169  LHS = Builder.CreateAShr(LHS, ShiftAmt);
1170  RHS = Builder.CreateShl(RHS, ShiftAmt);
1171  RHS = Builder.CreateAShr(RHS, ShiftAmt);
1172  } else {
1173  // Clear the upper bits.
1174  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1175  LHS = Builder.CreateAnd(LHS, Mask);
1176  RHS = Builder.CreateAnd(RHS, Mask);
1177  }
1178 
1179  Value *Res = Builder.CreateMul(LHS, RHS);
1180 
1181  if (CI.getNumArgOperands() == 4)
1182  Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1183 
1184  return Res;
1185 }
1186 
1187 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1189  Value *Mask) {
1190  unsigned NumElts = Vec->getType()->getVectorNumElements();
1191  if (Mask) {
1192  const auto *C = dyn_cast<Constant>(Mask);
1193  if (!C || !C->isAllOnesValue())
1194  Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1195  }
1196 
1197  if (NumElts < 8) {
1198  uint32_t Indices[8];
1199  for (unsigned i = 0; i != NumElts; ++i)
1200  Indices[i] = i;
1201  for (unsigned i = NumElts; i != 8; ++i)
1202  Indices[i] = NumElts + i % NumElts;
1203  Vec = Builder.CreateShuffleVector(Vec,
1205  Indices);
1206  }
1207  return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1208 }
1209 
1211  unsigned CC, bool Signed) {
1212  Value *Op0 = CI.getArgOperand(0);
1213  unsigned NumElts = Op0->getType()->getVectorNumElements();
1214 
1215  Value *Cmp;
1216  if (CC == 3) {
1217  Cmp = Constant::getNullValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1218  } else if (CC == 7) {
1219  Cmp = Constant::getAllOnesValue(llvm::VectorType::get(Builder.getInt1Ty(), NumElts));
1220  } else {
1221  ICmpInst::Predicate Pred;
1222  switch (CC) {
1223  default: llvm_unreachable("Unknown condition code");
1224  case 0: Pred = ICmpInst::ICMP_EQ; break;
1225  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1226  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1227  case 4: Pred = ICmpInst::ICMP_NE; break;
1228  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1229  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1230  }
1231  Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1232  }
1233 
1234  Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
1235 
1236  return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1237 }
1238 
1239 // Replace a masked intrinsic with an older unmasked intrinsic.
1241  Intrinsic::ID IID) {
1242  Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1243  Value *Rep = Builder.CreateCall(Intrin,
1244  { CI.getArgOperand(0), CI.getArgOperand(1) });
1245  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1246 }
1247 
1249  Value* A = CI.getArgOperand(0);
1250  Value* B = CI.getArgOperand(1);
1251  Value* Src = CI.getArgOperand(2);
1252  Value* Mask = CI.getArgOperand(3);
1253 
1254  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1255  Value* Cmp = Builder.CreateIsNotNull(AndNode);
1256  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1257  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1258  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1259  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1260 }
1261 
1262 
1263 static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
1264  Value* Op = CI.getArgOperand(0);
1265  Type* ReturnOp = CI.getType();
1266  unsigned NumElts = CI.getType()->getVectorNumElements();
1267  Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1268  return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1269 }
1270 
1271 // Replace intrinsic with unmasked version and a select.
1273  CallInst &CI, Value *&Rep) {
1274  Name = Name.substr(12); // Remove avx512.mask.
1275 
1276  unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1277  unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1278  Intrinsic::ID IID;
1279  if (Name.startswith("max.p")) {
1280  if (VecWidth == 128 && EltWidth == 32)
1282  else if (VecWidth == 128 && EltWidth == 64)
1284  else if (VecWidth == 256 && EltWidth == 32)
1286  else if (VecWidth == 256 && EltWidth == 64)
1288  else
1289  llvm_unreachable("Unexpected intrinsic");
1290  } else if (Name.startswith("min.p")) {
1291  if (VecWidth == 128 && EltWidth == 32)
1293  else if (VecWidth == 128 && EltWidth == 64)
1295  else if (VecWidth == 256 && EltWidth == 32)
1297  else if (VecWidth == 256 && EltWidth == 64)
1299  else
1300  llvm_unreachable("Unexpected intrinsic");
1301  } else if (Name.startswith("pshuf.b.")) {
1302  if (VecWidth == 128)
1304  else if (VecWidth == 256)
1306  else if (VecWidth == 512)
1308  else
1309  llvm_unreachable("Unexpected intrinsic");
1310  } else if (Name.startswith("pmul.hr.sw.")) {
1311  if (VecWidth == 128)
1313  else if (VecWidth == 256)
1315  else if (VecWidth == 512)
1317  else
1318  llvm_unreachable("Unexpected intrinsic");
1319  } else if (Name.startswith("pmulh.w.")) {
1320  if (VecWidth == 128)
1322  else if (VecWidth == 256)
1324  else if (VecWidth == 512)
1326  else
1327  llvm_unreachable("Unexpected intrinsic");
1328  } else if (Name.startswith("pmulhu.w.")) {
1329  if (VecWidth == 128)
1331  else if (VecWidth == 256)
1333  else if (VecWidth == 512)
1335  else
1336  llvm_unreachable("Unexpected intrinsic");
1337  } else if (Name.startswith("pmaddw.d.")) {
1338  if (VecWidth == 128)
1340  else if (VecWidth == 256)
1342  else if (VecWidth == 512)
1344  else
1345  llvm_unreachable("Unexpected intrinsic");
1346  } else if (Name.startswith("pmaddubs.w.")) {
1347  if (VecWidth == 128)
1349  else if (VecWidth == 256)
1351  else if (VecWidth == 512)
1353  else
1354  llvm_unreachable("Unexpected intrinsic");
1355  } else if (Name.startswith("packsswb.")) {
1356  if (VecWidth == 128)
1358  else if (VecWidth == 256)
1360  else if (VecWidth == 512)
1362  else
1363  llvm_unreachable("Unexpected intrinsic");
1364  } else if (Name.startswith("packssdw.")) {
1365  if (VecWidth == 128)
1367  else if (VecWidth == 256)
1369  else if (VecWidth == 512)
1371  else
1372  llvm_unreachable("Unexpected intrinsic");
1373  } else if (Name.startswith("packuswb.")) {
1374  if (VecWidth == 128)
1376  else if (VecWidth == 256)
1378  else if (VecWidth == 512)
1380  else
1381  llvm_unreachable("Unexpected intrinsic");
1382  } else if (Name.startswith("packusdw.")) {
1383  if (VecWidth == 128)
1385  else if (VecWidth == 256)
1387  else if (VecWidth == 512)
1389  else
1390  llvm_unreachable("Unexpected intrinsic");
1391  } else if (Name.startswith("vpermilvar.")) {
1392  if (VecWidth == 128 && EltWidth == 32)
1394  else if (VecWidth == 128 && EltWidth == 64)
1396  else if (VecWidth == 256 && EltWidth == 32)
1398  else if (VecWidth == 256 && EltWidth == 64)
1400  else if (VecWidth == 512 && EltWidth == 32)
1402  else if (VecWidth == 512 && EltWidth == 64)
1404  else
1405  llvm_unreachable("Unexpected intrinsic");
1406  } else if (Name == "cvtpd2dq.256") {
1408  } else if (Name == "cvtpd2ps.256") {
1410  } else if (Name == "cvttpd2dq.256") {
1412  } else if (Name == "cvttps2dq.128") {
1414  } else if (Name == "cvttps2dq.256") {
1416  } else if (Name.startswith("permvar.")) {
1417  bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1418  if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1420  else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1422  else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1424  else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1426  else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1428  else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1430  else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1432  else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1434  else if (VecWidth == 128 && EltWidth == 16)
1436  else if (VecWidth == 256 && EltWidth == 16)
1438  else if (VecWidth == 512 && EltWidth == 16)
1440  else if (VecWidth == 128 && EltWidth == 8)
1442  else if (VecWidth == 256 && EltWidth == 8)
1444  else if (VecWidth == 512 && EltWidth == 8)
1446  else
1447  llvm_unreachable("Unexpected intrinsic");
1448  } else if (Name.startswith("dbpsadbw.")) {
1449  if (VecWidth == 128)
1451  else if (VecWidth == 256)
1453  else if (VecWidth == 512)
1455  else
1456  llvm_unreachable("Unexpected intrinsic");
1457  } else if (Name.startswith("pmultishift.qb.")) {
1458  if (VecWidth == 128)
1460  else if (VecWidth == 256)
1462  else if (VecWidth == 512)
1464  else
1465  llvm_unreachable("Unexpected intrinsic");
1466  } else
1467  return false;
1468 
1470  CI.arg_operands().end());
1471  Args.pop_back();
1472  Args.pop_back();
1473  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1474  Args);
1475  unsigned NumArgs = CI.getNumArgOperands();
1476  Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1477  CI.getArgOperand(NumArgs - 2));
1478  return true;
1479 }
1480 
1481 /// Upgrade comment in call to inline asm that represents an objc retain release
1482 /// marker.
1483 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1484  size_t Pos;
1485  if (AsmStr->find("mov\tfp") == 0 &&
1486  AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1487  (Pos = AsmStr->find("# marker")) != std::string::npos) {
1488  AsmStr->replace(Pos, 1, ";");
1489  }
1490  return;
1491 }
1492 
1493 /// Upgrade a call to an old intrinsic. All argument and return casting must be
1494 /// provided to seamlessly integrate with existing context.
1496  Function *F = CI->getCalledFunction();
1497  LLVMContext &C = CI->getContext();
1498  IRBuilder<> Builder(C);
1499  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
1500 
1501  assert(F && "Intrinsic call is not direct?");
1502 
1503  if (!NewFn) {
1504  // Get the Function's name.
1505  StringRef Name = F->getName();
1506 
1507  assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
1508  Name = Name.substr(5);
1509 
1510  bool IsX86 = Name.startswith("x86.");
1511  if (IsX86)
1512  Name = Name.substr(4);
1513  bool IsNVVM = Name.startswith("nvvm.");
1514  if (IsNVVM)
1515  Name = Name.substr(5);
1516 
1517  if (IsX86 && Name.startswith("sse4a.movnt.")) {
1518  Module *M = F->getParent();
1520  Elts.push_back(
1522  MDNode *Node = MDNode::get(C, Elts);
1523 
1524  Value *Arg0 = CI->getArgOperand(0);
1525  Value *Arg1 = CI->getArgOperand(1);
1526 
1527  // Nontemporal (unaligned) store of the 0'th element of the float/double
1528  // vector.
1529  Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
1530  PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
1531  Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
1532  Value *Extract =
1533  Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
1534 
1535  StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, 1);
1536  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1537 
1538  // Remove intrinsic.
1539  CI->eraseFromParent();
1540  return;
1541  }
1542 
1543  if (IsX86 && (Name.startswith("avx.movnt.") ||
1544  Name.startswith("avx512.storent."))) {
1545  Module *M = F->getParent();
1547  Elts.push_back(
1549  MDNode *Node = MDNode::get(C, Elts);
1550 
1551  Value *Arg0 = CI->getArgOperand(0);
1552  Value *Arg1 = CI->getArgOperand(1);
1553 
1554  // Convert the type of the pointer to a pointer to the stored type.
1555  Value *BC = Builder.CreateBitCast(Arg0,
1557  "cast");
1558  VectorType *VTy = cast<VectorType>(Arg1->getType());
1559  StoreInst *SI = Builder.CreateAlignedStore(Arg1, BC,
1560  VTy->getBitWidth() / 8);
1561  SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1562 
1563  // Remove intrinsic.
1564  CI->eraseFromParent();
1565  return;
1566  }
1567 
1568  if (IsX86 && Name == "sse2.storel.dq") {
1569  Value *Arg0 = CI->getArgOperand(0);
1570  Value *Arg1 = CI->getArgOperand(1);
1571 
1572  Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
1573  Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
1574  Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
1575  Value *BC = Builder.CreateBitCast(Arg0,
1577  "cast");
1578  Builder.CreateAlignedStore(Elt, BC, 1);
1579 
1580  // Remove intrinsic.
1581  CI->eraseFromParent();
1582  return;
1583  }
1584 
1585  if (IsX86 && (Name.startswith("sse.storeu.") ||
1586  Name.startswith("sse2.storeu.") ||
1587  Name.startswith("avx.storeu."))) {
1588  Value *Arg0 = CI->getArgOperand(0);
1589  Value *Arg1 = CI->getArgOperand(1);
1590 
1591  Arg0 = Builder.CreateBitCast(Arg0,
1593  "cast");
1594  Builder.CreateAlignedStore(Arg1, Arg0, 1);
1595 
1596  // Remove intrinsic.
1597  CI->eraseFromParent();
1598  return;
1599  }
1600 
1601  if (IsX86 && Name == "avx512.mask.store.ss") {
1602  Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1603  UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1604  Mask, false);
1605 
1606  // Remove intrinsic.
1607  CI->eraseFromParent();
1608  return;
1609  }
1610 
1611  if (IsX86 && (Name.startswith("avx512.mask.store"))) {
1612  // "avx512.mask.storeu." or "avx512.mask.store."
1613  bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
1614  UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1615  CI->getArgOperand(2), Aligned);
1616 
1617  // Remove intrinsic.
1618  CI->eraseFromParent();
1619  return;
1620  }
1621 
1622  Value *Rep;
1623  // Upgrade packed integer vector compare intrinsics to compare instructions.
1624  if (IsX86 && (Name.startswith("sse2.pcmp") ||
1625  Name.startswith("avx2.pcmp"))) {
1626  // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
1627  bool CmpEq = Name[9] == 'e';
1628  Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
1629  CI->getArgOperand(0), CI->getArgOperand(1));
1630  Rep = Builder.CreateSExt(Rep, CI->getType(), "");
1631  } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
1632  Type *ExtTy = Type::getInt32Ty(C);
1633  if (CI->getOperand(0)->getType()->isIntegerTy(8))
1634  ExtTy = Type::getInt64Ty(C);
1635  unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1636  ExtTy->getPrimitiveSizeInBits();
1637  Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1638  Rep = Builder.CreateVectorSplat(NumElts, Rep);
1639  } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1640  Name == "sse2.sqrt.sd")) {
1641  Value *Vec = CI->getArgOperand(0);
1642  Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1644  Intrinsic::sqrt, Elt0->getType());
1645  Elt0 = Builder.CreateCall(Intr, Elt0);
1646  Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1647  } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1648  Name.startswith("sse2.sqrt.p") ||
1649  Name.startswith("sse.sqrt.p"))) {
1650  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1652  CI->getType()),
1653  {CI->getArgOperand(0)});
1654  } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1655  if (CI->getNumArgOperands() == 4 &&
1656  (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1657  cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1658  Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1660 
1661  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1662  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1663  IID), Args);
1664  } else {
1665  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1667  CI->getType()),
1668  {CI->getArgOperand(0)});
1669  }
1670  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1671  CI->getArgOperand(1));
1672  } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1673  Name.startswith("avx512.ptestnm"))) {
1674  Value *Op0 = CI->getArgOperand(0);
1675  Value *Op1 = CI->getArgOperand(1);
1676  Value *Mask = CI->getArgOperand(2);
1677  Rep = Builder.CreateAnd(Op0, Op1);
1678  llvm::Type *Ty = Op0->getType();
1679  Value *Zero = llvm::Constant::getNullValue(Ty);
1680  ICmpInst::Predicate Pred =
1681  Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1682  Rep = Builder.CreateICmp(Pred, Rep, Zero);
1683  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1684  } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1685  unsigned NumElts =
1687  Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1688  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1689  CI->getArgOperand(1));
1690  } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
1691  unsigned NumElts = CI->getType()->getScalarSizeInBits();
1692  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
1693  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
1694  uint32_t Indices[64];
1695  for (unsigned i = 0; i != NumElts; ++i)
1696  Indices[i] = i;
1697 
1698  // First extract half of each vector. This gives better codegen than
1699  // doing it in a single shuffle.
1700  LHS = Builder.CreateShuffleVector(LHS, LHS,
1701  makeArrayRef(Indices, NumElts / 2));
1702  RHS = Builder.CreateShuffleVector(RHS, RHS,
1703  makeArrayRef(Indices, NumElts / 2));
1704  // Concat the vectors.
1705  // NOTE: Operands have to be swapped to match intrinsic definition.
1706  Rep = Builder.CreateShuffleVector(RHS, LHS,
1707  makeArrayRef(Indices, NumElts));
1708  Rep = Builder.CreateBitCast(Rep, CI->getType());
1709  } else if (IsX86 && Name == "avx512.kand.w") {
1710  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1711  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1712  Rep = Builder.CreateAnd(LHS, RHS);
1713  Rep = Builder.CreateBitCast(Rep, CI->getType());
1714  } else if (IsX86 && Name == "avx512.kandn.w") {
1715  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1716  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1717  LHS = Builder.CreateNot(LHS);
1718  Rep = Builder.CreateAnd(LHS, RHS);
1719  Rep = Builder.CreateBitCast(Rep, CI->getType());
1720  } else if (IsX86 && Name == "avx512.kor.w") {
1721  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1722  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1723  Rep = Builder.CreateOr(LHS, RHS);
1724  Rep = Builder.CreateBitCast(Rep, CI->getType());
1725  } else if (IsX86 && Name == "avx512.kxor.w") {
1726  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1727  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1728  Rep = Builder.CreateXor(LHS, RHS);
1729  Rep = Builder.CreateBitCast(Rep, CI->getType());
1730  } else if (IsX86 && Name == "avx512.kxnor.w") {
1731  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1732  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1733  LHS = Builder.CreateNot(LHS);
1734  Rep = Builder.CreateXor(LHS, RHS);
1735  Rep = Builder.CreateBitCast(Rep, CI->getType());
1736  } else if (IsX86 && Name == "avx512.knot.w") {
1737  Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1738  Rep = Builder.CreateNot(Rep);
1739  Rep = Builder.CreateBitCast(Rep, CI->getType());
1740  } else if (IsX86 &&
1741  (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
1742  Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
1743  Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
1744  Rep = Builder.CreateOr(LHS, RHS);
1745  Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
1746  Value *C;
1747  if (Name[14] == 'c')
1749  else
1750  C = ConstantInt::getNullValue(Builder.getInt16Ty());
1751  Rep = Builder.CreateICmpEQ(Rep, C);
1752  Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1753  } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1754  Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1755  Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1756  Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1757  Type *I32Ty = Type::getInt32Ty(C);
1758  Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1759  ConstantInt::get(I32Ty, 0));
1760  Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1761  ConstantInt::get(I32Ty, 0));
1762  Value *EltOp;
1763  if (Name.contains(".add."))
1764  EltOp = Builder.CreateFAdd(Elt0, Elt1);
1765  else if (Name.contains(".sub."))
1766  EltOp = Builder.CreateFSub(Elt0, Elt1);
1767  else if (Name.contains(".mul."))
1768  EltOp = Builder.CreateFMul(Elt0, Elt1);
1769  else
1770  EltOp = Builder.CreateFDiv(Elt0, Elt1);
1771  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1772  ConstantInt::get(I32Ty, 0));
1773  } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1774  // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1775  bool CmpEq = Name[16] == 'e';
1776  Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1777  } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1778  Type *OpTy = CI->getArgOperand(0)->getType();
1779  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1780  Intrinsic::ID IID;
1781  switch (VecWidth) {
1782  default: llvm_unreachable("Unexpected intrinsic");
1783  case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1784  case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1785  case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1786  }
1787 
1788  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1789  { CI->getOperand(0), CI->getArgOperand(1) });
1790  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1791  } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1792  Type *OpTy = CI->getArgOperand(0)->getType();
1793  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1794  unsigned EltWidth = OpTy->getScalarSizeInBits();
1795  Intrinsic::ID IID;
1796  if (VecWidth == 128 && EltWidth == 32)
1798  else if (VecWidth == 256 && EltWidth == 32)
1800  else if (VecWidth == 512 && EltWidth == 32)
1802  else if (VecWidth == 128 && EltWidth == 64)
1804  else if (VecWidth == 256 && EltWidth == 64)
1806  else if (VecWidth == 512 && EltWidth == 64)
1808  else
1809  llvm_unreachable("Unexpected intrinsic");
1810 
1811  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1812  { CI->getOperand(0), CI->getArgOperand(1) });
1813  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1814  } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1815  Type *OpTy = CI->getArgOperand(0)->getType();
1816  unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1817  unsigned EltWidth = OpTy->getScalarSizeInBits();
1818  Intrinsic::ID IID;
1819  if (VecWidth == 128 && EltWidth == 32)
1821  else if (VecWidth == 256 && EltWidth == 32)
1823  else if (VecWidth == 512 && EltWidth == 32)
1825  else if (VecWidth == 128 && EltWidth == 64)
1827  else if (VecWidth == 256 && EltWidth == 64)
1829  else if (VecWidth == 512 && EltWidth == 64)
1831  else
1832  llvm_unreachable("Unexpected intrinsic");
1833 
1835  Args.push_back(CI->getArgOperand(0));
1836  Args.push_back(CI->getArgOperand(1));
1837  Args.push_back(CI->getArgOperand(2));
1838  if (CI->getNumArgOperands() == 5)
1839  Args.push_back(CI->getArgOperand(4));
1840 
1841  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1842  Args);
1843  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1844  } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1845  Name[16] != 'p') {
1846  // Integer compare intrinsics.
1847  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1848  Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1849  } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1850  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1851  Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1852  } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1853  Name.startswith("avx512.cvtw2mask.") ||
1854  Name.startswith("avx512.cvtd2mask.") ||
1855  Name.startswith("avx512.cvtq2mask."))) {
1856  Value *Op = CI->getArgOperand(0);
1858  Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1859  Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1860  } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1861  Name == "ssse3.pabs.w.128" ||
1862  Name == "ssse3.pabs.d.128" ||
1863  Name.startswith("avx2.pabs") ||
1864  Name.startswith("avx512.mask.pabs"))) {
1865  Rep = upgradeAbs(Builder, *CI);
1866  } else if (IsX86 && (Name == "sse41.pmaxsb" ||
1867  Name == "sse2.pmaxs.w" ||
1868  Name == "sse41.pmaxsd" ||
1869  Name.startswith("avx2.pmaxs") ||
1870  Name.startswith("avx512.mask.pmaxs"))) {
1871  Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
1872  } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
1873  Name == "sse41.pmaxuw" ||
1874  Name == "sse41.pmaxud" ||
1875  Name.startswith("avx2.pmaxu") ||
1876  Name.startswith("avx512.mask.pmaxu"))) {
1877  Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
1878  } else if (IsX86 && (Name == "sse41.pminsb" ||
1879  Name == "sse2.pmins.w" ||
1880  Name == "sse41.pminsd" ||
1881  Name.startswith("avx2.pmins") ||
1882  Name.startswith("avx512.mask.pmins"))) {
1883  Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
1884  } else if (IsX86 && (Name == "sse2.pminu.b" ||
1885  Name == "sse41.pminuw" ||
1886  Name == "sse41.pminud" ||
1887  Name.startswith("avx2.pminu") ||
1888  Name.startswith("avx512.mask.pminu"))) {
1889  Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
1890  } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
1891  Name == "avx2.pmulu.dq" ||
1892  Name == "avx512.pmulu.dq.512" ||
1893  Name.startswith("avx512.mask.pmulu.dq."))) {
1894  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
1895  } else if (IsX86 && (Name == "sse41.pmuldq" ||
1896  Name == "avx2.pmul.dq" ||
1897  Name == "avx512.pmul.dq.512" ||
1898  Name.startswith("avx512.mask.pmul.dq."))) {
1899  Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
1900  } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
1901  Name == "sse2.cvtsi2sd" ||
1902  Name == "sse.cvtsi642ss" ||
1903  Name == "sse2.cvtsi642sd")) {
1904  Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
1905  CI->getType()->getVectorElementType());
1906  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1907  } else if (IsX86 && Name == "avx512.cvtusi2sd") {
1908  Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
1909  CI->getType()->getVectorElementType());
1910  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1911  } else if (IsX86 && Name == "sse2.cvtss2sd") {
1912  Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
1913  Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
1914  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1915  } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
1916  Name == "sse2.cvtdq2ps" ||
1917  Name == "avx.cvtdq2.pd.256" ||
1918  Name == "avx.cvtdq2.ps.256" ||
1919  Name.startswith("avx512.mask.cvtdq2pd.") ||
1920  Name.startswith("avx512.mask.cvtudq2pd.") ||
1921  Name == "avx512.mask.cvtdq2ps.128" ||
1922  Name == "avx512.mask.cvtdq2ps.256" ||
1923  Name == "avx512.mask.cvtudq2ps.128" ||
1924  Name == "avx512.mask.cvtudq2ps.256" ||
1925  Name == "avx512.mask.cvtqq2pd.128" ||
1926  Name == "avx512.mask.cvtqq2pd.256" ||
1927  Name == "avx512.mask.cvtuqq2pd.128" ||
1928  Name == "avx512.mask.cvtuqq2pd.256" ||
1929  Name == "sse2.cvtps2pd" ||
1930  Name == "avx.cvt.ps2.pd.256" ||
1931  Name == "avx512.mask.cvtps2pd.128" ||
1932  Name == "avx512.mask.cvtps2pd.256")) {
1933  Type *DstTy = CI->getType();
1934  Rep = CI->getArgOperand(0);
1935 
1936  unsigned NumDstElts = DstTy->getVectorNumElements();
1937  if (NumDstElts < Rep->getType()->getVectorNumElements()) {
1938  assert(NumDstElts == 2 && "Unexpected vector size");
1939  uint32_t ShuffleMask[2] = { 0, 1 };
1940  Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1941  }
1942 
1943  bool IsPS2PD = (StringRef::npos != Name.find("ps2"));
1944  bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1945  if (IsPS2PD)
1946  Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1947  else if (IsUnsigned)
1948  Rep = Builder.CreateUIToFP(Rep, DstTy, "cvt");
1949  else
1950  Rep = Builder.CreateSIToFP(Rep, DstTy, "cvt");
1951 
1952  if (CI->getNumArgOperands() == 3)
1953  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1954  CI->getArgOperand(1));
1955  } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1956  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1957  CI->getArgOperand(1), CI->getArgOperand(2),
1958  /*Aligned*/false);
1959  } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1960  Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1961  CI->getArgOperand(1),CI->getArgOperand(2),
1962  /*Aligned*/true);
1963  } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
1964  Type *ResultTy = CI->getType();
1965  Type *PtrTy = ResultTy->getVectorElementType();
1966 
1967  // Cast the pointer to element type.
1968  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1970 
1971  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1972  ResultTy->getVectorNumElements());
1973 
1976  ResultTy);
1977  Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
1978  } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
1979  Type *ResultTy = CI->getArgOperand(1)->getType();
1980  Type *PtrTy = ResultTy->getVectorElementType();
1981 
1982  // Cast the pointer to element type.
1983  Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
1985 
1986  Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
1987  ResultTy->getVectorNumElements());
1988 
1991  ResultTy);
1992  Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
1993  } else if (IsX86 && Name.startswith("xop.vpcom")) {
1994  Intrinsic::ID intID;
1995  if (Name.endswith("ub"))
1997  else if (Name.endswith("uw"))
1999  else if (Name.endswith("ud"))
2001  else if (Name.endswith("uq"))
2003  else if (Name.endswith("b"))
2004  intID = Intrinsic::x86_xop_vpcomb;
2005  else if (Name.endswith("w"))
2006  intID = Intrinsic::x86_xop_vpcomw;
2007  else if (Name.endswith("d"))
2008  intID = Intrinsic::x86_xop_vpcomd;
2009  else if (Name.endswith("q"))
2010  intID = Intrinsic::x86_xop_vpcomq;
2011  else
2012  llvm_unreachable("Unknown suffix");
2013 
2014  Name = Name.substr(9); // strip off "xop.vpcom"
2015  unsigned Imm;
2016  if (Name.startswith("lt"))
2017  Imm = 0;
2018  else if (Name.startswith("le"))
2019  Imm = 1;
2020  else if (Name.startswith("gt"))
2021  Imm = 2;
2022  else if (Name.startswith("ge"))
2023  Imm = 3;
2024  else if (Name.startswith("eq"))
2025  Imm = 4;
2026  else if (Name.startswith("ne"))
2027  Imm = 5;
2028  else if (Name.startswith("false"))
2029  Imm = 6;
2030  else if (Name.startswith("true"))
2031  Imm = 7;
2032  else
2033  llvm_unreachable("Unknown condition");
2034 
2036  Rep =
2037  Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
2038  Builder.getInt8(Imm)});
2039  } else if (IsX86 && Name.startswith("xop.vpcmov")) {
2040  Value *Sel = CI->getArgOperand(2);
2041  Value *NotSel = Builder.CreateNot(Sel);
2042  Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2043  Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2044  Rep = Builder.CreateOr(Sel0, Sel1);
2045  } else if (IsX86 && (Name.startswith("xop.vprot") ||
2046  Name.startswith("avx512.prol") ||
2047  Name.startswith("avx512.mask.prol"))) {
2048  Rep = upgradeX86Rotate(Builder, *CI, false);
2049  } else if (IsX86 && (Name.startswith("avx512.pror") ||
2050  Name.startswith("avx512.mask.pror"))) {
2051  Rep = upgradeX86Rotate(Builder, *CI, true);
2052  } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2053  Name.startswith("avx512.mask.vpshld") ||
2054  Name.startswith("avx512.maskz.vpshld"))) {
2055  bool ZeroMask = Name[11] == 'z';
2056  Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2057  } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2058  Name.startswith("avx512.mask.vpshrd") ||
2059  Name.startswith("avx512.maskz.vpshrd"))) {
2060  bool ZeroMask = Name[11] == 'z';
2061  Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2062  } else if (IsX86 && Name == "sse42.crc32.64.8") {
2065  Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2066  Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2067  Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2068  } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2069  Name.startswith("avx512.vbroadcast.s"))) {
2070  // Replace broadcasts with a series of insertelements.
2071  Type *VecTy = CI->getType();
2072  Type *EltTy = VecTy->getVectorElementType();
2073  unsigned EltNum = VecTy->getVectorNumElements();
2074  Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
2075  EltTy->getPointerTo());
2076  Value *Load = Builder.CreateLoad(EltTy, Cast);
2077  Type *I32Ty = Type::getInt32Ty(C);
2078  Rep = UndefValue::get(VecTy);
2079  for (unsigned I = 0; I < EltNum; ++I)
2080  Rep = Builder.CreateInsertElement(Rep, Load,
2081  ConstantInt::get(I32Ty, I));
2082  } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
2083  Name.startswith("sse41.pmovzx") ||
2084  Name.startswith("avx2.pmovsx") ||
2085  Name.startswith("avx2.pmovzx") ||
2086  Name.startswith("avx512.mask.pmovsx") ||
2087  Name.startswith("avx512.mask.pmovzx"))) {
2088  VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
2089  VectorType *DstTy = cast<VectorType>(CI->getType());
2090  unsigned NumDstElts = DstTy->getNumElements();
2091 
2092  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2093  SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2094  for (unsigned i = 0; i != NumDstElts; ++i)
2095  ShuffleMask[i] = i;
2096 
2097  Value *SV = Builder.CreateShuffleVector(
2098  CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
2099 
2100  bool DoSext = (StringRef::npos != Name.find("pmovsx"));
2101  Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2102  : Builder.CreateZExt(SV, DstTy);
2103  // If there are 3 arguments, it's a masked intrinsic so we need a select.
2104  if (CI->getNumArgOperands() == 3)
2105  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2106  CI->getArgOperand(1));
2107  } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
2108  Name == "avx2.vbroadcasti128")) {
2109  // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2110  Type *EltTy = CI->getType()->getVectorElementType();
2111  unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2112  Type *VT = VectorType::get(EltTy, NumSrcElts);
2113  Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2115  Value *Load = Builder.CreateAlignedLoad(Op, 1);
2116  if (NumSrcElts == 2)
2117  Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2118  { 0, 1, 0, 1 });
2119  else
2120  Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
2121  { 0, 1, 2, 3, 0, 1, 2, 3 });
2122  } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
2123  Name.startswith("avx512.mask.shuf.f"))) {
2124  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2125  Type *VT = CI->getType();
2126  unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2127  unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2128  unsigned ControlBitsMask = NumLanes - 1;
2129  unsigned NumControlBits = NumLanes / 2;
2130  SmallVector<uint32_t, 8> ShuffleMask(0);
2131 
2132  for (unsigned l = 0; l != NumLanes; ++l) {
2133  unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2134  // We actually need the other source.
2135  if (l >= NumLanes / 2)
2136  LaneMask += NumLanes;
2137  for (unsigned i = 0; i != NumElementsInLane; ++i)
2138  ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2139  }
2140  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2141  CI->getArgOperand(1), ShuffleMask);
2142  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2143  CI->getArgOperand(3));
2144  }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
2145  Name.startswith("avx512.mask.broadcasti"))) {
2146  unsigned NumSrcElts =
2148  unsigned NumDstElts = CI->getType()->getVectorNumElements();
2149 
2150  SmallVector<uint32_t, 8> ShuffleMask(NumDstElts);
2151  for (unsigned i = 0; i != NumDstElts; ++i)
2152  ShuffleMask[i] = i % NumSrcElts;
2153 
2154  Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2155  CI->getArgOperand(0),
2156  ShuffleMask);
2157  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2158  CI->getArgOperand(1));
2159  } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
2160  Name.startswith("avx2.vbroadcast") ||
2161  Name.startswith("avx512.pbroadcast") ||
2162  Name.startswith("avx512.mask.broadcast.s"))) {
2163  // Replace vp?broadcasts with a vector shuffle.
2164  Value *Op = CI->getArgOperand(0);
2165  unsigned NumElts = CI->getType()->getVectorNumElements();
2166  Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
2167  Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
2168  Constant::getNullValue(MaskTy));
2169 
2170  if (CI->getNumArgOperands() == 3)
2171  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2172  CI->getArgOperand(1));
2173  } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2174  Name.startswith("sse2.psubs.") ||
2175  Name.startswith("avx2.padds.") ||
2176  Name.startswith("avx2.psubs.") ||
2177  Name.startswith("avx512.padds.") ||
2178  Name.startswith("avx512.psubs.") ||
2179  Name.startswith("avx512.mask.padds.") ||
2180  Name.startswith("avx512.mask.psubs."))) {
2181  bool IsAdd = Name.contains(".padds");
2182  Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2183  } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2184  Name.startswith("sse2.psubus.") ||
2185  Name.startswith("avx2.paddus.") ||
2186  Name.startswith("avx2.psubus.") ||
2187  Name.startswith("avx512.mask.paddus.") ||
2188  Name.startswith("avx512.mask.psubus."))) {
2189  bool IsAdd = Name.contains(".paddus");
2190  Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
2191  } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
2192  Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2193  CI->getArgOperand(1),
2194  CI->getArgOperand(2),
2195  CI->getArgOperand(3),
2196  CI->getArgOperand(4),
2197  false);
2198  } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
2199  Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2200  CI->getArgOperand(1),
2201  CI->getArgOperand(2),
2202  CI->getArgOperand(3),
2203  CI->getArgOperand(4),
2204  true);
2205  } else if (IsX86 && (Name == "sse2.psll.dq" ||
2206  Name == "avx2.psll.dq")) {
2207  // 128/256-bit shift left specified in bits.
2208  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2209  Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2210  Shift / 8); // Shift is in bits.
2211  } else if (IsX86 && (Name == "sse2.psrl.dq" ||
2212  Name == "avx2.psrl.dq")) {
2213  // 128/256-bit shift right specified in bits.
2214  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2215  Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2216  Shift / 8); // Shift is in bits.
2217  } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
2218  Name == "avx2.psll.dq.bs" ||
2219  Name == "avx512.psll.dq.512")) {
2220  // 128/256/512-bit shift left specified in bytes.
2221  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2222  Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2223  } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
2224  Name == "avx2.psrl.dq.bs" ||
2225  Name == "avx512.psrl.dq.512")) {
2226  // 128/256/512-bit shift right specified in bytes.
2227  unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2228  Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2229  } else if (IsX86 && (Name == "sse41.pblendw" ||
2230  Name.startswith("sse41.blendp") ||
2231  Name.startswith("avx.blend.p") ||
2232  Name == "avx2.pblendw" ||
2233  Name.startswith("avx2.pblendd."))) {
2234  Value *Op0 = CI->getArgOperand(0);
2235  Value *Op1 = CI->getArgOperand(1);
2236  unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2237  VectorType *VecTy = cast<VectorType>(CI->getType());
2238  unsigned NumElts = VecTy->getNumElements();
2239 
2240  SmallVector<uint32_t, 16> Idxs(NumElts);
2241  for (unsigned i = 0; i != NumElts; ++i)
2242  Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
2243 
2244  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2245  } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
2246  Name == "avx2.vinserti128" ||
2247  Name.startswith("avx512.mask.insert"))) {
2248  Value *Op0 = CI->getArgOperand(0);
2249  Value *Op1 = CI->getArgOperand(1);
2250  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2251  unsigned DstNumElts = CI->getType()->getVectorNumElements();
2252  unsigned SrcNumElts = Op1->getType()->getVectorNumElements();
2253  unsigned Scale = DstNumElts / SrcNumElts;
2254 
2255  // Mask off the high bits of the immediate value; hardware ignores those.
2256  Imm = Imm % Scale;
2257 
2258  // Extend the second operand into a vector the size of the destination.
2259  Value *UndefV = UndefValue::get(Op1->getType());
2260  SmallVector<uint32_t, 8> Idxs(DstNumElts);
2261  for (unsigned i = 0; i != SrcNumElts; ++i)
2262  Idxs[i] = i;
2263  for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2264  Idxs[i] = SrcNumElts;
2265  Rep = Builder.CreateShuffleVector(Op1, UndefV, Idxs);
2266 
2267  // Insert the second operand into the first operand.
2268 
2269  // Note that there is no guarantee that instruction lowering will actually
2270  // produce a vinsertf128 instruction for the created shuffles. In
2271  // particular, the 0 immediate case involves no lane changes, so it can
2272  // be handled as a blend.
2273 
2274  // Example of shuffle mask for 32-bit elements:
2275  // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2276  // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2277 
2278  // First fill with identify mask.
2279  for (unsigned i = 0; i != DstNumElts; ++i)
2280  Idxs[i] = i;
2281  // Then replace the elements where we need to insert.
2282  for (unsigned i = 0; i != SrcNumElts; ++i)
2283  Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2284  Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2285 
2286  // If the intrinsic has a mask operand, handle that.
2287  if (CI->getNumArgOperands() == 5)
2288  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2289  CI->getArgOperand(3));
2290  } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
2291  Name == "avx2.vextracti128" ||
2292  Name.startswith("avx512.mask.vextract"))) {
2293  Value *Op0 = CI->getArgOperand(0);
2294  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2295  unsigned DstNumElts = CI->getType()->getVectorNumElements();
2296  unsigned SrcNumElts = Op0->getType()->getVectorNumElements();
2297  unsigned Scale = SrcNumElts / DstNumElts;
2298 
2299  // Mask off the high bits of the immediate value; hardware ignores those.
2300  Imm = Imm % Scale;
2301 
2302  // Get indexes for the subvector of the input vector.
2303  SmallVector<uint32_t, 8> Idxs(DstNumElts);
2304  for (unsigned i = 0; i != DstNumElts; ++i) {
2305  Idxs[i] = i + (Imm * DstNumElts);
2306  }
2307  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2308 
2309  // If the intrinsic has a mask operand, handle that.
2310  if (CI->getNumArgOperands() == 4)
2311  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2312  CI->getArgOperand(2));
2313  } else if (!IsX86 && Name == "stackprotectorcheck") {
2314  Rep = nullptr;
2315  } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
2316  Name.startswith("avx512.mask.perm.di."))) {
2317  Value *Op0 = CI->getArgOperand(0);
2318  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2319  VectorType *VecTy = cast<VectorType>(CI->getType());
2320  unsigned NumElts = VecTy->getNumElements();
2321 
2322  SmallVector<uint32_t, 8> Idxs(NumElts);
2323  for (unsigned i = 0; i != NumElts; ++i)
2324  Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
2325 
2326  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2327 
2328  if (CI->getNumArgOperands() == 4)
2329  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2330  CI->getArgOperand(2));
2331  } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
2332  Name == "avx2.vperm2i128")) {
2333  // The immediate permute control byte looks like this:
2334  // [1:0] - select 128 bits from sources for low half of destination
2335  // [2] - ignore
2336  // [3] - zero low half of destination
2337  // [5:4] - select 128 bits from sources for high half of destination
2338  // [6] - ignore
2339  // [7] - zero high half of destination
2340 
2341  uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2342 
2343  unsigned NumElts = CI->getType()->getVectorNumElements();
2344  unsigned HalfSize = NumElts / 2;
2345  SmallVector<uint32_t, 8> ShuffleMask(NumElts);
2346 
2347  // Determine which operand(s) are actually in use for this instruction.
2348  Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2349  Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
2350 
2351  // If needed, replace operands based on zero mask.
2352  V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
2353  V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
2354 
2355  // Permute low half of result.
2356  unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
2357  for (unsigned i = 0; i < HalfSize; ++i)
2358  ShuffleMask[i] = StartIndex + i;
2359 
2360  // Permute high half of result.
2361  StartIndex = (Imm & 0x10) ? HalfSize : 0;
2362  for (unsigned i = 0; i < HalfSize; ++i)
2363  ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
2364 
2365  Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2366 
2367  } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
2368  Name == "sse2.pshuf.d" ||
2369  Name.startswith("avx512.mask.vpermil.p") ||
2370  Name.startswith("avx512.mask.pshuf.d."))) {
2371  Value *Op0 = CI->getArgOperand(0);
2372  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2373  VectorType *VecTy = cast<VectorType>(CI->getType());
2374  unsigned NumElts = VecTy->getNumElements();
2375  // Calculate the size of each index in the immediate.
2376  unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
2377  unsigned IdxMask = ((1 << IdxSize) - 1);
2378 
2379  SmallVector<uint32_t, 8> Idxs(NumElts);
2380  // Lookup the bits for this element, wrapping around the immediate every
2381  // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
2382  // to offset by the first index of each group.
2383  for (unsigned i = 0; i != NumElts; ++i)
2384  Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
2385 
2386  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2387 
2388  if (CI->getNumArgOperands() == 4)
2389  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2390  CI->getArgOperand(2));
2391  } else if (IsX86 && (Name == "sse2.pshufl.w" ||
2392  Name.startswith("avx512.mask.pshufl.w."))) {
2393  Value *Op0 = CI->getArgOperand(0);
2394  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2395  unsigned NumElts = CI->getType()->getVectorNumElements();
2396 
2397  SmallVector<uint32_t, 16> Idxs(NumElts);
2398  for (unsigned l = 0; l != NumElts; l += 8) {
2399  for (unsigned i = 0; i != 4; ++i)
2400  Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
2401  for (unsigned i = 4; i != 8; ++i)
2402  Idxs[i + l] = i + l;
2403  }
2404 
2405  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2406 
2407  if (CI->getNumArgOperands() == 4)
2408  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2409  CI->getArgOperand(2));
2410  } else if (IsX86 && (Name == "sse2.pshufh.w" ||
2411  Name.startswith("avx512.mask.pshufh.w."))) {
2412  Value *Op0 = CI->getArgOperand(0);
2413  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2414  unsigned NumElts = CI->getType()->getVectorNumElements();
2415 
2416  SmallVector<uint32_t, 16> Idxs(NumElts);
2417  for (unsigned l = 0; l != NumElts; l += 8) {
2418  for (unsigned i = 0; i != 4; ++i)
2419  Idxs[i + l] = i + l;
2420  for (unsigned i = 0; i != 4; ++i)
2421  Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
2422  }
2423 
2424  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2425 
2426  if (CI->getNumArgOperands() == 4)
2427  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2428  CI->getArgOperand(2));
2429  } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
2430  Value *Op0 = CI->getArgOperand(0);
2431  Value *Op1 = CI->getArgOperand(1);
2432  unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2433  unsigned NumElts = CI->getType()->getVectorNumElements();
2434 
2435  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2436  unsigned HalfLaneElts = NumLaneElts / 2;
2437 
2438  SmallVector<uint32_t, 16> Idxs(NumElts);
2439  for (unsigned i = 0; i != NumElts; ++i) {
2440  // Base index is the starting element of the lane.
2441  Idxs[i] = i - (i % NumLaneElts);
2442  // If we are half way through the lane switch to the other source.
2443  if ((i % NumLaneElts) >= HalfLaneElts)
2444  Idxs[i] += NumElts;
2445  // Now select the specific element. By adding HalfLaneElts bits from
2446  // the immediate. Wrapping around the immediate every 8-bits.
2447  Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
2448  }
2449 
2450  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2451 
2452  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
2453  CI->getArgOperand(3));
2454  } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
2455  Name.startswith("avx512.mask.movshdup") ||
2456  Name.startswith("avx512.mask.movsldup"))) {
2457  Value *Op0 = CI->getArgOperand(0);
2458  unsigned NumElts = CI->getType()->getVectorNumElements();
2459  unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2460 
2461  unsigned Offset = 0;
2462  if (Name.startswith("avx512.mask.movshdup."))
2463  Offset = 1;
2464 
2465  SmallVector<uint32_t, 16> Idxs(NumElts);
2466  for (unsigned l = 0; l != NumElts; l += NumLaneElts)
2467  for (unsigned i = 0; i != NumLaneElts; i += 2) {
2468  Idxs[i + l + 0] = i + l + Offset;
2469  Idxs[i + l + 1] = i + l + Offset;
2470  }
2471 
2472  Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
2473 
2474  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2475  CI->getArgOperand(1));
2476  } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
2477  Name.startswith("avx512.mask.unpckl."))) {
2478  Value *Op0 = CI->getArgOperand(0);
2479  Value *Op1 = CI->getArgOperand(1);
2480  int NumElts = CI->getType()->getVectorNumElements();
2481  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2482 
2483  SmallVector<uint32_t, 64> Idxs(NumElts);
2484  for (int l = 0; l != NumElts; l += NumLaneElts)
2485  for (int i = 0; i != NumLaneElts; ++i)
2486  Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
2487 
2488  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2489 
2490  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2491  CI->getArgOperand(2));
2492  } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
2493  Name.startswith("avx512.mask.unpckh."))) {
2494  Value *Op0 = CI->getArgOperand(0);
2495  Value *Op1 = CI->getArgOperand(1);
2496  int NumElts = CI->getType()->getVectorNumElements();
2497  int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
2498 
2499  SmallVector<uint32_t, 64> Idxs(NumElts);
2500  for (int l = 0; l != NumElts; l += NumLaneElts)
2501  for (int i = 0; i != NumLaneElts; ++i)
2502  Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
2503 
2504  Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2505 
2506  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2507  CI->getArgOperand(2));
2508  } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
2509  Name.startswith("avx512.mask.pand."))) {
2510  VectorType *FTy = cast<VectorType>(CI->getType());
2511  VectorType *ITy = VectorType::getInteger(FTy);
2512  Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2513  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2514  Rep = Builder.CreateBitCast(Rep, FTy);
2515  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2516  CI->getArgOperand(2));
2517  } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2518  Name.startswith("avx512.mask.pandn."))) {
2519  VectorType *FTy = cast<VectorType>(CI->getType());
2520  VectorType *ITy = VectorType::getInteger(FTy);
2521  Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
2522  Rep = Builder.CreateAnd(Rep,
2523  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2524  Rep = Builder.CreateBitCast(Rep, FTy);
2525  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2526  CI->getArgOperand(2));
2527  } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2528  Name.startswith("avx512.mask.por."))) {
2529  VectorType *FTy = cast<VectorType>(CI->getType());
2530  VectorType *ITy = VectorType::getInteger(FTy);
2531  Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2532  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2533  Rep = Builder.CreateBitCast(Rep, FTy);
2534  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2535  CI->getArgOperand(2));
2536  } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2537  Name.startswith("avx512.mask.pxor."))) {
2538  VectorType *FTy = cast<VectorType>(CI->getType());
2539  VectorType *ITy = VectorType::getInteger(FTy);
2540  Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
2541  Builder.CreateBitCast(CI->getArgOperand(1), ITy));
2542  Rep = Builder.CreateBitCast(Rep, FTy);
2543  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2544  CI->getArgOperand(2));
2545  } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
2546  Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2547  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2548  CI->getArgOperand(2));
2549  } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
2550  Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
2551  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2552  CI->getArgOperand(2));
2553  } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
2554  Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
2555  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2556  CI->getArgOperand(2));
2557  } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
2558  if (Name.endswith(".512")) {
2559  Intrinsic::ID IID;
2560  if (Name[17] == 's')
2562  else
2564 
2565  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2566  { CI->getArgOperand(0), CI->getArgOperand(1),
2567  CI->getArgOperand(4) });
2568  } else {
2569  Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2570  }
2571  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2572  CI->getArgOperand(2));
2573  } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
2574  if (Name.endswith(".512")) {
2575  Intrinsic::ID IID;
2576  if (Name[17] == 's')
2578  else
2580 
2581  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2582  { CI->getArgOperand(0), CI->getArgOperand(1),
2583  CI->getArgOperand(4) });
2584  } else {
2585  Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2586  }
2587  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2588  CI->getArgOperand(2));
2589  } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
2590  if (Name.endswith(".512")) {
2591  Intrinsic::ID IID;
2592  if (Name[17] == 's')
2594  else
2596 
2597  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2598  { CI->getArgOperand(0), CI->getArgOperand(1),
2599  CI->getArgOperand(4) });
2600  } else {
2601  Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2602  }
2603  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2604  CI->getArgOperand(2));
2605  } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
2606  if (Name.endswith(".512")) {
2607  Intrinsic::ID IID;
2608  if (Name[17] == 's')
2610  else
2612 
2613  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2614  { CI->getArgOperand(0), CI->getArgOperand(1),
2615  CI->getArgOperand(4) });
2616  } else {
2617  Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2618  }
2619  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2620  CI->getArgOperand(2));
2621  } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2622  Name.startswith("avx512.mask.min.p")) &&
2623  Name.drop_front(18) == ".512") {
2624  bool IsDouble = Name[17] == 'd';
2625  bool IsMin = Name[13] == 'i';
2626  static const Intrinsic::ID MinMaxTbl[2][2] = {
2629  };
2630  Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2631 
2632  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2633  { CI->getArgOperand(0), CI->getArgOperand(1),
2634  CI->getArgOperand(4) });
2635  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2636  CI->getArgOperand(2));
2637  } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
2638  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2640  CI->getType()),
2641  { CI->getArgOperand(0), Builder.getInt1(false) });
2642  Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2643  CI->getArgOperand(1));
2644  } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2645  bool IsImmediate = Name[16] == 'i' ||
2646  (Name.size() > 18 && Name[18] == 'i');
2647  bool IsVariable = Name[16] == 'v';
2648  char Size = Name[16] == '.' ? Name[17] :
2649  Name[17] == '.' ? Name[18] :
2650  Name[18] == '.' ? Name[19] :
2651  Name[20];
2652 
2653  Intrinsic::ID IID;
2654  if (IsVariable && Name[17] != '.') {
2655  if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
2657  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
2659  else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
2661  else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
2663  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
2665  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
2667  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
2669  else
2670  llvm_unreachable("Unexpected size");
2671  } else if (Name.endswith(".128")) {
2672  if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
2673  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
2675  else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
2676  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
2678  else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
2679  IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
2681  else
2682  llvm_unreachable("Unexpected size");
2683  } else if (Name.endswith(".256")) {
2684  if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
2685  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
2687  else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
2688  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
2690  else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
2691  IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
2693  else
2694  llvm_unreachable("Unexpected size");
2695  } else {
2696  if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
2697  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
2698  IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
2700  else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
2701  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
2702  IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
2704  else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
2705  IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
2707  else
2708  llvm_unreachable("Unexpected size");
2709  }
2710 
2711  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2712  } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
2713  bool IsImmediate = Name[16] == 'i' ||
2714  (Name.size() > 18 && Name[18] == 'i');
2715  bool IsVariable = Name[16] == 'v';
2716  char Size = Name[16] == '.' ? Name[17] :
2717  Name[17] == '.' ? Name[18] :
2718  Name[18] == '.' ? Name[19] :
2719  Name[20];
2720 
2721  Intrinsic::ID IID;
2722  if (IsVariable && Name[17] != '.') {
2723  if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
2725  else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
2727  else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
2729  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
2731  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
2733  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
2735  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
2737  else
2738  llvm_unreachable("Unexpected size");
2739  } else if (Name.endswith(".128")) {
2740  if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
2741  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
2743  else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
2744  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
2746  else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
2747  IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
2749  else
2750  llvm_unreachable("Unexpected size");
2751  } else if (Name.endswith(".256")) {
2752  if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
2753  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
2755  else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
2756  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
2758  else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
2759  IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
2761  else
2762  llvm_unreachable("Unexpected size");
2763  } else {
2764  if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
2765  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
2766  IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
2768  else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
2769  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
2770  IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
2772  else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
2773  IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
2775  else
2776  llvm_unreachable("Unexpected size");
2777  }
2778 
2779  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2780  } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
2781  bool IsImmediate = Name[16] == 'i' ||
2782  (Name.size() > 18 && Name[18] == 'i');
2783  bool IsVariable = Name[16] == 'v';
2784  char Size = Name[16] == '.' ? Name[17] :
2785  Name[17] == '.' ? Name[18] :
2786  Name[18] == '.' ? Name[19] :
2787  Name[20];
2788 
2789  Intrinsic::ID IID;
2790  if (IsVariable && Name[17] != '.') {
2791  if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
2793  else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
2795  else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
2797  else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
2799  else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
2801  else
2802  llvm_unreachable("Unexpected size");
2803  } else if (Name.endswith(".128")) {
2804  if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
2805  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
2807  else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
2808  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
2809  IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
2811  else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
2812  IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
2814  else
2815  llvm_unreachable("Unexpected size");
2816  } else if (Name.endswith(".256")) {
2817  if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
2818  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
2820  else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
2821  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
2822  IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
2824  else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
2825  IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
2827  else
2828  llvm_unreachable("Unexpected size");
2829  } else {
2830  if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
2831  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
2832  IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
2834  else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
2835  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
2836  IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
2838  else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
2839  IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
2841  else
2842  llvm_unreachable("Unexpected size");
2843  }
2844 
2845  Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2846  } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2847  Rep = upgradeMaskedMove(Builder, *CI);
2848  } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2849  Rep = UpgradeMaskToInt(Builder, *CI);
2850  } else if (IsX86 && Name.endswith(".movntdqa")) {
2851  Module *M = F->getParent();
2852  MDNode *Node = MDNode::get(
2854 
2855  Value *Ptr = CI->getArgOperand(0);
2856  VectorType *VTy = cast<VectorType>(CI->getType());
2857 
2858  // Convert the type of the pointer to a pointer to the stored type.
2859  Value *BC =
2860  Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2861  LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
2862  LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2863  Rep = LI;
2864  } else if (IsX86 &&
2865  (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") ||
2866  Name.startswith("avx512.mask.pavg"))) {
2867  // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w,
2868  // llvm.x86.avx512.mask.pavg.b/w
2869  Value *A = CI->getArgOperand(0);
2870  Value *B = CI->getArgOperand(1);
2872  cast<VectorType>(A->getType()));
2873  Value *ExtendedA = Builder.CreateZExt(A, ZextType);
2874  Value *ExtendedB = Builder.CreateZExt(B, ZextType);
2875  Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB);
2876  Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1));
2877  Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1));
2878  Rep = Builder.CreateTrunc(ShiftR, A->getType());
2879  if (CI->getNumArgOperands() > 2) {
2880  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2881  CI->getArgOperand(2));
2882  }
2883  } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2884  Name.startswith("fma.vfmsub.") ||
2885  Name.startswith("fma.vfnmadd.") ||
2886  Name.startswith("fma.vfnmsub."))) {
2887  bool NegMul = Name[6] == 'n';
2888  bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
2889  bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
2890 
2891  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2892  CI->getArgOperand(2) };
2893 
2894  if (IsScalar) {
2895  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2896  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2897  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2898  }
2899 
2900  if (NegMul && !IsScalar)
2901  Ops[0] = Builder.CreateFNeg(Ops[0]);
2902  if (NegMul && IsScalar)
2903  Ops[1] = Builder.CreateFNeg(Ops[1]);
2904  if (NegAcc)
2905  Ops[2] = Builder.CreateFNeg(Ops[2]);
2906 
2907  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2909  Ops[0]->getType()),
2910  Ops);
2911 
2912  if (IsScalar)
2913  Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
2914  (uint64_t)0);
2915  } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
2916  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
2917  CI->getArgOperand(2) };
2918 
2919  Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
2920  Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
2921  Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
2922 
2923  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
2925  Ops[0]->getType()),
2926  Ops);
2927 
2928  Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
2929  Rep, (uint64_t)0);
2930  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
2931  Name.startswith("avx512.maskz.vfmadd.s") ||
2932  Name.startswith("avx512.mask3.vfmadd.s") ||
2933  Name.startswith("avx512.mask3.vfmsub.s") ||
2934  Name.startswith("avx512.mask3.vfnmsub.s"))) {
2935  bool IsMask3 = Name[11] == '3';
2936  bool IsMaskZ = Name[11] == 'z';
2937  // Drop the "avx512.mask." to make it easier.
2938  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2939  bool NegMul = Name[2] == 'n';
2940  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
2941 
2942  Value *A = CI->getArgOperand(0);
2943  Value *B = CI->getArgOperand(1);
2944  Value *C = CI->getArgOperand(2);
2945 
2946  if (NegMul && (IsMask3 || IsMaskZ))
2947  A = Builder.CreateFNeg(A);
2948  if (NegMul && !(IsMask3 || IsMaskZ))
2949  B = Builder.CreateFNeg(B);
2950  if (NegAcc)
2951  C = Builder.CreateFNeg(C);
2952 
2953  A = Builder.CreateExtractElement(A, (uint64_t)0);
2954  B = Builder.CreateExtractElement(B, (uint64_t)0);
2955  C = Builder.CreateExtractElement(C, (uint64_t)0);
2956 
2957  if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
2958  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
2959  Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
2960 
2961  Intrinsic::ID IID;
2962  if (Name.back() == 'd')
2964  else
2967  Rep = Builder.CreateCall(FMA, Ops);
2968  } else {
2971  A->getType());
2972  Rep = Builder.CreateCall(FMA, { A, B, C });
2973  }
2974 
2975  Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
2976  IsMask3 ? C : A;
2977 
2978  // For Mask3 with NegAcc, we need to create a new extractelement that
2979  // avoids the negation above.
2980  if (NegAcc && IsMask3)
2981  PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
2982  (uint64_t)0);
2983 
2984  Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
2985  Rep, PassThru);
2986  Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
2987  Rep, (uint64_t)0);
2988  } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
2989  Name.startswith("avx512.mask.vfnmadd.p") ||
2990  Name.startswith("avx512.mask.vfnmsub.p") ||
2991  Name.startswith("avx512.mask3.vfmadd.p") ||
2992  Name.startswith("avx512.mask3.vfmsub.p") ||
2993  Name.startswith("avx512.mask3.vfnmsub.p") ||
2994  Name.startswith("avx512.maskz.vfmadd.p"))) {
2995  bool IsMask3 = Name[11] == '3';
2996  bool IsMaskZ = Name[11] == 'z';
2997  // Drop the "avx512.mask." to make it easier.
2998  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
2999  bool NegMul = Name[2] == 'n';
3000  bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3001 
3002  Value *A = CI->getArgOperand(0);
3003  Value *B = CI->getArgOperand(1);
3004  Value *C = CI->getArgOperand(2);
3005 
3006  if (NegMul && (IsMask3 || IsMaskZ))
3007  A = Builder.CreateFNeg(A);
3008  if (NegMul && !(IsMask3 || IsMaskZ))
3009  B = Builder.CreateFNeg(B);
3010  if (NegAcc)
3011  C = Builder.CreateFNeg(C);
3012 
3013  if (CI->getNumArgOperands() == 5 &&
3014  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3015  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3016  Intrinsic::ID IID;
3017  // Check the character before ".512" in string.
3018  if (Name[Name.size()-5] == 's')
3020  else
3022 
3023  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3024  { A, B, C, CI->getArgOperand(4) });
3025  } else {
3028  A->getType());
3029  Rep = Builder.CreateCall(FMA, { A, B, C });
3030  }
3031 
3032  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3033  IsMask3 ? CI->getArgOperand(2) :
3034  CI->getArgOperand(0);
3035 
3036  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3037  } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3038  Name.startswith("fma.vfmsubadd.p"))) {
3039  bool IsSubAdd = Name[7] == 's';
3040  int NumElts = CI->getType()->getVectorNumElements();
3041 
3042  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3043  CI->getArgOperand(2) };
3044 
3046  Ops[0]->getType());
3047  Value *Odd = Builder.CreateCall(FMA, Ops);
3048  Ops[2] = Builder.CreateFNeg(Ops[2]);
3049  Value *Even = Builder.CreateCall(FMA, Ops);
3050 
3051  if (IsSubAdd)
3052  std::swap(Even, Odd);
3053 
3054  SmallVector<uint32_t, 32> Idxs(NumElts);
3055  for (int i = 0; i != NumElts; ++i)
3056  Idxs[i] = i + (i % 2) * NumElts;
3057 
3058  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3059  } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3060  Name.startswith("avx512.mask3.vfmaddsub.p") ||
3061  Name.startswith("avx512.maskz.vfmaddsub.p") ||
3062  Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3063  bool IsMask3 = Name[11] == '3';
3064  bool IsMaskZ = Name[11] == 'z';
3065  // Drop the "avx512.mask." to make it easier.
3066  Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3067  bool IsSubAdd = Name[3] == 's';
3068  if (CI->getNumArgOperands() == 5 &&
3069  (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3070  cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3071  Intrinsic::ID IID;
3072  // Check the character before ".512" in string.
3073  if (Name[Name.size()-5] == 's')
3075  else
3077 
3078  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3079  CI->getArgOperand(2), CI->getArgOperand(4) };
3080  if (IsSubAdd)
3081  Ops[2] = Builder.CreateFNeg(Ops[2]);
3082 
3083  Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3084  {CI->getArgOperand(0), CI->getArgOperand(1),
3085  CI->getArgOperand(2), CI->getArgOperand(4)});
3086  } else {
3087  int NumElts = CI->getType()->getVectorNumElements();
3088 
3089  Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3090  CI->getArgOperand(2) };
3091 
3093  Ops[0]->getType());
3094  Value *Odd = Builder.CreateCall(FMA, Ops);
3095  Ops[2] = Builder.CreateFNeg(Ops[2]);
3096  Value *Even = Builder.CreateCall(FMA, Ops);
3097 
3098  if (IsSubAdd)
3099  std::swap(Even, Odd);
3100 
3101  SmallVector<uint32_t, 32> Idxs(NumElts);
3102  for (int i = 0; i != NumElts; ++i)
3103  Idxs[i] = i + (i % 2) * NumElts;
3104 
3105  Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3106  }
3107 
3108  Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3109  IsMask3 ? CI->getArgOperand(2) :
3110  CI->getArgOperand(0);
3111 
3112  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3113  } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3114  Name.startswith("avx512.maskz.pternlog."))) {
3115  bool ZeroMask = Name[11] == 'z';
3116  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3117  unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3118  Intrinsic::ID IID;
3119  if (VecWidth == 128 && EltWidth == 32)
3121  else if (VecWidth == 256 && EltWidth == 32)
3123  else if (VecWidth == 512 && EltWidth == 32)
3125  else if (VecWidth == 128 && EltWidth == 64)
3127  else if (VecWidth == 256 && EltWidth == 64)
3129  else if (VecWidth == 512 && EltWidth == 64)
3131  else
3132  llvm_unreachable("Unexpected intrinsic");
3133 
3134  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3135  CI->getArgOperand(2), CI->getArgOperand(3) };
3136  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3137  Args);
3138  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3139  : CI->getArgOperand(0);
3140  Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3141  } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3142  Name.startswith("avx512.maskz.vpmadd52"))) {
3143  bool ZeroMask = Name[11] == 'z';
3144  bool High = Name[20] == 'h' || Name[21] == 'h';
3145  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3146  Intrinsic::ID IID;
3147  if (VecWidth == 128 && !High)
3149  else if (VecWidth == 256 && !High)
3151  else if (VecWidth == 512 && !High)
3153  else if (VecWidth == 128 && High)
3155  else if (VecWidth == 256 && High)
3157  else if (VecWidth == 512 && High)
3159  else
3160  llvm_unreachable("Unexpected intrinsic");
3161 
3162  Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3163  CI->getArgOperand(2) };
3164  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3165  Args);
3166  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3167  : CI->getArgOperand(0);
3168  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3169  } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3170  Name.startswith("avx512.mask.vpermt2var.") ||
3171  Name.startswith("avx512.maskz.vpermt2var."))) {
3172  bool ZeroMask = Name[11] == 'z';
3173  bool IndexForm = Name[17] == 'i';
3174  Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3175  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3176  Name.startswith("avx512.maskz.vpdpbusd.") ||
3177  Name.startswith("avx512.mask.vpdpbusds.") ||
3178  Name.startswith("avx512.maskz.vpdpbusds."))) {
3179  bool ZeroMask = Name[11] == 'z';
3180  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3181  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3182  Intrinsic::ID IID;
3183  if (VecWidth == 128 && !IsSaturating)
3185  else if (VecWidth == 256 && !IsSaturating)
3187  else if (VecWidth == 512 && !IsSaturating)
3189  else if (VecWidth == 128 && IsSaturating)
3191  else if (VecWidth == 256 && IsSaturating)
3193  else if (VecWidth == 512 && IsSaturating)
3195  else
3196  llvm_unreachable("Unexpected intrinsic");
3197 
3198  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3199  CI->getArgOperand(2) };
3200  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3201  Args);
3202  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3203  : CI->getArgOperand(0);
3204  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3205  } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3206  Name.startswith("avx512.maskz.vpdpwssd.") ||
3207  Name.startswith("avx512.mask.vpdpwssds.") ||
3208  Name.startswith("avx512.maskz.vpdpwssds."))) {
3209  bool ZeroMask = Name[11] == 'z';
3210  bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3211  unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3212  Intrinsic::ID IID;
3213  if (VecWidth == 128 && !IsSaturating)
3215  else if (VecWidth == 256 && !IsSaturating)
3217  else if (VecWidth == 512 && !IsSaturating)
3219  else if (VecWidth == 128 && IsSaturating)
3221  else if (VecWidth == 256 && IsSaturating)
3223  else if (VecWidth == 512 && IsSaturating)
3225  else
3226  llvm_unreachable("Unexpected intrinsic");
3227 
3228  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3229  CI->getArgOperand(2) };
3230  Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3231  Args);
3232  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3233  : CI->getArgOperand(0);
3234  Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3235  } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3236  Name == "addcarry.u32" || Name == "addcarry.u64" ||
3237  Name == "subborrow.u32" || Name == "subborrow.u64")) {
3238  Intrinsic::ID IID;
3239  if (Name[0] == 'a' && Name.back() == '2')
3241  else if (Name[0] == 'a' && Name.back() == '4')
3243  else if (Name[0] == 's' && Name.back() == '2')
3245  else if (Name[0] == 's' && Name.back() == '4')
3247  else
3248  llvm_unreachable("Unexpected intrinsic");
3249 
3250  // Make a call with 3 operands.
3251  Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3252  CI->getArgOperand(2)};
3253  Value *NewCall = Builder.CreateCall(
3255  Args);
3256 
3257  // Extract the second result and store it.
3258  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3259  // Cast the pointer to the right type.
3260  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3262  Builder.CreateAlignedStore(Data, Ptr, 1);
3263  // Replace the original call result with the first result of the new call.
3264  Value *CF = Builder.CreateExtractValue(NewCall, 0);
3265 
3266  CI->replaceAllUsesWith(CF);
3267  Rep = nullptr;
3268  } else if (IsX86 && Name.startswith("avx512.mask.") &&
3269  upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3270  // Rep will be updated by the call in the condition.
3271  } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
3272  Value *Arg = CI->getArgOperand(0);
3273  Value *Neg = Builder.CreateNeg(Arg, "neg");
3274  Value *Cmp = Builder.CreateICmpSGE(
3275  Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
3276  Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3277  } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
3278  Name == "max.ui" || Name == "max.ull")) {
3279  Value *Arg0 = CI->getArgOperand(0);
3280  Value *Arg1 = CI->getArgOperand(1);
3281  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3282  ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
3283  : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
3284  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
3285  } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
3286  Name == "min.ui" || Name == "min.ull")) {
3287  Value *Arg0 = CI->getArgOperand(0);
3288  Value *Arg1 = CI->getArgOperand(1);
3289  Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
3290  ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
3291  : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
3292  Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
3293  } else if (IsNVVM && Name == "clz.ll") {
3294  // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
3295  Value *Arg = CI->getArgOperand(0);
3296  Value *Ctlz = Builder.CreateCall(
3298  {Arg->getType()}),
3299  {Arg, Builder.getFalse()}, "ctlz");
3300  Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
3301  } else if (IsNVVM && Name == "popc.ll") {
3302  // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
3303  // i64.
3304  Value *Arg = CI->getArgOperand(0);
3305  Value *Popc = Builder.CreateCall(
3307  {Arg->getType()}),
3308  Arg, "ctpop");
3309  Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
3310  } else if (IsNVVM && Name == "h2f") {
3311  Rep = Builder.CreateCall(Intrinsic::getDeclaration(
3313  {Builder.getFloatTy()}),
3314  CI->getArgOperand(0), "h2f");
3315  } else {
3316  llvm_unreachable("Unknown function for CallInst upgrade.");
3317  }
3318 
3319  if (Rep)
3320  CI->replaceAllUsesWith(Rep);
3321  CI->eraseFromParent();
3322  return;
3323  }
3324 
3325  const auto &DefaultCase = [&NewFn, &CI]() -> void {
3326  // Handle generic mangling change, but nothing else
3327  assert(
3328  (CI->getCalledFunction()->getName() != NewFn->getName()) &&
3329  "Unknown function for CallInst upgrade and isn't just a name change");
3330  CI->setCalledFunction(NewFn);
3331  };
3332  CallInst *NewCall = nullptr;
3333  switch (NewFn->getIntrinsicID()) {
3334  default: {
3335  DefaultCase();
3336  return;
3337  }
3338 
3354  CI->arg_operands().end());
3355  NewCall = Builder.CreateCall(NewFn, Args);
3356  break;
3357  }
3358 
3359  case Intrinsic::bitreverse:
3360  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3361  break;
3362 
3363  case Intrinsic::ctlz:
3364  case Intrinsic::cttz:
3365  assert(CI->getNumArgOperands() == 1 &&
3366  "Mismatch between function args and call args");
3367  NewCall =
3368  Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
3369  break;
3370 
3371  case Intrinsic::objectsize: {
3372  Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
3373  ? Builder.getFalse()
3374  : CI->getArgOperand(2);
3375  NewCall = Builder.CreateCall(
3376  NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
3377  break;
3378  }
3379 
3380  case Intrinsic::ctpop:
3381  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3382  break;
3383 
3385  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
3386  break;
3387 
3388  case Intrinsic::dbg_value:
3389  // Upgrade from the old version that had an extra offset argument.
3390  assert(CI->getNumArgOperands() == 4);
3391  // Drop nonzero offsets instead of attempting to upgrade them.
3392  if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
3393  if (Offset->isZeroValue()) {
3394  NewCall = Builder.CreateCall(
3395  NewFn,
3396  {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
3397  break;
3398  }
3399  CI->eraseFromParent();
3400  return;
3401 
3404  NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
3405  break;
3406 
3412  CI->arg_operands().end());
3413  VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
3414  VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
3415  Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
3416  NewCall = Builder.CreateCall(NewFn, Args);
3417  break;
3418  }
3419 
3423  // The arguments for these intrinsics used to be v4f32, and changed
3424  // to v2i64. This is purely a nop, since those are bitwise intrinsics.
3425  // So, the only thing required is a bitcast for both arguments.
3426  // First, check the arguments have the old type.
3427  Value *Arg0 = CI->getArgOperand(0);
3428  if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
3429  return;
3430 
3431  // Old intrinsic, add bitcasts
3432  Value *Arg1 = CI->getArgOperand(1);
3433 
3434  Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
3435 
3436  Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
3437  Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
3438 
3439  NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
3440  break;
3441  }
3442 
3443  case Intrinsic::x86_rdtscp: {
3444  // This used to take 1 arguments. If we have no arguments, it is already
3445  // upgraded.
3446  if (CI->getNumOperands() == 0)
3447  return;
3448 
3449  NewCall = Builder.CreateCall(NewFn);
3450  // Extract the second result and store it.
3451  Value *Data = Builder.CreateExtractValue(NewCall, 1);
3452  // Cast the pointer to the right type.
3453  Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3455  Builder.CreateAlignedStore(Data, Ptr, 1);
3456  // Replace the original call result with the first result of the new call.
3457  Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3458 
3459  std::string Name = CI->getName();
3460  if (!Name.empty()) {
3461  CI->setName(Name + ".old");
3462  NewCall->setName(Name);
3463  }
3464  CI->replaceAllUsesWith(TSC);
3465  CI->eraseFromParent();
3466  return;
3467  }
3468 
3475  // Need to truncate the last argument from i32 to i8 -- this argument models
3476  // an inherently 8-bit immediate operand to these x86 instructions.
3478  CI->arg_operands().end());
3479 
3480  // Replace the last argument with a trunc.
3481  Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
3482  NewCall = Builder.CreateCall(NewFn, Args);
3483  break;
3484  }
3485 
3487  NewCall = Builder.CreateCall(NewFn, {});
3488  break;
3489  }
3490 
3498  CI->arg_operands().end());
3499  NewCall = Builder.CreateCall(NewFn, Args);
3500  break;
3501  }
3502 
3503  case Intrinsic::memcpy:
3504  case Intrinsic::memmove:
3505  case Intrinsic::memset: {
3506  // We have to make sure that the call signature is what we're expecting.
3507  // We only want to change the old signatures by removing the alignment arg:
3508  // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
3509  // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
3510  // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
3511  // -> @llvm.memset...(i8*, i8, i[32|64], i1)
3512  // Note: i8*'s in the above can be any pointer type
3513  if (CI->getNumArgOperands() != 5) {
3514  DefaultCase();
3515  return;
3516  }
3517  // Remove alignment argument (3), and add alignment attributes to the
3518  // dest/src pointers.
3519  Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
3520  CI->getArgOperand(2), CI->getArgOperand(4)};
3521  NewCall = Builder.CreateCall(NewFn, Args);
3522  auto *MemCI = cast<MemIntrinsic>(NewCall);
3523  // All mem intrinsics support dest alignment.
3524  const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
3525  MemCI->setDestAlignment(Align->getZExtValue());
3526  // Memcpy/Memmove also support source alignment.
3527  if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
3528  MTI->setSourceAlignment(Align->getZExtValue());
3529  break;
3530  }
3531  }
3532  assert(NewCall && "Should have either set this variable or returned through "
3533  "the default case");
3534  std::string Name = CI->getName();
3535  if (!Name.empty()) {
3536  CI->setName(Name + ".old");
3537  NewCall->setName(Name);
3538  }
3539  CI->replaceAllUsesWith(NewCall);
3540  CI->eraseFromParent();
3541 }
3542 
3544  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
3545 
3546  // Check if this function should be upgraded and get the replacement function
3547  // if there is one.
3548  Function *NewFn;
3549  if (UpgradeIntrinsicFunction(F, NewFn)) {
3550  // Replace all users of the old function with the new function or new
3551  // instructions. This is not a range loop because the call is deleted.
3552  for (auto UI = F->user_begin(), UE = F->user_end(); UI != UE; )
3553  if (CallInst *CI = dyn_cast<CallInst>(*UI++))
3554  UpgradeIntrinsicCall(CI, NewFn);
3555 
3556  // Remove old function, no longer used, from the module.
3557  F->eraseFromParent();
3558  }
3559 }
3560 
3562  // Check if the tag uses struct-path aware TBAA format.
3563  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
3564  return &MD;
3565 
3566  auto &Context = MD.getContext();
3567  if (MD.getNumOperands() == 3) {
3568  Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
3569  MDNode *ScalarType = MDNode::get(Context, Elts);
3570  // Create a MDNode <ScalarType, ScalarType, offset 0, const>
3571  Metadata *Elts2[] = {ScalarType, ScalarType,
3574  MD.getOperand(2)};
3575  return MDNode::get(Context, Elts2);
3576  }
3577  // Create a MDNode <MD, MD, offset 0>
3580  return MDNode::get(Context, Elts);
3581 }
3582 
3583 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
3584  Instruction *&Temp) {
3585  if (Opc != Instruction::BitCast)
3586  return nullptr;
3587 
3588  Temp = nullptr;
3589  Type *SrcTy = V->getType();
3590  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3591  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3592  LLVMContext &Context = V->getContext();
3593 
3594  // We have no information about target data layout, so we assume that
3595  // the maximum pointer size is 64bit.
3596  Type *MidTy = Type::getInt64Ty(Context);
3597  Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
3598 
3599  return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
3600  }
3601 
3602  return nullptr;
3603 }
3604 
3605 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
3606  if (Opc != Instruction::BitCast)
3607  return nullptr;
3608 
3609  Type *SrcTy = C->getType();
3610  if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
3611  SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
3612  LLVMContext &Context = C->getContext();
3613 
3614  // We have no information about target data layout, so we assume that
3615  // the maximum pointer size is 64bit.
3616  Type *MidTy = Type::getInt64Ty(Context);
3617 
3619  DestTy);
3620  }
3621 
3622  return nullptr;
3623 }
3624 
3625 /// Check the debug info version number, if it is out-dated, drop the debug
3626 /// info. Return true if module is modified.
3629  if (Version == DEBUG_METADATA_VERSION) {
3630  bool BrokenDebugInfo = false;
3631  if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
3632  report_fatal_error("Broken module found, compilation aborted!");
3633  if (!BrokenDebugInfo)
3634  // Everything is ok.
3635  return false;
3636  else {
3637  // Diagnose malformed debug info.
3639  M.getContext().diagnose(Diag);
3640  }
3641  }
3642  bool Modified = StripDebugInfo(M);
3643  if (Modified && Version != DEBUG_METADATA_VERSION) {
3644  // Diagnose a version mismatch.
3645  DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
3646  M.getContext().diagnose(DiagVersion);
3647  }
3648  return Modified;
3649 }
3650 
3652  bool Changed = false;
3653  NamedMDNode *ModRetainReleaseMarker =
3654  M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker");
3655  if (ModRetainReleaseMarker) {
3656  MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3657  if (Op) {
3658  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3659  if (ID) {
3660  SmallVector<StringRef, 4> ValueComp;
3661  ID->getString().split(ValueComp, "#");
3662  if (ValueComp.size() == 2) {
3663  std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3664  Metadata *Ops[1] = {MDString::get(M.getContext(), NewValue)};
3665  ModRetainReleaseMarker->setOperand(0,
3666  MDNode::get(M.getContext(), Ops));
3667  Changed = true;
3668  }
3669  }
3670  }
3671  }
3672  return Changed;
3673 }
3674 
3676  NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
3677  if (!ModFlags)
3678  return false;
3679 
3680  bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
3681  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
3682  MDNode *Op = ModFlags->getOperand(I);
3683  if (Op->getNumOperands() != 3)
3684  continue;
3685  MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
3686  if (!ID)
3687  continue;
3688  if (ID->getString() == "Objective-C Image Info Version")
3689  HasObjCFlag = true;
3690  if (ID->getString() == "Objective-C Class Properties")
3691  HasClassProperties = true;
3692  // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
3693  // field was Error and now they are Max.
3694  if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
3695  if (auto *Behavior =
3696  mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
3697  if (Behavior->getLimitedValue() == Module::Error) {
3699  Metadata *Ops[3] = {
3701  MDString::get(M.getContext(), ID->getString()),
3702  Op->getOperand(2)};
3703  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3704  Changed = true;
3705  }
3706  }
3707  }
3708  // Upgrade Objective-C Image Info Section. Removed the whitespce in the
3709  // section name so that llvm-lto will not complain about mismatching
3710  // module flags that is functionally the same.
3711  if (ID->getString() == "Objective-C Image Info Section") {
3712  if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
3713  SmallVector<StringRef, 4> ValueComp;
3714  Value->getString().split(ValueComp, " ");
3715  if (ValueComp.size() != 1) {
3716  std::string NewValue;
3717  for (auto &S : ValueComp)
3718  NewValue += S.str();
3719  Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
3720  MDString::get(M.getContext(), NewValue)};
3721  ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
3722  Changed = true;
3723  }
3724  }
3725  }
3726  }
3727 
3728  // "Objective-C Class Properties" is recently added for Objective-C. We
3729  // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
3730  // flag of value 0, so we can correclty downgrade this flag when trying to
3731  // link an ObjC bitcode without this module flag with an ObjC bitcode with
3732  // this module flag.
3733  if (HasObjCFlag && !HasClassProperties) {
3734  M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
3735  (uint32_t)0);
3736  Changed = true;
3737  }
3738 
3739  return Changed;
3740 }
3741 
3743  auto TrimSpaces = [](StringRef Section) -> std::string {
3744  SmallVector<StringRef, 5> Components;
3745  Section.split(Components, ',');
3746 
3747  SmallString<32> Buffer;
3748  raw_svector_ostream OS(Buffer);
3749 
3750  for (auto Component : Components)
3751  OS << ',' << Component.trim();
3752 
3753  return OS.str().substr(1);
3754  };
3755 
3756  for (auto &GV : M.globals()) {
3757  if (!GV.hasSection())
3758  continue;
3759 
3760  StringRef Section = GV.getSection();
3761 
3762  if (!Section.startswith("__DATA, __objc_catlist"))
3763  continue;
3764 
3765  // __DATA, __objc_catlist, regular, no_dead_strip
3766  // __DATA,__objc_catlist,regular,no_dead_strip
3767  GV.setSection(TrimSpaces(Section));
3768  }
3769 }
3770 
3771 static bool isOldLoopArgument(Metadata *MD) {
3772  auto *T = dyn_cast_or_null<MDTuple>(MD);
3773  if (!T)
3774  return false;
3775  if (T->getNumOperands() < 1)
3776  return false;
3777  auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
3778  if (!S)
3779  return false;
3780  return S->getString().startswith("llvm.vectorizer.");
3781 }
3782 
3784  StringRef OldPrefix = "llvm.vectorizer.";
3785  assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
3786 
3787  if (OldTag == "llvm.vectorizer.unroll")
3788  return MDString::get(C, "llvm.loop.interleave.count");
3789 
3790  return MDString::get(
3791  C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
3792  .str());
3793 }
3794 
3796  auto *T = dyn_cast_or_null<MDTuple>(MD);
3797  if (!T)
3798  return MD;
3799  if (T->getNumOperands() < 1)
3800  return MD;
3801  auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
3802  if (!OldTag)
3803  return MD;
3804  if (!OldTag->getString().startswith("llvm.vectorizer."))
3805  return MD;
3806 
3807  // This has an old tag. Upgrade it.
3809  Ops.reserve(T->getNumOperands());
3810  Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
3811  for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
3812  Ops.push_back(T->getOperand(I));
3813 
3814  return MDTuple::get(T->getContext(), Ops);
3815 }
3816 
3818  auto *T = dyn_cast<MDTuple>(&N);
3819  if (!T)
3820  return &N;
3821 
3822  if (none_of(T->operands(), isOldLoopArgument))
3823  return &N;
3824 
3826  Ops.reserve(T->getNumOperands());
3827  for (Metadata *MD : T->operands())
3828  Ops.push_back(upgradeLoopArgument(MD));
3829 
3830  return MDTuple::get(T->getContext(), Ops);
3831 }
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:342
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Type * getVectorElementType() const
Definition: Type.h:371
const NoneType None
Definition: None.h:24
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn)
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1949
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1133
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return an i1 value testing if Arg is not null.
Definition: IRBuilder.h:2116
void UpgradeSectionAttributes(Module &M)
Takes the max of the two values, which are required to be integers.
Definition: Module.h:145
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1081
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
This class represents lattice values for constants.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
static Value * UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, bool ZeroMask, bool IndexForm)
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ty, Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool...
Definition: IRBuilder.h:1357
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1200
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1393
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
bool UpgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeAbs(IRBuilder<> &Builder, CallInst &CI)
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1332
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
static Value * ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, bool IsShiftRight, bool ZeroMask)
This class represents a function call, abstracting a target machine&#39;s calling convention.
unsigned less or equal
Definition: InstrTypes.h:672
unsigned less than
Definition: InstrTypes.h:671
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1760
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
unsigned getDebugMetadataVersionFromModule(const Module &M)
Return Debug Info Metadata Version by checking module flags.
Definition: DebugInfo.cpp:682
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:510
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1663
Metadata node.
Definition: Metadata.h:864
F(f)
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1859
void reserve(size_type N)
Definition: SmallVector.h:376
void UpgradeIntrinsicCall(CallInst *CI, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
static Value * EmitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1089
static Value * upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, ICmpInst::Predicate Pred)
uint64_t High
Diagnostic information for stripping invalid debug metadata.
Tuple of metadata.
Definition: Metadata.h:1106
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:265
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1334
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1430
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1135
unsigned getBitWidth() const
Return the number of bits in the Vector type.
Definition: DerivedTypes.h:452
A tuple of MDNodes.
Definition: Metadata.h:1326
amdgpu Simplify well known AMD library false Value Value const Twine & Name
static Value * UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1718
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:626
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:164
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name)
Definition: AutoUpgrade.cpp:68
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:652
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
static Constant * AddOne(Constant *C)
Add one to a Constant.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1200
void setCalledFunction(Value *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1210
unsigned Intr
unsigned getNumOperands() const
Definition: Metadata.cpp:1077
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1014
static Value * UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI)
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE R Default(T Value)
Definition: StringSwitch.h:203
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
static Metadata * upgradeLoopArgument(Metadata *MD)
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:267
llvm::Optional< Function * > remangleIntrinsicFunction(Function *F)
Definition: Function.cpp:1218
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1732
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
LLVMContext & getContext() const
Definition: Metadata.h:924
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:351
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:252
static Value * EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2196
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn)
This is a more granular function that simply checks an intrinsic function for upgrading, and returns true if it requires upgrading.
iterator_range< User::op_iterator > arg_operands()
Definition: InstrTypes.h:1127
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1031
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1709
An instruction for storing to memory.
Definition: Instructions.h:321
LinkageTypes getLinkage() const
Definition: GlobalValue.h:451
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:598
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1659
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:410
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1020
static Value * UpgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
static Value * UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1182
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
StringRef getString() const
Definition: Metadata.cpp:464
static Value * UpgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:43
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:169
void getModuleFlagsMetadata(SmallVectorImpl< ModuleFlagEntry > &Flags) const
Returns the module flags in the provided vector.
Definition: Module.cpp:292
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:136
Emits an error if two values disagree, otherwise the resulting value is that of the operands...
Definition: Module.h:116
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1247
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1144
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:282
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:2021
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type...
Definition: DerivedTypes.h:406
void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Metadata *Val)
Add a module-level flag to the module-level flags metadata.
Definition: Module.cpp:339
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1308
static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:54
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:646
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1867
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
size_t arg_size() const
Definition: Function.h:698
arg_iterator arg_begin()
Definition: Function.h:671
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1839
self_iterator getIterator()
Definition: ilist_node.h:82
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2041
static Value * UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, bool IsSigned, bool IsAddition)
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:319
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:193
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2083
size_t size() const
Definition: SmallVector.h:53
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1048
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1655
static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
signed greater than
Definition: InstrTypes.h:673
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1705
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef drop_front(size_t N=1) const
Return a StringRef equal to &#39;this&#39; but with the first N elements dropped.
Definition: StringRef.h:645
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:227
static Value * UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallInst &CI, Value *&Rep)
unsigned getNumOperands() const
Definition: User.h:192
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1801
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1851
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
Module.h This file contains the declarations for the Module class.
bool UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2054
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:727
signed less than
Definition: InstrTypes.h:675
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:492
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2068
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:535
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:622
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:471
static Value * UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, Intrinsic::ID IID)
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
Definition: Function.h:227
static VectorType * getExtendedElementVectorType(VectorType *VTy)
This static method is like getInteger except that the element types are twice as wide as the elements...
Definition: DerivedTypes.h:415
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:194
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:134
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
signed less or equal
Definition: InstrTypes.h:676
Class to represent vector types.
Definition: DerivedTypes.h:393
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:337
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1103
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1778
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:4820
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:292
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1133
static bool isOldLoopArgument(Metadata *MD)
static const size_t npos
Definition: StringRef.h:51
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
unsigned greater or equal
Definition: InstrTypes.h:670
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1181
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1747
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1230
Diagnostic information for debug metadata version reporting.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:34
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:94
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI, unsigned CC, bool Signed)
uint32_t Size
Definition: Profile.cpp:47
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1974
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:214
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:370
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1213
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1164
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1264
static bool UpgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:38
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
unsigned getMDKindID(StringRef Name) const
Return a unique non-zero ID for the specified metadata kind.
Definition: Module.cpp:120
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:302
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:606
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1124
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr)
matches - Match the regex against a given String.
Definition: Regex.cpp:73
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:297
iterator_range< global_iterator > globals()
Definition: Module.h:584
unsigned greater than
Definition: InstrTypes.h:669
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A single uniqued string.
Definition: Metadata.h:604
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned)
Value * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:298
Root of the metadata hierarchy.
Definition: Metadata.h:58
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1875
const uint64_t Version
Definition: InstrProf.h:895
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:174
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, bool IsRotateRight)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1326
signed greater or equal
Definition: InstrTypes.h:674
IntegerType * Int32Ty
const BasicBlock * getParent() const
Definition: Instruction.h:67
user_iterator user_end()
Definition: Value.h:384