Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
35234 views
1
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the auto-upgrade helper functions.
10
// This is where deprecated IR intrinsics and other IR features are updated to
11
// current specifications.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/IR/AutoUpgrade.h"
16
#include "llvm/ADT/StringRef.h"
17
#include "llvm/ADT/StringSwitch.h"
18
#include "llvm/BinaryFormat/Dwarf.h"
19
#include "llvm/IR/AttributeMask.h"
20
#include "llvm/IR/Constants.h"
21
#include "llvm/IR/DebugInfo.h"
22
#include "llvm/IR/DebugInfoMetadata.h"
23
#include "llvm/IR/DiagnosticInfo.h"
24
#include "llvm/IR/Function.h"
25
#include "llvm/IR/IRBuilder.h"
26
#include "llvm/IR/InstVisitor.h"
27
#include "llvm/IR/Instruction.h"
28
#include "llvm/IR/IntrinsicInst.h"
29
#include "llvm/IR/Intrinsics.h"
30
#include "llvm/IR/IntrinsicsAArch64.h"
31
#include "llvm/IR/IntrinsicsARM.h"
32
#include "llvm/IR/IntrinsicsNVPTX.h"
33
#include "llvm/IR/IntrinsicsRISCV.h"
34
#include "llvm/IR/IntrinsicsWebAssembly.h"
35
#include "llvm/IR/IntrinsicsX86.h"
36
#include "llvm/IR/LLVMContext.h"
37
#include "llvm/IR/Metadata.h"
38
#include "llvm/IR/Module.h"
39
#include "llvm/IR/Verifier.h"
40
#include "llvm/Support/CommandLine.h"
41
#include "llvm/Support/ErrorHandling.h"
42
#include "llvm/Support/Regex.h"
43
#include "llvm/TargetParser/Triple.h"
44
#include <cstring>
45
46
using namespace llvm;
47
48
static cl::opt<bool>
49
DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50
cl::desc("Disable autoupgrade of debug info"));
51
52
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54
// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55
// changed their type from v4f32 to v2i64.
56
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,
57
Function *&NewFn) {
58
// Check whether this is an old version of the function, which received
59
// v4f32 arguments.
60
Type *Arg0Type = F->getFunctionType()->getParamType(0);
61
if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62
return false;
63
64
// Yes, it's old, replace it with new version.
65
rename(F);
66
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67
return true;
68
}
69
70
// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71
// arguments have changed their type from i32 to i8.
72
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
73
Function *&NewFn) {
74
// Check that the last argument is an i32.
75
Type *LastArgType = F->getFunctionType()->getParamType(
76
F->getFunctionType()->getNumParams() - 1);
77
if (!LastArgType->isIntegerTy(32))
78
return false;
79
80
// Move this function aside and map down.
81
rename(F);
82
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83
return true;
84
}
85
86
// Upgrade the declaration of fp compare intrinsics that change return type
87
// from scalar to vXi1 mask.
88
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
89
Function *&NewFn) {
90
// Check if the return type is a vector.
91
if (F->getReturnType()->isVectorTy())
92
return false;
93
94
rename(F);
95
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96
return true;
97
}
98
99
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,
100
Function *&NewFn) {
101
if (F->getReturnType()->getScalarType()->isBFloatTy())
102
return false;
103
104
rename(F);
105
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106
return true;
107
}
108
109
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,
110
Function *&NewFn) {
111
if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112
return false;
113
114
rename(F);
115
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116
return true;
117
}
118
119
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
120
// All of the intrinsics matches below should be marked with which llvm
121
// version started autoupgrading them. At some point in the future we would
122
// like to use this information to remove upgrade code for some older
123
// intrinsics. It is currently undecided how we will determine that future
124
// point.
125
if (Name.consume_front("avx."))
126
return (Name.starts_with("blend.p") || // Added in 3.7
127
Name == "cvt.ps2.pd.256" || // Added in 3.9
128
Name == "cvtdq2.pd.256" || // Added in 3.9
129
Name == "cvtdq2.ps.256" || // Added in 7.0
130
Name.starts_with("movnt.") || // Added in 3.2
131
Name.starts_with("sqrt.p") || // Added in 7.0
132
Name.starts_with("storeu.") || // Added in 3.9
133
Name.starts_with("vbroadcast.s") || // Added in 3.5
134
Name.starts_with("vbroadcastf128") || // Added in 4.0
135
Name.starts_with("vextractf128.") || // Added in 3.7
136
Name.starts_with("vinsertf128.") || // Added in 3.7
137
Name.starts_with("vperm2f128.") || // Added in 6.0
138
Name.starts_with("vpermil.")); // Added in 3.1
139
140
if (Name.consume_front("avx2."))
141
return (Name == "movntdqa" || // Added in 5.0
142
Name.starts_with("pabs.") || // Added in 6.0
143
Name.starts_with("padds.") || // Added in 8.0
144
Name.starts_with("paddus.") || // Added in 8.0
145
Name.starts_with("pblendd.") || // Added in 3.7
146
Name == "pblendw" || // Added in 3.7
147
Name.starts_with("pbroadcast") || // Added in 3.8
148
Name.starts_with("pcmpeq.") || // Added in 3.1
149
Name.starts_with("pcmpgt.") || // Added in 3.1
150
Name.starts_with("pmax") || // Added in 3.9
151
Name.starts_with("pmin") || // Added in 3.9
152
Name.starts_with("pmovsx") || // Added in 3.9
153
Name.starts_with("pmovzx") || // Added in 3.9
154
Name == "pmul.dq" || // Added in 7.0
155
Name == "pmulu.dq" || // Added in 7.0
156
Name.starts_with("psll.dq") || // Added in 3.7
157
Name.starts_with("psrl.dq") || // Added in 3.7
158
Name.starts_with("psubs.") || // Added in 8.0
159
Name.starts_with("psubus.") || // Added in 8.0
160
Name.starts_with("vbroadcast") || // Added in 3.8
161
Name == "vbroadcasti128" || // Added in 3.7
162
Name == "vextracti128" || // Added in 3.7
163
Name == "vinserti128" || // Added in 3.7
164
Name == "vperm2i128"); // Added in 6.0
165
166
if (Name.consume_front("avx512.")) {
167
if (Name.consume_front("mask."))
168
// 'avx512.mask.*'
169
return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170
Name.starts_with("and.") || // Added in 3.9
171
Name.starts_with("andn.") || // Added in 3.9
172
Name.starts_with("broadcast.s") || // Added in 3.9
173
Name.starts_with("broadcastf32x4.") || // Added in 6.0
174
Name.starts_with("broadcastf32x8.") || // Added in 6.0
175
Name.starts_with("broadcastf64x2.") || // Added in 6.0
176
Name.starts_with("broadcastf64x4.") || // Added in 6.0
177
Name.starts_with("broadcasti32x4.") || // Added in 6.0
178
Name.starts_with("broadcasti32x8.") || // Added in 6.0
179
Name.starts_with("broadcasti64x2.") || // Added in 6.0
180
Name.starts_with("broadcasti64x4.") || // Added in 6.0
181
Name.starts_with("cmp.b") || // Added in 5.0
182
Name.starts_with("cmp.d") || // Added in 5.0
183
Name.starts_with("cmp.q") || // Added in 5.0
184
Name.starts_with("cmp.w") || // Added in 5.0
185
Name.starts_with("compress.b") || // Added in 9.0
186
Name.starts_with("compress.d") || // Added in 9.0
187
Name.starts_with("compress.p") || // Added in 9.0
188
Name.starts_with("compress.q") || // Added in 9.0
189
Name.starts_with("compress.store.") || // Added in 7.0
190
Name.starts_with("compress.w") || // Added in 9.0
191
Name.starts_with("conflict.") || // Added in 9.0
192
Name.starts_with("cvtdq2pd.") || // Added in 4.0
193
Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194
Name == "cvtpd2dq.256" || // Added in 7.0
195
Name == "cvtpd2ps.256" || // Added in 7.0
196
Name == "cvtps2pd.128" || // Added in 7.0
197
Name == "cvtps2pd.256" || // Added in 7.0
198
Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199
Name == "cvtqq2ps.256" || // Added in 9.0
200
Name == "cvtqq2ps.512" || // Added in 9.0
201
Name == "cvttpd2dq.256" || // Added in 7.0
202
Name == "cvttps2dq.128" || // Added in 7.0
203
Name == "cvttps2dq.256" || // Added in 7.0
204
Name.starts_with("cvtudq2pd.") || // Added in 4.0
205
Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206
Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207
Name == "cvtuqq2ps.256" || // Added in 9.0
208
Name == "cvtuqq2ps.512" || // Added in 9.0
209
Name.starts_with("dbpsadbw.") || // Added in 7.0
210
Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211
Name.starts_with("expand.b") || // Added in 9.0
212
Name.starts_with("expand.d") || // Added in 9.0
213
Name.starts_with("expand.load.") || // Added in 7.0
214
Name.starts_with("expand.p") || // Added in 9.0
215
Name.starts_with("expand.q") || // Added in 9.0
216
Name.starts_with("expand.w") || // Added in 9.0
217
Name.starts_with("fpclass.p") || // Added in 7.0
218
Name.starts_with("insert") || // Added in 4.0
219
Name.starts_with("load.") || // Added in 3.9
220
Name.starts_with("loadu.") || // Added in 3.9
221
Name.starts_with("lzcnt.") || // Added in 5.0
222
Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223
Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224
Name.starts_with("movddup") || // Added in 3.9
225
Name.starts_with("move.s") || // Added in 4.0
226
Name.starts_with("movshdup") || // Added in 3.9
227
Name.starts_with("movsldup") || // Added in 3.9
228
Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229
Name.starts_with("or.") || // Added in 3.9
230
Name.starts_with("pabs.") || // Added in 6.0
231
Name.starts_with("packssdw.") || // Added in 5.0
232
Name.starts_with("packsswb.") || // Added in 5.0
233
Name.starts_with("packusdw.") || // Added in 5.0
234
Name.starts_with("packuswb.") || // Added in 5.0
235
Name.starts_with("padd.") || // Added in 4.0
236
Name.starts_with("padds.") || // Added in 8.0
237
Name.starts_with("paddus.") || // Added in 8.0
238
Name.starts_with("palignr.") || // Added in 3.9
239
Name.starts_with("pand.") || // Added in 3.9
240
Name.starts_with("pandn.") || // Added in 3.9
241
Name.starts_with("pavg") || // Added in 6.0
242
Name.starts_with("pbroadcast") || // Added in 6.0
243
Name.starts_with("pcmpeq.") || // Added in 3.9
244
Name.starts_with("pcmpgt.") || // Added in 3.9
245
Name.starts_with("perm.df.") || // Added in 3.9
246
Name.starts_with("perm.di.") || // Added in 3.9
247
Name.starts_with("permvar.") || // Added in 7.0
248
Name.starts_with("pmaddubs.w.") || // Added in 7.0
249
Name.starts_with("pmaddw.d.") || // Added in 7.0
250
Name.starts_with("pmax") || // Added in 4.0
251
Name.starts_with("pmin") || // Added in 4.0
252
Name == "pmov.qd.256" || // Added in 9.0
253
Name == "pmov.qd.512" || // Added in 9.0
254
Name == "pmov.wb.256" || // Added in 9.0
255
Name == "pmov.wb.512" || // Added in 9.0
256
Name.starts_with("pmovsx") || // Added in 4.0
257
Name.starts_with("pmovzx") || // Added in 4.0
258
Name.starts_with("pmul.dq.") || // Added in 4.0
259
Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260
Name.starts_with("pmulh.w.") || // Added in 7.0
261
Name.starts_with("pmulhu.w.") || // Added in 7.0
262
Name.starts_with("pmull.") || // Added in 4.0
263
Name.starts_with("pmultishift.qb.") || // Added in 8.0
264
Name.starts_with("pmulu.dq.") || // Added in 4.0
265
Name.starts_with("por.") || // Added in 3.9
266
Name.starts_with("prol.") || // Added in 8.0
267
Name.starts_with("prolv.") || // Added in 8.0
268
Name.starts_with("pror.") || // Added in 8.0
269
Name.starts_with("prorv.") || // Added in 8.0
270
Name.starts_with("pshuf.b.") || // Added in 4.0
271
Name.starts_with("pshuf.d.") || // Added in 3.9
272
Name.starts_with("pshufh.w.") || // Added in 3.9
273
Name.starts_with("pshufl.w.") || // Added in 3.9
274
Name.starts_with("psll.d") || // Added in 4.0
275
Name.starts_with("psll.q") || // Added in 4.0
276
Name.starts_with("psll.w") || // Added in 4.0
277
Name.starts_with("pslli") || // Added in 4.0
278
Name.starts_with("psllv") || // Added in 4.0
279
Name.starts_with("psra.d") || // Added in 4.0
280
Name.starts_with("psra.q") || // Added in 4.0
281
Name.starts_with("psra.w") || // Added in 4.0
282
Name.starts_with("psrai") || // Added in 4.0
283
Name.starts_with("psrav") || // Added in 4.0
284
Name.starts_with("psrl.d") || // Added in 4.0
285
Name.starts_with("psrl.q") || // Added in 4.0
286
Name.starts_with("psrl.w") || // Added in 4.0
287
Name.starts_with("psrli") || // Added in 4.0
288
Name.starts_with("psrlv") || // Added in 4.0
289
Name.starts_with("psub.") || // Added in 4.0
290
Name.starts_with("psubs.") || // Added in 8.0
291
Name.starts_with("psubus.") || // Added in 8.0
292
Name.starts_with("pternlog.") || // Added in 7.0
293
Name.starts_with("punpckh") || // Added in 3.9
294
Name.starts_with("punpckl") || // Added in 3.9
295
Name.starts_with("pxor.") || // Added in 3.9
296
Name.starts_with("shuf.f") || // Added in 6.0
297
Name.starts_with("shuf.i") || // Added in 6.0
298
Name.starts_with("shuf.p") || // Added in 4.0
299
Name.starts_with("sqrt.p") || // Added in 7.0
300
Name.starts_with("store.b.") || // Added in 3.9
301
Name.starts_with("store.d.") || // Added in 3.9
302
Name.starts_with("store.p") || // Added in 3.9
303
Name.starts_with("store.q.") || // Added in 3.9
304
Name.starts_with("store.w.") || // Added in 3.9
305
Name == "store.ss" || // Added in 7.0
306
Name.starts_with("storeu.") || // Added in 3.9
307
Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308
Name.starts_with("ucmp.") || // Added in 5.0
309
Name.starts_with("unpckh.") || // Added in 3.9
310
Name.starts_with("unpckl.") || // Added in 3.9
311
Name.starts_with("valign.") || // Added in 4.0
312
Name == "vcvtph2ps.128" || // Added in 11.0
313
Name == "vcvtph2ps.256" || // Added in 11.0
314
Name.starts_with("vextract") || // Added in 4.0
315
Name.starts_with("vfmadd.") || // Added in 7.0
316
Name.starts_with("vfmaddsub.") || // Added in 7.0
317
Name.starts_with("vfnmadd.") || // Added in 7.0
318
Name.starts_with("vfnmsub.") || // Added in 7.0
319
Name.starts_with("vpdpbusd.") || // Added in 7.0
320
Name.starts_with("vpdpbusds.") || // Added in 7.0
321
Name.starts_with("vpdpwssd.") || // Added in 7.0
322
Name.starts_with("vpdpwssds.") || // Added in 7.0
323
Name.starts_with("vpermi2var.") || // Added in 7.0
324
Name.starts_with("vpermil.p") || // Added in 3.9
325
Name.starts_with("vpermilvar.") || // Added in 4.0
326
Name.starts_with("vpermt2var.") || // Added in 7.0
327
Name.starts_with("vpmadd52") || // Added in 7.0
328
Name.starts_with("vpshld.") || // Added in 7.0
329
Name.starts_with("vpshldv.") || // Added in 8.0
330
Name.starts_with("vpshrd.") || // Added in 7.0
331
Name.starts_with("vpshrdv.") || // Added in 8.0
332
Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333
Name.starts_with("xor.")); // Added in 3.9
334
335
if (Name.consume_front("mask3."))
336
// 'avx512.mask3.*'
337
return (Name.starts_with("vfmadd.") || // Added in 7.0
338
Name.starts_with("vfmaddsub.") || // Added in 7.0
339
Name.starts_with("vfmsub.") || // Added in 7.0
340
Name.starts_with("vfmsubadd.") || // Added in 7.0
341
Name.starts_with("vfnmsub.")); // Added in 7.0
342
343
if (Name.consume_front("maskz."))
344
// 'avx512.maskz.*'
345
return (Name.starts_with("pternlog.") || // Added in 7.0
346
Name.starts_with("vfmadd.") || // Added in 7.0
347
Name.starts_with("vfmaddsub.") || // Added in 7.0
348
Name.starts_with("vpdpbusd.") || // Added in 7.0
349
Name.starts_with("vpdpbusds.") || // Added in 7.0
350
Name.starts_with("vpdpwssd.") || // Added in 7.0
351
Name.starts_with("vpdpwssds.") || // Added in 7.0
352
Name.starts_with("vpermt2var.") || // Added in 7.0
353
Name.starts_with("vpmadd52") || // Added in 7.0
354
Name.starts_with("vpshldv.") || // Added in 8.0
355
Name.starts_with("vpshrdv.")); // Added in 8.0
356
357
// 'avx512.*'
358
return (Name == "movntdqa" || // Added in 5.0
359
Name == "pmul.dq.512" || // Added in 7.0
360
Name == "pmulu.dq.512" || // Added in 7.0
361
Name.starts_with("broadcastm") || // Added in 6.0
362
Name.starts_with("cmp.p") || // Added in 12.0
363
Name.starts_with("cvtb2mask.") || // Added in 7.0
364
Name.starts_with("cvtd2mask.") || // Added in 7.0
365
Name.starts_with("cvtmask2") || // Added in 5.0
366
Name.starts_with("cvtq2mask.") || // Added in 7.0
367
Name == "cvtusi2sd" || // Added in 7.0
368
Name.starts_with("cvtw2mask.") || // Added in 7.0
369
Name == "kand.w" || // Added in 7.0
370
Name == "kandn.w" || // Added in 7.0
371
Name == "knot.w" || // Added in 7.0
372
Name == "kor.w" || // Added in 7.0
373
Name == "kortestc.w" || // Added in 7.0
374
Name == "kortestz.w" || // Added in 7.0
375
Name.starts_with("kunpck") || // added in 6.0
376
Name == "kxnor.w" || // Added in 7.0
377
Name == "kxor.w" || // Added in 7.0
378
Name.starts_with("padds.") || // Added in 8.0
379
Name.starts_with("pbroadcast") || // Added in 3.9
380
Name.starts_with("prol") || // Added in 8.0
381
Name.starts_with("pror") || // Added in 8.0
382
Name.starts_with("psll.dq") || // Added in 3.9
383
Name.starts_with("psrl.dq") || // Added in 3.9
384
Name.starts_with("psubs.") || // Added in 8.0
385
Name.starts_with("ptestm") || // Added in 6.0
386
Name.starts_with("ptestnm") || // Added in 6.0
387
Name.starts_with("storent.") || // Added in 3.9
388
Name.starts_with("vbroadcast.s") || // Added in 7.0
389
Name.starts_with("vpshld.") || // Added in 8.0
390
Name.starts_with("vpshrd.")); // Added in 8.0
391
}
392
393
if (Name.consume_front("fma."))
394
return (Name.starts_with("vfmadd.") || // Added in 7.0
395
Name.starts_with("vfmsub.") || // Added in 7.0
396
Name.starts_with("vfmsubadd.") || // Added in 7.0
397
Name.starts_with("vfnmadd.") || // Added in 7.0
398
Name.starts_with("vfnmsub.")); // Added in 7.0
399
400
if (Name.consume_front("fma4."))
401
return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403
if (Name.consume_front("sse."))
404
return (Name == "add.ss" || // Added in 4.0
405
Name == "cvtsi2ss" || // Added in 7.0
406
Name == "cvtsi642ss" || // Added in 7.0
407
Name == "div.ss" || // Added in 4.0
408
Name == "mul.ss" || // Added in 4.0
409
Name.starts_with("sqrt.p") || // Added in 7.0
410
Name == "sqrt.ss" || // Added in 7.0
411
Name.starts_with("storeu.") || // Added in 3.9
412
Name == "sub.ss"); // Added in 4.0
413
414
if (Name.consume_front("sse2."))
415
return (Name == "add.sd" || // Added in 4.0
416
Name == "cvtdq2pd" || // Added in 3.9
417
Name == "cvtdq2ps" || // Added in 7.0
418
Name == "cvtps2pd" || // Added in 3.9
419
Name == "cvtsi2sd" || // Added in 7.0
420
Name == "cvtsi642sd" || // Added in 7.0
421
Name == "cvtss2sd" || // Added in 7.0
422
Name == "div.sd" || // Added in 4.0
423
Name == "mul.sd" || // Added in 4.0
424
Name.starts_with("padds.") || // Added in 8.0
425
Name.starts_with("paddus.") || // Added in 8.0
426
Name.starts_with("pcmpeq.") || // Added in 3.1
427
Name.starts_with("pcmpgt.") || // Added in 3.1
428
Name == "pmaxs.w" || // Added in 3.9
429
Name == "pmaxu.b" || // Added in 3.9
430
Name == "pmins.w" || // Added in 3.9
431
Name == "pminu.b" || // Added in 3.9
432
Name == "pmulu.dq" || // Added in 7.0
433
Name.starts_with("pshuf") || // Added in 3.9
434
Name.starts_with("psll.dq") || // Added in 3.7
435
Name.starts_with("psrl.dq") || // Added in 3.7
436
Name.starts_with("psubs.") || // Added in 8.0
437
Name.starts_with("psubus.") || // Added in 8.0
438
Name.starts_with("sqrt.p") || // Added in 7.0
439
Name == "sqrt.sd" || // Added in 7.0
440
Name == "storel.dq" || // Added in 3.9
441
Name.starts_with("storeu.") || // Added in 3.9
442
Name == "sub.sd"); // Added in 4.0
443
444
if (Name.consume_front("sse41."))
445
return (Name.starts_with("blendp") || // Added in 3.7
446
Name == "movntdqa" || // Added in 5.0
447
Name == "pblendw" || // Added in 3.7
448
Name == "pmaxsb" || // Added in 3.9
449
Name == "pmaxsd" || // Added in 3.9
450
Name == "pmaxud" || // Added in 3.9
451
Name == "pmaxuw" || // Added in 3.9
452
Name == "pminsb" || // Added in 3.9
453
Name == "pminsd" || // Added in 3.9
454
Name == "pminud" || // Added in 3.9
455
Name == "pminuw" || // Added in 3.9
456
Name.starts_with("pmovsx") || // Added in 3.8
457
Name.starts_with("pmovzx") || // Added in 3.9
458
Name == "pmuldq"); // Added in 7.0
459
460
if (Name.consume_front("sse42."))
461
return Name == "crc32.64.8"; // Added in 3.4
462
463
if (Name.consume_front("sse4a."))
464
return Name.starts_with("movnt."); // Added in 3.9
465
466
if (Name.consume_front("ssse3."))
467
return (Name == "pabs.b.128" || // Added in 6.0
468
Name == "pabs.d.128" || // Added in 6.0
469
Name == "pabs.w.128"); // Added in 6.0
470
471
if (Name.consume_front("xop."))
472
return (Name == "vpcmov" || // Added in 3.8
473
Name == "vpcmov.256" || // Added in 5.0
474
Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475
Name.starts_with("vprot")); // Added in 8.0
476
477
return (Name == "addcarry.u32" || // Added in 8.0
478
Name == "addcarry.u64" || // Added in 8.0
479
Name == "addcarryx.u32" || // Added in 8.0
480
Name == "addcarryx.u64" || // Added in 8.0
481
Name == "subborrow.u32" || // Added in 8.0
482
Name == "subborrow.u64" || // Added in 8.0
483
Name.starts_with("vcvtph2ps.")); // Added in 11.0
484
}
485
486
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,
487
Function *&NewFn) {
488
// Only handle intrinsics that start with "x86.".
489
if (!Name.consume_front("x86."))
490
return false;
491
492
if (shouldUpgradeX86Intrinsic(F, Name)) {
493
NewFn = nullptr;
494
return true;
495
}
496
497
if (Name == "rdtscp") { // Added in 8.0
498
// If this intrinsic has 0 operands, it's the new version.
499
if (F->getFunctionType()->getNumParams() == 0)
500
return false;
501
502
rename(F);
503
NewFn = Intrinsic::getDeclaration(F->getParent(),
504
Intrinsic::x86_rdtscp);
505
return true;
506
}
507
508
Intrinsic::ID ID;
509
510
// SSE4.1 ptest functions may have an old signature.
511
if (Name.consume_front("sse41.ptest")) { // Added in 3.2
512
ID = StringSwitch<Intrinsic::ID>(Name)
513
.Case("c", Intrinsic::x86_sse41_ptestc)
514
.Case("z", Intrinsic::x86_sse41_ptestz)
515
.Case("nzc", Intrinsic::x86_sse41_ptestnzc)
516
.Default(Intrinsic::not_intrinsic);
517
if (ID != Intrinsic::not_intrinsic)
518
return upgradePTESTIntrinsic(F, ID, NewFn);
519
520
return false;
521
}
522
523
// Several blend and other instructions with masks used the wrong number of
524
// bits.
525
526
// Added in 3.6
527
ID = StringSwitch<Intrinsic::ID>(Name)
528
.Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529
.Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530
.Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531
.Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532
.Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533
.Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
534
.Default(Intrinsic::not_intrinsic);
535
if (ID != Intrinsic::not_intrinsic)
536
return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538
if (Name.consume_front("avx512.mask.cmp.")) {
539
// Added in 7.0
540
ID = StringSwitch<Intrinsic::ID>(Name)
541
.Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542
.Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543
.Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544
.Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545
.Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546
.Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
547
.Default(Intrinsic::not_intrinsic);
548
if (ID != Intrinsic::not_intrinsic)
549
return upgradeX86MaskedFPCompare(F, ID, NewFn);
550
return false; // No other 'x86.avx523.mask.cmp.*'.
551
}
552
553
if (Name.consume_front("avx512bf16.")) {
554
// Added in 9.0
555
ID = StringSwitch<Intrinsic::ID>(Name)
556
.Case("cvtne2ps2bf16.128",
557
Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558
.Case("cvtne2ps2bf16.256",
559
Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560
.Case("cvtne2ps2bf16.512",
561
Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562
.Case("mask.cvtneps2bf16.128",
563
Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564
.Case("cvtneps2bf16.256",
565
Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566
.Case("cvtneps2bf16.512",
567
Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
568
.Default(Intrinsic::not_intrinsic);
569
if (ID != Intrinsic::not_intrinsic)
570
return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572
// Added in 9.0
573
ID = StringSwitch<Intrinsic::ID>(Name)
574
.Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575
.Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576
.Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
577
.Default(Intrinsic::not_intrinsic);
578
if (ID != Intrinsic::not_intrinsic)
579
return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580
return false; // No other 'x86.avx512bf16.*'.
581
}
582
583
if (Name.consume_front("xop.")) {
584
Intrinsic::ID ID = Intrinsic::not_intrinsic;
585
if (Name.starts_with("vpermil2")) { // Added in 3.9
586
// Upgrade any XOP PERMIL2 index operand still using a float/double
587
// vector.
588
auto Idx = F->getFunctionType()->getParamType(2);
589
if (Idx->isFPOrFPVectorTy()) {
590
unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591
unsigned EltSize = Idx->getScalarSizeInBits();
592
if (EltSize == 64 && IdxSize == 128)
593
ID = Intrinsic::x86_xop_vpermil2pd;
594
else if (EltSize == 32 && IdxSize == 128)
595
ID = Intrinsic::x86_xop_vpermil2ps;
596
else if (EltSize == 64 && IdxSize == 256)
597
ID = Intrinsic::x86_xop_vpermil2pd_256;
598
else
599
ID = Intrinsic::x86_xop_vpermil2ps_256;
600
}
601
} else if (F->arg_size() == 2)
602
// frcz.ss/sd may need to have an argument dropped. Added in 3.2
603
ID = StringSwitch<Intrinsic::ID>(Name)
604
.Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605
.Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
606
.Default(Intrinsic::not_intrinsic);
607
608
if (ID != Intrinsic::not_intrinsic) {
609
rename(F);
610
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611
return true;
612
}
613
return false; // No other 'x86.xop.*'
614
}
615
616
if (Name == "seh.recoverfp") {
617
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618
return true;
619
}
620
621
return false;
622
}
623
624
// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625
// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
626
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,
627
StringRef Name,
628
Function *&NewFn) {
629
if (Name.starts_with("rbit")) {
630
// '(arm|aarch64).rbit'.
631
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632
F->arg_begin()->getType());
633
return true;
634
}
635
636
if (Name == "thread.pointer") {
637
// '(arm|aarch64).thread.pointer'.
638
NewFn =
639
Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640
return true;
641
}
642
643
bool Neon = Name.consume_front("neon.");
644
if (Neon) {
645
// '(arm|aarch64).neon.*'.
646
// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647
// v16i8 respectively.
648
if (Name.consume_front("bfdot.")) {
649
// (arm|aarch64).neon.bfdot.*'.
650
Intrinsic::ID ID =
651
StringSwitch<Intrinsic::ID>(Name)
652
.Cases("v2f32.v8i8", "v4f32.v16i8",
653
IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654
: (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
655
.Default(Intrinsic::not_intrinsic);
656
if (ID != Intrinsic::not_intrinsic) {
657
size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658
assert((OperandWidth == 64 || OperandWidth == 128) &&
659
"Unexpected operand width");
660
LLVMContext &Ctx = F->getParent()->getContext();
661
std::array<Type *, 2> Tys{
662
{F->getReturnType(),
663
FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665
return true;
666
}
667
return false; // No other '(arm|aarch64).neon.bfdot.*'.
668
}
669
670
// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671
// anymore and accept v8bf16 instead of v16i8.
672
if (Name.consume_front("bfm")) {
673
// (arm|aarch64).neon.bfm*'.
674
if (Name.consume_back(".v4f32.v16i8")) {
675
// (arm|aarch64).neon.bfm*.v4f32.v16i8'.
676
Intrinsic::ID ID =
677
StringSwitch<Intrinsic::ID>(Name)
678
.Case("mla",
679
IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680
: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681
.Case("lalb",
682
IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683
: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684
.Case("lalt",
685
IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686
: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
687
.Default(Intrinsic::not_intrinsic);
688
if (ID != Intrinsic::not_intrinsic) {
689
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690
return true;
691
}
692
return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693
}
694
return false; // No other '(arm|aarch64).neon.bfm*.
695
}
696
// Continue on to Aarch64 Neon or Arm Neon.
697
}
698
// Continue on to Arm or Aarch64.
699
700
if (IsArm) {
701
// 'arm.*'.
702
if (Neon) {
703
// 'arm.neon.*'.
704
Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
705
.StartsWith("vclz.", Intrinsic::ctlz)
706
.StartsWith("vcnt.", Intrinsic::ctpop)
707
.StartsWith("vqadds.", Intrinsic::sadd_sat)
708
.StartsWith("vqaddu.", Intrinsic::uadd_sat)
709
.StartsWith("vqsubs.", Intrinsic::ssub_sat)
710
.StartsWith("vqsubu.", Intrinsic::usub_sat)
711
.Default(Intrinsic::not_intrinsic);
712
if (ID != Intrinsic::not_intrinsic) {
713
NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714
F->arg_begin()->getType());
715
return true;
716
}
717
718
if (Name.consume_front("vst")) {
719
// 'arm.neon.vst*'.
720
static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
721
SmallVector<StringRef, 2> Groups;
722
if (vstRegex.match(Name, &Groups)) {
723
static const Intrinsic::ID StoreInts[] = {
724
Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725
Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727
static const Intrinsic::ID StoreLaneInts[] = {
728
Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729
Intrinsic::arm_neon_vst4lane};
730
731
auto fArgs = F->getFunctionType()->params();
732
Type *Tys[] = {fArgs[0], fArgs[1]};
733
if (Groups[1].size() == 1)
734
NewFn = Intrinsic::getDeclaration(F->getParent(),
735
StoreInts[fArgs.size() - 3], Tys);
736
else
737
NewFn = Intrinsic::getDeclaration(
738
F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739
return true;
740
}
741
return false; // No other 'arm.neon.vst*'.
742
}
743
744
return false; // No other 'arm.neon.*'.
745
}
746
747
if (Name.consume_front("mve.")) {
748
// 'arm.mve.*'.
749
if (Name == "vctp64") {
750
if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751
// A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752
// the function and deal with it below in UpgradeIntrinsicCall.
753
rename(F);
754
return true;
755
}
756
return false; // Not 'arm.mve.vctp64'.
757
}
758
759
// These too are changed to accept a v2i1 instead of the old v4i1.
760
if (Name.consume_back(".v4i1")) {
761
// 'arm.mve.*.v4i1'.
762
if (Name.consume_back(".predicated.v2i64.v4i32"))
763
// 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764
return Name == "mull.int" || Name == "vqdmull";
765
766
if (Name.consume_back(".v2i64")) {
767
// 'arm.mve.*.v2i64.v4i1'
768
bool IsGather = Name.consume_front("vldr.gather.");
769
if (IsGather || Name.consume_front("vstr.scatter.")) {
770
if (Name.consume_front("base.")) {
771
// Optional 'wb.' prefix.
772
Name.consume_front("wb.");
773
// 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774
// predicated.v2i64.v2i64.v4i1'.
775
return Name == "predicated.v2i64";
776
}
777
778
if (Name.consume_front("offset.predicated."))
779
return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780
Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782
// No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783
return false;
784
}
785
786
return false; // No other 'arm.mve.*.v2i64.v4i1'.
787
}
788
return false; // No other 'arm.mve.*.v4i1'.
789
}
790
return false; // No other 'arm.mve.*'.
791
}
792
793
if (Name.consume_front("cde.vcx")) {
794
// 'arm.cde.vcx*'.
795
if (Name.consume_back(".predicated.v2i64.v4i1"))
796
// 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797
return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798
Name == "3q" || Name == "3qa";
799
800
return false; // No other 'arm.cde.vcx*'.
801
}
802
} else {
803
// 'aarch64.*'.
804
if (Neon) {
805
// 'aarch64.neon.*'.
806
Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
807
.StartsWith("frintn", Intrinsic::roundeven)
808
.StartsWith("rbit", Intrinsic::bitreverse)
809
.Default(Intrinsic::not_intrinsic);
810
if (ID != Intrinsic::not_intrinsic) {
811
NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812
F->arg_begin()->getType());
813
return true;
814
}
815
816
if (Name.starts_with("addp")) {
817
// 'aarch64.neon.addp*'.
818
if (F->arg_size() != 2)
819
return false; // Invalid IR.
820
VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821
if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822
NewFn = Intrinsic::getDeclaration(F->getParent(),
823
Intrinsic::aarch64_neon_faddp, Ty);
824
return true;
825
}
826
}
827
return false; // No other 'aarch64.neon.*'.
828
}
829
if (Name.consume_front("sve.")) {
830
// 'aarch64.sve.*'.
831
if (Name.consume_front("bf")) {
832
if (Name.consume_back(".lane")) {
833
// 'aarch64.sve.bf*.lane'.
834
Intrinsic::ID ID =
835
StringSwitch<Intrinsic::ID>(Name)
836
.Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837
.Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838
.Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
839
.Default(Intrinsic::not_intrinsic);
840
if (ID != Intrinsic::not_intrinsic) {
841
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842
return true;
843
}
844
return false; // No other 'aarch64.sve.bf*.lane'.
845
}
846
return false; // No other 'aarch64.sve.bf*'.
847
}
848
849
if (Name.consume_front("addqv")) {
850
// 'aarch64.sve.addqv'.
851
if (!F->getReturnType()->isFPOrFPVectorTy())
852
return false;
853
854
auto Args = F->getFunctionType()->params();
855
Type *Tys[] = {F->getReturnType(), Args[1]};
856
NewFn = Intrinsic::getDeclaration(F->getParent(),
857
Intrinsic::aarch64_sve_faddqv, Tys);
858
return true;
859
}
860
861
if (Name.consume_front("ld")) {
862
// 'aarch64.sve.ld*'.
863
static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864
if (LdRegex.match(Name)) {
865
Type *ScalarTy =
866
cast<VectorType>(F->getReturnType())->getElementType();
867
ElementCount EC =
868
cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869
Type *Ty = VectorType::get(ScalarTy, EC);
870
static const Intrinsic::ID LoadIDs[] = {
871
Intrinsic::aarch64_sve_ld2_sret,
872
Intrinsic::aarch64_sve_ld3_sret,
873
Intrinsic::aarch64_sve_ld4_sret,
874
};
875
NewFn = Intrinsic::getDeclaration(F->getParent(),
876
LoadIDs[Name[0] - '2'], Ty);
877
return true;
878
}
879
return false; // No other 'aarch64.sve.ld*'.
880
}
881
882
if (Name.consume_front("tuple.")) {
883
// 'aarch64.sve.tuple.*'.
884
if (Name.starts_with("get")) {
885
// 'aarch64.sve.tuple.get*'.
886
Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887
NewFn = Intrinsic::getDeclaration(F->getParent(),
888
Intrinsic::vector_extract, Tys);
889
return true;
890
}
891
892
if (Name.starts_with("set")) {
893
// 'aarch64.sve.tuple.set*'.
894
auto Args = F->getFunctionType()->params();
895
Type *Tys[] = {Args[0], Args[2], Args[1]};
896
NewFn = Intrinsic::getDeclaration(F->getParent(),
897
Intrinsic::vector_insert, Tys);
898
return true;
899
}
900
901
static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902
if (CreateTupleRegex.match(Name)) {
903
// 'aarch64.sve.tuple.create*'.
904
auto Args = F->getFunctionType()->params();
905
Type *Tys[] = {F->getReturnType(), Args[1]};
906
NewFn = Intrinsic::getDeclaration(F->getParent(),
907
Intrinsic::vector_insert, Tys);
908
return true;
909
}
910
return false; // No other 'aarch64.sve.tuple.*'.
911
}
912
return false; // No other 'aarch64.sve.*'.
913
}
914
}
915
return false; // No other 'arm.*', 'aarch64.*'.
916
}
917
918
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
919
if (Name.consume_front("abs."))
920
return StringSwitch<Intrinsic::ID>(Name)
921
.Case("bf16", Intrinsic::nvvm_abs_bf16)
922
.Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
923
.Default(Intrinsic::not_intrinsic);
924
925
if (Name.consume_front("fma.rn."))
926
return StringSwitch<Intrinsic::ID>(Name)
927
.Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928
.Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929
.Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930
.Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931
.Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932
.Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933
.Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934
.Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935
.Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936
.Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937
.Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938
.Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
939
.Default(Intrinsic::not_intrinsic);
940
941
if (Name.consume_front("fmax."))
942
return StringSwitch<Intrinsic::ID>(Name)
943
.Case("bf16", Intrinsic::nvvm_fmax_bf16)
944
.Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945
.Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946
.Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947
.Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948
.Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949
.Case("ftz.nan.xorsign.abs.bf16",
950
Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951
.Case("ftz.nan.xorsign.abs.bf16x2",
952
Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953
.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954
.Case("ftz.xorsign.abs.bf16x2",
955
Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956
.Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957
.Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958
.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959
.Case("nan.xorsign.abs.bf16x2",
960
Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961
.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962
.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
963
.Default(Intrinsic::not_intrinsic);
964
965
if (Name.consume_front("fmin."))
966
return StringSwitch<Intrinsic::ID>(Name)
967
.Case("bf16", Intrinsic::nvvm_fmin_bf16)
968
.Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969
.Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970
.Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971
.Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972
.Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973
.Case("ftz.nan.xorsign.abs.bf16",
974
Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975
.Case("ftz.nan.xorsign.abs.bf16x2",
976
Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977
.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978
.Case("ftz.xorsign.abs.bf16x2",
979
Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980
.Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981
.Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982
.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983
.Case("nan.xorsign.abs.bf16x2",
984
Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985
.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986
.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
987
.Default(Intrinsic::not_intrinsic);
988
989
if (Name.consume_front("neg."))
990
return StringSwitch<Intrinsic::ID>(Name)
991
.Case("bf16", Intrinsic::nvvm_neg_bf16)
992
.Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
993
.Default(Intrinsic::not_intrinsic);
994
995
return Intrinsic::not_intrinsic;
996
}
997
998
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
999
bool CanUpgradeDebugIntrinsicsToRecords) {
1000
assert(F && "Illegal to upgrade a non-existent Function.");
1001
1002
StringRef Name = F->getName();
1003
1004
// Quickly eliminate it, if it's not a candidate.
1005
if (!Name.consume_front("llvm.") || Name.empty())
1006
return false;
1007
1008
switch (Name[0]) {
1009
default: break;
1010
case 'a': {
1011
bool IsArm = Name.consume_front("arm.");
1012
if (IsArm || Name.consume_front("aarch64.")) {
1013
if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014
return true;
1015
break;
1016
}
1017
1018
if (Name.consume_front("amdgcn.")) {
1019
if (Name == "alignbit") {
1020
// Target specific intrinsic became redundant
1021
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022
{F->getReturnType()});
1023
return true;
1024
}
1025
1026
if (Name.consume_front("atomic.")) {
1027
if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028
// These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029
// there's no new declaration.
1030
NewFn = nullptr;
1031
return true;
1032
}
1033
break; // No other 'amdgcn.atomic.*'
1034
}
1035
1036
if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037
Name.starts_with("ds.fmax")) {
1038
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039
// declaration.
1040
NewFn = nullptr;
1041
return true;
1042
}
1043
1044
if (Name.starts_with("ldexp.")) {
1045
// Target specific intrinsic became redundant
1046
NewFn = Intrinsic::getDeclaration(
1047
F->getParent(), Intrinsic::ldexp,
1048
{F->getReturnType(), F->getArg(1)->getType()});
1049
return true;
1050
}
1051
break; // No other 'amdgcn.*'
1052
}
1053
1054
break;
1055
}
1056
case 'c': {
1057
if (F->arg_size() == 1) {
1058
Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)
1059
.StartsWith("ctlz.", Intrinsic::ctlz)
1060
.StartsWith("cttz.", Intrinsic::cttz)
1061
.Default(Intrinsic::not_intrinsic);
1062
if (ID != Intrinsic::not_intrinsic) {
1063
rename(F);
1064
NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065
F->arg_begin()->getType());
1066
return true;
1067
}
1068
}
1069
1070
if (F->arg_size() == 2 && Name == "coro.end") {
1071
rename(F);
1072
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073
return true;
1074
}
1075
1076
break;
1077
}
1078
case 'd':
1079
if (Name.consume_front("dbg.")) {
1080
// Mark debug intrinsics for upgrade to new debug format.
1081
if (CanUpgradeDebugIntrinsicsToRecords &&
1082
F->getParent()->IsNewDbgInfoFormat) {
1083
if (Name == "addr" || Name == "value" || Name == "assign" ||
1084
Name == "declare" || Name == "label") {
1085
// There's no function to replace these with.
1086
NewFn = nullptr;
1087
// But we do want these to get upgraded.
1088
return true;
1089
}
1090
}
1091
// Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092
// converted to DbgVariableRecords later.
1093
if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094
rename(F);
1095
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096
return true;
1097
}
1098
break; // No other 'dbg.*'.
1099
}
1100
break;
1101
case 'e':
1102
if (Name.consume_front("experimental.vector.")) {
1103
Intrinsic::ID ID =
1104
StringSwitch<Intrinsic::ID>(Name)
1105
.StartsWith("extract.", Intrinsic::vector_extract)
1106
.StartsWith("insert.", Intrinsic::vector_insert)
1107
.StartsWith("splice.", Intrinsic::vector_splice)
1108
.StartsWith("reverse.", Intrinsic::vector_reverse)
1109
.StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110
.StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1111
.Default(Intrinsic::not_intrinsic);
1112
if (ID != Intrinsic::not_intrinsic) {
1113
const auto *FT = F->getFunctionType();
1114
SmallVector<Type *, 2> Tys;
1115
if (ID == Intrinsic::vector_extract ||
1116
ID == Intrinsic::vector_interleave2)
1117
// Extracting overloads the return type.
1118
Tys.push_back(FT->getReturnType());
1119
if (ID != Intrinsic::vector_interleave2)
1120
Tys.push_back(FT->getParamType(0));
1121
if (ID == Intrinsic::vector_insert)
1122
// Inserting overloads the inserted type.
1123
Tys.push_back(FT->getParamType(1));
1124
rename(F);
1125
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126
return true;
1127
}
1128
1129
if (Name.consume_front("reduce.")) {
1130
SmallVector<StringRef, 2> Groups;
1131
static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132
if (R.match(Name, &Groups))
1133
ID = StringSwitch<Intrinsic::ID>(Groups[1])
1134
.Case("add", Intrinsic::vector_reduce_add)
1135
.Case("mul", Intrinsic::vector_reduce_mul)
1136
.Case("and", Intrinsic::vector_reduce_and)
1137
.Case("or", Intrinsic::vector_reduce_or)
1138
.Case("xor", Intrinsic::vector_reduce_xor)
1139
.Case("smax", Intrinsic::vector_reduce_smax)
1140
.Case("smin", Intrinsic::vector_reduce_smin)
1141
.Case("umax", Intrinsic::vector_reduce_umax)
1142
.Case("umin", Intrinsic::vector_reduce_umin)
1143
.Case("fmax", Intrinsic::vector_reduce_fmax)
1144
.Case("fmin", Intrinsic::vector_reduce_fmin)
1145
.Default(Intrinsic::not_intrinsic);
1146
1147
bool V2 = false;
1148
if (ID == Intrinsic::not_intrinsic) {
1149
static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150
Groups.clear();
1151
V2 = true;
1152
if (R2.match(Name, &Groups))
1153
ID = StringSwitch<Intrinsic::ID>(Groups[1])
1154
.Case("fadd", Intrinsic::vector_reduce_fadd)
1155
.Case("fmul", Intrinsic::vector_reduce_fmul)
1156
.Default(Intrinsic::not_intrinsic);
1157
}
1158
if (ID != Intrinsic::not_intrinsic) {
1159
rename(F);
1160
auto Args = F->getFunctionType()->params();
1161
NewFn =
1162
Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163
return true;
1164
}
1165
break; // No other 'expermental.vector.reduce.*'.
1166
}
1167
break; // No other 'experimental.vector.*'.
1168
}
1169
break; // No other 'e*'.
1170
case 'f':
1171
if (Name.starts_with("flt.rounds")) {
1172
rename(F);
1173
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174
return true;
1175
}
1176
break;
1177
case 'i':
1178
if (Name.starts_with("invariant.group.barrier")) {
1179
// Rename invariant.group.barrier to launder.invariant.group
1180
auto Args = F->getFunctionType()->params();
1181
Type* ObjectPtr[1] = {Args[0]};
1182
rename(F);
1183
NewFn = Intrinsic::getDeclaration(F->getParent(),
1184
Intrinsic::launder_invariant_group, ObjectPtr);
1185
return true;
1186
}
1187
break;
1188
case 'm': {
1189
// Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190
// alignment parameter to embedding the alignment as an attribute of
1191
// the pointer args.
1192
if (unsigned ID = StringSwitch<unsigned>(Name)
1193
.StartsWith("memcpy.", Intrinsic::memcpy)
1194
.StartsWith("memmove.", Intrinsic::memmove)
1195
.Default(0)) {
1196
if (F->arg_size() == 5) {
1197
rename(F);
1198
// Get the types of dest, src, and len
1199
ArrayRef<Type *> ParamTypes =
1200
F->getFunctionType()->params().slice(0, 3);
1201
NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202
return true;
1203
}
1204
}
1205
if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206
rename(F);
1207
// Get the types of dest, and len
1208
const auto *FT = F->getFunctionType();
1209
Type *ParamTypes[2] = {
1210
FT->getParamType(0), // Dest
1211
FT->getParamType(2) // len
1212
};
1213
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214
ParamTypes);
1215
return true;
1216
}
1217
break;
1218
}
1219
case 'n': {
1220
if (Name.consume_front("nvvm.")) {
1221
// Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222
if (F->arg_size() == 1) {
1223
Intrinsic::ID IID =
1224
StringSwitch<Intrinsic::ID>(Name)
1225
.Cases("brev32", "brev64", Intrinsic::bitreverse)
1226
.Case("clz.i", Intrinsic::ctlz)
1227
.Case("popc.i", Intrinsic::ctpop)
1228
.Default(Intrinsic::not_intrinsic);
1229
if (IID != Intrinsic::not_intrinsic) {
1230
NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231
{F->getReturnType()});
1232
return true;
1233
}
1234
}
1235
1236
// Check for nvvm intrinsics that need a return type adjustment.
1237
if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1238
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
1239
if (IID != Intrinsic::not_intrinsic) {
1240
NewFn = nullptr;
1241
return true;
1242
}
1243
}
1244
1245
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1247
//
1248
// TODO: We could add lohi.i2d.
1249
bool Expand = false;
1250
if (Name.consume_front("abs."))
1251
// nvvm.abs.{i,ii}
1252
Expand = Name == "i" || Name == "ll";
1253
else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254
Expand = true;
1255
else if (Name.consume_front("max.") || Name.consume_front("min."))
1256
// nvvm.{min,max}.{i,ii,ui,ull}
1257
Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258
Name == "ui" || Name == "ull";
1259
else if (Name.consume_front("atomic.load.add."))
1260
// nvvm.atomic.load.add.{f32.p,f64.p}
1261
Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262
else
1263
Expand = false;
1264
1265
if (Expand) {
1266
NewFn = nullptr;
1267
return true;
1268
}
1269
break; // No other 'nvvm.*'.
1270
}
1271
break;
1272
}
1273
case 'o':
1274
// We only need to change the name to match the mangling including the
1275
// address space.
1276
if (Name.starts_with("objectsize.")) {
1277
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278
if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279
F->getName() !=
1280
Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281
rename(F);
1282
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283
Tys);
1284
return true;
1285
}
1286
}
1287
break;
1288
1289
case 'p':
1290
if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291
rename(F);
1292
NewFn = Intrinsic::getDeclaration(
1293
F->getParent(), Intrinsic::ptr_annotation,
1294
{F->arg_begin()->getType(), F->getArg(1)->getType()});
1295
return true;
1296
}
1297
break;
1298
1299
case 'r': {
1300
if (Name.consume_front("riscv.")) {
1301
Intrinsic::ID ID;
1302
ID = StringSwitch<Intrinsic::ID>(Name)
1303
.Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304
.Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305
.Case("aes32esi", Intrinsic::riscv_aes32esi)
1306
.Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1307
.Default(Intrinsic::not_intrinsic);
1308
if (ID != Intrinsic::not_intrinsic) {
1309
if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310
rename(F);
1311
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312
return true;
1313
}
1314
break; // No other applicable upgrades.
1315
}
1316
1317
ID = StringSwitch<Intrinsic::ID>(Name)
1318
.StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319
.StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1320
.Default(Intrinsic::not_intrinsic);
1321
if (ID != Intrinsic::not_intrinsic) {
1322
if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323
F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324
rename(F);
1325
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326
return true;
1327
}
1328
break; // No other applicable upgrades.
1329
}
1330
1331
ID = StringSwitch<Intrinsic::ID>(Name)
1332
.StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333
.StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334
.StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335
.StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336
.StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337
.StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1338
.Default(Intrinsic::not_intrinsic);
1339
if (ID != Intrinsic::not_intrinsic) {
1340
if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341
rename(F);
1342
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343
return true;
1344
}
1345
break; // No other applicable upgrades.
1346
}
1347
break; // No other 'riscv.*' intrinsics
1348
}
1349
} break;
1350
1351
case 's':
1352
if (Name == "stackprotectorcheck") {
1353
NewFn = nullptr;
1354
return true;
1355
}
1356
break;
1357
1358
case 'v': {
1359
if (Name == "var.annotation" && F->arg_size() == 4) {
1360
rename(F);
1361
NewFn = Intrinsic::getDeclaration(
1362
F->getParent(), Intrinsic::var_annotation,
1363
{{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364
return true;
1365
}
1366
break;
1367
}
1368
1369
case 'w':
1370
if (Name.consume_front("wasm.")) {
1371
Intrinsic::ID ID =
1372
StringSwitch<Intrinsic::ID>(Name)
1373
.StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374
.StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375
.StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1376
.Default(Intrinsic::not_intrinsic);
1377
if (ID != Intrinsic::not_intrinsic) {
1378
rename(F);
1379
NewFn =
1380
Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381
return true;
1382
}
1383
1384
if (Name.consume_front("dot.i8x16.i7x16.")) {
1385
ID = StringSwitch<Intrinsic::ID>(Name)
1386
.Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387
.Case("add.signed",
1388
Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1389
.Default(Intrinsic::not_intrinsic);
1390
if (ID != Intrinsic::not_intrinsic) {
1391
rename(F);
1392
NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393
return true;
1394
}
1395
break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396
}
1397
break; // No other 'wasm.*'.
1398
}
1399
break;
1400
1401
case 'x':
1402
if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403
return true;
1404
}
1405
1406
auto *ST = dyn_cast<StructType>(F->getReturnType());
1407
if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408
F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409
// Replace return type with literal non-packed struct. Only do this for
1410
// intrinsics declared to return a struct, not for intrinsics with
1411
// overloaded return type, in which case the exact struct type will be
1412
// mangled into the name.
1413
SmallVector<Intrinsic::IITDescriptor> Desc;
1414
Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
1415
if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416
auto *FT = F->getFunctionType();
1417
auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418
auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419
std::string Name = F->getName().str();
1420
rename(F);
1421
NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422
Name, F->getParent());
1423
1424
// The new function may also need remangling.
1425
if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426
NewFn = *Result;
1427
return true;
1428
}
1429
}
1430
1431
// Remangle our intrinsic since we upgrade the mangling
1432
auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
1433
if (Result != std::nullopt) {
1434
NewFn = *Result;
1435
return true;
1436
}
1437
1438
// This may not belong here. This function is effectively being overloaded
1439
// to both detect an intrinsic which needs upgrading, and to provide the
1440
// upgraded form of the intrinsic. We should perhaps have two separate
1441
// functions for this.
1442
return false;
1443
}
1444
1445
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
1446
bool CanUpgradeDebugIntrinsicsToRecords) {
1447
NewFn = nullptr;
1448
bool Upgraded =
1449
upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450
assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451
1452
// Upgrade intrinsic attributes. This does not change the function.
1453
if (NewFn)
1454
F = NewFn;
1455
if (Intrinsic::ID id = F->getIntrinsicID())
1456
F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457
return Upgraded;
1458
}
1459
1460
GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
1461
if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462
GV->getName() == "llvm.global_dtors")) ||
1463
!GV->hasInitializer())
1464
return nullptr;
1465
ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466
if (!ATy)
1467
return nullptr;
1468
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469
if (!STy || STy->getNumElements() != 2)
1470
return nullptr;
1471
1472
LLVMContext &C = GV->getContext();
1473
IRBuilder<> IRB(C);
1474
auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475
IRB.getPtrTy());
1476
Constant *Init = GV->getInitializer();
1477
unsigned N = Init->getNumOperands();
1478
std::vector<Constant *> NewCtors(N);
1479
for (unsigned i = 0; i != N; ++i) {
1480
auto Ctor = cast<Constant>(Init->getOperand(i));
1481
NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482
Ctor->getAggregateElement(1),
1483
Constant::getNullValue(IRB.getPtrTy()));
1484
}
1485
Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486
1487
return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488
NewInit, GV->getName());
1489
}
1490
1491
// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492
// to byte shuffles.
1493
static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1494
unsigned Shift) {
1495
auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496
unsigned NumElts = ResultTy->getNumElements() * 8;
1497
1498
// Bitcast from a 64-bit element type to a byte element type.
1499
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500
Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501
1502
// We'll be shuffling in zeroes.
1503
Value *Res = Constant::getNullValue(VecTy);
1504
1505
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506
// we'll just return the zero vector.
1507
if (Shift < 16) {
1508
int Idxs[64];
1509
// 256/512-bit version is split into 2/4 16-byte lanes.
1510
for (unsigned l = 0; l != NumElts; l += 16)
1511
for (unsigned i = 0; i != 16; ++i) {
1512
unsigned Idx = NumElts + i - Shift;
1513
if (Idx < NumElts)
1514
Idx -= NumElts - 16; // end of lane, switch operand.
1515
Idxs[l + i] = Idx + l;
1516
}
1517
1518
Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519
}
1520
1521
// Bitcast back to a 64-bit element type.
1522
return Builder.CreateBitCast(Res, ResultTy, "cast");
1523
}
1524
1525
// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526
// to byte shuffles.
1527
static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
1528
unsigned Shift) {
1529
auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530
unsigned NumElts = ResultTy->getNumElements() * 8;
1531
1532
// Bitcast from a 64-bit element type to a byte element type.
1533
Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534
Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535
1536
// We'll be shuffling in zeroes.
1537
Value *Res = Constant::getNullValue(VecTy);
1538
1539
// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540
// we'll just return the zero vector.
1541
if (Shift < 16) {
1542
int Idxs[64];
1543
// 256/512-bit version is split into 2/4 16-byte lanes.
1544
for (unsigned l = 0; l != NumElts; l += 16)
1545
for (unsigned i = 0; i != 16; ++i) {
1546
unsigned Idx = i + Shift;
1547
if (Idx >= 16)
1548
Idx += NumElts - 16; // end of lane, switch operand.
1549
Idxs[l + i] = Idx + l;
1550
}
1551
1552
Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553
}
1554
1555
// Bitcast back to a 64-bit element type.
1556
return Builder.CreateBitCast(Res, ResultTy, "cast");
1557
}
1558
1559
static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560
unsigned NumElts) {
1561
assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1562
llvm::VectorType *MaskTy = FixedVectorType::get(
1563
Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564
Mask = Builder.CreateBitCast(Mask, MaskTy);
1565
1566
// If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567
// i8 and we need to extract down to the right number of elements.
1568
if (NumElts <= 4) {
1569
int Indices[4];
1570
for (unsigned i = 0; i != NumElts; ++i)
1571
Indices[i] = i;
1572
Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573
"extract");
1574
}
1575
1576
return Mask;
1577
}
1578
1579
static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580
Value *Op1) {
1581
// If the mask is all ones just emit the first operation.
1582
if (const auto *C = dyn_cast<Constant>(Mask))
1583
if (C->isAllOnesValue())
1584
return Op0;
1585
1586
Mask = getX86MaskVec(Builder, Mask,
1587
cast<FixedVectorType>(Op0->getType())->getNumElements());
1588
return Builder.CreateSelect(Mask, Op0, Op1);
1589
}
1590
1591
static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592
Value *Op1) {
1593
// If the mask is all ones just emit the first operation.
1594
if (const auto *C = dyn_cast<Constant>(Mask))
1595
if (C->isAllOnesValue())
1596
return Op0;
1597
1598
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599
Mask->getType()->getIntegerBitWidth());
1600
Mask = Builder.CreateBitCast(Mask, MaskTy);
1601
Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602
return Builder.CreateSelect(Mask, Op0, Op1);
1603
}
1604
1605
// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606
// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607
// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1608
static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
1609
Value *Op1, Value *Shift,
1610
Value *Passthru, Value *Mask,
1611
bool IsVALIGN) {
1612
unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613
1614
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615
assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616
assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617
assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618
1619
// Mask the immediate for VALIGN.
1620
if (IsVALIGN)
1621
ShiftVal &= (NumElts - 1);
1622
1623
// If palignr is shifting the pair of vectors more than the size of two
1624
// lanes, emit zero.
1625
if (ShiftVal >= 32)
1626
return llvm::Constant::getNullValue(Op0->getType());
1627
1628
// If palignr is shifting the pair of input vectors more than one lane,
1629
// but less than two lanes, convert to shifting in zeroes.
1630
if (ShiftVal > 16) {
1631
ShiftVal -= 16;
1632
Op1 = Op0;
1633
Op0 = llvm::Constant::getNullValue(Op0->getType());
1634
}
1635
1636
int Indices[64];
1637
// 256-bit palignr operates on 128-bit lanes so we need to handle that
1638
for (unsigned l = 0; l < NumElts; l += 16) {
1639
for (unsigned i = 0; i != 16; ++i) {
1640
unsigned Idx = ShiftVal + i;
1641
if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642
Idx += NumElts - 16; // End of lane, switch operand.
1643
Indices[l + i] = Idx + l;
1644
}
1645
}
1646
1647
Value *Align = Builder.CreateShuffleVector(
1648
Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649
1650
return emitX86Select(Builder, Mask, Align, Passthru);
1651
}
1652
1653
static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
1654
bool ZeroMask, bool IndexForm) {
1655
Type *Ty = CI.getType();
1656
unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657
unsigned EltWidth = Ty->getScalarSizeInBits();
1658
bool IsFloat = Ty->isFPOrFPVectorTy();
1659
Intrinsic::ID IID;
1660
if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661
IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662
else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663
IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664
else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665
IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666
else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667
IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668
else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669
IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671
IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673
IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675
IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677
IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679
IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681
IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683
IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684
else if (VecWidth == 128 && EltWidth == 16)
1685
IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686
else if (VecWidth == 256 && EltWidth == 16)
1687
IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688
else if (VecWidth == 512 && EltWidth == 16)
1689
IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690
else if (VecWidth == 128 && EltWidth == 8)
1691
IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692
else if (VecWidth == 256 && EltWidth == 8)
1693
IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694
else if (VecWidth == 512 && EltWidth == 8)
1695
IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696
else
1697
llvm_unreachable("Unexpected intrinsic");
1698
1699
Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700
CI.getArgOperand(2) };
1701
1702
// If this isn't index form we need to swap operand 0 and 1.
1703
if (!IndexForm)
1704
std::swap(Args[0], Args[1]);
1705
1706
Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707
Args);
1708
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709
: Builder.CreateBitCast(CI.getArgOperand(1),
1710
Ty);
1711
return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712
}
1713
1714
static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
1715
Intrinsic::ID IID) {
1716
Type *Ty = CI.getType();
1717
Value *Op0 = CI.getOperand(0);
1718
Value *Op1 = CI.getOperand(1);
1719
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721
1722
if (CI.arg_size() == 4) { // For masked intrinsics.
1723
Value *VecSrc = CI.getOperand(2);
1724
Value *Mask = CI.getOperand(3);
1725
Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726
}
1727
return Res;
1728
}
1729
1730
static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
1731
bool IsRotateRight) {
1732
Type *Ty = CI.getType();
1733
Value *Src = CI.getArgOperand(0);
1734
Value *Amt = CI.getArgOperand(1);
1735
1736
// Amount may be scalar immediate, in which case create a splat vector.
1737
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738
// we only care about the lowest log2 bits anyway.
1739
if (Amt->getType() != Ty) {
1740
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742
Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743
}
1744
1745
Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747
Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748
1749
if (CI.arg_size() == 4) { // For masked intrinsics.
1750
Value *VecSrc = CI.getOperand(2);
1751
Value *Mask = CI.getOperand(3);
1752
Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753
}
1754
return Res;
1755
}
1756
1757
static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758
bool IsSigned) {
1759
Type *Ty = CI.getType();
1760
Value *LHS = CI.getArgOperand(0);
1761
Value *RHS = CI.getArgOperand(1);
1762
1763
CmpInst::Predicate Pred;
1764
switch (Imm) {
1765
case 0x0:
1766
Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767
break;
1768
case 0x1:
1769
Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770
break;
1771
case 0x2:
1772
Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773
break;
1774
case 0x3:
1775
Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776
break;
1777
case 0x4:
1778
Pred = ICmpInst::ICMP_EQ;
1779
break;
1780
case 0x5:
1781
Pred = ICmpInst::ICMP_NE;
1782
break;
1783
case 0x6:
1784
return Constant::getNullValue(Ty); // FALSE
1785
case 0x7:
1786
return Constant::getAllOnesValue(Ty); // TRUE
1787
default:
1788
llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789
}
1790
1791
Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792
Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793
return Ext;
1794
}
1795
1796
static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
1797
bool IsShiftRight, bool ZeroMask) {
1798
Type *Ty = CI.getType();
1799
Value *Op0 = CI.getArgOperand(0);
1800
Value *Op1 = CI.getArgOperand(1);
1801
Value *Amt = CI.getArgOperand(2);
1802
1803
if (IsShiftRight)
1804
std::swap(Op0, Op1);
1805
1806
// Amount may be scalar immediate, in which case create a splat vector.
1807
// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808
// we only care about the lowest log2 bits anyway.
1809
if (Amt->getType() != Ty) {
1810
unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811
Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812
Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813
}
1814
1815
Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818
1819
unsigned NumArgs = CI.arg_size();
1820
if (NumArgs >= 4) { // For masked intrinsics.
1821
Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822
ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1823
CI.getArgOperand(0);
1824
Value *Mask = CI.getOperand(NumArgs - 1);
1825
Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826
}
1827
return Res;
1828
}
1829
1830
static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,
1831
Value *Mask, bool Aligned) {
1832
// Cast the pointer to the right type.
1833
Ptr = Builder.CreateBitCast(Ptr,
1834
llvm::PointerType::getUnqual(Data->getType()));
1835
const Align Alignment =
1836
Aligned
1837
? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838
: Align(1);
1839
1840
// If the mask is all ones just emit a regular store.
1841
if (const auto *C = dyn_cast<Constant>(Mask))
1842
if (C->isAllOnesValue())
1843
return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844
1845
// Convert the mask from an integer type to a vector of i1.
1846
unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847
Mask = getX86MaskVec(Builder, Mask, NumElts);
1848
return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849
}
1850
1851
static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,
1852
Value *Passthru, Value *Mask, bool Aligned) {
1853
Type *ValTy = Passthru->getType();
1854
// Cast the pointer to the right type.
1855
Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
1856
const Align Alignment =
1857
Aligned
1858
? Align(
1859
Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /
1860
8)
1861
: Align(1);
1862
1863
// If the mask is all ones just emit a regular store.
1864
if (const auto *C = dyn_cast<Constant>(Mask))
1865
if (C->isAllOnesValue())
1866
return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867
1868
// Convert the mask from an integer type to a vector of i1.
1869
unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870
Mask = getX86MaskVec(Builder, Mask, NumElts);
1871
return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872
}
1873
1874
static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875
Type *Ty = CI.getType();
1876
Value *Op0 = CI.getArgOperand(0);
1877
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878
Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879
if (CI.arg_size() == 3)
1880
Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881
return Res;
1882
}
1883
1884
static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885
Type *Ty = CI.getType();
1886
1887
// Arguments have a vXi32 type so cast to vXi64.
1888
Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889
Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890
1891
if (IsSigned) {
1892
// Shift left then arithmetic shift right.
1893
Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894
LHS = Builder.CreateShl(LHS, ShiftAmt);
1895
LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896
RHS = Builder.CreateShl(RHS, ShiftAmt);
1897
RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898
} else {
1899
// Clear the upper bits.
1900
Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901
LHS = Builder.CreateAnd(LHS, Mask);
1902
RHS = Builder.CreateAnd(RHS, Mask);
1903
}
1904
1905
Value *Res = Builder.CreateMul(LHS, RHS);
1906
1907
if (CI.arg_size() == 4)
1908
Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909
1910
return Res;
1911
}
1912
1913
// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1914
static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
1915
Value *Mask) {
1916
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917
if (Mask) {
1918
const auto *C = dyn_cast<Constant>(Mask);
1919
if (!C || !C->isAllOnesValue())
1920
Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921
}
1922
1923
if (NumElts < 8) {
1924
int Indices[8];
1925
for (unsigned i = 0; i != NumElts; ++i)
1926
Indices[i] = i;
1927
for (unsigned i = NumElts; i != 8; ++i)
1928
Indices[i] = NumElts + i % NumElts;
1929
Vec = Builder.CreateShuffleVector(Vec,
1930
Constant::getNullValue(Vec->getType()),
1931
Indices);
1932
}
1933
return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934
}
1935
1936
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
1937
unsigned CC, bool Signed) {
1938
Value *Op0 = CI.getArgOperand(0);
1939
unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940
1941
Value *Cmp;
1942
if (CC == 3) {
1943
Cmp = Constant::getNullValue(
1944
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945
} else if (CC == 7) {
1946
Cmp = Constant::getAllOnesValue(
1947
FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948
} else {
1949
ICmpInst::Predicate Pred;
1950
switch (CC) {
1951
default: llvm_unreachable("Unknown condition code");
1952
case 0: Pred = ICmpInst::ICMP_EQ; break;
1953
case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954
case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955
case 4: Pred = ICmpInst::ICMP_NE; break;
1956
case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957
case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958
}
1959
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960
}
1961
1962
Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963
1964
return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965
}
1966
1967
// Replace a masked intrinsic with an older unmasked intrinsic.
1968
static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
1969
Intrinsic::ID IID) {
1970
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971
Value *Rep = Builder.CreateCall(Intrin,
1972
{ CI.getArgOperand(0), CI.getArgOperand(1) });
1973
return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974
}
1975
1976
static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
1977
Value* A = CI.getArgOperand(0);
1978
Value* B = CI.getArgOperand(1);
1979
Value* Src = CI.getArgOperand(2);
1980
Value* Mask = CI.getArgOperand(3);
1981
1982
Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983
Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984
Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985
Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986
Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988
}
1989
1990
static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
1991
Value* Op = CI.getArgOperand(0);
1992
Type* ReturnOp = CI.getType();
1993
unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994
Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995
return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996
}
1997
1998
// Replace intrinsic with unmasked version and a select.
1999
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
2000
CallBase &CI, Value *&Rep) {
2001
Name = Name.substr(12); // Remove avx512.mask.
2002
2003
unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004
unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005
Intrinsic::ID IID;
2006
if (Name.starts_with("max.p")) {
2007
if (VecWidth == 128 && EltWidth == 32)
2008
IID = Intrinsic::x86_sse_max_ps;
2009
else if (VecWidth == 128 && EltWidth == 64)
2010
IID = Intrinsic::x86_sse2_max_pd;
2011
else if (VecWidth == 256 && EltWidth == 32)
2012
IID = Intrinsic::x86_avx_max_ps_256;
2013
else if (VecWidth == 256 && EltWidth == 64)
2014
IID = Intrinsic::x86_avx_max_pd_256;
2015
else
2016
llvm_unreachable("Unexpected intrinsic");
2017
} else if (Name.starts_with("min.p")) {
2018
if (VecWidth == 128 && EltWidth == 32)
2019
IID = Intrinsic::x86_sse_min_ps;
2020
else if (VecWidth == 128 && EltWidth == 64)
2021
IID = Intrinsic::x86_sse2_min_pd;
2022
else if (VecWidth == 256 && EltWidth == 32)
2023
IID = Intrinsic::x86_avx_min_ps_256;
2024
else if (VecWidth == 256 && EltWidth == 64)
2025
IID = Intrinsic::x86_avx_min_pd_256;
2026
else
2027
llvm_unreachable("Unexpected intrinsic");
2028
} else if (Name.starts_with("pshuf.b.")) {
2029
if (VecWidth == 128)
2030
IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031
else if (VecWidth == 256)
2032
IID = Intrinsic::x86_avx2_pshuf_b;
2033
else if (VecWidth == 512)
2034
IID = Intrinsic::x86_avx512_pshuf_b_512;
2035
else
2036
llvm_unreachable("Unexpected intrinsic");
2037
} else if (Name.starts_with("pmul.hr.sw.")) {
2038
if (VecWidth == 128)
2039
IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040
else if (VecWidth == 256)
2041
IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042
else if (VecWidth == 512)
2043
IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044
else
2045
llvm_unreachable("Unexpected intrinsic");
2046
} else if (Name.starts_with("pmulh.w.")) {
2047
if (VecWidth == 128)
2048
IID = Intrinsic::x86_sse2_pmulh_w;
2049
else if (VecWidth == 256)
2050
IID = Intrinsic::x86_avx2_pmulh_w;
2051
else if (VecWidth == 512)
2052
IID = Intrinsic::x86_avx512_pmulh_w_512;
2053
else
2054
llvm_unreachable("Unexpected intrinsic");
2055
} else if (Name.starts_with("pmulhu.w.")) {
2056
if (VecWidth == 128)
2057
IID = Intrinsic::x86_sse2_pmulhu_w;
2058
else if (VecWidth == 256)
2059
IID = Intrinsic::x86_avx2_pmulhu_w;
2060
else if (VecWidth == 512)
2061
IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062
else
2063
llvm_unreachable("Unexpected intrinsic");
2064
} else if (Name.starts_with("pmaddw.d.")) {
2065
if (VecWidth == 128)
2066
IID = Intrinsic::x86_sse2_pmadd_wd;
2067
else if (VecWidth == 256)
2068
IID = Intrinsic::x86_avx2_pmadd_wd;
2069
else if (VecWidth == 512)
2070
IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071
else
2072
llvm_unreachable("Unexpected intrinsic");
2073
} else if (Name.starts_with("pmaddubs.w.")) {
2074
if (VecWidth == 128)
2075
IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076
else if (VecWidth == 256)
2077
IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078
else if (VecWidth == 512)
2079
IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080
else
2081
llvm_unreachable("Unexpected intrinsic");
2082
} else if (Name.starts_with("packsswb.")) {
2083
if (VecWidth == 128)
2084
IID = Intrinsic::x86_sse2_packsswb_128;
2085
else if (VecWidth == 256)
2086
IID = Intrinsic::x86_avx2_packsswb;
2087
else if (VecWidth == 512)
2088
IID = Intrinsic::x86_avx512_packsswb_512;
2089
else
2090
llvm_unreachable("Unexpected intrinsic");
2091
} else if (Name.starts_with("packssdw.")) {
2092
if (VecWidth == 128)
2093
IID = Intrinsic::x86_sse2_packssdw_128;
2094
else if (VecWidth == 256)
2095
IID = Intrinsic::x86_avx2_packssdw;
2096
else if (VecWidth == 512)
2097
IID = Intrinsic::x86_avx512_packssdw_512;
2098
else
2099
llvm_unreachable("Unexpected intrinsic");
2100
} else if (Name.starts_with("packuswb.")) {
2101
if (VecWidth == 128)
2102
IID = Intrinsic::x86_sse2_packuswb_128;
2103
else if (VecWidth == 256)
2104
IID = Intrinsic::x86_avx2_packuswb;
2105
else if (VecWidth == 512)
2106
IID = Intrinsic::x86_avx512_packuswb_512;
2107
else
2108
llvm_unreachable("Unexpected intrinsic");
2109
} else if (Name.starts_with("packusdw.")) {
2110
if (VecWidth == 128)
2111
IID = Intrinsic::x86_sse41_packusdw;
2112
else if (VecWidth == 256)
2113
IID = Intrinsic::x86_avx2_packusdw;
2114
else if (VecWidth == 512)
2115
IID = Intrinsic::x86_avx512_packusdw_512;
2116
else
2117
llvm_unreachable("Unexpected intrinsic");
2118
} else if (Name.starts_with("vpermilvar.")) {
2119
if (VecWidth == 128 && EltWidth == 32)
2120
IID = Intrinsic::x86_avx_vpermilvar_ps;
2121
else if (VecWidth == 128 && EltWidth == 64)
2122
IID = Intrinsic::x86_avx_vpermilvar_pd;
2123
else if (VecWidth == 256 && EltWidth == 32)
2124
IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125
else if (VecWidth == 256 && EltWidth == 64)
2126
IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127
else if (VecWidth == 512 && EltWidth == 32)
2128
IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129
else if (VecWidth == 512 && EltWidth == 64)
2130
IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131
else
2132
llvm_unreachable("Unexpected intrinsic");
2133
} else if (Name == "cvtpd2dq.256") {
2134
IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135
} else if (Name == "cvtpd2ps.256") {
2136
IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137
} else if (Name == "cvttpd2dq.256") {
2138
IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139
} else if (Name == "cvttps2dq.128") {
2140
IID = Intrinsic::x86_sse2_cvttps2dq;
2141
} else if (Name == "cvttps2dq.256") {
2142
IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143
} else if (Name.starts_with("permvar.")) {
2144
bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145
if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146
IID = Intrinsic::x86_avx2_permps;
2147
else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148
IID = Intrinsic::x86_avx2_permd;
2149
else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150
IID = Intrinsic::x86_avx512_permvar_df_256;
2151
else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152
IID = Intrinsic::x86_avx512_permvar_di_256;
2153
else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154
IID = Intrinsic::x86_avx512_permvar_sf_512;
2155
else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156
IID = Intrinsic::x86_avx512_permvar_si_512;
2157
else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158
IID = Intrinsic::x86_avx512_permvar_df_512;
2159
else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160
IID = Intrinsic::x86_avx512_permvar_di_512;
2161
else if (VecWidth == 128 && EltWidth == 16)
2162
IID = Intrinsic::x86_avx512_permvar_hi_128;
2163
else if (VecWidth == 256 && EltWidth == 16)
2164
IID = Intrinsic::x86_avx512_permvar_hi_256;
2165
else if (VecWidth == 512 && EltWidth == 16)
2166
IID = Intrinsic::x86_avx512_permvar_hi_512;
2167
else if (VecWidth == 128 && EltWidth == 8)
2168
IID = Intrinsic::x86_avx512_permvar_qi_128;
2169
else if (VecWidth == 256 && EltWidth == 8)
2170
IID = Intrinsic::x86_avx512_permvar_qi_256;
2171
else if (VecWidth == 512 && EltWidth == 8)
2172
IID = Intrinsic::x86_avx512_permvar_qi_512;
2173
else
2174
llvm_unreachable("Unexpected intrinsic");
2175
} else if (Name.starts_with("dbpsadbw.")) {
2176
if (VecWidth == 128)
2177
IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178
else if (VecWidth == 256)
2179
IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180
else if (VecWidth == 512)
2181
IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182
else
2183
llvm_unreachable("Unexpected intrinsic");
2184
} else if (Name.starts_with("pmultishift.qb.")) {
2185
if (VecWidth == 128)
2186
IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187
else if (VecWidth == 256)
2188
IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189
else if (VecWidth == 512)
2190
IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191
else
2192
llvm_unreachable("Unexpected intrinsic");
2193
} else if (Name.starts_with("conflict.")) {
2194
if (Name[9] == 'd' && VecWidth == 128)
2195
IID = Intrinsic::x86_avx512_conflict_d_128;
2196
else if (Name[9] == 'd' && VecWidth == 256)
2197
IID = Intrinsic::x86_avx512_conflict_d_256;
2198
else if (Name[9] == 'd' && VecWidth == 512)
2199
IID = Intrinsic::x86_avx512_conflict_d_512;
2200
else if (Name[9] == 'q' && VecWidth == 128)
2201
IID = Intrinsic::x86_avx512_conflict_q_128;
2202
else if (Name[9] == 'q' && VecWidth == 256)
2203
IID = Intrinsic::x86_avx512_conflict_q_256;
2204
else if (Name[9] == 'q' && VecWidth == 512)
2205
IID = Intrinsic::x86_avx512_conflict_q_512;
2206
else
2207
llvm_unreachable("Unexpected intrinsic");
2208
} else if (Name.starts_with("pavg.")) {
2209
if (Name[5] == 'b' && VecWidth == 128)
2210
IID = Intrinsic::x86_sse2_pavg_b;
2211
else if (Name[5] == 'b' && VecWidth == 256)
2212
IID = Intrinsic::x86_avx2_pavg_b;
2213
else if (Name[5] == 'b' && VecWidth == 512)
2214
IID = Intrinsic::x86_avx512_pavg_b_512;
2215
else if (Name[5] == 'w' && VecWidth == 128)
2216
IID = Intrinsic::x86_sse2_pavg_w;
2217
else if (Name[5] == 'w' && VecWidth == 256)
2218
IID = Intrinsic::x86_avx2_pavg_w;
2219
else if (Name[5] == 'w' && VecWidth == 512)
2220
IID = Intrinsic::x86_avx512_pavg_w_512;
2221
else
2222
llvm_unreachable("Unexpected intrinsic");
2223
} else
2224
return false;
2225
2226
SmallVector<Value *, 4> Args(CI.args());
2227
Args.pop_back();
2228
Args.pop_back();
2229
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230
Args);
2231
unsigned NumArgs = CI.arg_size();
2232
Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233
CI.getArgOperand(NumArgs - 2));
2234
return true;
2235
}
2236
2237
/// Upgrade comment in call to inline asm that represents an objc retain release
2238
/// marker.
2239
void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240
size_t Pos;
2241
if (AsmStr->find("mov\tfp") == 0 &&
2242
AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243
(Pos = AsmStr->find("# marker")) != std::string::npos) {
2244
AsmStr->replace(Pos, 1, ";");
2245
}
2246
}
2247
2248
static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,
2249
IRBuilder<> &Builder) {
2250
LLVMContext &C = F->getContext();
2251
Value *Rep = nullptr;
2252
2253
if (Name.starts_with("sse4a.movnt.")) {
2254
SmallVector<Metadata *, 1> Elts;
2255
Elts.push_back(
2256
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2257
MDNode *Node = MDNode::get(C, Elts);
2258
2259
Value *Arg0 = CI->getArgOperand(0);
2260
Value *Arg1 = CI->getArgOperand(1);
2261
2262
// Nontemporal (unaligned) store of the 0'th element of the float/double
2263
// vector.
2264
Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2265
PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2266
Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2267
Value *Extract =
2268
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2269
2270
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2271
SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2272
} else if (Name.starts_with("avx.movnt.") ||
2273
Name.starts_with("avx512.storent.")) {
2274
SmallVector<Metadata *, 1> Elts;
2275
Elts.push_back(
2276
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2277
MDNode *Node = MDNode::get(C, Elts);
2278
2279
Value *Arg0 = CI->getArgOperand(0);
2280
Value *Arg1 = CI->getArgOperand(1);
2281
2282
// Convert the type of the pointer to a pointer to the stored type.
2283
Value *BC = Builder.CreateBitCast(
2284
Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2285
StoreInst *SI = Builder.CreateAlignedStore(
2286
Arg1, BC,
2287
Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
2288
SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2289
} else if (Name == "sse2.storel.dq") {
2290
Value *Arg0 = CI->getArgOperand(0);
2291
Value *Arg1 = CI->getArgOperand(1);
2292
2293
auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2294
Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2295
Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2296
Value *BC = Builder.CreateBitCast(
2297
Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2298
Builder.CreateAlignedStore(Elt, BC, Align(1));
2299
} else if (Name.starts_with("sse.storeu.") ||
2300
Name.starts_with("sse2.storeu.") ||
2301
Name.starts_with("avx.storeu.")) {
2302
Value *Arg0 = CI->getArgOperand(0);
2303
Value *Arg1 = CI->getArgOperand(1);
2304
2305
Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2306
"cast");
2307
Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2308
} else if (Name == "avx512.mask.store.ss") {
2309
Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2310
upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2311
Mask, false);
2312
} else if (Name.starts_with("avx512.mask.store")) {
2313
// "avx512.mask.storeu." or "avx512.mask.store."
2314
bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2315
upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2316
CI->getArgOperand(2), Aligned);
2317
} else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2318
// Upgrade packed integer vector compare intrinsics to compare instructions.
2319
// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2320
bool CmpEq = Name[9] == 'e';
2321
Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2322
CI->getArgOperand(0), CI->getArgOperand(1));
2323
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2324
} else if (Name.starts_with("avx512.broadcastm")) {
2325
Type *ExtTy = Type::getInt32Ty(C);
2326
if (CI->getOperand(0)->getType()->isIntegerTy(8))
2327
ExtTy = Type::getInt64Ty(C);
2328
unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2329
ExtTy->getPrimitiveSizeInBits();
2330
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2331
Rep = Builder.CreateVectorSplat(NumElts, Rep);
2332
} else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2333
Value *Vec = CI->getArgOperand(0);
2334
Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2335
Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
2336
Elt0->getType());
2337
Elt0 = Builder.CreateCall(Intr, Elt0);
2338
Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2339
} else if (Name.starts_with("avx.sqrt.p") ||
2340
Name.starts_with("sse2.sqrt.p") ||
2341
Name.starts_with("sse.sqrt.p")) {
2342
Rep =
2343
Builder.CreateCall(Intrinsic::getDeclaration(
2344
F->getParent(), Intrinsic::sqrt, CI->getType()),
2345
{CI->getArgOperand(0)});
2346
} else if (Name.starts_with("avx512.mask.sqrt.p")) {
2347
if (CI->arg_size() == 4 &&
2348
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
2349
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2350
Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2351
: Intrinsic::x86_avx512_sqrt_pd_512;
2352
2353
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2354
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
2355
Args);
2356
} else {
2357
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2358
Intrinsic::sqrt,
2359
CI->getType()),
2360
{CI->getArgOperand(0)});
2361
}
2362
Rep =
2363
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2364
} else if (Name.starts_with("avx512.ptestm") ||
2365
Name.starts_with("avx512.ptestnm")) {
2366
Value *Op0 = CI->getArgOperand(0);
2367
Value *Op1 = CI->getArgOperand(1);
2368
Value *Mask = CI->getArgOperand(2);
2369
Rep = Builder.CreateAnd(Op0, Op1);
2370
llvm::Type *Ty = Op0->getType();
2371
Value *Zero = llvm::Constant::getNullValue(Ty);
2372
ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2373
? ICmpInst::ICMP_NE
2374
: ICmpInst::ICMP_EQ;
2375
Rep = Builder.CreateICmp(Pred, Rep, Zero);
2376
Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2377
} else if (Name.starts_with("avx512.mask.pbroadcast")) {
2378
unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2379
->getNumElements();
2380
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2381
Rep =
2382
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2383
} else if (Name.starts_with("avx512.kunpck")) {
2384
unsigned NumElts = CI->getType()->getScalarSizeInBits();
2385
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2386
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2387
int Indices[64];
2388
for (unsigned i = 0; i != NumElts; ++i)
2389
Indices[i] = i;
2390
2391
// First extract half of each vector. This gives better codegen than
2392
// doing it in a single shuffle.
2393
LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2394
RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2395
// Concat the vectors.
2396
// NOTE: Operands have to be swapped to match intrinsic definition.
2397
Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2398
Rep = Builder.CreateBitCast(Rep, CI->getType());
2399
} else if (Name == "avx512.kand.w") {
2400
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2401
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2402
Rep = Builder.CreateAnd(LHS, RHS);
2403
Rep = Builder.CreateBitCast(Rep, CI->getType());
2404
} else if (Name == "avx512.kandn.w") {
2405
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2406
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2407
LHS = Builder.CreateNot(LHS);
2408
Rep = Builder.CreateAnd(LHS, RHS);
2409
Rep = Builder.CreateBitCast(Rep, CI->getType());
2410
} else if (Name == "avx512.kor.w") {
2411
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2412
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2413
Rep = Builder.CreateOr(LHS, RHS);
2414
Rep = Builder.CreateBitCast(Rep, CI->getType());
2415
} else if (Name == "avx512.kxor.w") {
2416
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2417
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2418
Rep = Builder.CreateXor(LHS, RHS);
2419
Rep = Builder.CreateBitCast(Rep, CI->getType());
2420
} else if (Name == "avx512.kxnor.w") {
2421
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2422
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2423
LHS = Builder.CreateNot(LHS);
2424
Rep = Builder.CreateXor(LHS, RHS);
2425
Rep = Builder.CreateBitCast(Rep, CI->getType());
2426
} else if (Name == "avx512.knot.w") {
2427
Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2428
Rep = Builder.CreateNot(Rep);
2429
Rep = Builder.CreateBitCast(Rep, CI->getType());
2430
} else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2431
Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2432
Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2433
Rep = Builder.CreateOr(LHS, RHS);
2434
Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2435
Value *C;
2436
if (Name[14] == 'c')
2437
C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2438
else
2439
C = ConstantInt::getNullValue(Builder.getInt16Ty());
2440
Rep = Builder.CreateICmpEQ(Rep, C);
2441
Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2442
} else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2443
Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2444
Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2445
Name == "sse.div.ss" || Name == "sse2.div.sd") {
2446
Type *I32Ty = Type::getInt32Ty(C);
2447
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2448
ConstantInt::get(I32Ty, 0));
2449
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2450
ConstantInt::get(I32Ty, 0));
2451
Value *EltOp;
2452
if (Name.contains(".add."))
2453
EltOp = Builder.CreateFAdd(Elt0, Elt1);
2454
else if (Name.contains(".sub."))
2455
EltOp = Builder.CreateFSub(Elt0, Elt1);
2456
else if (Name.contains(".mul."))
2457
EltOp = Builder.CreateFMul(Elt0, Elt1);
2458
else
2459
EltOp = Builder.CreateFDiv(Elt0, Elt1);
2460
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2461
ConstantInt::get(I32Ty, 0));
2462
} else if (Name.starts_with("avx512.mask.pcmp")) {
2463
// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2464
bool CmpEq = Name[16] == 'e';
2465
Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2466
} else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2467
Type *OpTy = CI->getArgOperand(0)->getType();
2468
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2469
Intrinsic::ID IID;
2470
switch (VecWidth) {
2471
default:
2472
llvm_unreachable("Unexpected intrinsic");
2473
case 128:
2474
IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2475
break;
2476
case 256:
2477
IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2478
break;
2479
case 512:
2480
IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2481
break;
2482
}
2483
2484
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2485
{CI->getOperand(0), CI->getArgOperand(1)});
2486
Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2487
} else if (Name.starts_with("avx512.mask.fpclass.p")) {
2488
Type *OpTy = CI->getArgOperand(0)->getType();
2489
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2490
unsigned EltWidth = OpTy->getScalarSizeInBits();
2491
Intrinsic::ID IID;
2492
if (VecWidth == 128 && EltWidth == 32)
2493
IID = Intrinsic::x86_avx512_fpclass_ps_128;
2494
else if (VecWidth == 256 && EltWidth == 32)
2495
IID = Intrinsic::x86_avx512_fpclass_ps_256;
2496
else if (VecWidth == 512 && EltWidth == 32)
2497
IID = Intrinsic::x86_avx512_fpclass_ps_512;
2498
else if (VecWidth == 128 && EltWidth == 64)
2499
IID = Intrinsic::x86_avx512_fpclass_pd_128;
2500
else if (VecWidth == 256 && EltWidth == 64)
2501
IID = Intrinsic::x86_avx512_fpclass_pd_256;
2502
else if (VecWidth == 512 && EltWidth == 64)
2503
IID = Intrinsic::x86_avx512_fpclass_pd_512;
2504
else
2505
llvm_unreachable("Unexpected intrinsic");
2506
2507
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2508
{CI->getOperand(0), CI->getArgOperand(1)});
2509
Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2510
} else if (Name.starts_with("avx512.cmp.p")) {
2511
SmallVector<Value *, 4> Args(CI->args());
2512
Type *OpTy = Args[0]->getType();
2513
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2514
unsigned EltWidth = OpTy->getScalarSizeInBits();
2515
Intrinsic::ID IID;
2516
if (VecWidth == 128 && EltWidth == 32)
2517
IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2518
else if (VecWidth == 256 && EltWidth == 32)
2519
IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2520
else if (VecWidth == 512 && EltWidth == 32)
2521
IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2522
else if (VecWidth == 128 && EltWidth == 64)
2523
IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2524
else if (VecWidth == 256 && EltWidth == 64)
2525
IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2526
else if (VecWidth == 512 && EltWidth == 64)
2527
IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2528
else
2529
llvm_unreachable("Unexpected intrinsic");
2530
2531
Value *Mask = Constant::getAllOnesValue(CI->getType());
2532
if (VecWidth == 512)
2533
std::swap(Mask, Args.back());
2534
Args.push_back(Mask);
2535
2536
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2537
Args);
2538
} else if (Name.starts_with("avx512.mask.cmp.")) {
2539
// Integer compare intrinsics.
2540
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2541
Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2542
} else if (Name.starts_with("avx512.mask.ucmp.")) {
2543
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2544
Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2545
} else if (Name.starts_with("avx512.cvtb2mask.") ||
2546
Name.starts_with("avx512.cvtw2mask.") ||
2547
Name.starts_with("avx512.cvtd2mask.") ||
2548
Name.starts_with("avx512.cvtq2mask.")) {
2549
Value *Op = CI->getArgOperand(0);
2550
Value *Zero = llvm::Constant::getNullValue(Op->getType());
2551
Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2552
Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2553
} else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2554
Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2555
Name.starts_with("avx512.mask.pabs")) {
2556
Rep = upgradeAbs(Builder, *CI);
2557
} else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2558
Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2559
Name.starts_with("avx512.mask.pmaxs")) {
2560
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2561
} else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2562
Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2563
Name.starts_with("avx512.mask.pmaxu")) {
2564
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2565
} else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2566
Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2567
Name.starts_with("avx512.mask.pmins")) {
2568
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2569
} else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2570
Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2571
Name.starts_with("avx512.mask.pminu")) {
2572
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2573
} else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2574
Name == "avx512.pmulu.dq.512" ||
2575
Name.starts_with("avx512.mask.pmulu.dq.")) {
2576
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2577
} else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2578
Name == "avx512.pmul.dq.512" ||
2579
Name.starts_with("avx512.mask.pmul.dq.")) {
2580
Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2581
} else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2582
Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2583
Rep =
2584
Builder.CreateSIToFP(CI->getArgOperand(1),
2585
cast<VectorType>(CI->getType())->getElementType());
2586
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2587
} else if (Name == "avx512.cvtusi2sd") {
2588
Rep =
2589
Builder.CreateUIToFP(CI->getArgOperand(1),
2590
cast<VectorType>(CI->getType())->getElementType());
2591
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2592
} else if (Name == "sse2.cvtss2sd") {
2593
Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2594
Rep = Builder.CreateFPExt(
2595
Rep, cast<VectorType>(CI->getType())->getElementType());
2596
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2597
} else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2598
Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2599
Name.starts_with("avx512.mask.cvtdq2pd.") ||
2600
Name.starts_with("avx512.mask.cvtudq2pd.") ||
2601
Name.starts_with("avx512.mask.cvtdq2ps.") ||
2602
Name.starts_with("avx512.mask.cvtudq2ps.") ||
2603
Name.starts_with("avx512.mask.cvtqq2pd.") ||
2604
Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2605
Name == "avx512.mask.cvtqq2ps.256" ||
2606
Name == "avx512.mask.cvtqq2ps.512" ||
2607
Name == "avx512.mask.cvtuqq2ps.256" ||
2608
Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2609
Name == "avx.cvt.ps2.pd.256" ||
2610
Name == "avx512.mask.cvtps2pd.128" ||
2611
Name == "avx512.mask.cvtps2pd.256") {
2612
auto *DstTy = cast<FixedVectorType>(CI->getType());
2613
Rep = CI->getArgOperand(0);
2614
auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2615
2616
unsigned NumDstElts = DstTy->getNumElements();
2617
if (NumDstElts < SrcTy->getNumElements()) {
2618
assert(NumDstElts == 2 && "Unexpected vector size");
2619
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2620
}
2621
2622
bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2623
bool IsUnsigned = Name.contains("cvtu");
2624
if (IsPS2PD)
2625
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2626
else if (CI->arg_size() == 4 &&
2627
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
2628
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2629
Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2630
: Intrinsic::x86_avx512_sitofp_round;
2631
Function *F =
2632
Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
2633
Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
2634
} else {
2635
Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2636
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
2637
}
2638
2639
if (CI->arg_size() >= 3)
2640
Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2641
CI->getArgOperand(1));
2642
} else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2643
Name.starts_with("vcvtph2ps.")) {
2644
auto *DstTy = cast<FixedVectorType>(CI->getType());
2645
Rep = CI->getArgOperand(0);
2646
auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2647
unsigned NumDstElts = DstTy->getNumElements();
2648
if (NumDstElts != SrcTy->getNumElements()) {
2649
assert(NumDstElts == 4 && "Unexpected vector size");
2650
Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2651
}
2652
Rep = Builder.CreateBitCast(
2653
Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2654
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2655
if (CI->arg_size() >= 3)
2656
Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2657
CI->getArgOperand(1));
2658
} else if (Name.starts_with("avx512.mask.load")) {
2659
// "avx512.mask.loadu." or "avx512.mask.load."
2660
bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2661
Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2662
CI->getArgOperand(2), Aligned);
2663
} else if (Name.starts_with("avx512.mask.expand.load.")) {
2664
auto *ResultTy = cast<FixedVectorType>(CI->getType());
2665
Type *PtrTy = ResultTy->getElementType();
2666
2667
// Cast the pointer to element type.
2668
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2669
llvm::PointerType::getUnqual(PtrTy));
2670
2671
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2672
ResultTy->getNumElements());
2673
2674
Function *ELd = Intrinsic::getDeclaration(
2675
F->getParent(), Intrinsic::masked_expandload, ResultTy);
2676
Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});
2677
} else if (Name.starts_with("avx512.mask.compress.store.")) {
2678
auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2679
Type *PtrTy = ResultTy->getElementType();
2680
2681
// Cast the pointer to element type.
2682
Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2683
llvm::PointerType::getUnqual(PtrTy));
2684
2685
Value *MaskVec =
2686
getX86MaskVec(Builder, CI->getArgOperand(2),
2687
cast<FixedVectorType>(ResultTy)->getNumElements());
2688
2689
Function *CSt = Intrinsic::getDeclaration(
2690
F->getParent(), Intrinsic::masked_compressstore, ResultTy);
2691
Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});
2692
} else if (Name.starts_with("avx512.mask.compress.") ||
2693
Name.starts_with("avx512.mask.expand.")) {
2694
auto *ResultTy = cast<FixedVectorType>(CI->getType());
2695
2696
Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2697
ResultTy->getNumElements());
2698
2699
bool IsCompress = Name[12] == 'c';
2700
Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2701
: Intrinsic::x86_avx512_mask_expand;
2702
Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2703
Rep = Builder.CreateCall(Intr,
2704
{CI->getOperand(0), CI->getOperand(1), MaskVec});
2705
} else if (Name.starts_with("xop.vpcom")) {
2706
bool IsSigned;
2707
if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2708
Name.ends_with("uq"))
2709
IsSigned = false;
2710
else if (Name.ends_with("b") || Name.ends_with("w") ||
2711
Name.ends_with("d") || Name.ends_with("q"))
2712
IsSigned = true;
2713
else
2714
llvm_unreachable("Unknown suffix");
2715
2716
unsigned Imm;
2717
if (CI->arg_size() == 3) {
2718
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2719
} else {
2720
Name = Name.substr(9); // strip off "xop.vpcom"
2721
if (Name.starts_with("lt"))
2722
Imm = 0;
2723
else if (Name.starts_with("le"))
2724
Imm = 1;
2725
else if (Name.starts_with("gt"))
2726
Imm = 2;
2727
else if (Name.starts_with("ge"))
2728
Imm = 3;
2729
else if (Name.starts_with("eq"))
2730
Imm = 4;
2731
else if (Name.starts_with("ne"))
2732
Imm = 5;
2733
else if (Name.starts_with("false"))
2734
Imm = 6;
2735
else if (Name.starts_with("true"))
2736
Imm = 7;
2737
else
2738
llvm_unreachable("Unknown condition");
2739
}
2740
2741
Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2742
} else if (Name.starts_with("xop.vpcmov")) {
2743
Value *Sel = CI->getArgOperand(2);
2744
Value *NotSel = Builder.CreateNot(Sel);
2745
Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2746
Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2747
Rep = Builder.CreateOr(Sel0, Sel1);
2748
} else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2749
Name.starts_with("avx512.mask.prol")) {
2750
Rep = upgradeX86Rotate(Builder, *CI, false);
2751
} else if (Name.starts_with("avx512.pror") ||
2752
Name.starts_with("avx512.mask.pror")) {
2753
Rep = upgradeX86Rotate(Builder, *CI, true);
2754
} else if (Name.starts_with("avx512.vpshld.") ||
2755
Name.starts_with("avx512.mask.vpshld") ||
2756
Name.starts_with("avx512.maskz.vpshld")) {
2757
bool ZeroMask = Name[11] == 'z';
2758
Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2759
} else if (Name.starts_with("avx512.vpshrd.") ||
2760
Name.starts_with("avx512.mask.vpshrd") ||
2761
Name.starts_with("avx512.maskz.vpshrd")) {
2762
bool ZeroMask = Name[11] == 'z';
2763
Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2764
} else if (Name == "sse42.crc32.64.8") {
2765
Function *CRC32 = Intrinsic::getDeclaration(
2766
F->getParent(), Intrinsic::x86_sse42_crc32_32_8);
2767
Value *Trunc0 =
2768
Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2769
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2770
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2771
} else if (Name.starts_with("avx.vbroadcast.s") ||
2772
Name.starts_with("avx512.vbroadcast.s")) {
2773
// Replace broadcasts with a series of insertelements.
2774
auto *VecTy = cast<FixedVectorType>(CI->getType());
2775
Type *EltTy = VecTy->getElementType();
2776
unsigned EltNum = VecTy->getNumElements();
2777
Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2778
Type *I32Ty = Type::getInt32Ty(C);
2779
Rep = PoisonValue::get(VecTy);
2780
for (unsigned I = 0; I < EltNum; ++I)
2781
Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2782
} else if (Name.starts_with("sse41.pmovsx") ||
2783
Name.starts_with("sse41.pmovzx") ||
2784
Name.starts_with("avx2.pmovsx") ||
2785
Name.starts_with("avx2.pmovzx") ||
2786
Name.starts_with("avx512.mask.pmovsx") ||
2787
Name.starts_with("avx512.mask.pmovzx")) {
2788
auto *DstTy = cast<FixedVectorType>(CI->getType());
2789
unsigned NumDstElts = DstTy->getNumElements();
2790
2791
// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2792
SmallVector<int, 8> ShuffleMask(NumDstElts);
2793
for (unsigned i = 0; i != NumDstElts; ++i)
2794
ShuffleMask[i] = i;
2795
2796
Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2797
2798
bool DoSext = Name.contains("pmovsx");
2799
Rep =
2800
DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2801
// If there are 3 arguments, it's a masked intrinsic so we need a select.
2802
if (CI->arg_size() == 3)
2803
Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2804
CI->getArgOperand(1));
2805
} else if (Name == "avx512.mask.pmov.qd.256" ||
2806
Name == "avx512.mask.pmov.qd.512" ||
2807
Name == "avx512.mask.pmov.wb.256" ||
2808
Name == "avx512.mask.pmov.wb.512") {
2809
Type *Ty = CI->getArgOperand(1)->getType();
2810
Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2811
Rep =
2812
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2813
} else if (Name.starts_with("avx.vbroadcastf128") ||
2814
Name == "avx2.vbroadcasti128") {
2815
// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2816
Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2817
unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2818
auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2819
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2820
PointerType::getUnqual(VT));
2821
Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2822
if (NumSrcElts == 2)
2823
Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2824
else
2825
Rep = Builder.CreateShuffleVector(Load,
2826
ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2827
} else if (Name.starts_with("avx512.mask.shuf.i") ||
2828
Name.starts_with("avx512.mask.shuf.f")) {
2829
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2830
Type *VT = CI->getType();
2831
unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2832
unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2833
unsigned ControlBitsMask = NumLanes - 1;
2834
unsigned NumControlBits = NumLanes / 2;
2835
SmallVector<int, 8> ShuffleMask(0);
2836
2837
for (unsigned l = 0; l != NumLanes; ++l) {
2838
unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2839
// We actually need the other source.
2840
if (l >= NumLanes / 2)
2841
LaneMask += NumLanes;
2842
for (unsigned i = 0; i != NumElementsInLane; ++i)
2843
ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2844
}
2845
Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2846
CI->getArgOperand(1), ShuffleMask);
2847
Rep =
2848
emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2849
} else if (Name.starts_with("avx512.mask.broadcastf") ||
2850
Name.starts_with("avx512.mask.broadcasti")) {
2851
unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2852
->getNumElements();
2853
unsigned NumDstElts =
2854
cast<FixedVectorType>(CI->getType())->getNumElements();
2855
2856
SmallVector<int, 8> ShuffleMask(NumDstElts);
2857
for (unsigned i = 0; i != NumDstElts; ++i)
2858
ShuffleMask[i] = i % NumSrcElts;
2859
2860
Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2861
CI->getArgOperand(0), ShuffleMask);
2862
Rep =
2863
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2864
} else if (Name.starts_with("avx2.pbroadcast") ||
2865
Name.starts_with("avx2.vbroadcast") ||
2866
Name.starts_with("avx512.pbroadcast") ||
2867
Name.starts_with("avx512.mask.broadcast.s")) {
2868
// Replace vp?broadcasts with a vector shuffle.
2869
Value *Op = CI->getArgOperand(0);
2870
ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2871
Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2872
SmallVector<int, 8> M;
2873
ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
2874
Rep = Builder.CreateShuffleVector(Op, M);
2875
2876
if (CI->arg_size() == 3)
2877
Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2878
CI->getArgOperand(1));
2879
} else if (Name.starts_with("sse2.padds.") ||
2880
Name.starts_with("avx2.padds.") ||
2881
Name.starts_with("avx512.padds.") ||
2882
Name.starts_with("avx512.mask.padds.")) {
2883
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2884
} else if (Name.starts_with("sse2.psubs.") ||
2885
Name.starts_with("avx2.psubs.") ||
2886
Name.starts_with("avx512.psubs.") ||
2887
Name.starts_with("avx512.mask.psubs.")) {
2888
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2889
} else if (Name.starts_with("sse2.paddus.") ||
2890
Name.starts_with("avx2.paddus.") ||
2891
Name.starts_with("avx512.mask.paddus.")) {
2892
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2893
} else if (Name.starts_with("sse2.psubus.") ||
2894
Name.starts_with("avx2.psubus.") ||
2895
Name.starts_with("avx512.mask.psubus.")) {
2896
Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2897
} else if (Name.starts_with("avx512.mask.palignr.")) {
2898
Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2899
CI->getArgOperand(1), CI->getArgOperand(2),
2900
CI->getArgOperand(3), CI->getArgOperand(4),
2901
false);
2902
} else if (Name.starts_with("avx512.mask.valign.")) {
2903
Rep = upgradeX86ALIGNIntrinsics(
2904
Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2905
CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
2906
} else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
2907
// 128/256-bit shift left specified in bits.
2908
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2909
Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2910
Shift / 8); // Shift is in bits.
2911
} else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
2912
// 128/256-bit shift right specified in bits.
2913
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914
Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2915
Shift / 8); // Shift is in bits.
2916
} else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
2917
Name == "avx512.psll.dq.512") {
2918
// 128/256/512-bit shift left specified in bytes.
2919
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920
Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2921
} else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
2922
Name == "avx512.psrl.dq.512") {
2923
// 128/256/512-bit shift right specified in bytes.
2924
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2925
Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2926
} else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
2927
Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
2928
Name.starts_with("avx2.pblendd.")) {
2929
Value *Op0 = CI->getArgOperand(0);
2930
Value *Op1 = CI->getArgOperand(1);
2931
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2932
auto *VecTy = cast<FixedVectorType>(CI->getType());
2933
unsigned NumElts = VecTy->getNumElements();
2934
2935
SmallVector<int, 16> Idxs(NumElts);
2936
for (unsigned i = 0; i != NumElts; ++i)
2937
Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
2938
2939
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940
} else if (Name.starts_with("avx.vinsertf128.") ||
2941
Name == "avx2.vinserti128" ||
2942
Name.starts_with("avx512.mask.insert")) {
2943
Value *Op0 = CI->getArgOperand(0);
2944
Value *Op1 = CI->getArgOperand(1);
2945
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2946
unsigned DstNumElts =
2947
cast<FixedVectorType>(CI->getType())->getNumElements();
2948
unsigned SrcNumElts =
2949
cast<FixedVectorType>(Op1->getType())->getNumElements();
2950
unsigned Scale = DstNumElts / SrcNumElts;
2951
2952
// Mask off the high bits of the immediate value; hardware ignores those.
2953
Imm = Imm % Scale;
2954
2955
// Extend the second operand into a vector the size of the destination.
2956
SmallVector<int, 8> Idxs(DstNumElts);
2957
for (unsigned i = 0; i != SrcNumElts; ++i)
2958
Idxs[i] = i;
2959
for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2960
Idxs[i] = SrcNumElts;
2961
Rep = Builder.CreateShuffleVector(Op1, Idxs);
2962
2963
// Insert the second operand into the first operand.
2964
2965
// Note that there is no guarantee that instruction lowering will actually
2966
// produce a vinsertf128 instruction for the created shuffles. In
2967
// particular, the 0 immediate case involves no lane changes, so it can
2968
// be handled as a blend.
2969
2970
// Example of shuffle mask for 32-bit elements:
2971
// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2972
// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2973
2974
// First fill with identify mask.
2975
for (unsigned i = 0; i != DstNumElts; ++i)
2976
Idxs[i] = i;
2977
// Then replace the elements where we need to insert.
2978
for (unsigned i = 0; i != SrcNumElts; ++i)
2979
Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2980
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2981
2982
// If the intrinsic has a mask operand, handle that.
2983
if (CI->arg_size() == 5)
2984
Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
2985
CI->getArgOperand(3));
2986
} else if (Name.starts_with("avx.vextractf128.") ||
2987
Name == "avx2.vextracti128" ||
2988
Name.starts_with("avx512.mask.vextract")) {
2989
Value *Op0 = CI->getArgOperand(0);
2990
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2991
unsigned DstNumElts =
2992
cast<FixedVectorType>(CI->getType())->getNumElements();
2993
unsigned SrcNumElts =
2994
cast<FixedVectorType>(Op0->getType())->getNumElements();
2995
unsigned Scale = SrcNumElts / DstNumElts;
2996
2997
// Mask off the high bits of the immediate value; hardware ignores those.
2998
Imm = Imm % Scale;
2999
3000
// Get indexes for the subvector of the input vector.
3001
SmallVector<int, 8> Idxs(DstNumElts);
3002
for (unsigned i = 0; i != DstNumElts; ++i) {
3003
Idxs[i] = i + (Imm * DstNumElts);
3004
}
3005
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3006
3007
// If the intrinsic has a mask operand, handle that.
3008
if (CI->arg_size() == 4)
3009
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3010
CI->getArgOperand(2));
3011
} else if (Name.starts_with("avx512.mask.perm.df.") ||
3012
Name.starts_with("avx512.mask.perm.di.")) {
3013
Value *Op0 = CI->getArgOperand(0);
3014
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3015
auto *VecTy = cast<FixedVectorType>(CI->getType());
3016
unsigned NumElts = VecTy->getNumElements();
3017
3018
SmallVector<int, 8> Idxs(NumElts);
3019
for (unsigned i = 0; i != NumElts; ++i)
3020
Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3021
3022
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3023
3024
if (CI->arg_size() == 4)
3025
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3026
CI->getArgOperand(2));
3027
} else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3028
// The immediate permute control byte looks like this:
3029
// [1:0] - select 128 bits from sources for low half of destination
3030
// [2] - ignore
3031
// [3] - zero low half of destination
3032
// [5:4] - select 128 bits from sources for high half of destination
3033
// [6] - ignore
3034
// [7] - zero high half of destination
3035
3036
uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3037
3038
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3039
unsigned HalfSize = NumElts / 2;
3040
SmallVector<int, 8> ShuffleMask(NumElts);
3041
3042
// Determine which operand(s) are actually in use for this instruction.
3043
Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3044
Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3045
3046
// If needed, replace operands based on zero mask.
3047
V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3048
V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3049
3050
// Permute low half of result.
3051
unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3052
for (unsigned i = 0; i < HalfSize; ++i)
3053
ShuffleMask[i] = StartIndex + i;
3054
3055
// Permute high half of result.
3056
StartIndex = (Imm & 0x10) ? HalfSize : 0;
3057
for (unsigned i = 0; i < HalfSize; ++i)
3058
ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3059
3060
Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3061
3062
} else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3063
Name.starts_with("avx512.mask.vpermil.p") ||
3064
Name.starts_with("avx512.mask.pshuf.d.")) {
3065
Value *Op0 = CI->getArgOperand(0);
3066
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067
auto *VecTy = cast<FixedVectorType>(CI->getType());
3068
unsigned NumElts = VecTy->getNumElements();
3069
// Calculate the size of each index in the immediate.
3070
unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3071
unsigned IdxMask = ((1 << IdxSize) - 1);
3072
3073
SmallVector<int, 8> Idxs(NumElts);
3074
// Lookup the bits for this element, wrapping around the immediate every
3075
// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3076
// to offset by the first index of each group.
3077
for (unsigned i = 0; i != NumElts; ++i)
3078
Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3079
3080
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3081
3082
if (CI->arg_size() == 4)
3083
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3084
CI->getArgOperand(2));
3085
} else if (Name == "sse2.pshufl.w" ||
3086
Name.starts_with("avx512.mask.pshufl.w.")) {
3087
Value *Op0 = CI->getArgOperand(0);
3088
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3090
3091
SmallVector<int, 16> Idxs(NumElts);
3092
for (unsigned l = 0; l != NumElts; l += 8) {
3093
for (unsigned i = 0; i != 4; ++i)
3094
Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3095
for (unsigned i = 4; i != 8; ++i)
3096
Idxs[i + l] = i + l;
3097
}
3098
3099
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3100
3101
if (CI->arg_size() == 4)
3102
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3103
CI->getArgOperand(2));
3104
} else if (Name == "sse2.pshufh.w" ||
3105
Name.starts_with("avx512.mask.pshufh.w.")) {
3106
Value *Op0 = CI->getArgOperand(0);
3107
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3108
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3109
3110
SmallVector<int, 16> Idxs(NumElts);
3111
for (unsigned l = 0; l != NumElts; l += 8) {
3112
for (unsigned i = 0; i != 4; ++i)
3113
Idxs[i + l] = i + l;
3114
for (unsigned i = 0; i != 4; ++i)
3115
Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3116
}
3117
3118
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3119
3120
if (CI->arg_size() == 4)
3121
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3122
CI->getArgOperand(2));
3123
} else if (Name.starts_with("avx512.mask.shuf.p")) {
3124
Value *Op0 = CI->getArgOperand(0);
3125
Value *Op1 = CI->getArgOperand(1);
3126
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3128
3129
unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3130
unsigned HalfLaneElts = NumLaneElts / 2;
3131
3132
SmallVector<int, 16> Idxs(NumElts);
3133
for (unsigned i = 0; i != NumElts; ++i) {
3134
// Base index is the starting element of the lane.
3135
Idxs[i] = i - (i % NumLaneElts);
3136
// If we are half way through the lane switch to the other source.
3137
if ((i % NumLaneElts) >= HalfLaneElts)
3138
Idxs[i] += NumElts;
3139
// Now select the specific element. By adding HalfLaneElts bits from
3140
// the immediate. Wrapping around the immediate every 8-bits.
3141
Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3142
}
3143
3144
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3145
3146
Rep =
3147
emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3148
} else if (Name.starts_with("avx512.mask.movddup") ||
3149
Name.starts_with("avx512.mask.movshdup") ||
3150
Name.starts_with("avx512.mask.movsldup")) {
3151
Value *Op0 = CI->getArgOperand(0);
3152
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3153
unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3154
3155
unsigned Offset = 0;
3156
if (Name.starts_with("avx512.mask.movshdup."))
3157
Offset = 1;
3158
3159
SmallVector<int, 16> Idxs(NumElts);
3160
for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3161
for (unsigned i = 0; i != NumLaneElts; i += 2) {
3162
Idxs[i + l + 0] = i + l + Offset;
3163
Idxs[i + l + 1] = i + l + Offset;
3164
}
3165
3166
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3167
3168
Rep =
3169
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3170
} else if (Name.starts_with("avx512.mask.punpckl") ||
3171
Name.starts_with("avx512.mask.unpckl.")) {
3172
Value *Op0 = CI->getArgOperand(0);
3173
Value *Op1 = CI->getArgOperand(1);
3174
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3175
int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3176
3177
SmallVector<int, 64> Idxs(NumElts);
3178
for (int l = 0; l != NumElts; l += NumLaneElts)
3179
for (int i = 0; i != NumLaneElts; ++i)
3180
Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3181
3182
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3183
3184
Rep =
3185
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3186
} else if (Name.starts_with("avx512.mask.punpckh") ||
3187
Name.starts_with("avx512.mask.unpckh.")) {
3188
Value *Op0 = CI->getArgOperand(0);
3189
Value *Op1 = CI->getArgOperand(1);
3190
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3191
int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3192
3193
SmallVector<int, 64> Idxs(NumElts);
3194
for (int l = 0; l != NumElts; l += NumLaneElts)
3195
for (int i = 0; i != NumLaneElts; ++i)
3196
Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3197
3198
Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3199
3200
Rep =
3201
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3202
} else if (Name.starts_with("avx512.mask.and.") ||
3203
Name.starts_with("avx512.mask.pand.")) {
3204
VectorType *FTy = cast<VectorType>(CI->getType());
3205
VectorType *ITy = VectorType::getInteger(FTy);
3206
Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3207
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3208
Rep = Builder.CreateBitCast(Rep, FTy);
3209
Rep =
3210
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3211
} else if (Name.starts_with("avx512.mask.andn.") ||
3212
Name.starts_with("avx512.mask.pandn.")) {
3213
VectorType *FTy = cast<VectorType>(CI->getType());
3214
VectorType *ITy = VectorType::getInteger(FTy);
3215
Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3216
Rep = Builder.CreateAnd(Rep,
3217
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3218
Rep = Builder.CreateBitCast(Rep, FTy);
3219
Rep =
3220
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3221
} else if (Name.starts_with("avx512.mask.or.") ||
3222
Name.starts_with("avx512.mask.por.")) {
3223
VectorType *FTy = cast<VectorType>(CI->getType());
3224
VectorType *ITy = VectorType::getInteger(FTy);
3225
Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3226
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3227
Rep = Builder.CreateBitCast(Rep, FTy);
3228
Rep =
3229
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3230
} else if (Name.starts_with("avx512.mask.xor.") ||
3231
Name.starts_with("avx512.mask.pxor.")) {
3232
VectorType *FTy = cast<VectorType>(CI->getType());
3233
VectorType *ITy = VectorType::getInteger(FTy);
3234
Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3235
Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3236
Rep = Builder.CreateBitCast(Rep, FTy);
3237
Rep =
3238
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3239
} else if (Name.starts_with("avx512.mask.padd.")) {
3240
Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3241
Rep =
3242
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3243
} else if (Name.starts_with("avx512.mask.psub.")) {
3244
Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3245
Rep =
3246
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3247
} else if (Name.starts_with("avx512.mask.pmull.")) {
3248
Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3249
Rep =
3250
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3251
} else if (Name.starts_with("avx512.mask.add.p")) {
3252
if (Name.ends_with(".512")) {
3253
Intrinsic::ID IID;
3254
if (Name[17] == 's')
3255
IID = Intrinsic::x86_avx512_add_ps_512;
3256
else
3257
IID = Intrinsic::x86_avx512_add_pd_512;
3258
3259
Rep = Builder.CreateCall(
3260
Intrinsic::getDeclaration(F->getParent(), IID),
3261
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3262
} else {
3263
Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3264
}
3265
Rep =
3266
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3267
} else if (Name.starts_with("avx512.mask.div.p")) {
3268
if (Name.ends_with(".512")) {
3269
Intrinsic::ID IID;
3270
if (Name[17] == 's')
3271
IID = Intrinsic::x86_avx512_div_ps_512;
3272
else
3273
IID = Intrinsic::x86_avx512_div_pd_512;
3274
3275
Rep = Builder.CreateCall(
3276
Intrinsic::getDeclaration(F->getParent(), IID),
3277
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3278
} else {
3279
Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3280
}
3281
Rep =
3282
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3283
} else if (Name.starts_with("avx512.mask.mul.p")) {
3284
if (Name.ends_with(".512")) {
3285
Intrinsic::ID IID;
3286
if (Name[17] == 's')
3287
IID = Intrinsic::x86_avx512_mul_ps_512;
3288
else
3289
IID = Intrinsic::x86_avx512_mul_pd_512;
3290
3291
Rep = Builder.CreateCall(
3292
Intrinsic::getDeclaration(F->getParent(), IID),
3293
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3294
} else {
3295
Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3296
}
3297
Rep =
3298
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3299
} else if (Name.starts_with("avx512.mask.sub.p")) {
3300
if (Name.ends_with(".512")) {
3301
Intrinsic::ID IID;
3302
if (Name[17] == 's')
3303
IID = Intrinsic::x86_avx512_sub_ps_512;
3304
else
3305
IID = Intrinsic::x86_avx512_sub_pd_512;
3306
3307
Rep = Builder.CreateCall(
3308
Intrinsic::getDeclaration(F->getParent(), IID),
3309
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3310
} else {
3311
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3312
}
3313
Rep =
3314
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3315
} else if ((Name.starts_with("avx512.mask.max.p") ||
3316
Name.starts_with("avx512.mask.min.p")) &&
3317
Name.drop_front(18) == ".512") {
3318
bool IsDouble = Name[17] == 'd';
3319
bool IsMin = Name[13] == 'i';
3320
static const Intrinsic::ID MinMaxTbl[2][2] = {
3321
{Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3322
{Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3323
Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3324
3325
Rep = Builder.CreateCall(
3326
Intrinsic::getDeclaration(F->getParent(), IID),
3327
{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3328
Rep =
3329
emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330
} else if (Name.starts_with("avx512.mask.lzcnt.")) {
3331
Rep =
3332
Builder.CreateCall(Intrinsic::getDeclaration(
3333
F->getParent(), Intrinsic::ctlz, CI->getType()),
3334
{CI->getArgOperand(0), Builder.getInt1(false)});
3335
Rep =
3336
emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3337
} else if (Name.starts_with("avx512.mask.psll")) {
3338
bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3339
bool IsVariable = Name[16] == 'v';
3340
char Size = Name[16] == '.' ? Name[17]
3341
: Name[17] == '.' ? Name[18]
3342
: Name[18] == '.' ? Name[19]
3343
: Name[20];
3344
3345
Intrinsic::ID IID;
3346
if (IsVariable && Name[17] != '.') {
3347
if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3348
IID = Intrinsic::x86_avx2_psllv_q;
3349
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3350
IID = Intrinsic::x86_avx2_psllv_q_256;
3351
else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3352
IID = Intrinsic::x86_avx2_psllv_d;
3353
else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3354
IID = Intrinsic::x86_avx2_psllv_d_256;
3355
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3356
IID = Intrinsic::x86_avx512_psllv_w_128;
3357
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3358
IID = Intrinsic::x86_avx512_psllv_w_256;
3359
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3360
IID = Intrinsic::x86_avx512_psllv_w_512;
3361
else
3362
llvm_unreachable("Unexpected size");
3363
} else if (Name.ends_with(".128")) {
3364
if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3365
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3366
: Intrinsic::x86_sse2_psll_d;
3367
else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3368
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3369
: Intrinsic::x86_sse2_psll_q;
3370
else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3371
IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3372
: Intrinsic::x86_sse2_psll_w;
3373
else
3374
llvm_unreachable("Unexpected size");
3375
} else if (Name.ends_with(".256")) {
3376
if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3377
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3378
: Intrinsic::x86_avx2_psll_d;
3379
else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3380
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3381
: Intrinsic::x86_avx2_psll_q;
3382
else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3383
IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3384
: Intrinsic::x86_avx2_psll_w;
3385
else
3386
llvm_unreachable("Unexpected size");
3387
} else {
3388
if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3389
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3390
: IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3391
: Intrinsic::x86_avx512_psll_d_512;
3392
else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3393
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3394
: IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3395
: Intrinsic::x86_avx512_psll_q_512;
3396
else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3397
IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3398
: Intrinsic::x86_avx512_psll_w_512;
3399
else
3400
llvm_unreachable("Unexpected size");
3401
}
3402
3403
Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3404
} else if (Name.starts_with("avx512.mask.psrl")) {
3405
bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3406
bool IsVariable = Name[16] == 'v';
3407
char Size = Name[16] == '.' ? Name[17]
3408
: Name[17] == '.' ? Name[18]
3409
: Name[18] == '.' ? Name[19]
3410
: Name[20];
3411
3412
Intrinsic::ID IID;
3413
if (IsVariable && Name[17] != '.') {
3414
if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3415
IID = Intrinsic::x86_avx2_psrlv_q;
3416
else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3417
IID = Intrinsic::x86_avx2_psrlv_q_256;
3418
else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3419
IID = Intrinsic::x86_avx2_psrlv_d;
3420
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3421
IID = Intrinsic::x86_avx2_psrlv_d_256;
3422
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3423
IID = Intrinsic::x86_avx512_psrlv_w_128;
3424
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3425
IID = Intrinsic::x86_avx512_psrlv_w_256;
3426
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3427
IID = Intrinsic::x86_avx512_psrlv_w_512;
3428
else
3429
llvm_unreachable("Unexpected size");
3430
} else if (Name.ends_with(".128")) {
3431
if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3432
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3433
: Intrinsic::x86_sse2_psrl_d;
3434
else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3435
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3436
: Intrinsic::x86_sse2_psrl_q;
3437
else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3438
IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3439
: Intrinsic::x86_sse2_psrl_w;
3440
else
3441
llvm_unreachable("Unexpected size");
3442
} else if (Name.ends_with(".256")) {
3443
if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3444
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3445
: Intrinsic::x86_avx2_psrl_d;
3446
else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3447
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3448
: Intrinsic::x86_avx2_psrl_q;
3449
else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3450
IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3451
: Intrinsic::x86_avx2_psrl_w;
3452
else
3453
llvm_unreachable("Unexpected size");
3454
} else {
3455
if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3456
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3457
: IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3458
: Intrinsic::x86_avx512_psrl_d_512;
3459
else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3460
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3461
: IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3462
: Intrinsic::x86_avx512_psrl_q_512;
3463
else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3464
IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3465
: Intrinsic::x86_avx512_psrl_w_512;
3466
else
3467
llvm_unreachable("Unexpected size");
3468
}
3469
3470
Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3471
} else if (Name.starts_with("avx512.mask.psra")) {
3472
bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3473
bool IsVariable = Name[16] == 'v';
3474
char Size = Name[16] == '.' ? Name[17]
3475
: Name[17] == '.' ? Name[18]
3476
: Name[18] == '.' ? Name[19]
3477
: Name[20];
3478
3479
Intrinsic::ID IID;
3480
if (IsVariable && Name[17] != '.') {
3481
if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3482
IID = Intrinsic::x86_avx2_psrav_d;
3483
else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3484
IID = Intrinsic::x86_avx2_psrav_d_256;
3485
else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3486
IID = Intrinsic::x86_avx512_psrav_w_128;
3487
else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3488
IID = Intrinsic::x86_avx512_psrav_w_256;
3489
else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3490
IID = Intrinsic::x86_avx512_psrav_w_512;
3491
else
3492
llvm_unreachable("Unexpected size");
3493
} else if (Name.ends_with(".128")) {
3494
if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3495
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3496
: Intrinsic::x86_sse2_psra_d;
3497
else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3498
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3499
: IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3500
: Intrinsic::x86_avx512_psra_q_128;
3501
else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3502
IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3503
: Intrinsic::x86_sse2_psra_w;
3504
else
3505
llvm_unreachable("Unexpected size");
3506
} else if (Name.ends_with(".256")) {
3507
if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3508
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3509
: Intrinsic::x86_avx2_psra_d;
3510
else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3511
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3512
: IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3513
: Intrinsic::x86_avx512_psra_q_256;
3514
else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3515
IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3516
: Intrinsic::x86_avx2_psra_w;
3517
else
3518
llvm_unreachable("Unexpected size");
3519
} else {
3520
if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3521
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3522
: IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3523
: Intrinsic::x86_avx512_psra_d_512;
3524
else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3525
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3526
: IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3527
: Intrinsic::x86_avx512_psra_q_512;
3528
else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3529
IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3530
: Intrinsic::x86_avx512_psra_w_512;
3531
else
3532
llvm_unreachable("Unexpected size");
3533
}
3534
3535
Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3536
} else if (Name.starts_with("avx512.mask.move.s")) {
3537
Rep = upgradeMaskedMove(Builder, *CI);
3538
} else if (Name.starts_with("avx512.cvtmask2")) {
3539
Rep = upgradeMaskToInt(Builder, *CI);
3540
} else if (Name.ends_with(".movntdqa")) {
3541
MDNode *Node = MDNode::get(
3542
C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3543
3544
Value *Ptr = CI->getArgOperand(0);
3545
3546
// Convert the type of the pointer to a pointer to the stored type.
3547
Value *BC = Builder.CreateBitCast(
3548
Ptr, PointerType::getUnqual(CI->getType()), "cast");
3549
LoadInst *LI = Builder.CreateAlignedLoad(
3550
CI->getType(), BC,
3551
Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
3552
LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3553
Rep = LI;
3554
} else if (Name.starts_with("fma.vfmadd.") ||
3555
Name.starts_with("fma.vfmsub.") ||
3556
Name.starts_with("fma.vfnmadd.") ||
3557
Name.starts_with("fma.vfnmsub.")) {
3558
bool NegMul = Name[6] == 'n';
3559
bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3560
bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3561
3562
Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3563
CI->getArgOperand(2)};
3564
3565
if (IsScalar) {
3566
Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3567
Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3568
Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3569
}
3570
3571
if (NegMul && !IsScalar)
3572
Ops[0] = Builder.CreateFNeg(Ops[0]);
3573
if (NegMul && IsScalar)
3574
Ops[1] = Builder.CreateFNeg(Ops[1]);
3575
if (NegAcc)
3576
Ops[2] = Builder.CreateFNeg(Ops[2]);
3577
3578
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3579
Intrinsic::fma,
3580
Ops[0]->getType()),
3581
Ops);
3582
3583
if (IsScalar)
3584
Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3585
} else if (Name.starts_with("fma4.vfmadd.s")) {
3586
Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3587
CI->getArgOperand(2)};
3588
3589
Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3590
Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3591
Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3592
3593
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3594
Intrinsic::fma,
3595
Ops[0]->getType()),
3596
Ops);
3597
3598
Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3599
Rep, (uint64_t)0);
3600
} else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3601
Name.starts_with("avx512.maskz.vfmadd.s") ||
3602
Name.starts_with("avx512.mask3.vfmadd.s") ||
3603
Name.starts_with("avx512.mask3.vfmsub.s") ||
3604
Name.starts_with("avx512.mask3.vfnmsub.s")) {
3605
bool IsMask3 = Name[11] == '3';
3606
bool IsMaskZ = Name[11] == 'z';
3607
// Drop the "avx512.mask." to make it easier.
3608
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3609
bool NegMul = Name[2] == 'n';
3610
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3611
3612
Value *A = CI->getArgOperand(0);
3613
Value *B = CI->getArgOperand(1);
3614
Value *C = CI->getArgOperand(2);
3615
3616
if (NegMul && (IsMask3 || IsMaskZ))
3617
A = Builder.CreateFNeg(A);
3618
if (NegMul && !(IsMask3 || IsMaskZ))
3619
B = Builder.CreateFNeg(B);
3620
if (NegAcc)
3621
C = Builder.CreateFNeg(C);
3622
3623
A = Builder.CreateExtractElement(A, (uint64_t)0);
3624
B = Builder.CreateExtractElement(B, (uint64_t)0);
3625
C = Builder.CreateExtractElement(C, (uint64_t)0);
3626
3627
if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3628
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3629
Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3630
3631
Intrinsic::ID IID;
3632
if (Name.back() == 'd')
3633
IID = Intrinsic::x86_avx512_vfmadd_f64;
3634
else
3635
IID = Intrinsic::x86_avx512_vfmadd_f32;
3636
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3637
Rep = Builder.CreateCall(FMA, Ops);
3638
} else {
3639
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3640
A->getType());
3641
Rep = Builder.CreateCall(FMA, {A, B, C});
3642
}
3643
3644
Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3645
: IsMask3 ? C
3646
: A;
3647
3648
// For Mask3 with NegAcc, we need to create a new extractelement that
3649
// avoids the negation above.
3650
if (NegAcc && IsMask3)
3651
PassThru =
3652
Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3653
3654
Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3655
Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3656
(uint64_t)0);
3657
} else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3658
Name.starts_with("avx512.mask.vfnmadd.p") ||
3659
Name.starts_with("avx512.mask.vfnmsub.p") ||
3660
Name.starts_with("avx512.mask3.vfmadd.p") ||
3661
Name.starts_with("avx512.mask3.vfmsub.p") ||
3662
Name.starts_with("avx512.mask3.vfnmsub.p") ||
3663
Name.starts_with("avx512.maskz.vfmadd.p")) {
3664
bool IsMask3 = Name[11] == '3';
3665
bool IsMaskZ = Name[11] == 'z';
3666
// Drop the "avx512.mask." to make it easier.
3667
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3668
bool NegMul = Name[2] == 'n';
3669
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3670
3671
Value *A = CI->getArgOperand(0);
3672
Value *B = CI->getArgOperand(1);
3673
Value *C = CI->getArgOperand(2);
3674
3675
if (NegMul && (IsMask3 || IsMaskZ))
3676
A = Builder.CreateFNeg(A);
3677
if (NegMul && !(IsMask3 || IsMaskZ))
3678
B = Builder.CreateFNeg(B);
3679
if (NegAcc)
3680
C = Builder.CreateFNeg(C);
3681
3682
if (CI->arg_size() == 5 &&
3683
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
3684
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3685
Intrinsic::ID IID;
3686
// Check the character before ".512" in string.
3687
if (Name[Name.size() - 5] == 's')
3688
IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3689
else
3690
IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3691
3692
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3693
{A, B, C, CI->getArgOperand(4)});
3694
} else {
3695
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3696
A->getType());
3697
Rep = Builder.CreateCall(FMA, {A, B, C});
3698
}
3699
3700
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3701
: IsMask3 ? CI->getArgOperand(2)
3702
: CI->getArgOperand(0);
3703
3704
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3705
} else if (Name.starts_with("fma.vfmsubadd.p")) {
3706
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3707
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3708
Intrinsic::ID IID;
3709
if (VecWidth == 128 && EltWidth == 32)
3710
IID = Intrinsic::x86_fma_vfmaddsub_ps;
3711
else if (VecWidth == 256 && EltWidth == 32)
3712
IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3713
else if (VecWidth == 128 && EltWidth == 64)
3714
IID = Intrinsic::x86_fma_vfmaddsub_pd;
3715
else if (VecWidth == 256 && EltWidth == 64)
3716
IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3717
else
3718
llvm_unreachable("Unexpected intrinsic");
3719
3720
Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3721
CI->getArgOperand(2)};
3722
Ops[2] = Builder.CreateFNeg(Ops[2]);
3723
Rep =
3724
Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);
3725
} else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3726
Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3727
Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3728
Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3729
bool IsMask3 = Name[11] == '3';
3730
bool IsMaskZ = Name[11] == 'z';
3731
// Drop the "avx512.mask." to make it easier.
3732
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3733
bool IsSubAdd = Name[3] == 's';
3734
if (CI->arg_size() == 5) {
3735
Intrinsic::ID IID;
3736
// Check the character before ".512" in string.
3737
if (Name[Name.size() - 5] == 's')
3738
IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3739
else
3740
IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3741
3742
Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3743
CI->getArgOperand(2), CI->getArgOperand(4)};
3744
if (IsSubAdd)
3745
Ops[2] = Builder.CreateFNeg(Ops[2]);
3746
3747
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3748
Ops);
3749
} else {
3750
int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751
3752
Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3753
CI->getArgOperand(2)};
3754
3755
Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3756
Ops[0]->getType());
3757
Value *Odd = Builder.CreateCall(FMA, Ops);
3758
Ops[2] = Builder.CreateFNeg(Ops[2]);
3759
Value *Even = Builder.CreateCall(FMA, Ops);
3760
3761
if (IsSubAdd)
3762
std::swap(Even, Odd);
3763
3764
SmallVector<int, 32> Idxs(NumElts);
3765
for (int i = 0; i != NumElts; ++i)
3766
Idxs[i] = i + (i % 2) * NumElts;
3767
3768
Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3769
}
3770
3771
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3772
: IsMask3 ? CI->getArgOperand(2)
3773
: CI->getArgOperand(0);
3774
3775
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3776
} else if (Name.starts_with("avx512.mask.pternlog.") ||
3777
Name.starts_with("avx512.maskz.pternlog.")) {
3778
bool ZeroMask = Name[11] == 'z';
3779
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3780
unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3781
Intrinsic::ID IID;
3782
if (VecWidth == 128 && EltWidth == 32)
3783
IID = Intrinsic::x86_avx512_pternlog_d_128;
3784
else if (VecWidth == 256 && EltWidth == 32)
3785
IID = Intrinsic::x86_avx512_pternlog_d_256;
3786
else if (VecWidth == 512 && EltWidth == 32)
3787
IID = Intrinsic::x86_avx512_pternlog_d_512;
3788
else if (VecWidth == 128 && EltWidth == 64)
3789
IID = Intrinsic::x86_avx512_pternlog_q_128;
3790
else if (VecWidth == 256 && EltWidth == 64)
3791
IID = Intrinsic::x86_avx512_pternlog_q_256;
3792
else if (VecWidth == 512 && EltWidth == 64)
3793
IID = Intrinsic::x86_avx512_pternlog_q_512;
3794
else
3795
llvm_unreachable("Unexpected intrinsic");
3796
3797
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3798
CI->getArgOperand(2), CI->getArgOperand(3)};
3799
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3800
Args);
3801
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3802
: CI->getArgOperand(0);
3803
Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3804
} else if (Name.starts_with("avx512.mask.vpmadd52") ||
3805
Name.starts_with("avx512.maskz.vpmadd52")) {
3806
bool ZeroMask = Name[11] == 'z';
3807
bool High = Name[20] == 'h' || Name[21] == 'h';
3808
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3809
Intrinsic::ID IID;
3810
if (VecWidth == 128 && !High)
3811
IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3812
else if (VecWidth == 256 && !High)
3813
IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3814
else if (VecWidth == 512 && !High)
3815
IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3816
else if (VecWidth == 128 && High)
3817
IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3818
else if (VecWidth == 256 && High)
3819
IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3820
else if (VecWidth == 512 && High)
3821
IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3822
else
3823
llvm_unreachable("Unexpected intrinsic");
3824
3825
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3826
CI->getArgOperand(2)};
3827
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3828
Args);
3829
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3830
: CI->getArgOperand(0);
3831
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3832
} else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3833
Name.starts_with("avx512.mask.vpermt2var.") ||
3834
Name.starts_with("avx512.maskz.vpermt2var.")) {
3835
bool ZeroMask = Name[11] == 'z';
3836
bool IndexForm = Name[17] == 'i';
3837
Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3838
} else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3839
Name.starts_with("avx512.maskz.vpdpbusd.") ||
3840
Name.starts_with("avx512.mask.vpdpbusds.") ||
3841
Name.starts_with("avx512.maskz.vpdpbusds.")) {
3842
bool ZeroMask = Name[11] == 'z';
3843
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3844
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3845
Intrinsic::ID IID;
3846
if (VecWidth == 128 && !IsSaturating)
3847
IID = Intrinsic::x86_avx512_vpdpbusd_128;
3848
else if (VecWidth == 256 && !IsSaturating)
3849
IID = Intrinsic::x86_avx512_vpdpbusd_256;
3850
else if (VecWidth == 512 && !IsSaturating)
3851
IID = Intrinsic::x86_avx512_vpdpbusd_512;
3852
else if (VecWidth == 128 && IsSaturating)
3853
IID = Intrinsic::x86_avx512_vpdpbusds_128;
3854
else if (VecWidth == 256 && IsSaturating)
3855
IID = Intrinsic::x86_avx512_vpdpbusds_256;
3856
else if (VecWidth == 512 && IsSaturating)
3857
IID = Intrinsic::x86_avx512_vpdpbusds_512;
3858
else
3859
llvm_unreachable("Unexpected intrinsic");
3860
3861
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3862
CI->getArgOperand(2)};
3863
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3864
Args);
3865
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3866
: CI->getArgOperand(0);
3867
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3868
} else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3869
Name.starts_with("avx512.maskz.vpdpwssd.") ||
3870
Name.starts_with("avx512.mask.vpdpwssds.") ||
3871
Name.starts_with("avx512.maskz.vpdpwssds.")) {
3872
bool ZeroMask = Name[11] == 'z';
3873
bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3874
unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3875
Intrinsic::ID IID;
3876
if (VecWidth == 128 && !IsSaturating)
3877
IID = Intrinsic::x86_avx512_vpdpwssd_128;
3878
else if (VecWidth == 256 && !IsSaturating)
3879
IID = Intrinsic::x86_avx512_vpdpwssd_256;
3880
else if (VecWidth == 512 && !IsSaturating)
3881
IID = Intrinsic::x86_avx512_vpdpwssd_512;
3882
else if (VecWidth == 128 && IsSaturating)
3883
IID = Intrinsic::x86_avx512_vpdpwssds_128;
3884
else if (VecWidth == 256 && IsSaturating)
3885
IID = Intrinsic::x86_avx512_vpdpwssds_256;
3886
else if (VecWidth == 512 && IsSaturating)
3887
IID = Intrinsic::x86_avx512_vpdpwssds_512;
3888
else
3889
llvm_unreachable("Unexpected intrinsic");
3890
3891
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3892
CI->getArgOperand(2)};
3893
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3894
Args);
3895
Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3896
: CI->getArgOperand(0);
3897
Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3898
} else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3899
Name == "addcarry.u32" || Name == "addcarry.u64" ||
3900
Name == "subborrow.u32" || Name == "subborrow.u64") {
3901
Intrinsic::ID IID;
3902
if (Name[0] == 'a' && Name.back() == '2')
3903
IID = Intrinsic::x86_addcarry_32;
3904
else if (Name[0] == 'a' && Name.back() == '4')
3905
IID = Intrinsic::x86_addcarry_64;
3906
else if (Name[0] == 's' && Name.back() == '2')
3907
IID = Intrinsic::x86_subborrow_32;
3908
else if (Name[0] == 's' && Name.back() == '4')
3909
IID = Intrinsic::x86_subborrow_64;
3910
else
3911
llvm_unreachable("Unexpected intrinsic");
3912
3913
// Make a call with 3 operands.
3914
Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915
CI->getArgOperand(2)};
3916
Value *NewCall = Builder.CreateCall(
3917
Intrinsic::getDeclaration(CI->getModule(), IID), Args);
3918
3919
// Extract the second result and store it.
3920
Value *Data = Builder.CreateExtractValue(NewCall, 1);
3921
// Cast the pointer to the right type.
3922
Value *Ptr = Builder.CreateBitCast(
3923
CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
3924
Builder.CreateAlignedStore(Data, Ptr, Align(1));
3925
// Replace the original call result with the first result of the new call.
3926
Value *CF = Builder.CreateExtractValue(NewCall, 0);
3927
3928
CI->replaceAllUsesWith(CF);
3929
Rep = nullptr;
3930
} else if (Name.starts_with("avx512.mask.") &&
3931
upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3932
// Rep will be updated by the call in the condition.
3933
}
3934
3935
return Rep;
3936
}
3937
3938
static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
3939
IRBuilder<> &Builder) {
3940
if (Name == "mve.vctp64.old") {
3941
// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
3942
// correct type.
3943
Value *VCTP = Builder.CreateCall(
3944
Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
3945
CI->getArgOperand(0), CI->getName());
3946
Value *C1 = Builder.CreateCall(
3947
Intrinsic::getDeclaration(
3948
F->getParent(), Intrinsic::arm_mve_pred_v2i,
3949
{VectorType::get(Builder.getInt1Ty(), 2, false)}),
3950
VCTP);
3951
return Builder.CreateCall(
3952
Intrinsic::getDeclaration(
3953
F->getParent(), Intrinsic::arm_mve_pred_i2v,
3954
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
3955
C1);
3956
} else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
3957
Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
3958
Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
3959
Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
3960
Name ==
3961
"mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
3962
Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
3963
Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
3964
Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
3965
Name ==
3966
"mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
3967
Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
3968
Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
3969
Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
3970
Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
3971
Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
3972
Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
3973
Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
3974
std::vector<Type *> Tys;
3975
unsigned ID = CI->getIntrinsicID();
3976
Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
3977
switch (ID) {
3978
case Intrinsic::arm_mve_mull_int_predicated:
3979
case Intrinsic::arm_mve_vqdmull_predicated:
3980
case Intrinsic::arm_mve_vldr_gather_base_predicated:
3981
Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
3982
break;
3983
case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
3984
case Intrinsic::arm_mve_vstr_scatter_base_predicated:
3985
case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
3986
Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
3987
V2I1Ty};
3988
break;
3989
case Intrinsic::arm_mve_vldr_gather_offset_predicated:
3990
Tys = {CI->getType(), CI->getOperand(0)->getType(),
3991
CI->getOperand(1)->getType(), V2I1Ty};
3992
break;
3993
case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
3994
Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
3995
CI->getOperand(2)->getType(), V2I1Ty};
3996
break;
3997
case Intrinsic::arm_cde_vcx1q_predicated:
3998
case Intrinsic::arm_cde_vcx1qa_predicated:
3999
case Intrinsic::arm_cde_vcx2q_predicated:
4000
case Intrinsic::arm_cde_vcx2qa_predicated:
4001
case Intrinsic::arm_cde_vcx3q_predicated:
4002
case Intrinsic::arm_cde_vcx3qa_predicated:
4003
Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4004
break;
4005
default:
4006
llvm_unreachable("Unhandled Intrinsic!");
4007
}
4008
4009
std::vector<Value *> Ops;
4010
for (Value *Op : CI->args()) {
4011
Type *Ty = Op->getType();
4012
if (Ty->getScalarSizeInBits() == 1) {
4013
Value *C1 = Builder.CreateCall(
4014
Intrinsic::getDeclaration(
4015
F->getParent(), Intrinsic::arm_mve_pred_v2i,
4016
{VectorType::get(Builder.getInt1Ty(), 4, false)}),
4017
Op);
4018
Op = Builder.CreateCall(
4019
Intrinsic::getDeclaration(F->getParent(),
4020
Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
4021
C1);
4022
}
4023
Ops.push_back(Op);
4024
}
4025
4026
Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
4027
return Builder.CreateCall(Fn, Ops, CI->getName());
4028
}
4029
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4030
}
4031
4032
// These are expected to have the arguments:
4033
// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4034
//
4035
// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4036
//
4037
static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
4038
Function *F, IRBuilder<> &Builder) {
4039
AtomicRMWInst::BinOp RMWOp =
4040
StringSwitch<AtomicRMWInst::BinOp>(Name)
4041
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4042
.StartsWith("ds.fmin", AtomicRMWInst::FMin)
4043
.StartsWith("ds.fmax", AtomicRMWInst::FMax)
4044
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4045
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
4046
4047
unsigned NumOperands = CI->getNumOperands();
4048
if (NumOperands < 3) // Malformed bitcode.
4049
return nullptr;
4050
4051
Value *Ptr = CI->getArgOperand(0);
4052
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4053
if (!PtrTy) // Malformed.
4054
return nullptr;
4055
4056
Value *Val = CI->getArgOperand(1);
4057
if (Val->getType() != CI->getType()) // Malformed.
4058
return nullptr;
4059
4060
ConstantInt *OrderArg = nullptr;
4061
bool IsVolatile = false;
4062
4063
// These should have 5 arguments (plus the callee). A separate version of the
4064
// ds_fadd intrinsic was defined for bf16 which was missing arguments.
4065
if (NumOperands > 3)
4066
OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4067
4068
// Ignore scope argument at 3
4069
4070
if (NumOperands > 5) {
4071
ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4072
IsVolatile = !VolatileArg || !VolatileArg->isZero();
4073
}
4074
4075
AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4076
if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4077
Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4078
if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4079
Order = AtomicOrdering::SequentiallyConsistent;
4080
4081
LLVMContext &Ctx = F->getContext();
4082
4083
// Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4084
Type *RetTy = CI->getType();
4085
if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4086
if (VT->getElementType()->isIntegerTy(16)) {
4087
VectorType *AsBF16 =
4088
VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4089
Val = Builder.CreateBitCast(Val, AsBF16);
4090
}
4091
}
4092
4093
// The scope argument never really worked correctly. Use agent as the most
4094
// conservative option which should still always produce the instruction.
4095
SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4096
AtomicRMWInst *RMW =
4097
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4098
4099
if (PtrTy->getAddressSpace() != 3) {
4100
RMW->setMetadata("amdgpu.no.fine.grained.memory",
4101
MDNode::get(F->getContext(), {}));
4102
}
4103
4104
if (IsVolatile)
4105
RMW->setVolatile(true);
4106
4107
return Builder.CreateBitCast(RMW, RetTy);
4108
}
4109
4110
/// Helper to unwrap intrinsic call MetadataAsValue operands.
4111
template <typename MDType>
4112
static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4113
if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4114
return dyn_cast<MDType>(MAV->getMetadata());
4115
return nullptr;
4116
}
4117
4118
/// Convert debug intrinsic calls to non-instruction debug records.
4119
/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4120
/// \p CI - The debug intrinsic call.
4121
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {
4122
DbgRecord *DR = nullptr;
4123
if (Name == "label") {
4124
DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4125
} else if (Name == "assign") {
4126
DR = new DbgVariableRecord(
4127
unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4128
unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4129
unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4130
CI->getDebugLoc());
4131
} else if (Name == "declare") {
4132
DR = new DbgVariableRecord(
4133
unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4134
unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4135
DbgVariableRecord::LocationType::Declare);
4136
} else if (Name == "addr") {
4137
// Upgrade dbg.addr to dbg.value with DW_OP_deref.
4138
DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4139
Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4140
DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4141
unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4142
CI->getDebugLoc());
4143
} else if (Name == "value") {
4144
// An old version of dbg.value had an extra offset argument.
4145
unsigned VarOp = 1;
4146
unsigned ExprOp = 2;
4147
if (CI->arg_size() == 4) {
4148
auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4149
// Nonzero offset dbg.values get dropped without a replacement.
4150
if (!Offset || !Offset->isZeroValue())
4151
return;
4152
VarOp = 2;
4153
ExprOp = 3;
4154
}
4155
DR = new DbgVariableRecord(
4156
unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4157
unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4158
}
4159
assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4160
CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4161
}
4162
4163
/// Upgrade a call to an old intrinsic. All argument and return casting must be
4164
/// provided to seamlessly integrate with existing context.
4165
void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
4166
// Note dyn_cast to Function is not quite the same as getCalledFunction, which
4167
// checks the callee's function type matches. It's likely we need to handle
4168
// type changes here.
4169
Function *F = dyn_cast<Function>(CI->getCalledOperand());
4170
if (!F)
4171
return;
4172
4173
LLVMContext &C = CI->getContext();
4174
IRBuilder<> Builder(C);
4175
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4176
4177
if (!NewFn) {
4178
bool FallthroughToDefaultUpgrade = false;
4179
// Get the Function's name.
4180
StringRef Name = F->getName();
4181
4182
assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4183
Name = Name.substr(5);
4184
4185
bool IsX86 = Name.consume_front("x86.");
4186
bool IsNVVM = Name.consume_front("nvvm.");
4187
bool IsARM = Name.consume_front("arm.");
4188
bool IsAMDGCN = Name.consume_front("amdgcn.");
4189
bool IsDbg = Name.consume_front("dbg.");
4190
Value *Rep = nullptr;
4191
4192
if (!IsX86 && Name == "stackprotectorcheck") {
4193
Rep = nullptr;
4194
} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4195
Value *Arg = CI->getArgOperand(0);
4196
Value *Neg = Builder.CreateNeg(Arg, "neg");
4197
Value *Cmp = Builder.CreateICmpSGE(
4198
Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4199
Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4200
} else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4201
Name.starts_with("atomic.load.add.f64.p"))) {
4202
Value *Ptr = CI->getArgOperand(0);
4203
Value *Val = CI->getArgOperand(1);
4204
Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4205
AtomicOrdering::SequentiallyConsistent);
4206
} else if (IsNVVM && Name.consume_front("max.") &&
4207
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4208
Name == "ui" || Name == "ull")) {
4209
Value *Arg0 = CI->getArgOperand(0);
4210
Value *Arg1 = CI->getArgOperand(1);
4211
Value *Cmp = Name.starts_with("u")
4212
? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4213
: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4214
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4215
} else if (IsNVVM && Name.consume_front("min.") &&
4216
(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4217
Name == "ui" || Name == "ull")) {
4218
Value *Arg0 = CI->getArgOperand(0);
4219
Value *Arg1 = CI->getArgOperand(1);
4220
Value *Cmp = Name.starts_with("u")
4221
? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4222
: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4223
Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4224
} else if (IsNVVM && Name == "clz.ll") {
4225
// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4226
Value *Arg = CI->getArgOperand(0);
4227
Value *Ctlz = Builder.CreateCall(
4228
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4229
{Arg->getType()}),
4230
{Arg, Builder.getFalse()}, "ctlz");
4231
Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4232
} else if (IsNVVM && Name == "popc.ll") {
4233
// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4234
// i64.
4235
Value *Arg = CI->getArgOperand(0);
4236
Value *Popc = Builder.CreateCall(
4237
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4238
{Arg->getType()}),
4239
Arg, "ctpop");
4240
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4241
} else if (IsNVVM) {
4242
if (Name == "h2f") {
4243
Rep =
4244
Builder.CreateCall(Intrinsic::getDeclaration(
4245
F->getParent(), Intrinsic::convert_from_fp16,
4246
{Builder.getFloatTy()}),
4247
CI->getArgOperand(0), "h2f");
4248
} else {
4249
Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
4250
if (IID != Intrinsic::not_intrinsic &&
4251
!F->getReturnType()->getScalarType()->isBFloatTy()) {
4252
rename(F);
4253
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4254
SmallVector<Value *, 2> Args;
4255
for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4256
Value *Arg = CI->getArgOperand(I);
4257
Type *OldType = Arg->getType();
4258
Type *NewType = NewFn->getArg(I)->getType();
4259
Args.push_back((OldType->isIntegerTy() &&
4260
NewType->getScalarType()->isBFloatTy())
4261
? Builder.CreateBitCast(Arg, NewType)
4262
: Arg);
4263
}
4264
Rep = Builder.CreateCall(NewFn, Args);
4265
if (F->getReturnType()->isIntegerTy())
4266
Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4267
}
4268
}
4269
} else if (IsX86) {
4270
Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4271
} else if (IsARM) {
4272
Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4273
} else if (IsAMDGCN) {
4274
Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4275
} else if (IsDbg) {
4276
// We might have decided we don't want the new format after all between
4277
// first requesting the upgrade and now; skip the conversion if that is
4278
// the case, and check here to see if the intrinsic needs to be upgraded
4279
// normally.
4280
if (!CI->getModule()->IsNewDbgInfoFormat) {
4281
bool NeedsUpgrade =
4282
upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4283
if (!NeedsUpgrade)
4284
return;
4285
FallthroughToDefaultUpgrade = true;
4286
} else {
4287
upgradeDbgIntrinsicToDbgRecord(Name, CI);
4288
}
4289
} else {
4290
llvm_unreachable("Unknown function for CallBase upgrade.");
4291
}
4292
4293
if (!FallthroughToDefaultUpgrade) {
4294
if (Rep)
4295
CI->replaceAllUsesWith(Rep);
4296
CI->eraseFromParent();
4297
return;
4298
}
4299
}
4300
4301
const auto &DefaultCase = [&]() -> void {
4302
if (CI->getFunctionType() == NewFn->getFunctionType()) {
4303
// Handle generic mangling change.
4304
assert(
4305
(CI->getCalledFunction()->getName() != NewFn->getName()) &&
4306
"Unknown function for CallBase upgrade and isn't just a name change");
4307
CI->setCalledFunction(NewFn);
4308
return;
4309
}
4310
4311
// This must be an upgrade from a named to a literal struct.
4312
if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4313
assert(OldST != NewFn->getReturnType() &&
4314
"Return type must have changed");
4315
assert(OldST->getNumElements() ==
4316
cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4317
"Must have same number of elements");
4318
4319
SmallVector<Value *> Args(CI->args());
4320
Value *NewCI = Builder.CreateCall(NewFn, Args);
4321
Value *Res = PoisonValue::get(OldST);
4322
for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4323
Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4324
Res = Builder.CreateInsertValue(Res, Elem, Idx);
4325
}
4326
CI->replaceAllUsesWith(Res);
4327
CI->eraseFromParent();
4328
return;
4329
}
4330
4331
// We're probably about to produce something invalid. Let the verifier catch
4332
// it instead of dying here.
4333
CI->setCalledOperand(
4334
ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));
4335
return;
4336
};
4337
CallInst *NewCall = nullptr;
4338
switch (NewFn->getIntrinsicID()) {
4339
default: {
4340
DefaultCase();
4341
return;
4342
}
4343
case Intrinsic::arm_neon_vst1:
4344
case Intrinsic::arm_neon_vst2:
4345
case Intrinsic::arm_neon_vst3:
4346
case Intrinsic::arm_neon_vst4:
4347
case Intrinsic::arm_neon_vst2lane:
4348
case Intrinsic::arm_neon_vst3lane:
4349
case Intrinsic::arm_neon_vst4lane: {
4350
SmallVector<Value *, 4> Args(CI->args());
4351
NewCall = Builder.CreateCall(NewFn, Args);
4352
break;
4353
}
4354
case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4355
case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4356
case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4357
LLVMContext &Ctx = F->getParent()->getContext();
4358
SmallVector<Value *, 4> Args(CI->args());
4359
Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4360
cast<ConstantInt>(Args[3])->getZExtValue());
4361
NewCall = Builder.CreateCall(NewFn, Args);
4362
break;
4363
}
4364
case Intrinsic::aarch64_sve_ld3_sret:
4365
case Intrinsic::aarch64_sve_ld4_sret:
4366
case Intrinsic::aarch64_sve_ld2_sret: {
4367
StringRef Name = F->getName();
4368
Name = Name.substr(5);
4369
unsigned N = StringSwitch<unsigned>(Name)
4370
.StartsWith("aarch64.sve.ld2", 2)
4371
.StartsWith("aarch64.sve.ld3", 3)
4372
.StartsWith("aarch64.sve.ld4", 4)
4373
.Default(0);
4374
auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4375
unsigned MinElts = RetTy->getMinNumElements() / N;
4376
SmallVector<Value *, 2> Args(CI->args());
4377
Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4378
Value *Ret = llvm::PoisonValue::get(RetTy);
4379
for (unsigned I = 0; I < N; I++) {
4380
Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4381
Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4382
Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4383
}
4384
NewCall = dyn_cast<CallInst>(Ret);
4385
break;
4386
}
4387
4388
case Intrinsic::coro_end: {
4389
SmallVector<Value *, 3> Args(CI->args());
4390
Args.push_back(ConstantTokenNone::get(CI->getContext()));
4391
NewCall = Builder.CreateCall(NewFn, Args);
4392
break;
4393
}
4394
4395
case Intrinsic::vector_extract: {
4396
StringRef Name = F->getName();
4397
Name = Name.substr(5); // Strip llvm
4398
if (!Name.starts_with("aarch64.sve.tuple.get")) {
4399
DefaultCase();
4400
return;
4401
}
4402
auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4403
unsigned MinElts = RetTy->getMinNumElements();
4404
unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4405
Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4406
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4407
break;
4408
}
4409
4410
case Intrinsic::vector_insert: {
4411
StringRef Name = F->getName();
4412
Name = Name.substr(5);
4413
if (!Name.starts_with("aarch64.sve.tuple")) {
4414
DefaultCase();
4415
return;
4416
}
4417
if (Name.starts_with("aarch64.sve.tuple.set")) {
4418
unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4419
auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4420
Value *NewIdx =
4421
ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4422
NewCall = Builder.CreateCall(
4423
NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4424
break;
4425
}
4426
if (Name.starts_with("aarch64.sve.tuple.create")) {
4427
unsigned N = StringSwitch<unsigned>(Name)
4428
.StartsWith("aarch64.sve.tuple.create2", 2)
4429
.StartsWith("aarch64.sve.tuple.create3", 3)
4430
.StartsWith("aarch64.sve.tuple.create4", 4)
4431
.Default(0);
4432
assert(N > 1 && "Create is expected to be between 2-4");
4433
auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4434
Value *Ret = llvm::PoisonValue::get(RetTy);
4435
unsigned MinElts = RetTy->getMinNumElements() / N;
4436
for (unsigned I = 0; I < N; I++) {
4437
Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4438
Value *V = CI->getArgOperand(I);
4439
Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4440
}
4441
NewCall = dyn_cast<CallInst>(Ret);
4442
}
4443
break;
4444
}
4445
4446
case Intrinsic::arm_neon_bfdot:
4447
case Intrinsic::arm_neon_bfmmla:
4448
case Intrinsic::arm_neon_bfmlalb:
4449
case Intrinsic::arm_neon_bfmlalt:
4450
case Intrinsic::aarch64_neon_bfdot:
4451
case Intrinsic::aarch64_neon_bfmmla:
4452
case Intrinsic::aarch64_neon_bfmlalb:
4453
case Intrinsic::aarch64_neon_bfmlalt: {
4454
SmallVector<Value *, 3> Args;
4455
assert(CI->arg_size() == 3 &&
4456
"Mismatch between function args and call args");
4457
size_t OperandWidth =
4458
CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
4459
assert((OperandWidth == 64 || OperandWidth == 128) &&
4460
"Unexpected operand width");
4461
Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4462
auto Iter = CI->args().begin();
4463
Args.push_back(*Iter++);
4464
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4465
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4466
NewCall = Builder.CreateCall(NewFn, Args);
4467
break;
4468
}
4469
4470
case Intrinsic::bitreverse:
4471
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4472
break;
4473
4474
case Intrinsic::ctlz:
4475
case Intrinsic::cttz:
4476
assert(CI->arg_size() == 1 &&
4477
"Mismatch between function args and call args");
4478
NewCall =
4479
Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4480
break;
4481
4482
case Intrinsic::objectsize: {
4483
Value *NullIsUnknownSize =
4484
CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4485
Value *Dynamic =
4486
CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4487
NewCall = Builder.CreateCall(
4488
NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4489
break;
4490
}
4491
4492
case Intrinsic::ctpop:
4493
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4494
break;
4495
4496
case Intrinsic::convert_from_fp16:
4497
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4498
break;
4499
4500
case Intrinsic::dbg_value: {
4501
StringRef Name = F->getName();
4502
Name = Name.substr(5); // Strip llvm.
4503
// Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4504
if (Name.starts_with("dbg.addr")) {
4505
DIExpression *Expr = cast<DIExpression>(
4506
cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4507
Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4508
NewCall =
4509
Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4510
MetadataAsValue::get(C, Expr)});
4511
break;
4512
}
4513
4514
// Upgrade from the old version that had an extra offset argument.
4515
assert(CI->arg_size() == 4);
4516
// Drop nonzero offsets instead of attempting to upgrade them.
4517
if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4518
if (Offset->isZeroValue()) {
4519
NewCall = Builder.CreateCall(
4520
NewFn,
4521
{CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4522
break;
4523
}
4524
CI->eraseFromParent();
4525
return;
4526
}
4527
4528
case Intrinsic::ptr_annotation:
4529
// Upgrade from versions that lacked the annotation attribute argument.
4530
if (CI->arg_size() != 4) {
4531
DefaultCase();
4532
return;
4533
}
4534
4535
// Create a new call with an added null annotation attribute argument.
4536
NewCall =
4537
Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4538
CI->getArgOperand(2), CI->getArgOperand(3),
4539
Constant::getNullValue(Builder.getPtrTy())});
4540
NewCall->takeName(CI);
4541
CI->replaceAllUsesWith(NewCall);
4542
CI->eraseFromParent();
4543
return;
4544
4545
case Intrinsic::var_annotation:
4546
// Upgrade from versions that lacked the annotation attribute argument.
4547
if (CI->arg_size() != 4) {
4548
DefaultCase();
4549
return;
4550
}
4551
// Create a new call with an added null annotation attribute argument.
4552
NewCall =
4553
Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4554
CI->getArgOperand(2), CI->getArgOperand(3),
4555
Constant::getNullValue(Builder.getPtrTy())});
4556
NewCall->takeName(CI);
4557
CI->replaceAllUsesWith(NewCall);
4558
CI->eraseFromParent();
4559
return;
4560
4561
case Intrinsic::riscv_aes32dsi:
4562
case Intrinsic::riscv_aes32dsmi:
4563
case Intrinsic::riscv_aes32esi:
4564
case Intrinsic::riscv_aes32esmi:
4565
case Intrinsic::riscv_sm4ks:
4566
case Intrinsic::riscv_sm4ed: {
4567
// The last argument to these intrinsics used to be i8 and changed to i32.
4568
// The type overload for sm4ks and sm4ed was removed.
4569
Value *Arg2 = CI->getArgOperand(2);
4570
if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4571
return;
4572
4573
Value *Arg0 = CI->getArgOperand(0);
4574
Value *Arg1 = CI->getArgOperand(1);
4575
if (CI->getType()->isIntegerTy(64)) {
4576
Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4577
Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4578
}
4579
4580
Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4581
cast<ConstantInt>(Arg2)->getZExtValue());
4582
4583
NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4584
Value *Res = NewCall;
4585
if (Res->getType() != CI->getType())
4586
Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4587
NewCall->takeName(CI);
4588
CI->replaceAllUsesWith(Res);
4589
CI->eraseFromParent();
4590
return;
4591
}
4592
case Intrinsic::riscv_sha256sig0:
4593
case Intrinsic::riscv_sha256sig1:
4594
case Intrinsic::riscv_sha256sum0:
4595
case Intrinsic::riscv_sha256sum1:
4596
case Intrinsic::riscv_sm3p0:
4597
case Intrinsic::riscv_sm3p1: {
4598
// The last argument to these intrinsics used to be i8 and changed to i32.
4599
// The type overload for sm4ks and sm4ed was removed.
4600
if (!CI->getType()->isIntegerTy(64))
4601
return;
4602
4603
Value *Arg =
4604
Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4605
4606
NewCall = Builder.CreateCall(NewFn, Arg);
4607
Value *Res =
4608
Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4609
NewCall->takeName(CI);
4610
CI->replaceAllUsesWith(Res);
4611
CI->eraseFromParent();
4612
return;
4613
}
4614
4615
case Intrinsic::x86_xop_vfrcz_ss:
4616
case Intrinsic::x86_xop_vfrcz_sd:
4617
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4618
break;
4619
4620
case Intrinsic::x86_xop_vpermil2pd:
4621
case Intrinsic::x86_xop_vpermil2ps:
4622
case Intrinsic::x86_xop_vpermil2pd_256:
4623
case Intrinsic::x86_xop_vpermil2ps_256: {
4624
SmallVector<Value *, 4> Args(CI->args());
4625
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4626
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4627
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4628
NewCall = Builder.CreateCall(NewFn, Args);
4629
break;
4630
}
4631
4632
case Intrinsic::x86_sse41_ptestc:
4633
case Intrinsic::x86_sse41_ptestz:
4634
case Intrinsic::x86_sse41_ptestnzc: {
4635
// The arguments for these intrinsics used to be v4f32, and changed
4636
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
4637
// So, the only thing required is a bitcast for both arguments.
4638
// First, check the arguments have the old type.
4639
Value *Arg0 = CI->getArgOperand(0);
4640
if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4641
return;
4642
4643
// Old intrinsic, add bitcasts
4644
Value *Arg1 = CI->getArgOperand(1);
4645
4646
auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4647
4648
Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4649
Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4650
4651
NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4652
break;
4653
}
4654
4655
case Intrinsic::x86_rdtscp: {
4656
// This used to take 1 arguments. If we have no arguments, it is already
4657
// upgraded.
4658
if (CI->getNumOperands() == 0)
4659
return;
4660
4661
NewCall = Builder.CreateCall(NewFn);
4662
// Extract the second result and store it.
4663
Value *Data = Builder.CreateExtractValue(NewCall, 1);
4664
// Cast the pointer to the right type.
4665
Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4666
llvm::PointerType::getUnqual(Data->getType()));
4667
Builder.CreateAlignedStore(Data, Ptr, Align(1));
4668
// Replace the original call result with the first result of the new call.
4669
Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4670
4671
NewCall->takeName(CI);
4672
CI->replaceAllUsesWith(TSC);
4673
CI->eraseFromParent();
4674
return;
4675
}
4676
4677
case Intrinsic::x86_sse41_insertps:
4678
case Intrinsic::x86_sse41_dppd:
4679
case Intrinsic::x86_sse41_dpps:
4680
case Intrinsic::x86_sse41_mpsadbw:
4681
case Intrinsic::x86_avx_dp_ps_256:
4682
case Intrinsic::x86_avx2_mpsadbw: {
4683
// Need to truncate the last argument from i32 to i8 -- this argument models
4684
// an inherently 8-bit immediate operand to these x86 instructions.
4685
SmallVector<Value *, 4> Args(CI->args());
4686
4687
// Replace the last argument with a trunc.
4688
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4689
NewCall = Builder.CreateCall(NewFn, Args);
4690
break;
4691
}
4692
4693
case Intrinsic::x86_avx512_mask_cmp_pd_128:
4694
case Intrinsic::x86_avx512_mask_cmp_pd_256:
4695
case Intrinsic::x86_avx512_mask_cmp_pd_512:
4696
case Intrinsic::x86_avx512_mask_cmp_ps_128:
4697
case Intrinsic::x86_avx512_mask_cmp_ps_256:
4698
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4699
SmallVector<Value *, 4> Args(CI->args());
4700
unsigned NumElts =
4701
cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4702
Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4703
4704
NewCall = Builder.CreateCall(NewFn, Args);
4705
Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4706
4707
NewCall->takeName(CI);
4708
CI->replaceAllUsesWith(Res);
4709
CI->eraseFromParent();
4710
return;
4711
}
4712
4713
case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4714
case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4715
case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4716
case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4717
case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4718
case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4719
SmallVector<Value *, 4> Args(CI->args());
4720
unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4721
if (NewFn->getIntrinsicID() ==
4722
Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4723
Args[1] = Builder.CreateBitCast(
4724
Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4725
4726
NewCall = Builder.CreateCall(NewFn, Args);
4727
Value *Res = Builder.CreateBitCast(
4728
NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4729
4730
NewCall->takeName(CI);
4731
CI->replaceAllUsesWith(Res);
4732
CI->eraseFromParent();
4733
return;
4734
}
4735
case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4736
case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4737
case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4738
SmallVector<Value *, 4> Args(CI->args());
4739
unsigned NumElts =
4740
cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4741
Args[1] = Builder.CreateBitCast(
4742
Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4743
Args[2] = Builder.CreateBitCast(
4744
Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4745
4746
NewCall = Builder.CreateCall(NewFn, Args);
4747
break;
4748
}
4749
4750
case Intrinsic::thread_pointer: {
4751
NewCall = Builder.CreateCall(NewFn, {});
4752
break;
4753
}
4754
4755
case Intrinsic::memcpy:
4756
case Intrinsic::memmove:
4757
case Intrinsic::memset: {
4758
// We have to make sure that the call signature is what we're expecting.
4759
// We only want to change the old signatures by removing the alignment arg:
4760
// @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4761
// -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4762
// @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4763
// -> @llvm.memset...(i8*, i8, i[32|64], i1)
4764
// Note: i8*'s in the above can be any pointer type
4765
if (CI->arg_size() != 5) {
4766
DefaultCase();
4767
return;
4768
}
4769
// Remove alignment argument (3), and add alignment attributes to the
4770
// dest/src pointers.
4771
Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4772
CI->getArgOperand(2), CI->getArgOperand(4)};
4773
NewCall = Builder.CreateCall(NewFn, Args);
4774
AttributeList OldAttrs = CI->getAttributes();
4775
AttributeList NewAttrs = AttributeList::get(
4776
C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4777
{OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4778
OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4779
NewCall->setAttributes(NewAttrs);
4780
auto *MemCI = cast<MemIntrinsic>(NewCall);
4781
// All mem intrinsics support dest alignment.
4782
const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4783
MemCI->setDestAlignment(Align->getMaybeAlignValue());
4784
// Memcpy/Memmove also support source alignment.
4785
if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4786
MTI->setSourceAlignment(Align->getMaybeAlignValue());
4787
break;
4788
}
4789
}
4790
assert(NewCall && "Should have either set this variable or returned through "
4791
"the default case");
4792
NewCall->takeName(CI);
4793
CI->replaceAllUsesWith(NewCall);
4794
CI->eraseFromParent();
4795
}
4796
4797
void llvm::UpgradeCallsToIntrinsic(Function *F) {
4798
assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4799
4800
// Check if this function should be upgraded and get the replacement function
4801
// if there is one.
4802
Function *NewFn;
4803
if (UpgradeIntrinsicFunction(F, NewFn)) {
4804
// Replace all users of the old function with the new function or new
4805
// instructions. This is not a range loop because the call is deleted.
4806
for (User *U : make_early_inc_range(F->users()))
4807
if (CallBase *CB = dyn_cast<CallBase>(U))
4808
UpgradeIntrinsicCall(CB, NewFn);
4809
4810
// Remove old function, no longer used, from the module.
4811
F->eraseFromParent();
4812
}
4813
}
4814
4815
MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
4816
const unsigned NumOperands = MD.getNumOperands();
4817
if (NumOperands == 0)
4818
return &MD; // Invalid, punt to a verifier error.
4819
4820
// Check if the tag uses struct-path aware TBAA format.
4821
if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4822
return &MD;
4823
4824
auto &Context = MD.getContext();
4825
if (NumOperands == 3) {
4826
Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4827
MDNode *ScalarType = MDNode::get(Context, Elts);
4828
// Create a MDNode <ScalarType, ScalarType, offset 0, const>
4829
Metadata *Elts2[] = {ScalarType, ScalarType,
4830
ConstantAsMetadata::get(
4831
Constant::getNullValue(Type::getInt64Ty(Context))),
4832
MD.getOperand(2)};
4833
return MDNode::get(Context, Elts2);
4834
}
4835
// Create a MDNode <MD, MD, offset 0>
4836
Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
4837
Type::getInt64Ty(Context)))};
4838
return MDNode::get(Context, Elts);
4839
}
4840
4841
Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
4842
Instruction *&Temp) {
4843
if (Opc != Instruction::BitCast)
4844
return nullptr;
4845
4846
Temp = nullptr;
4847
Type *SrcTy = V->getType();
4848
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4849
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4850
LLVMContext &Context = V->getContext();
4851
4852
// We have no information about target data layout, so we assume that
4853
// the maximum pointer size is 64bit.
4854
Type *MidTy = Type::getInt64Ty(Context);
4855
Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4856
4857
return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4858
}
4859
4860
return nullptr;
4861
}
4862
4863
Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
4864
if (Opc != Instruction::BitCast)
4865
return nullptr;
4866
4867
Type *SrcTy = C->getType();
4868
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4869
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4870
LLVMContext &Context = C->getContext();
4871
4872
// We have no information about target data layout, so we assume that
4873
// the maximum pointer size is 64bit.
4874
Type *MidTy = Type::getInt64Ty(Context);
4875
4876
return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
4877
DestTy);
4878
}
4879
4880
return nullptr;
4881
}
4882
4883
/// Check the debug info version number, if it is out-dated, drop the debug
4884
/// info. Return true if module is modified.
4885
bool llvm::UpgradeDebugInfo(Module &M) {
4886
if (DisableAutoUpgradeDebugInfo)
4887
return false;
4888
4889
unsigned Version = getDebugMetadataVersionFromModule(M);
4890
if (Version == DEBUG_METADATA_VERSION) {
4891
bool BrokenDebugInfo = false;
4892
if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4893
report_fatal_error("Broken module found, compilation aborted!");
4894
if (!BrokenDebugInfo)
4895
// Everything is ok.
4896
return false;
4897
else {
4898
// Diagnose malformed debug info.
4899
DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
4900
M.getContext().diagnose(Diag);
4901
}
4902
}
4903
bool Modified = StripDebugInfo(M);
4904
if (Modified && Version != DEBUG_METADATA_VERSION) {
4905
// Diagnose a version mismatch.
4906
DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4907
M.getContext().diagnose(DiagVersion);
4908
}
4909
return Modified;
4910
}
4911
4912
/// This checks for objc retain release marker which should be upgraded. It
4913
/// returns true if module is modified.
4914
static bool upgradeRetainReleaseMarker(Module &M) {
4915
bool Changed = false;
4916
const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4917
NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4918
if (ModRetainReleaseMarker) {
4919
MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4920
if (Op) {
4921
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4922
if (ID) {
4923
SmallVector<StringRef, 4> ValueComp;
4924
ID->getString().split(ValueComp, "#");
4925
if (ValueComp.size() == 2) {
4926
std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4927
ID = MDString::get(M.getContext(), NewValue);
4928
}
4929
M.addModuleFlag(Module::Error, MarkerKey, ID);
4930
M.eraseNamedMetadata(ModRetainReleaseMarker);
4931
Changed = true;
4932
}
4933
}
4934
}
4935
return Changed;
4936
}
4937
4938
void llvm::UpgradeARCRuntime(Module &M) {
4939
// This lambda converts normal function calls to ARC runtime functions to
4940
// intrinsic calls.
4941
auto UpgradeToIntrinsic = [&](const char *OldFunc,
4942
llvm::Intrinsic::ID IntrinsicFunc) {
4943
Function *Fn = M.getFunction(OldFunc);
4944
4945
if (!Fn)
4946
return;
4947
4948
Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4949
4950
for (User *U : make_early_inc_range(Fn->users())) {
4951
CallInst *CI = dyn_cast<CallInst>(U);
4952
if (!CI || CI->getCalledFunction() != Fn)
4953
continue;
4954
4955
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4956
FunctionType *NewFuncTy = NewFn->getFunctionType();
4957
SmallVector<Value *, 2> Args;
4958
4959
// Don't upgrade the intrinsic if it's not valid to bitcast the return
4960
// value to the return type of the old function.
4961
if (NewFuncTy->getReturnType() != CI->getType() &&
4962
!CastInst::castIsValid(Instruction::BitCast, CI,
4963
NewFuncTy->getReturnType()))
4964
continue;
4965
4966
bool InvalidCast = false;
4967
4968
for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4969
Value *Arg = CI->getArgOperand(I);
4970
4971
// Bitcast argument to the parameter type of the new function if it's
4972
// not a variadic argument.
4973
if (I < NewFuncTy->getNumParams()) {
4974
// Don't upgrade the intrinsic if it's not valid to bitcast the argument
4975
// to the parameter type of the new function.
4976
if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4977
NewFuncTy->getParamType(I))) {
4978
InvalidCast = true;
4979
break;
4980
}
4981
Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4982
}
4983
Args.push_back(Arg);
4984
}
4985
4986
if (InvalidCast)
4987
continue;
4988
4989
// Create a call instruction that calls the new function.
4990
CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4991
NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4992
NewCall->takeName(CI);
4993
4994
// Bitcast the return value back to the type of the old call.
4995
Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4996
4997
if (!CI->use_empty())
4998
CI->replaceAllUsesWith(NewRetVal);
4999
CI->eraseFromParent();
5000
}
5001
5002
if (Fn->use_empty())
5003
Fn->eraseFromParent();
5004
};
5005
5006
// Unconditionally convert a call to "clang.arc.use" to a call to
5007
// "llvm.objc.clang.arc.use".
5008
UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5009
5010
// Upgrade the retain release marker. If there is no need to upgrade
5011
// the marker, that means either the module is already new enough to contain
5012
// new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5013
if (!upgradeRetainReleaseMarker(M))
5014
return;
5015
5016
std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5017
{"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5018
{"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5019
{"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5020
{"objc_autoreleaseReturnValue",
5021
llvm::Intrinsic::objc_autoreleaseReturnValue},
5022
{"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5023
{"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5024
{"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5025
{"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5026
{"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5027
{"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5028
{"objc_release", llvm::Intrinsic::objc_release},
5029
{"objc_retain", llvm::Intrinsic::objc_retain},
5030
{"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5031
{"objc_retainAutoreleaseReturnValue",
5032
llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5033
{"objc_retainAutoreleasedReturnValue",
5034
llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5035
{"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5036
{"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5037
{"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5038
{"objc_unsafeClaimAutoreleasedReturnValue",
5039
llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5040
{"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5041
{"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5042
{"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5043
{"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5044
{"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5045
{"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5046
{"objc_arc_annotation_topdown_bbstart",
5047
llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5048
{"objc_arc_annotation_topdown_bbend",
5049
llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5050
{"objc_arc_annotation_bottomup_bbstart",
5051
llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5052
{"objc_arc_annotation_bottomup_bbend",
5053
llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5054
5055
for (auto &I : RuntimeFuncs)
5056
UpgradeToIntrinsic(I.first, I.second);
5057
}
5058
5059
bool llvm::UpgradeModuleFlags(Module &M) {
5060
NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5061
if (!ModFlags)
5062
return false;
5063
5064
bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5065
bool HasSwiftVersionFlag = false;
5066
uint8_t SwiftMajorVersion, SwiftMinorVersion;
5067
uint32_t SwiftABIVersion;
5068
auto Int8Ty = Type::getInt8Ty(M.getContext());
5069
auto Int32Ty = Type::getInt32Ty(M.getContext());
5070
5071
for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5072
MDNode *Op = ModFlags->getOperand(I);
5073
if (Op->getNumOperands() != 3)
5074
continue;
5075
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5076
if (!ID)
5077
continue;
5078
auto SetBehavior = [&](Module::ModFlagBehavior B) {
5079
Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5080
Type::getInt32Ty(M.getContext()), B)),
5081
MDString::get(M.getContext(), ID->getString()),
5082
Op->getOperand(2)};
5083
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5084
Changed = true;
5085
};
5086
5087
if (ID->getString() == "Objective-C Image Info Version")
5088
HasObjCFlag = true;
5089
if (ID->getString() == "Objective-C Class Properties")
5090
HasClassProperties = true;
5091
// Upgrade PIC from Error/Max to Min.
5092
if (ID->getString() == "PIC Level") {
5093
if (auto *Behavior =
5094
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5095
uint64_t V = Behavior->getLimitedValue();
5096
if (V == Module::Error || V == Module::Max)
5097
SetBehavior(Module::Min);
5098
}
5099
}
5100
// Upgrade "PIE Level" from Error to Max.
5101
if (ID->getString() == "PIE Level")
5102
if (auto *Behavior =
5103
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5104
if (Behavior->getLimitedValue() == Module::Error)
5105
SetBehavior(Module::Max);
5106
5107
// Upgrade branch protection and return address signing module flags. The
5108
// module flag behavior for these fields were Error and now they are Min.
5109
if (ID->getString() == "branch-target-enforcement" ||
5110
ID->getString().starts_with("sign-return-address")) {
5111
if (auto *Behavior =
5112
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5113
if (Behavior->getLimitedValue() == Module::Error) {
5114
Type *Int32Ty = Type::getInt32Ty(M.getContext());
5115
Metadata *Ops[3] = {
5116
ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5117
Op->getOperand(1), Op->getOperand(2)};
5118
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5119
Changed = true;
5120
}
5121
}
5122
}
5123
5124
// Upgrade Objective-C Image Info Section. Removed the whitespce in the
5125
// section name so that llvm-lto will not complain about mismatching
5126
// module flags that is functionally the same.
5127
if (ID->getString() == "Objective-C Image Info Section") {
5128
if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5129
SmallVector<StringRef, 4> ValueComp;
5130
Value->getString().split(ValueComp, " ");
5131
if (ValueComp.size() != 1) {
5132
std::string NewValue;
5133
for (auto &S : ValueComp)
5134
NewValue += S.str();
5135
Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5136
MDString::get(M.getContext(), NewValue)};
5137
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5138
Changed = true;
5139
}
5140
}
5141
}
5142
5143
// IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5144
// If the higher bits are set, it adds new module flag for swift info.
5145
if (ID->getString() == "Objective-C Garbage Collection") {
5146
auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5147
if (Md) {
5148
assert(Md->getValue() && "Expected non-empty metadata");
5149
auto Type = Md->getValue()->getType();
5150
if (Type == Int8Ty)
5151
continue;
5152
unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5153
if ((Val & 0xff) != Val) {
5154
HasSwiftVersionFlag = true;
5155
SwiftABIVersion = (Val & 0xff00) >> 8;
5156
SwiftMajorVersion = (Val & 0xff000000) >> 24;
5157
SwiftMinorVersion = (Val & 0xff0000) >> 16;
5158
}
5159
Metadata *Ops[3] = {
5160
ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5161
Op->getOperand(1),
5162
ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5163
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5164
Changed = true;
5165
}
5166
}
5167
5168
if (ID->getString() == "amdgpu_code_object_version") {
5169
Metadata *Ops[3] = {
5170
Op->getOperand(0),
5171
MDString::get(M.getContext(), "amdhsa_code_object_version"),
5172
Op->getOperand(2)};
5173
ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5174
Changed = true;
5175
}
5176
}
5177
5178
// "Objective-C Class Properties" is recently added for Objective-C. We
5179
// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5180
// flag of value 0, so we can correclty downgrade this flag when trying to
5181
// link an ObjC bitcode without this module flag with an ObjC bitcode with
5182
// this module flag.
5183
if (HasObjCFlag && !HasClassProperties) {
5184
M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5185
(uint32_t)0);
5186
Changed = true;
5187
}
5188
5189
if (HasSwiftVersionFlag) {
5190
M.addModuleFlag(Module::Error, "Swift ABI Version",
5191
SwiftABIVersion);
5192
M.addModuleFlag(Module::Error, "Swift Major Version",
5193
ConstantInt::get(Int8Ty, SwiftMajorVersion));
5194
M.addModuleFlag(Module::Error, "Swift Minor Version",
5195
ConstantInt::get(Int8Ty, SwiftMinorVersion));
5196
Changed = true;
5197
}
5198
5199
return Changed;
5200
}
5201
5202
void llvm::UpgradeSectionAttributes(Module &M) {
5203
auto TrimSpaces = [](StringRef Section) -> std::string {
5204
SmallVector<StringRef, 5> Components;
5205
Section.split(Components, ',');
5206
5207
SmallString<32> Buffer;
5208
raw_svector_ostream OS(Buffer);
5209
5210
for (auto Component : Components)
5211
OS << ',' << Component.trim();
5212
5213
return std::string(OS.str().substr(1));
5214
};
5215
5216
for (auto &GV : M.globals()) {
5217
if (!GV.hasSection())
5218
continue;
5219
5220
StringRef Section = GV.getSection();
5221
5222
if (!Section.starts_with("__DATA, __objc_catlist"))
5223
continue;
5224
5225
// __DATA, __objc_catlist, regular, no_dead_strip
5226
// __DATA,__objc_catlist,regular,no_dead_strip
5227
GV.setSection(TrimSpaces(Section));
5228
}
5229
}
5230
5231
namespace {
5232
// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5233
// callsites within a function that did not also have the strictfp attribute.
5234
// Since 10.0, if strict FP semantics are needed within a function, the
5235
// function must have the strictfp attribute and all calls within the function
5236
// must also have the strictfp attribute. This latter restriction is
5237
// necessary to prevent unwanted libcall simplification when a function is
5238
// being cloned (such as for inlining).
5239
//
5240
// The "dangling" strictfp attribute usage was only used to prevent constant
5241
// folding and other libcall simplification. The nobuiltin attribute on the
5242
// callsite has the same effect.
5243
struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5244
StrictFPUpgradeVisitor() = default;
5245
5246
void visitCallBase(CallBase &Call) {
5247
if (!Call.isStrictFP())
5248
return;
5249
if (isa<ConstrainedFPIntrinsic>(&Call))
5250
return;
5251
// If we get here, the caller doesn't have the strictfp attribute
5252
// but this callsite does. Replace the strictfp attribute with nobuiltin.
5253
Call.removeFnAttr(Attribute::StrictFP);
5254
Call.addFnAttr(Attribute::NoBuiltin);
5255
}
5256
};
5257
} // namespace
5258
5259
void llvm::UpgradeFunctionAttributes(Function &F) {
5260
// If a function definition doesn't have the strictfp attribute,
5261
// convert any callsite strictfp attributes to nobuiltin.
5262
if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5263
StrictFPUpgradeVisitor SFPV;
5264
SFPV.visit(F);
5265
}
5266
5267
// Remove all incompatibile attributes from function.
5268
F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5269
for (auto &Arg : F.args())
5270
Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5271
5272
// Older versions of LLVM treated an "implicit-section-name" attribute
5273
// similarly to directly setting the section on a Function.
5274
if (Attribute A = F.getFnAttribute("implicit-section-name");
5275
A.isValid() && A.isStringAttribute()) {
5276
F.setSection(A.getValueAsString());
5277
F.removeFnAttr("implicit-section-name");
5278
}
5279
}
5280
5281
static bool isOldLoopArgument(Metadata *MD) {
5282
auto *T = dyn_cast_or_null<MDTuple>(MD);
5283
if (!T)
5284
return false;
5285
if (T->getNumOperands() < 1)
5286
return false;
5287
auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5288
if (!S)
5289
return false;
5290
return S->getString().starts_with("llvm.vectorizer.");
5291
}
5292
5293
static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
5294
StringRef OldPrefix = "llvm.vectorizer.";
5295
assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5296
5297
if (OldTag == "llvm.vectorizer.unroll")
5298
return MDString::get(C, "llvm.loop.interleave.count");
5299
5300
return MDString::get(
5301
C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5302
.str());
5303
}
5304
5305
static Metadata *upgradeLoopArgument(Metadata *MD) {
5306
auto *T = dyn_cast_or_null<MDTuple>(MD);
5307
if (!T)
5308
return MD;
5309
if (T->getNumOperands() < 1)
5310
return MD;
5311
auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5312
if (!OldTag)
5313
return MD;
5314
if (!OldTag->getString().starts_with("llvm.vectorizer."))
5315
return MD;
5316
5317
// This has an old tag. Upgrade it.
5318
SmallVector<Metadata *, 8> Ops;
5319
Ops.reserve(T->getNumOperands());
5320
Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5321
for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5322
Ops.push_back(T->getOperand(I));
5323
5324
return MDTuple::get(T->getContext(), Ops);
5325
}
5326
5327
MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
5328
auto *T = dyn_cast<MDTuple>(&N);
5329
if (!T)
5330
return &N;
5331
5332
if (none_of(T->operands(), isOldLoopArgument))
5333
return &N;
5334
5335
SmallVector<Metadata *, 8> Ops;
5336
Ops.reserve(T->getNumOperands());
5337
for (Metadata *MD : T->operands())
5338
Ops.push_back(upgradeLoopArgument(MD));
5339
5340
return MDTuple::get(T->getContext(), Ops);
5341
}
5342
5343
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
5344
Triple T(TT);
5345
// The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5346
// the address space of globals to 1. This does not apply to SPIRV Logical.
5347
if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5348
(T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5349
!DL.contains("-G") && !DL.starts_with("G")) {
5350
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5351
}
5352
5353
if (T.isLoongArch64() || T.isRISCV64()) {
5354
// Make i32 a native type for 64-bit LoongArch and RISC-V.
5355
auto I = DL.find("-n64-");
5356
if (I != StringRef::npos)
5357
return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5358
return DL.str();
5359
}
5360
5361
std::string Res = DL.str();
5362
// AMDGCN data layout upgrades.
5363
if (T.isAMDGCN()) {
5364
// Define address spaces for constants.
5365
if (!DL.contains("-G") && !DL.starts_with("G"))
5366
Res.append(Res.empty() ? "G1" : "-G1");
5367
5368
// Add missing non-integral declarations.
5369
// This goes before adding new address spaces to prevent incoherent string
5370
// values.
5371
if (!DL.contains("-ni") && !DL.starts_with("ni"))
5372
Res.append("-ni:7:8:9");
5373
// Update ni:7 to ni:7:8:9.
5374
if (DL.ends_with("ni:7"))
5375
Res.append(":8:9");
5376
if (DL.ends_with("ni:7:8"))
5377
Res.append(":9");
5378
5379
// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5380
// resources) An empty data layout has already been upgraded to G1 by now.
5381
if (!DL.contains("-p7") && !DL.starts_with("p7"))
5382
Res.append("-p7:160:256:256:32");
5383
if (!DL.contains("-p8") && !DL.starts_with("p8"))
5384
Res.append("-p8:128:128");
5385
if (!DL.contains("-p9") && !DL.starts_with("p9"))
5386
Res.append("-p9:192:256:256:32");
5387
5388
return Res;
5389
}
5390
5391
// AArch64 data layout upgrades.
5392
if (T.isAArch64()) {
5393
// Add "-Fn32"
5394
if (!DL.empty() && !DL.contains("-Fn32"))
5395
Res.append("-Fn32");
5396
return Res;
5397
}
5398
5399
if (!T.isX86())
5400
return Res;
5401
5402
// If the datalayout matches the expected format, add pointer size address
5403
// spaces to the datalayout.
5404
std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5405
if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5406
SmallVector<StringRef, 4> Groups;
5407
Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5408
if (R.match(Res, &Groups))
5409
Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5410
}
5411
5412
// i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5413
// for i128 operations prior to this being reflected in the data layout, and
5414
// clang mostly produced LLVM IR that already aligned i128 to 16 byte
5415
// boundaries, so although this is a breaking change, the upgrade is expected
5416
// to fix more IR than it breaks.
5417
// Intel MCU is an exception and uses 4-byte-alignment.
5418
if (!T.isOSIAMCU()) {
5419
std::string I128 = "-i128:128";
5420
if (StringRef Ref = Res; !Ref.contains(I128)) {
5421
SmallVector<StringRef, 4> Groups;
5422
Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5423
if (R.match(Res, &Groups))
5424
Res = (Groups[1] + I128 + Groups[3]).str();
5425
}
5426
}
5427
5428
// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5429
// Raising the alignment is safe because Clang did not produce f80 values in
5430
// the MSVC environment before this upgrade was added.
5431
if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5432
StringRef Ref = Res;
5433
auto I = Ref.find("-f80:32-");
5434
if (I != StringRef::npos)
5435
Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5436
}
5437
5438
return Res;
5439
}
5440
5441
void llvm::UpgradeAttributes(AttrBuilder &B) {
5442
StringRef FramePointer;
5443
Attribute A = B.getAttribute("no-frame-pointer-elim");
5444
if (A.isValid()) {
5445
// The value can be "true" or "false".
5446
FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5447
B.removeAttribute("no-frame-pointer-elim");
5448
}
5449
if (B.contains("no-frame-pointer-elim-non-leaf")) {
5450
// The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5451
if (FramePointer != "all")
5452
FramePointer = "non-leaf";
5453
B.removeAttribute("no-frame-pointer-elim-non-leaf");
5454
}
5455
if (!FramePointer.empty())
5456
B.addAttribute("frame-pointer", FramePointer);
5457
5458
A = B.getAttribute("null-pointer-is-valid");
5459
if (A.isValid()) {
5460
// The value can be "true" or "false".
5461
bool NullPointerIsValid = A.getValueAsString() == "true";
5462
B.removeAttribute("null-pointer-is-valid");
5463
if (NullPointerIsValid)
5464
B.addAttribute(Attribute::NullPointerIsValid);
5465
}
5466
}
5467
5468
void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5469
// clang.arc.attachedcall bundles are now required to have an operand.
5470
// If they don't, it's okay to drop them entirely: when there is an operand,
5471
// the "attachedcall" is meaningful and required, but without an operand,
5472
// it's just a marker NOP. Dropping it merely prevents an optimization.
5473
erase_if(Bundles, [&](OperandBundleDef &OBD) {
5474
return OBD.getTag() == "clang.arc.attachedcall" &&
5475
OBD.inputs().empty();
5476
});
5477
}
5478
5479