Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
35234 views
1
//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
10
// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
11
// ‘sitofp .. to’ instructions with a bitwidth above a threshold into
12
// auto-generated functions. This is useful for targets like x86_64 that cannot
13
// lower fp convertions with more than 128 bits.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/CodeGen/ExpandLargeFpConvert.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/StringExtras.h"
20
#include "llvm/Analysis/GlobalsModRef.h"
21
#include "llvm/CodeGen/Passes.h"
22
#include "llvm/CodeGen/TargetLowering.h"
23
#include "llvm/CodeGen/TargetPassConfig.h"
24
#include "llvm/CodeGen/TargetSubtargetInfo.h"
25
#include "llvm/IR/IRBuilder.h"
26
#include "llvm/IR/InstIterator.h"
27
#include "llvm/IR/PassManager.h"
28
#include "llvm/InitializePasses.h"
29
#include "llvm/Pass.h"
30
#include "llvm/Support/CommandLine.h"
31
#include "llvm/Target/TargetMachine.h"
32
33
using namespace llvm;
34
35
static cl::opt<unsigned>
36
ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
37
cl::init(llvm::IntegerType::MAX_INT_BITS),
38
cl::desc("fp convert instructions on integers with "
39
"more than <N> bits are expanded."));
40
41
/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
42
/// the generated code. This currently generates code similarly to compiler-rt's
43
/// implementations.
44
///
45
/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
46
/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
47
/// entry:
48
/// %0 = bitcast float %a to i32
49
/// %conv.i = zext i32 %0 to i64
50
/// %tobool.not = icmp sgt i32 %0, -1
51
/// %conv = select i1 %tobool.not, i64 1, i64 -1
52
/// %and = lshr i64 %conv.i, 23
53
/// %shr = and i64 %and, 255
54
/// %and2 = and i64 %conv.i, 8388607
55
/// %or = or i64 %and2, 8388608
56
/// %cmp = icmp ult i64 %shr, 127
57
/// br i1 %cmp, label %cleanup, label %if.end
58
///
59
/// if.end: ; preds = %entry
60
/// %sub = add nuw nsw i64 %shr, 4294967169
61
/// %conv5 = and i64 %sub, 4294967232
62
/// %cmp6.not = icmp eq i64 %conv5, 0
63
/// br i1 %cmp6.not, label %if.end12, label %if.then8
64
///
65
/// if.then8: ; preds = %if.end
66
/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
67
/// br label %cleanup
68
///
69
/// if.end12: ; preds = %if.end
70
/// %cmp13 = icmp ult i64 %shr, 150
71
/// br i1 %cmp13, label %if.then15, label %if.else
72
///
73
/// if.then15: ; preds = %if.end12
74
/// %sub16 = sub nuw nsw i64 150, %shr
75
/// %shr17 = lshr i64 %or, %sub16
76
/// %mul = mul nsw i64 %shr17, %conv
77
/// br label %cleanup
78
///
79
/// if.else: ; preds = %if.end12
80
/// %sub18 = add nsw i64 %shr, -150
81
/// %shl = shl i64 %or, %sub18
82
/// %mul19 = mul nsw i64 %shl, %conv
83
/// br label %cleanup
84
///
85
/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
86
/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
87
/// ret i64 %retval.0
88
/// }
89
///
90
/// Replace fp to integer with generated code.
91
static void expandFPToI(Instruction *FPToI) {
92
IRBuilder<> Builder(FPToI);
93
auto *FloatVal = FPToI->getOperand(0);
94
IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
95
96
unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
97
unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
98
99
// FIXME: fp16's range is covered by i32. So `fptoi half` can convert
100
// to i32 first following a sext/zext to target integer type.
101
Value *A1 = nullptr;
102
if (FloatVal->getType()->isHalfTy()) {
103
if (FPToI->getOpcode() == Instruction::FPToUI) {
104
Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
105
A1 = Builder.CreateZExt(A0, IntTy);
106
} else { // FPToSI
107
Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
108
A1 = Builder.CreateSExt(A0, IntTy);
109
}
110
FPToI->replaceAllUsesWith(A1);
111
FPToI->dropAllReferences();
112
FPToI->eraseFromParent();
113
return;
114
}
115
116
// fp80 conversion is implemented by fpext to fp128 first then do the
117
// conversion.
118
FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
119
unsigned FloatWidth =
120
PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
121
unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
122
unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
123
Value *ImplicitBit = Builder.CreateShl(
124
Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
125
Value *SignificandMask =
126
Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
127
Value *NegOne = Builder.CreateSExt(
128
ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
129
Value *NegInf =
130
Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
131
ConstantInt::getSigned(IntTy, BitWidth - 1));
132
133
BasicBlock *Entry = Builder.GetInsertBlock();
134
Function *F = Entry->getParent();
135
Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
136
BasicBlock *End =
137
Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
138
BasicBlock *IfEnd =
139
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
140
BasicBlock *IfThen5 =
141
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
142
BasicBlock *IfEnd9 =
143
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
144
BasicBlock *IfThen12 =
145
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
146
BasicBlock *IfElse =
147
BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
148
149
Entry->getTerminator()->eraseFromParent();
150
151
// entry:
152
Builder.SetInsertPoint(Entry);
153
Value *FloatVal0 = FloatVal;
154
// fp80 conversion is implemented by fpext to fp128 first then do the
155
// conversion.
156
if (FloatVal->getType()->isX86_FP80Ty())
157
FloatVal0 =
158
Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
159
Value *ARep0 =
160
Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
161
Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
162
Value *PosOrNeg = Builder.CreateICmpSGT(
163
ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
164
Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
165
ConstantInt::getSigned(IntTy, -1));
166
Value *And =
167
Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
168
Value *And2 = Builder.CreateAnd(
169
And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
170
Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
171
Value *Or = Builder.CreateOr(Abs, ImplicitBit);
172
Value *Cmp =
173
Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
174
Builder.CreateCondBr(Cmp, End, IfEnd);
175
176
// if.end:
177
Builder.SetInsertPoint(IfEnd);
178
Value *Add1 = Builder.CreateAdd(
179
And2, ConstantInt::getSigned(
180
IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
181
Value *Cmp3 = Builder.CreateICmpULT(
182
Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
183
Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
184
185
// if.then5:
186
Builder.SetInsertPoint(IfThen5);
187
Value *PosInf = Builder.CreateXor(NegOne, NegInf);
188
Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
189
Builder.CreateBr(End);
190
191
// if.end9:
192
Builder.SetInsertPoint(IfEnd9);
193
Value *Cmp10 = Builder.CreateICmpULT(
194
And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
195
Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
196
197
// if.then12:
198
Builder.SetInsertPoint(IfThen12);
199
Value *Sub13 = Builder.CreateSub(
200
Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
201
Value *Shr14 = Builder.CreateLShr(Or, Sub13);
202
Value *Mul = Builder.CreateMul(Shr14, Sign);
203
Builder.CreateBr(End);
204
205
// if.else:
206
Builder.SetInsertPoint(IfElse);
207
Value *Sub15 = Builder.CreateAdd(
208
And2, ConstantInt::getSigned(
209
IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
210
Value *Shl = Builder.CreateShl(Or, Sub15);
211
Value *Mul16 = Builder.CreateMul(Shl, Sign);
212
Builder.CreateBr(End);
213
214
// cleanup:
215
Builder.SetInsertPoint(End, End->begin());
216
PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
217
218
Retval0->addIncoming(Cond8, IfThen5);
219
Retval0->addIncoming(Mul, IfThen12);
220
Retval0->addIncoming(Mul16, IfElse);
221
Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
222
223
FPToI->replaceAllUsesWith(Retval0);
224
FPToI->dropAllReferences();
225
FPToI->eraseFromParent();
226
}
227
228
/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
229
/// the generated code. This currently generates code similarly to compiler-rt's
230
/// implementations. This implementation has an implicit assumption that integer
231
/// width is larger than fp.
232
///
233
/// An example IR generated from compiler-rt/floatdisf.c looks like below:
234
/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
235
/// entry:
236
/// %cmp = icmp eq i64 %a, 0
237
/// br i1 %cmp, label %return, label %if.end
238
///
239
/// if.end: ; preds = %entry
240
/// %shr = ashr i64 %a, 63
241
/// %xor = xor i64 %shr, %a
242
/// %sub = sub nsw i64 %xor, %shr
243
/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
244
/// %cast = trunc i64 %0 to i32
245
/// %sub1 = sub nuw nsw i32 64, %cast
246
/// %sub2 = xor i32 %cast, 63
247
/// %cmp3 = icmp ult i32 %cast, 40
248
/// br i1 %cmp3, label %if.then4, label %if.else
249
///
250
/// if.then4: ; preds = %if.end
251
/// switch i32 %sub1, label %sw.default [
252
/// i32 25, label %sw.bb
253
/// i32 26, label %sw.epilog
254
/// ]
255
///
256
/// sw.bb: ; preds = %if.then4
257
/// %shl = shl i64 %sub, 1
258
/// br label %sw.epilog
259
///
260
/// sw.default: ; preds = %if.then4
261
/// %sub5 = sub nsw i64 38, %0
262
/// %sh_prom = and i64 %sub5, 4294967295
263
/// %shr6 = lshr i64 %sub, %sh_prom
264
/// %shr9 = lshr i64 274877906943, %0
265
/// %and = and i64 %shr9, %sub
266
/// %cmp10 = icmp ne i64 %and, 0
267
/// %conv11 = zext i1 %cmp10 to i64
268
/// %or = or i64 %shr6, %conv11
269
/// br label %sw.epilog
270
///
271
/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
272
/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
273
/// %1 = lshr i64 %a.addr.0, 2
274
/// %2 = and i64 %1, 1
275
/// %or16 = or i64 %2, %a.addr.0
276
/// %inc = add nsw i64 %or16, 1
277
/// %3 = and i64 %inc, 67108864
278
/// %tobool.not = icmp eq i64 %3, 0
279
/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
280
/// %spec.select = ashr i64 %inc, %spec.select.v
281
/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
282
/// br label %if.end26
283
///
284
/// if.else: ; preds = %if.end
285
/// %sub23 = add nuw nsw i64 %0, 4294967256
286
/// %sh_prom24 = and i64 %sub23, 4294967295
287
/// %shl25 = shl i64 %sub, %sh_prom24
288
/// br label %if.end26
289
///
290
/// if.end26: ; preds = %sw.epilog, %if.else
291
/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
292
/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
293
/// %conv27 = trunc i64 %shr to i32
294
/// %and28 = and i32 %conv27, -2147483648
295
/// %add = shl nuw nsw i32 %e.0, 23
296
/// %shl29 = add nuw nsw i32 %add, 1065353216
297
/// %conv31 = trunc i64 %a.addr.1 to i32
298
/// %and32 = and i32 %conv31, 8388607
299
/// %or30 = or i32 %and32, %and28
300
/// %or33 = or i32 %or30, %shl29
301
/// %4 = bitcast i32 %or33 to float
302
/// br label %return
303
///
304
/// return: ; preds = %entry, %if.end26
305
/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
306
/// ret float %retval.0
307
/// }
308
///
309
/// Replace integer to fp with generated code.
310
static void expandIToFP(Instruction *IToFP) {
311
IRBuilder<> Builder(IToFP);
312
auto *IntVal = IToFP->getOperand(0);
313
IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
314
315
unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
316
unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
317
// fp80 conversion is implemented by conversion tp fp128 first following
318
// a fptrunc to fp80.
319
FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
320
// FIXME: As there is no related builtins added in compliler-rt,
321
// here currently utilized the fp32 <-> fp16 lib calls to implement.
322
FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
323
FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
324
unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
325
bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
326
327
assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
328
"assumes integer width is larger than fp.");
329
330
Value *Temp1 =
331
Builder.CreateShl(Builder.getIntN(BitWidth, 1),
332
Builder.getIntN(BitWidth, FPMantissaWidth + 3));
333
334
BasicBlock *Entry = Builder.GetInsertBlock();
335
Function *F = Entry->getParent();
336
Entry->setName(Twine(Entry->getName(), "itofp-entry"));
337
BasicBlock *End =
338
Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
339
BasicBlock *IfEnd =
340
BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
341
BasicBlock *IfThen4 =
342
BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
343
BasicBlock *SwBB =
344
BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
345
BasicBlock *SwDefault =
346
BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
347
BasicBlock *SwEpilog =
348
BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
349
BasicBlock *IfThen20 =
350
BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
351
BasicBlock *IfElse =
352
BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
353
BasicBlock *IfEnd26 =
354
BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
355
356
Entry->getTerminator()->eraseFromParent();
357
358
Function *CTLZ =
359
Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
360
ConstantInt *True = Builder.getTrue();
361
362
// entry:
363
Builder.SetInsertPoint(Entry);
364
Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
365
Builder.CreateCondBr(Cmp, End, IfEnd);
366
367
// if.end:
368
Builder.SetInsertPoint(IfEnd);
369
Value *Shr =
370
Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
371
Value *Xor = Builder.CreateXor(Shr, IntVal);
372
Value *Sub = Builder.CreateSub(Xor, Shr);
373
Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
374
Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
375
int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
376
Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
377
FloatWidth == 128 ? Call : Cast);
378
Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
379
FloatWidth == 128 ? Call : Cast);
380
Value *Cmp3 = Builder.CreateICmpSGT(
381
Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
382
Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
383
384
// if.then4:
385
Builder.SetInsertPoint(IfThen4);
386
llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
387
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
388
SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
389
390
// sw.bb:
391
Builder.SetInsertPoint(SwBB);
392
Value *Shl =
393
Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
394
Builder.CreateBr(SwEpilog);
395
396
// sw.default:
397
Builder.SetInsertPoint(SwDefault);
398
Value *Sub5 = Builder.CreateSub(
399
Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
400
FloatWidth == 128 ? Call : Cast);
401
Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
402
Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
403
FloatWidth == 128 ? Sub5 : ShProm);
404
Value *Sub8 =
405
Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
406
Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
407
Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
408
Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
409
FloatWidth == 128 ? Sub8 : ShProm9);
410
Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
411
Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
412
Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
413
Value *Or = Builder.CreateOr(Shr6, Conv11);
414
Builder.CreateBr(SwEpilog);
415
416
// sw.epilog:
417
Builder.SetInsertPoint(SwEpilog);
418
PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
419
AAddr0->addIncoming(Or, SwDefault);
420
AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
421
AAddr0->addIncoming(Shl, SwBB);
422
Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
423
Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
424
Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
425
Value *Conv16 = Builder.CreateZExt(A2, IntTy);
426
Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
427
Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
428
Value *Shr18 = nullptr;
429
if (IsSigned)
430
Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
431
else
432
Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
433
Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
434
Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
435
Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
436
Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
437
Value *ExtractT64 = nullptr;
438
if (FloatWidth > 80)
439
ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
440
else
441
ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
442
Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
443
444
// if.then20
445
Builder.SetInsertPoint(IfThen20);
446
Value *Shr21 = nullptr;
447
if (IsSigned)
448
Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
449
else
450
Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
451
Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
452
Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
453
Value *ExtractT62 = nullptr;
454
if (FloatWidth > 80)
455
ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
456
else
457
ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
458
Builder.CreateBr(IfEnd26);
459
460
// if.else:
461
Builder.SetInsertPoint(IfElse);
462
Value *Sub24 = Builder.CreateAdd(
463
FloatWidth == 128 ? Call : Cast,
464
ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
465
-(BitWidth - FPMantissaWidth - 1)));
466
Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
467
Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
468
FloatWidth == 128 ? Sub24 : ShProm25);
469
Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
470
Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
471
Value *ExtractT66 = nullptr;
472
if (FloatWidth > 80)
473
ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
474
else
475
ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
476
Builder.CreateBr(IfEnd26);
477
478
// if.end26:
479
Builder.SetInsertPoint(IfEnd26);
480
PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
481
AAddr1Off0->addIncoming(ExtractT, IfThen20);
482
AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
483
AAddr1Off0->addIncoming(ExtractT61, IfElse);
484
PHINode *AAddr1Off32 = nullptr;
485
if (FloatWidth > 32) {
486
AAddr1Off32 =
487
Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
488
AAddr1Off32->addIncoming(ExtractT62, IfThen20);
489
AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
490
AAddr1Off32->addIncoming(ExtractT66, IfElse);
491
}
492
PHINode *E0 = nullptr;
493
if (FloatWidth <= 80) {
494
E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
495
E0->addIncoming(Sub1, IfThen20);
496
E0->addIncoming(Sub2, SwEpilog);
497
E0->addIncoming(Sub2, IfElse);
498
}
499
Value *And29 = nullptr;
500
if (FloatWidth > 80) {
501
Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
502
Builder.getIntN(BitWidth, 63));
503
And29 = Builder.CreateAnd(Shr, Temp2, "and29");
504
} else {
505
Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
506
And29 = Builder.CreateAnd(
507
Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
508
}
509
unsigned TempMod = FPMantissaWidth % 32;
510
Value *And34 = nullptr;
511
Value *Shl30 = nullptr;
512
if (FloatWidth > 80) {
513
TempMod += 32;
514
Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
515
Shl30 = Builder.CreateAdd(
516
Add,
517
Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
518
And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
519
} else {
520
Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
521
Shl30 = Builder.CreateAdd(
522
Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
523
And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
524
Builder.getIntN(32, (1 << TempMod) - 1));
525
}
526
Value *Or35 = nullptr;
527
if (FloatWidth > 80) {
528
Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
529
Value *Or31 = Builder.CreateOr(And29Trunc, And34);
530
Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
531
Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
532
Builder.getIntN(128, FPMantissaWidth));
533
Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
534
Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
535
Or35 = Builder.CreateOr(Or34, A6);
536
} else {
537
Value *Or31 = Builder.CreateOr(And34, And29);
538
Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
539
}
540
Value *A4 = nullptr;
541
if (IToFP->getType()->isDoubleTy()) {
542
Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
543
Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
544
Value *And1 =
545
Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
546
Value *Or1 = Builder.CreateOr(Shl1, And1);
547
A4 = Builder.CreateBitCast(Or1, IToFP->getType());
548
} else if (IToFP->getType()->isX86_FP80Ty()) {
549
Value *A40 =
550
Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
551
A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
552
} else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
553
// Deal with "half" situation. This is a workaround since we don't have
554
// floattihf.c currently as referring.
555
Value *A40 =
556
Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
557
A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
558
} else // float type
559
A4 = Builder.CreateBitCast(Or35, IToFP->getType());
560
Builder.CreateBr(End);
561
562
// return:
563
Builder.SetInsertPoint(End, End->begin());
564
PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
565
Retval0->addIncoming(A4, IfEnd26);
566
Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
567
568
IToFP->replaceAllUsesWith(Retval0);
569
IToFP->dropAllReferences();
570
IToFP->eraseFromParent();
571
}
572
573
static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
574
VectorType *VTy = cast<FixedVectorType>(I->getType());
575
576
IRBuilder<> Builder(I);
577
578
unsigned NumElements = VTy->getElementCount().getFixedValue();
579
Value *Result = PoisonValue::get(VTy);
580
for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
581
Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
582
Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
583
I->getType()->getScalarType());
584
Result = Builder.CreateInsertElement(Result, Cast, Idx);
585
if (isa<Instruction>(Cast))
586
Replace.push_back(cast<Instruction>(Cast));
587
}
588
I->replaceAllUsesWith(Result);
589
I->dropAllReferences();
590
I->eraseFromParent();
591
}
592
593
static bool runImpl(Function &F, const TargetLowering &TLI) {
594
SmallVector<Instruction *, 4> Replace;
595
SmallVector<Instruction *, 4> ReplaceVector;
596
bool Modified = false;
597
598
unsigned MaxLegalFpConvertBitWidth =
599
TLI.getMaxLargeFPConvertBitWidthSupported();
600
if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
601
MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
602
603
if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
604
return false;
605
606
for (auto &I : instructions(F)) {
607
switch (I.getOpcode()) {
608
case Instruction::FPToUI:
609
case Instruction::FPToSI: {
610
// TODO: This pass doesn't handle scalable vectors.
611
if (I.getOperand(0)->getType()->isScalableTy())
612
continue;
613
614
auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
615
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
616
continue;
617
618
if (I.getOperand(0)->getType()->isVectorTy())
619
ReplaceVector.push_back(&I);
620
else
621
Replace.push_back(&I);
622
Modified = true;
623
break;
624
}
625
case Instruction::UIToFP:
626
case Instruction::SIToFP: {
627
// TODO: This pass doesn't handle scalable vectors.
628
if (I.getOperand(0)->getType()->isScalableTy())
629
continue;
630
631
auto *IntTy =
632
cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
633
if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
634
continue;
635
636
if (I.getOperand(0)->getType()->isVectorTy())
637
ReplaceVector.push_back(&I);
638
else
639
Replace.push_back(&I);
640
Modified = true;
641
break;
642
}
643
default:
644
break;
645
}
646
}
647
648
while (!ReplaceVector.empty()) {
649
Instruction *I = ReplaceVector.pop_back_val();
650
scalarize(I, Replace);
651
}
652
653
if (Replace.empty())
654
return false;
655
656
while (!Replace.empty()) {
657
Instruction *I = Replace.pop_back_val();
658
if (I->getOpcode() == Instruction::FPToUI ||
659
I->getOpcode() == Instruction::FPToSI) {
660
expandFPToI(I);
661
} else {
662
expandIToFP(I);
663
}
664
}
665
666
return Modified;
667
}
668
669
namespace {
670
class ExpandLargeFpConvertLegacyPass : public FunctionPass {
671
public:
672
static char ID;
673
674
ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
675
initializeExpandLargeFpConvertLegacyPassPass(
676
*PassRegistry::getPassRegistry());
677
}
678
679
bool runOnFunction(Function &F) override {
680
auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
681
auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
682
return runImpl(F, *TLI);
683
}
684
685
void getAnalysisUsage(AnalysisUsage &AU) const override {
686
AU.addRequired<TargetPassConfig>();
687
AU.addPreserved<AAResultsWrapperPass>();
688
AU.addPreserved<GlobalsAAWrapperPass>();
689
}
690
};
691
} // namespace
692
693
PreservedAnalyses ExpandLargeFpConvertPass::run(Function &F,
694
FunctionAnalysisManager &FAM) {
695
const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
696
return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()
697
: PreservedAnalyses::all();
698
}
699
700
char ExpandLargeFpConvertLegacyPass::ID = 0;
701
INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
702
"Expand large fp convert", false, false)
703
INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
704
"Expand large fp convert", false, false)
705
706
FunctionPass *llvm::createExpandLargeFpConvertPass() {
707
return new ExpandLargeFpConvertLegacyPass();
708
}
709
710