CoCalc -- ExpandFp.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandFp.cpp
²¹³⁷⁶⁵ views
1
//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// This pass expands certain floating point instructions at the IR level.
9
//
10
// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp ..  to’, ‘sitofp
11
// .. to’ instructions with a bitwidth above a threshold.  This is
12
// useful for targets like x86_64 that cannot lower fp convertions
13
// with more than 128 bits.
14
//
15
//===----------------------------------------------------------------------===//
16

17
#include "llvm/CodeGen/ExpandFp.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/Analysis/GlobalsModRef.h"
20
#include "llvm/CodeGen/Passes.h"
21
#include "llvm/CodeGen/TargetLowering.h"
22
#include "llvm/CodeGen/TargetPassConfig.h"
23
#include "llvm/CodeGen/TargetSubtargetInfo.h"
24
#include "llvm/IR/IRBuilder.h"
25
#include "llvm/IR/InstIterator.h"
26
#include "llvm/IR/PassManager.h"
27
#include "llvm/InitializePasses.h"
28
#include "llvm/Pass.h"
29
#include "llvm/Support/CommandLine.h"
30
#include "llvm/Target/TargetMachine.h"
31

32
using namespace llvm;
33

34
static cl::opt<unsigned>
35
    ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
36
                        cl::init(llvm::IntegerType::MAX_INT_BITS),
37
                        cl::desc("fp convert instructions on integers with "
38
                                 "more than <N> bits are expanded."));
39

40
// clang-format off: preserve formatting of the following example
41

42
/// Generate code to convert a fp number to integer, replacing FPToS(U)I with
43
/// the generated code. This currently generates code similarly to compiler-rt's
44
/// implementations.
45
///
46
/// An example IR generated from compiler-rt/fixsfdi.c looks like below:
47
/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
48
/// entry:
49
///   %0 = bitcast float %a to i32
50
///   %conv.i = zext i32 %0 to i64
51
///   %tobool.not = icmp sgt i32 %0, -1
52
///   %conv = select i1 %tobool.not, i64 1, i64 -1
53
///   %and = lshr i64 %conv.i, 23
54
///   %shr = and i64 %and, 255
55
///   %and2 = and i64 %conv.i, 8388607
56
///   %or = or i64 %and2, 8388608
57
///   %cmp = icmp ult i64 %shr, 127
58
///   br i1 %cmp, label %cleanup, label %if.end
59
///
60
/// if.end:                                           ; preds = %entry
61
///   %sub = add nuw nsw i64 %shr, 4294967169
62
///   %conv5 = and i64 %sub, 4294967232
63
///   %cmp6.not = icmp eq i64 %conv5, 0
64
///   br i1 %cmp6.not, label %if.end12, label %if.then8
65
///
66
/// if.then8:                                         ; preds = %if.end
67
///   %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
68
///   br label %cleanup
69
///
70
/// if.end12:                                         ; preds = %if.end
71
///   %cmp13 = icmp ult i64 %shr, 150
72
///   br i1 %cmp13, label %if.then15, label %if.else
73
///
74
/// if.then15:                                        ; preds = %if.end12
75
///   %sub16 = sub nuw nsw i64 150, %shr
76
///   %shr17 = lshr i64 %or, %sub16
77
///   %mul = mul nsw i64 %shr17, %conv
78
///   br label %cleanup
79
///
80
/// if.else:                                          ; preds = %if.end12
81
///   %sub18 = add nsw i64 %shr, -150
82
///   %shl = shl i64 %or, %sub18
83
///   %mul19 = mul nsw i64 %shl, %conv
84
///   br label %cleanup
85
///
86
/// cleanup:                                          ; preds = %entry, %if.else, %if.then15, %if.then8
87
///   %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
88
///   ret i64 %retval.0
89
/// }
90
///
91
/// Replace fp to integer with generated code.
92
static void expandFPToI(Instruction *FPToI) {
93
  // clang-format on
94
  IRBuilder<> Builder(FPToI);
95
  auto *FloatVal = FPToI->getOperand(0);
96
  IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
97

98
  unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
99
  unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
100

101
  // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
102
  // to i32 first following a sext/zext to target integer type.
103
  Value *A1 = nullptr;
104
  if (FloatVal->getType()->isHalfTy()) {
105
    if (FPToI->getOpcode() == Instruction::FPToUI) {
106
      Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
107
      A1 = Builder.CreateZExt(A0, IntTy);
108
    } else { // FPToSI
109
      Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
110
      A1 = Builder.CreateSExt(A0, IntTy);
111
    }
112
    FPToI->replaceAllUsesWith(A1);
113
    FPToI->dropAllReferences();
114
    FPToI->eraseFromParent();
115
    return;
116
  }
117

118
  // fp80 conversion is implemented by fpext to fp128 first then do the
119
  // conversion.
120
  FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
121
  unsigned FloatWidth =
122
      PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());
123
  unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
124
  unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
125
  Value *ImplicitBit = Builder.CreateShl(
126
      Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
127
  Value *SignificandMask =
128
      Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
129
  Value *NegOne = Builder.CreateSExt(
130
      ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
131
  Value *NegInf =
132
      Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
133
                        ConstantInt::getSigned(IntTy, BitWidth - 1));
134

135
  BasicBlock *Entry = Builder.GetInsertBlock();
136
  Function *F = Entry->getParent();
137
  Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
138
  BasicBlock *End =
139
      Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
140
  BasicBlock *IfEnd =
141
      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
142
  BasicBlock *IfThen5 =
143
      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
144
  BasicBlock *IfEnd9 =
145
      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
146
  BasicBlock *IfThen12 =
147
      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
148
  BasicBlock *IfElse =
149
      BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
150

151
  Entry->getTerminator()->eraseFromParent();
152

153
  // entry:
154
  Builder.SetInsertPoint(Entry);
155
  Value *FloatVal0 = FloatVal;
156
  // fp80 conversion is implemented by fpext to fp128 first then do the
157
  // conversion.
158
  if (FloatVal->getType()->isX86_FP80Ty())
159
    FloatVal0 =
160
        Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
161
  Value *ARep0 =
162
      Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
163
  Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
164
  Value *PosOrNeg = Builder.CreateICmpSGT(
165
      ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
166
  Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
167
                                     ConstantInt::getSigned(IntTy, -1));
168
  Value *And =
169
      Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
170
  Value *And2 = Builder.CreateAnd(
171
      And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
172
  Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
173
  Value *Or = Builder.CreateOr(Abs, ImplicitBit);
174
  Value *Cmp =
175
      Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
176
  Builder.CreateCondBr(Cmp, End, IfEnd);
177

178
  // if.end:
179
  Builder.SetInsertPoint(IfEnd);
180
  Value *Add1 = Builder.CreateAdd(
181
      And2, ConstantInt::getSigned(
182
                IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));
183
  Value *Cmp3 = Builder.CreateICmpULT(
184
      Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));
185
  Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
186

187
  // if.then5:
188
  Builder.SetInsertPoint(IfThen5);
189
  Value *PosInf = Builder.CreateXor(NegOne, NegInf);
190
  Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
191
  Builder.CreateBr(End);
192

193
  // if.end9:
194
  Builder.SetInsertPoint(IfEnd9);
195
  Value *Cmp10 = Builder.CreateICmpULT(
196
      And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
197
  Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
198

199
  // if.then12:
200
  Builder.SetInsertPoint(IfThen12);
201
  Value *Sub13 = Builder.CreateSub(
202
      Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
203
  Value *Shr14 = Builder.CreateLShr(Or, Sub13);
204
  Value *Mul = Builder.CreateMul(Shr14, Sign);
205
  Builder.CreateBr(End);
206

207
  // if.else:
208
  Builder.SetInsertPoint(IfElse);
209
  Value *Sub15 = Builder.CreateAdd(
210
      And2, ConstantInt::getSigned(
211
                IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));
212
  Value *Shl = Builder.CreateShl(Or, Sub15);
213
  Value *Mul16 = Builder.CreateMul(Shl, Sign);
214
  Builder.CreateBr(End);
215

216
  // cleanup:
217
  Builder.SetInsertPoint(End, End->begin());
218
  PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
219

220
  Retval0->addIncoming(Cond8, IfThen5);
221
  Retval0->addIncoming(Mul, IfThen12);
222
  Retval0->addIncoming(Mul16, IfElse);
223
  Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
224

225
  FPToI->replaceAllUsesWith(Retval0);
226
  FPToI->dropAllReferences();
227
  FPToI->eraseFromParent();
228
}
229

230
// clang-format off: preserve formatting of the following example
231

232
/// Generate code to convert a fp number to integer, replacing S(U)IToFP with
233
/// the generated code. This currently generates code similarly to compiler-rt's
234
/// implementations. This implementation has an implicit assumption that integer
235
/// width is larger than fp.
236
///
237
/// An example IR generated from compiler-rt/floatdisf.c looks like below:
238
/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
239
/// entry:
240
///   %cmp = icmp eq i64 %a, 0
241
///   br i1 %cmp, label %return, label %if.end
242
///
243
/// if.end:                                           ; preds = %entry
244
///   %shr = ashr i64 %a, 63
245
///   %xor = xor i64 %shr, %a
246
///   %sub = sub nsw i64 %xor, %shr
247
///   %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
248
///   %cast = trunc i64 %0 to i32
249
///   %sub1 = sub nuw nsw i32 64, %cast
250
///   %sub2 = xor i32 %cast, 63
251
///   %cmp3 = icmp ult i32 %cast, 40
252
///   br i1 %cmp3, label %if.then4, label %if.else
253
///
254
/// if.then4:                                         ; preds = %if.end
255
///   switch i32 %sub1, label %sw.default [
256
///     i32 25, label %sw.bb
257
///     i32 26, label %sw.epilog
258
///   ]
259
///
260
/// sw.bb:                                            ; preds = %if.then4
261
///   %shl = shl i64 %sub, 1
262
///   br label %sw.epilog
263
///
264
/// sw.default:                                       ; preds = %if.then4
265
///   %sub5 = sub nsw i64 38, %0
266
///   %sh_prom = and i64 %sub5, 4294967295
267
///   %shr6 = lshr i64 %sub, %sh_prom
268
///   %shr9 = lshr i64 274877906943, %0
269
///   %and = and i64 %shr9, %sub
270
///   %cmp10 = icmp ne i64 %and, 0
271
///   %conv11 = zext i1 %cmp10 to i64
272
///   %or = or i64 %shr6, %conv11
273
///   br label %sw.epilog
274
///
275
/// sw.epilog:                                        ; preds = %sw.default, %if.then4, %sw.bb
276
///   %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
277
///   %1 = lshr i64 %a.addr.0, 2
278
///   %2 = and i64 %1, 1
279
///   %or16 = or i64 %2, %a.addr.0
280
///   %inc = add nsw i64 %or16, 1
281
///   %3 = and i64 %inc, 67108864
282
///   %tobool.not = icmp eq i64 %3, 0
283
///   %spec.select.v = select i1 %tobool.not, i64 2, i64 3
284
///   %spec.select = ashr i64 %inc, %spec.select.v
285
///   %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
286
///   br label %if.end26
287
///
288
/// if.else:                                          ; preds = %if.end
289
///   %sub23 = add nuw nsw i64 %0, 4294967256
290
///   %sh_prom24 = and i64 %sub23, 4294967295
291
///   %shl25 = shl i64 %sub, %sh_prom24
292
///   br label %if.end26
293
///
294
/// if.end26:                                         ; preds = %sw.epilog, %if.else
295
///   %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
296
///   %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
297
///   %conv27 = trunc i64 %shr to i32
298
///   %and28 = and i32 %conv27, -2147483648
299
///   %add = shl nuw nsw i32 %e.0, 23
300
///   %shl29 = add nuw nsw i32 %add, 1065353216
301
///   %conv31 = trunc i64 %a.addr.1 to i32
302
///   %and32 = and i32 %conv31, 8388607
303
///   %or30 = or i32 %and32, %and28
304
///   %or33 = or i32 %or30, %shl29
305
///   %4 = bitcast i32 %or33 to float
306
///   br label %return
307
///
308
/// return:                                           ; preds = %entry, %if.end26
309
///   %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
310
///   ret float %retval.0
311
/// }
312
///
313
/// Replace integer to fp with generated code.
314
static void expandIToFP(Instruction *IToFP) {
315
  // clang-format on
316
  IRBuilder<> Builder(IToFP);
317
  auto *IntVal = IToFP->getOperand(0);
318
  IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
319

320
  unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
321
  unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
322
  // fp80 conversion is implemented by conversion tp fp128 first following
323
  // a fptrunc to fp80.
324
  FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
325
  // FIXME: As there is no related builtins added in compliler-rt,
326
  // here currently utilized the fp32 <-> fp16 lib calls to implement.
327
  FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
328
  FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;
329
  unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
330
  bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
331

332
  assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
333
                                  "assumes integer width is larger than fp.");
334

335
  Value *Temp1 =
336
      Builder.CreateShl(Builder.getIntN(BitWidth, 1),
337
                        Builder.getIntN(BitWidth, FPMantissaWidth + 3));
338

339
  BasicBlock *Entry = Builder.GetInsertBlock();
340
  Function *F = Entry->getParent();
341
  Entry->setName(Twine(Entry->getName(), "itofp-entry"));
342
  BasicBlock *End =
343
      Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
344
  BasicBlock *IfEnd =
345
      BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
346
  BasicBlock *IfThen4 =
347
      BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
348
  BasicBlock *SwBB =
349
      BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
350
  BasicBlock *SwDefault =
351
      BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
352
  BasicBlock *SwEpilog =
353
      BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
354
  BasicBlock *IfThen20 =
355
      BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
356
  BasicBlock *IfElse =
357
      BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
358
  BasicBlock *IfEnd26 =
359
      BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
360

361
  Entry->getTerminator()->eraseFromParent();
362

363
  Function *CTLZ =
364
      Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
365
  ConstantInt *True = Builder.getTrue();
366

367
  // entry:
368
  Builder.SetInsertPoint(Entry);
369
  Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
370
  Builder.CreateCondBr(Cmp, End, IfEnd);
371

372
  // if.end:
373
  Builder.SetInsertPoint(IfEnd);
374
  Value *Shr =
375
      Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
376
  Value *Xor = Builder.CreateXor(Shr, IntVal);
377
  Value *Sub = Builder.CreateSub(Xor, Shr);
378
  Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
379
  Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
380
  int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
381
  Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
382
                                  FloatWidth == 128 ? Call : Cast);
383
  Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
384
                                  FloatWidth == 128 ? Call : Cast);
385
  Value *Cmp3 = Builder.CreateICmpSGT(
386
      Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
387
  Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
388

389
  // if.then4:
390
  Builder.SetInsertPoint(IfThen4);
391
  llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
392
  SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
393
  SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
394

395
  // sw.bb:
396
  Builder.SetInsertPoint(SwBB);
397
  Value *Shl =
398
      Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
399
  Builder.CreateBr(SwEpilog);
400

401
  // sw.default:
402
  Builder.SetInsertPoint(SwDefault);
403
  Value *Sub5 = Builder.CreateSub(
404
      Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
405
      FloatWidth == 128 ? Call : Cast);
406
  Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
407
  Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
408
                                   FloatWidth == 128 ? Sub5 : ShProm);
409
  Value *Sub8 =
410
      Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
411
                        Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
412
  Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
413
  Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
414
                                   FloatWidth == 128 ? Sub8 : ShProm9);
415
  Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
416
  Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
417
  Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
418
  Value *Or = Builder.CreateOr(Shr6, Conv11);
419
  Builder.CreateBr(SwEpilog);
420

421
  // sw.epilog:
422
  Builder.SetInsertPoint(SwEpilog);
423
  PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
424
  AAddr0->addIncoming(Or, SwDefault);
425
  AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
426
  AAddr0->addIncoming(Shl, SwBB);
427
  Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
428
  Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
429
  Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
430
  Value *Conv16 = Builder.CreateZExt(A2, IntTy);
431
  Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
432
  Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
433
  Value *Shr18 = nullptr;
434
  if (IsSigned)
435
    Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
436
  else
437
    Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
438
  Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
439
  Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
440
  Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
441
  Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
442
  Value *ExtractT64 = nullptr;
443
  if (FloatWidth > 80)
444
    ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
445
  else
446
    ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
447
  Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
448

449
  // if.then20
450
  Builder.SetInsertPoint(IfThen20);
451
  Value *Shr21 = nullptr;
452
  if (IsSigned)
453
    Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
454
  else
455
    Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
456
  Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
457
  Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
458
  Value *ExtractT62 = nullptr;
459
  if (FloatWidth > 80)
460
    ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
461
  else
462
    ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
463
  Builder.CreateBr(IfEnd26);
464

465
  // if.else:
466
  Builder.SetInsertPoint(IfElse);
467
  Value *Sub24 = Builder.CreateAdd(
468
      FloatWidth == 128 ? Call : Cast,
469
      ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
470
                             -(BitWidth - FPMantissaWidth - 1)));
471
  Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
472
  Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
473
                                   FloatWidth == 128 ? Sub24 : ShProm25);
474
  Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
475
  Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
476
  Value *ExtractT66 = nullptr;
477
  if (FloatWidth > 80)
478
    ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
479
  else
480
    ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
481
  Builder.CreateBr(IfEnd26);
482

483
  // if.end26:
484
  Builder.SetInsertPoint(IfEnd26);
485
  PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
486
  AAddr1Off0->addIncoming(ExtractT, IfThen20);
487
  AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
488
  AAddr1Off0->addIncoming(ExtractT61, IfElse);
489
  PHINode *AAddr1Off32 = nullptr;
490
  if (FloatWidth > 32) {
491
    AAddr1Off32 =
492
        Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
493
    AAddr1Off32->addIncoming(ExtractT62, IfThen20);
494
    AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
495
    AAddr1Off32->addIncoming(ExtractT66, IfElse);
496
  }
497
  PHINode *E0 = nullptr;
498
  if (FloatWidth <= 80) {
499
    E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
500
    E0->addIncoming(Sub1, IfThen20);
501
    E0->addIncoming(Sub2, SwEpilog);
502
    E0->addIncoming(Sub2, IfElse);
503
  }
504
  Value *And29 = nullptr;
505
  if (FloatWidth > 80) {
506
    Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
507
                                     Builder.getIntN(BitWidth, 63));
508
    And29 = Builder.CreateAnd(Shr, Temp2, "and29");
509
  } else {
510
    Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
511
    And29 = Builder.CreateAnd(
512
        Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
513
  }
514
  unsigned TempMod = FPMantissaWidth % 32;
515
  Value *And34 = nullptr;
516
  Value *Shl30 = nullptr;
517
  if (FloatWidth > 80) {
518
    TempMod += 32;
519
    Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
520
    Shl30 = Builder.CreateAdd(
521
        Add,
522
        Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
523
    And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
524
  } else {
525
    Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
526
    Shl30 = Builder.CreateAdd(
527
        Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
528
    And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
529
                              Builder.getIntN(32, (1 << TempMod) - 1));
530
  }
531
  Value *Or35 = nullptr;
532
  if (FloatWidth > 80) {
533
    Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
534
    Value *Or31 = Builder.CreateOr(And29Trunc, And34);
535
    Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
536
    Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
537
                                     Builder.getIntN(128, FPMantissaWidth));
538
    Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
539
    Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
540
    Or35 = Builder.CreateOr(Or34, A6);
541
  } else {
542
    Value *Or31 = Builder.CreateOr(And34, And29);
543
    Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
544
  }
545
  Value *A4 = nullptr;
546
  if (IToFP->getType()->isDoubleTy()) {
547
    Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
548
    Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
549
    Value *And1 =
550
        Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
551
    Value *Or1 = Builder.CreateOr(Shl1, And1);
552
    A4 = Builder.CreateBitCast(Or1, IToFP->getType());
553
  } else if (IToFP->getType()->isX86_FP80Ty()) {
554
    Value *A40 =
555
        Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
556
    A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
557
  } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {
558
    // Deal with "half" situation. This is a workaround since we don't have
559
    // floattihf.c currently as referring.
560
    Value *A40 =
561
        Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
562
    A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
563
  } else // float type
564
    A4 = Builder.CreateBitCast(Or35, IToFP->getType());
565
  Builder.CreateBr(End);
566

567
  // return:
568
  Builder.SetInsertPoint(End, End->begin());
569
  PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
570
  Retval0->addIncoming(A4, IfEnd26);
571
  Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
572

573
  IToFP->replaceAllUsesWith(Retval0);
574
  IToFP->dropAllReferences();
575
  IToFP->eraseFromParent();
576
}
577

578
static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {
579
  VectorType *VTy = cast<FixedVectorType>(I->getType());
580

581
  IRBuilder<> Builder(I);
582

583
  unsigned NumElements = VTy->getElementCount().getFixedValue();
584
  Value *Result = PoisonValue::get(VTy);
585
  for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
586
    Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);
587
    Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,
588
                                     I->getType()->getScalarType());
589
    Result = Builder.CreateInsertElement(Result, Cast, Idx);
590
    if (isa<Instruction>(Cast))
591
      Replace.push_back(cast<Instruction>(Cast));
592
  }
593
  I->replaceAllUsesWith(Result);
594
  I->dropAllReferences();
595
  I->eraseFromParent();
596
}
597

598
static bool runImpl(Function &F, const TargetLowering &TLI) {
599
  SmallVector<Instruction *, 4> Replace;
600
  SmallVector<Instruction *, 4> ReplaceVector;
601
  bool Modified = false;
602

603
  unsigned MaxLegalFpConvertBitWidth =
604
      TLI.getMaxLargeFPConvertBitWidthSupported();
605
  if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
606
    MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
607

608
  if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
609
    return false;
610

611
  for (auto &I : instructions(F)) {
612
    switch (I.getOpcode()) {
613
    case Instruction::FPToUI:
614
    case Instruction::FPToSI: {
615
      // TODO: This pass doesn't handle scalable vectors.
616
      if (I.getOperand(0)->getType()->isScalableTy())
617
        continue;
618

619
      auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());
620
      if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
621
        continue;
622

623
      if (I.getOperand(0)->getType()->isVectorTy())
624
        ReplaceVector.push_back(&I);
625
      else
626
        Replace.push_back(&I);
627
      Modified = true;
628
      break;
629
    }
630
    case Instruction::UIToFP:
631
    case Instruction::SIToFP: {
632
      // TODO: This pass doesn't handle scalable vectors.
633
      if (I.getOperand(0)->getType()->isScalableTy())
634
        continue;
635

636
      auto *IntTy =
637
          cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());
638
      if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
639
        continue;
640

641
      if (I.getOperand(0)->getType()->isVectorTy())
642
        ReplaceVector.push_back(&I);
643
      else
644
        Replace.push_back(&I);
645
      Modified = true;
646
      break;
647
    }
648
    default:
649
      break;
650
    }
651
  }
652

653
  while (!ReplaceVector.empty()) {
654
    Instruction *I = ReplaceVector.pop_back_val();
655
    scalarize(I, Replace);
656
  }
657

658
  if (Replace.empty())
659
    return false;
660

661
  while (!Replace.empty()) {
662
    Instruction *I = Replace.pop_back_val();
663
    if (I->getOpcode() == Instruction::FPToUI ||
664
        I->getOpcode() == Instruction::FPToSI) {
665
      expandFPToI(I);
666
    } else {
667
      expandIToFP(I);
668
    }
669
  }
670

671
  return Modified;
672
}
673

674
namespace {
675
class ExpandFpLegacyPass : public FunctionPass {
676
public:
677
  static char ID;
678

679
  ExpandFpLegacyPass() : FunctionPass(ID) {
680
    initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry());
681
  }
682

683
  bool runOnFunction(Function &F) override {
684
    auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
685
    auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
686
    return runImpl(F, *TLI);
687
  }
688

689
  void getAnalysisUsage(AnalysisUsage &AU) const override {
690
    AU.addRequired<TargetPassConfig>();
691
    AU.addPreserved<AAResultsWrapperPass>();
692
    AU.addPreserved<GlobalsAAWrapperPass>();
693
  }
694
};
695
} // namespace
696

697
PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {
698
  const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);
699
  return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()
700
                                               : PreservedAnalyses::all();
701
}
702

703
char ExpandFpLegacyPass::ID = 0;
704
INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",
705
                      "Expand certain fp instructions", false, false)
706
INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)
707

708
FunctionPass *llvm::createExpandFpPass() { return new ExpandFpLegacyPass(); }
709

710
Product

Resources

Company