CoCalc -- Float2Int.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Scalar/Float2Int.cpp
³⁵²⁶⁶ views
1
//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the Float2Int pass, which aims to demote floating
10
// point operations to work on integers, where that is losslessly possible.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#include "llvm/Transforms/Scalar/Float2Int.h"
15
#include "llvm/ADT/APInt.h"
16
#include "llvm/ADT/APSInt.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/Analysis/GlobalsModRef.h"
19
#include "llvm/IR/Constants.h"
20
#include "llvm/IR/Dominators.h"
21
#include "llvm/IR/IRBuilder.h"
22
#include "llvm/IR/Module.h"
23
#include "llvm/Support/CommandLine.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/raw_ostream.h"
26
#include <deque>
27

28
#define DEBUG_TYPE "float2int"
29

30
using namespace llvm;
31

32
// The algorithm is simple. Start at instructions that convert from the
33
// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
34
// graph, using an equivalence datastructure to unify graphs that interfere.
35
//
36
// Mappable instructions are those with an integer corrollary that, given
37
// integer domain inputs, produce an integer output; fadd, for example.
38
//
39
// If a non-mappable instruction is seen, this entire def-use graph is marked
40
// as non-transformable. If we see an instruction that converts from the
41
// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
42

43
/// The largest integer type worth dealing with.
44
static cl::opt<unsigned>
45
MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
46
             cl::desc("Max integer bitwidth to consider in float2int"
47
                      "(default=64)"));
48

49
// Given a FCmp predicate, return a matching ICmp predicate if one
50
// exists, otherwise return BAD_ICMP_PREDICATE.
51
static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
52
  switch (P) {
53
  case CmpInst::FCMP_OEQ:
54
  case CmpInst::FCMP_UEQ:
55
    return CmpInst::ICMP_EQ;
56
  case CmpInst::FCMP_OGT:
57
  case CmpInst::FCMP_UGT:
58
    return CmpInst::ICMP_SGT;
59
  case CmpInst::FCMP_OGE:
60
  case CmpInst::FCMP_UGE:
61
    return CmpInst::ICMP_SGE;
62
  case CmpInst::FCMP_OLT:
63
  case CmpInst::FCMP_ULT:
64
    return CmpInst::ICMP_SLT;
65
  case CmpInst::FCMP_OLE:
66
  case CmpInst::FCMP_ULE:
67
    return CmpInst::ICMP_SLE;
68
  case CmpInst::FCMP_ONE:
69
  case CmpInst::FCMP_UNE:
70
    return CmpInst::ICMP_NE;
71
  default:
72
    return CmpInst::BAD_ICMP_PREDICATE;
73
  }
74
}
75

76
// Given a floating point binary operator, return the matching
77
// integer version.
78
static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
79
  switch (Opcode) {
80
  default: llvm_unreachable("Unhandled opcode!");
81
  case Instruction::FAdd: return Instruction::Add;
82
  case Instruction::FSub: return Instruction::Sub;
83
  case Instruction::FMul: return Instruction::Mul;
84
  }
85
}
86

87
// Find the roots - instructions that convert from the FP domain to
88
// integer domain.
89
void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
90
  for (BasicBlock &BB : F) {
91
    // Unreachable code can take on strange forms that we are not prepared to
92
    // handle. For example, an instruction may have itself as an operand.
93
    if (!DT.isReachableFromEntry(&BB))
94
      continue;
95

96
    for (Instruction &I : BB) {
97
      if (isa<VectorType>(I.getType()))
98
        continue;
99
      switch (I.getOpcode()) {
100
      default: break;
101
      case Instruction::FPToUI:
102
      case Instruction::FPToSI:
103
        Roots.insert(&I);
104
        break;
105
      case Instruction::FCmp:
106
        if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
107
            CmpInst::BAD_ICMP_PREDICATE)
108
          Roots.insert(&I);
109
        break;
110
      }
111
    }
112
  }
113
}
114

115
// Helper - mark I as having been traversed, having range R.
116
void Float2IntPass::seen(Instruction *I, ConstantRange R) {
117
  LLVM_DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
118
  auto IT = SeenInsts.find(I);
119
  if (IT != SeenInsts.end())
120
    IT->second = std::move(R);
121
  else
122
    SeenInsts.insert(std::make_pair(I, std::move(R)));
123
}
124

125
// Helper - get a range representing a poison value.
126
ConstantRange Float2IntPass::badRange() {
127
  return ConstantRange::getFull(MaxIntegerBW + 1);
128
}
129
ConstantRange Float2IntPass::unknownRange() {
130
  return ConstantRange::getEmpty(MaxIntegerBW + 1);
131
}
132
ConstantRange Float2IntPass::validateRange(ConstantRange R) {
133
  if (R.getBitWidth() > MaxIntegerBW + 1)
134
    return badRange();
135
  return R;
136
}
137

138
// The most obvious way to structure the search is a depth-first, eager
139
// search from each root. However, that require direct recursion and so
140
// can only handle small instruction sequences. Instead, we split the search
141
// up into two phases:
142
//   - walkBackwards:  A breadth-first walk of the use-def graph starting from
143
//                     the roots. Populate "SeenInsts" with interesting
144
//                     instructions and poison values if they're obvious and
145
//                     cheap to compute. Calculate the equivalance set structure
146
//                     while we're here too.
147
//   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit
148
//                     defs before their uses. Calculate the real range info.
149

150
// Breadth-first walk of the use-def graph; determine the set of nodes
151
// we care about and eagerly determine if some of them are poisonous.
152
void Float2IntPass::walkBackwards() {
153
  std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
154
  while (!Worklist.empty()) {
155
    Instruction *I = Worklist.back();
156
    Worklist.pop_back();
157

158
    if (SeenInsts.contains(I))
159
      // Seen already.
160
      continue;
161

162
    switch (I->getOpcode()) {
163
      // FIXME: Handle select and phi nodes.
164
    default:
165
      // Path terminated uncleanly.
166
      seen(I, badRange());
167
      break;
168

169
    case Instruction::UIToFP:
170
    case Instruction::SIToFP: {
171
      // Path terminated cleanly - use the type of the integer input to seed
172
      // the analysis.
173
      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
174
      auto Input = ConstantRange::getFull(BW);
175
      auto CastOp = (Instruction::CastOps)I->getOpcode();
176
      seen(I, validateRange(Input.castOp(CastOp, MaxIntegerBW+1)));
177
      continue;
178
    }
179

180
    case Instruction::FNeg:
181
    case Instruction::FAdd:
182
    case Instruction::FSub:
183
    case Instruction::FMul:
184
    case Instruction::FPToUI:
185
    case Instruction::FPToSI:
186
    case Instruction::FCmp:
187
      seen(I, unknownRange());
188
      break;
189
    }
190

191
    for (Value *O : I->operands()) {
192
      if (Instruction *OI = dyn_cast<Instruction>(O)) {
193
        // Unify def-use chains if they interfere.
194
        ECs.unionSets(I, OI);
195
        if (SeenInsts.find(I)->second != badRange())
196
          Worklist.push_back(OI);
197
      } else if (!isa<ConstantFP>(O)) {
198
        // Not an instruction or ConstantFP? we can't do anything.
199
        seen(I, badRange());
200
      }
201
    }
202
  }
203
}
204

205
// Calculate result range from operand ranges.
206
// Return std::nullopt if the range cannot be calculated yet.
207
std::optional<ConstantRange> Float2IntPass::calcRange(Instruction *I) {
208
  SmallVector<ConstantRange, 4> OpRanges;
209
  for (Value *O : I->operands()) {
210
    if (Instruction *OI = dyn_cast<Instruction>(O)) {
211
      auto OpIt = SeenInsts.find(OI);
212
      assert(OpIt != SeenInsts.end() && "def not seen before use!");
213
      if (OpIt->second == unknownRange())
214
        return std::nullopt; // Wait until operand range has been calculated.
215
      OpRanges.push_back(OpIt->second);
216
    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
217
      // Work out if the floating point number can be losslessly represented
218
      // as an integer.
219
      // APFloat::convertToInteger(&Exact) purports to do what we want, but
220
      // the exactness can be too precise. For example, negative zero can
221
      // never be exactly converted to an integer.
222
      //
223
      // Instead, we ask APFloat to round itself to an integral value - this
224
      // preserves sign-of-zero - then compare the result with the original.
225
      //
226
      const APFloat &F = CF->getValueAPF();
227

228
      // First, weed out obviously incorrect values. Non-finite numbers
229
      // can't be represented and neither can negative zero, unless
230
      // we're in fast math mode.
231
      if (!F.isFinite() ||
232
          (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
233
           !I->hasNoSignedZeros()))
234
        return badRange();
235

236
      APFloat NewF = F;
237
      auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
238
      if (Res != APFloat::opOK || NewF != F)
239
        return badRange();
240

241
      // OK, it's representable. Now get it.
242
      APSInt Int(MaxIntegerBW+1, false);
243
      bool Exact;
244
      CF->getValueAPF().convertToInteger(Int,
245
                                         APFloat::rmNearestTiesToEven,
246
                                         &Exact);
247
      OpRanges.push_back(ConstantRange(Int));
248
    } else {
249
      llvm_unreachable("Should have already marked this as badRange!");
250
    }
251
  }
252

253
  switch (I->getOpcode()) {
254
  // FIXME: Handle select and phi nodes.
255
  default:
256
  case Instruction::UIToFP:
257
  case Instruction::SIToFP:
258
    llvm_unreachable("Should have been handled in walkForwards!");
259

260
  case Instruction::FNeg: {
261
    assert(OpRanges.size() == 1 && "FNeg is a unary operator!");
262
    unsigned Size = OpRanges[0].getBitWidth();
263
    auto Zero = ConstantRange(APInt::getZero(Size));
264
    return Zero.sub(OpRanges[0]);
265
  }
266

267
  case Instruction::FAdd:
268
  case Instruction::FSub:
269
  case Instruction::FMul: {
270
    assert(OpRanges.size() == 2 && "its a binary operator!");
271
    auto BinOp = (Instruction::BinaryOps) I->getOpcode();
272
    return OpRanges[0].binaryOp(BinOp, OpRanges[1]);
273
  }
274

275
  //
276
  // Root-only instructions - we'll only see these if they're the
277
  //                          first node in a walk.
278
  //
279
  case Instruction::FPToUI:
280
  case Instruction::FPToSI: {
281
    assert(OpRanges.size() == 1 && "FPTo[US]I is a unary operator!");
282
    // Note: We're ignoring the casts output size here as that's what the
283
    // caller expects.
284
    auto CastOp = (Instruction::CastOps)I->getOpcode();
285
    return OpRanges[0].castOp(CastOp, MaxIntegerBW+1);
286
  }
287

288
  case Instruction::FCmp:
289
    assert(OpRanges.size() == 2 && "FCmp is a binary operator!");
290
    return OpRanges[0].unionWith(OpRanges[1]);
291
  }
292
}
293

294
// Walk forwards down the list of seen instructions, so we visit defs before
295
// uses.
296
void Float2IntPass::walkForwards() {
297
  std::deque<Instruction *> Worklist;
298
  for (const auto &Pair : SeenInsts)
299
    if (Pair.second == unknownRange())
300
      Worklist.push_back(Pair.first);
301

302
  while (!Worklist.empty()) {
303
    Instruction *I = Worklist.back();
304
    Worklist.pop_back();
305

306
    if (std::optional<ConstantRange> Range = calcRange(I))
307
      seen(I, *Range);
308
    else
309
      Worklist.push_front(I); // Reprocess later.
310
  }
311
}
312

313
// If there is a valid transform to be done, do it.
314
bool Float2IntPass::validateAndTransform(const DataLayout &DL) {
315
  bool MadeChange = false;
316

317
  // Iterate over every disjoint partition of the def-use graph.
318
  for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
319
    ConstantRange R(MaxIntegerBW + 1, false);
320
    bool Fail = false;
321
    Type *ConvertedToTy = nullptr;
322

323
    // For every member of the partition, union all the ranges together.
324
    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
325
         MI != ME; ++MI) {
326
      Instruction *I = *MI;
327
      auto SeenI = SeenInsts.find(I);
328
      if (SeenI == SeenInsts.end())
329
        continue;
330

331
      R = R.unionWith(SeenI->second);
332
      // We need to ensure I has no users that have not been seen.
333
      // If it does, transformation would be illegal.
334
      //
335
      // Don't count the roots, as they terminate the graphs.
336
      if (!Roots.contains(I)) {
337
        // Set the type of the conversion while we're here.
338
        if (!ConvertedToTy)
339
          ConvertedToTy = I->getType();
340
        for (User *U : I->users()) {
341
          Instruction *UI = dyn_cast<Instruction>(U);
342
          if (!UI || !SeenInsts.contains(UI)) {
343
            LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
344
            Fail = true;
345
            break;
346
          }
347
        }
348
      }
349
      if (Fail)
350
        break;
351
    }
352

353
    // If the set was empty, or we failed, or the range is poisonous,
354
    // bail out.
355
    if (ECs.member_begin(It) == ECs.member_end() || Fail ||
356
        R.isFullSet() || R.isSignWrappedSet())
357
      continue;
358
    assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
359

360
    // The number of bits required is the maximum of the upper and
361
    // lower limits, plus one so it can be signed.
362
    unsigned MinBW = R.getMinSignedBits() + 1;
363
    LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
364

365
    // If we've run off the realms of the exactly representable integers,
366
    // the floating point result will differ from an integer approximation.
367

368
    // Do we need more bits than are in the mantissa of the type we converted
369
    // to? semanticsPrecision returns the number of mantissa bits plus one
370
    // for the sign bit.
371
    unsigned MaxRepresentableBits
372
      = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
373
    if (MinBW > MaxRepresentableBits) {
374
      LLVM_DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
375
      continue;
376
    }
377

378
    // OK, R is known to be representable.
379
    // Pick the smallest legal type that will fit.
380
    Type *Ty = DL.getSmallestLegalIntType(*Ctx, MinBW);
381
    if (!Ty) {
382
      // Every supported target supports 64-bit and 32-bit integers,
383
      // so fallback to a 32 or 64-bit integer if the value fits.
384
      if (MinBW <= 32) {
385
        Ty = Type::getInt32Ty(*Ctx);
386
      } else if (MinBW <= 64) {
387
        Ty = Type::getInt64Ty(*Ctx);
388
      } else {
389
        LLVM_DEBUG(dbgs() << "F2I: Value requires more bits to represent than "
390
                             "the target supports!\n");
391
        continue;
392
      }
393
    }
394

395
    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
396
         MI != ME; ++MI)
397
      convert(*MI, Ty);
398
    MadeChange = true;
399
  }
400

401
  return MadeChange;
402
}
403

404
Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
405
  if (ConvertedInsts.contains(I))
406
    // Already converted this instruction.
407
    return ConvertedInsts[I];
408

409
  SmallVector<Value*,4> NewOperands;
410
  for (Value *V : I->operands()) {
411
    // Don't recurse if we're an instruction that terminates the path.
412
    if (I->getOpcode() == Instruction::UIToFP ||
413
        I->getOpcode() == Instruction::SIToFP) {
414
      NewOperands.push_back(V);
415
    } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
416
      NewOperands.push_back(convert(VI, ToTy));
417
    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
418
      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false);
419
      bool Exact;
420
      CF->getValueAPF().convertToInteger(Val,
421
                                         APFloat::rmNearestTiesToEven,
422
                                         &Exact);
423
      NewOperands.push_back(ConstantInt::get(ToTy, Val));
424
    } else {
425
      llvm_unreachable("Unhandled operand type?");
426
    }
427
  }
428

429
  // Now create a new instruction.
430
  IRBuilder<> IRB(I);
431
  Value *NewV = nullptr;
432
  switch (I->getOpcode()) {
433
  default: llvm_unreachable("Unhandled instruction!");
434

435
  case Instruction::FPToUI:
436
    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
437
    break;
438

439
  case Instruction::FPToSI:
440
    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
441
    break;
442

443
  case Instruction::FCmp: {
444
    CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
445
    assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
446
    NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
447
    break;
448
  }
449

450
  case Instruction::UIToFP:
451
    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
452
    break;
453

454
  case Instruction::SIToFP:
455
    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
456
    break;
457

458
  case Instruction::FNeg:
459
    NewV = IRB.CreateNeg(NewOperands[0], I->getName());
460
    break;
461

462
  case Instruction::FAdd:
463
  case Instruction::FSub:
464
  case Instruction::FMul:
465
    NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
466
                           NewOperands[0], NewOperands[1],
467
                           I->getName());
468
    break;
469
  }
470

471
  // If we're a root instruction, RAUW.
472
  if (Roots.count(I))
473
    I->replaceAllUsesWith(NewV);
474

475
  ConvertedInsts[I] = NewV;
476
  return NewV;
477
}
478

479
// Perform dead code elimination on the instructions we just modified.
480
void Float2IntPass::cleanup() {
481
  for (auto &I : reverse(ConvertedInsts))
482
    I.first->eraseFromParent();
483
}
484

485
bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
486
  LLVM_DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
487
  // Clear out all state.
488
  ECs = EquivalenceClasses<Instruction*>();
489
  SeenInsts.clear();
490
  ConvertedInsts.clear();
491
  Roots.clear();
492

493
  Ctx = &F.getParent()->getContext();
494

495
  findRoots(F, DT);
496

497
  walkBackwards();
498
  walkForwards();
499

500
  const DataLayout &DL = F.getDataLayout();
501
  bool Modified = validateAndTransform(DL);
502
  if (Modified)
503
    cleanup();
504
  return Modified;
505
}
506

507
PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
508
  const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
509
  if (!runImpl(F, DT))
510
    return PreservedAnalyses::all();
511

512
  PreservedAnalyses PA;
513
  PA.preserveSet<CFGAnalyses>();
514
  return PA;
515
}
516

517
Product

Resources

Company