Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandFp.cpp
213765 views
//===--- ExpandFp.cpp - Expand fp instructions ----------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7// This pass expands certain floating point instructions at the IR level.8//9// It expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp10// .. to’ instructions with a bitwidth above a threshold. This is11// useful for targets like x86_64 that cannot lower fp convertions12// with more than 128 bits.13//14//===----------------------------------------------------------------------===//1516#include "llvm/CodeGen/ExpandFp.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/Analysis/GlobalsModRef.h"19#include "llvm/CodeGen/Passes.h"20#include "llvm/CodeGen/TargetLowering.h"21#include "llvm/CodeGen/TargetPassConfig.h"22#include "llvm/CodeGen/TargetSubtargetInfo.h"23#include "llvm/IR/IRBuilder.h"24#include "llvm/IR/InstIterator.h"25#include "llvm/IR/PassManager.h"26#include "llvm/InitializePasses.h"27#include "llvm/Pass.h"28#include "llvm/Support/CommandLine.h"29#include "llvm/Target/TargetMachine.h"3031using namespace llvm;3233static cl::opt<unsigned>34ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,35cl::init(llvm::IntegerType::MAX_INT_BITS),36cl::desc("fp convert instructions on integers with "37"more than <N> bits are expanded."));3839// clang-format off: preserve formatting of the following example4041/// Generate code to convert a fp number to integer, replacing FPToS(U)I with42/// the generated code. This currently generates code similarly to compiler-rt's43/// implementations.44///45/// An example IR generated from compiler-rt/fixsfdi.c looks like below:46/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {47/// entry:48/// %0 = bitcast float %a to i3249/// %conv.i = zext i32 %0 to i6450/// %tobool.not = icmp sgt i32 %0, -151/// %conv = select i1 %tobool.not, i64 1, i64 -152/// %and = lshr i64 %conv.i, 2353/// %shr = and i64 %and, 25554/// %and2 = and i64 %conv.i, 838860755/// %or = or i64 %and2, 838860856/// %cmp = icmp ult i64 %shr, 12757/// br i1 %cmp, label %cleanup, label %if.end58///59/// if.end: ; preds = %entry60/// %sub = add nuw nsw i64 %shr, 429496716961/// %conv5 = and i64 %sub, 429496723262/// %cmp6.not = icmp eq i64 %conv5, 063/// br i1 %cmp6.not, label %if.end12, label %if.then864///65/// if.then8: ; preds = %if.end66/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -922337203685477580867/// br label %cleanup68///69/// if.end12: ; preds = %if.end70/// %cmp13 = icmp ult i64 %shr, 15071/// br i1 %cmp13, label %if.then15, label %if.else72///73/// if.then15: ; preds = %if.end1274/// %sub16 = sub nuw nsw i64 150, %shr75/// %shr17 = lshr i64 %or, %sub1676/// %mul = mul nsw i64 %shr17, %conv77/// br label %cleanup78///79/// if.else: ; preds = %if.end1280/// %sub18 = add nsw i64 %shr, -15081/// %shl = shl i64 %or, %sub1882/// %mul19 = mul nsw i64 %shl, %conv83/// br label %cleanup84///85/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then886/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]87/// ret i64 %retval.088/// }89///90/// Replace fp to integer with generated code.91static void expandFPToI(Instruction *FPToI) {92// clang-format on93IRBuilder<> Builder(FPToI);94auto *FloatVal = FPToI->getOperand(0);95IntegerType *IntTy = cast<IntegerType>(FPToI->getType());9697unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();98unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;99100// FIXME: fp16's range is covered by i32. So `fptoi half` can convert101// to i32 first following a sext/zext to target integer type.102Value *A1 = nullptr;103if (FloatVal->getType()->isHalfTy()) {104if (FPToI->getOpcode() == Instruction::FPToUI) {105Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));106A1 = Builder.CreateZExt(A0, IntTy);107} else { // FPToSI108Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));109A1 = Builder.CreateSExt(A0, IntTy);110}111FPToI->replaceAllUsesWith(A1);112FPToI->dropAllReferences();113FPToI->eraseFromParent();114return;115}116117// fp80 conversion is implemented by fpext to fp128 first then do the118// conversion.119FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;120unsigned FloatWidth =121PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());122unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;123unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;124Value *ImplicitBit = Builder.CreateShl(125Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));126Value *SignificandMask =127Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));128Value *NegOne = Builder.CreateSExt(129ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);130Value *NegInf =131Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),132ConstantInt::getSigned(IntTy, BitWidth - 1));133134BasicBlock *Entry = Builder.GetInsertBlock();135Function *F = Entry->getParent();136Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));137BasicBlock *End =138Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");139BasicBlock *IfEnd =140BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);141BasicBlock *IfThen5 =142BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);143BasicBlock *IfEnd9 =144BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);145BasicBlock *IfThen12 =146BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);147BasicBlock *IfElse =148BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);149150Entry->getTerminator()->eraseFromParent();151152// entry:153Builder.SetInsertPoint(Entry);154Value *FloatVal0 = FloatVal;155// fp80 conversion is implemented by fpext to fp128 first then do the156// conversion.157if (FloatVal->getType()->isX86_FP80Ty())158FloatVal0 =159Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));160Value *ARep0 =161Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));162Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());163Value *PosOrNeg = Builder.CreateICmpSGT(164ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));165Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),166ConstantInt::getSigned(IntTy, -1));167Value *And =168Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));169Value *And2 = Builder.CreateAnd(170And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));171Value *Abs = Builder.CreateAnd(ARep, SignificandMask);172Value *Or = Builder.CreateOr(Abs, ImplicitBit);173Value *Cmp =174Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));175Builder.CreateCondBr(Cmp, End, IfEnd);176177// if.end:178Builder.SetInsertPoint(IfEnd);179Value *Add1 = Builder.CreateAdd(180And2, ConstantInt::getSigned(181IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));182Value *Cmp3 = Builder.CreateICmpULT(183Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));184Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);185186// if.then5:187Builder.SetInsertPoint(IfThen5);188Value *PosInf = Builder.CreateXor(NegOne, NegInf);189Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);190Builder.CreateBr(End);191192// if.end9:193Builder.SetInsertPoint(IfEnd9);194Value *Cmp10 = Builder.CreateICmpULT(195And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));196Builder.CreateCondBr(Cmp10, IfThen12, IfElse);197198// if.then12:199Builder.SetInsertPoint(IfThen12);200Value *Sub13 = Builder.CreateSub(201Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);202Value *Shr14 = Builder.CreateLShr(Or, Sub13);203Value *Mul = Builder.CreateMul(Shr14, Sign);204Builder.CreateBr(End);205206// if.else:207Builder.SetInsertPoint(IfElse);208Value *Sub15 = Builder.CreateAdd(209And2, ConstantInt::getSigned(210IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));211Value *Shl = Builder.CreateShl(Or, Sub15);212Value *Mul16 = Builder.CreateMul(Shl, Sign);213Builder.CreateBr(End);214215// cleanup:216Builder.SetInsertPoint(End, End->begin());217PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);218219Retval0->addIncoming(Cond8, IfThen5);220Retval0->addIncoming(Mul, IfThen12);221Retval0->addIncoming(Mul16, IfElse);222Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);223224FPToI->replaceAllUsesWith(Retval0);225FPToI->dropAllReferences();226FPToI->eraseFromParent();227}228229// clang-format off: preserve formatting of the following example230231/// Generate code to convert a fp number to integer, replacing S(U)IToFP with232/// the generated code. This currently generates code similarly to compiler-rt's233/// implementations. This implementation has an implicit assumption that integer234/// width is larger than fp.235///236/// An example IR generated from compiler-rt/floatdisf.c looks like below:237/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {238/// entry:239/// %cmp = icmp eq i64 %a, 0240/// br i1 %cmp, label %return, label %if.end241///242/// if.end: ; preds = %entry243/// %shr = ashr i64 %a, 63244/// %xor = xor i64 %shr, %a245/// %sub = sub nsw i64 %xor, %shr246/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5247/// %cast = trunc i64 %0 to i32248/// %sub1 = sub nuw nsw i32 64, %cast249/// %sub2 = xor i32 %cast, 63250/// %cmp3 = icmp ult i32 %cast, 40251/// br i1 %cmp3, label %if.then4, label %if.else252///253/// if.then4: ; preds = %if.end254/// switch i32 %sub1, label %sw.default [255/// i32 25, label %sw.bb256/// i32 26, label %sw.epilog257/// ]258///259/// sw.bb: ; preds = %if.then4260/// %shl = shl i64 %sub, 1261/// br label %sw.epilog262///263/// sw.default: ; preds = %if.then4264/// %sub5 = sub nsw i64 38, %0265/// %sh_prom = and i64 %sub5, 4294967295266/// %shr6 = lshr i64 %sub, %sh_prom267/// %shr9 = lshr i64 274877906943, %0268/// %and = and i64 %shr9, %sub269/// %cmp10 = icmp ne i64 %and, 0270/// %conv11 = zext i1 %cmp10 to i64271/// %or = or i64 %shr6, %conv11272/// br label %sw.epilog273///274/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb275/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]276/// %1 = lshr i64 %a.addr.0, 2277/// %2 = and i64 %1, 1278/// %or16 = or i64 %2, %a.addr.0279/// %inc = add nsw i64 %or16, 1280/// %3 = and i64 %inc, 67108864281/// %tobool.not = icmp eq i64 %3, 0282/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3283/// %spec.select = ashr i64 %inc, %spec.select.v284/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1285/// br label %if.end26286///287/// if.else: ; preds = %if.end288/// %sub23 = add nuw nsw i64 %0, 4294967256289/// %sh_prom24 = and i64 %sub23, 4294967295290/// %shl25 = shl i64 %sub, %sh_prom24291/// br label %if.end26292///293/// if.end26: ; preds = %sw.epilog, %if.else294/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]295/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]296/// %conv27 = trunc i64 %shr to i32297/// %and28 = and i32 %conv27, -2147483648298/// %add = shl nuw nsw i32 %e.0, 23299/// %shl29 = add nuw nsw i32 %add, 1065353216300/// %conv31 = trunc i64 %a.addr.1 to i32301/// %and32 = and i32 %conv31, 8388607302/// %or30 = or i32 %and32, %and28303/// %or33 = or i32 %or30, %shl29304/// %4 = bitcast i32 %or33 to float305/// br label %return306///307/// return: ; preds = %entry, %if.end26308/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]309/// ret float %retval.0310/// }311///312/// Replace integer to fp with generated code.313static void expandIToFP(Instruction *IToFP) {314// clang-format on315IRBuilder<> Builder(IToFP);316auto *IntVal = IToFP->getOperand(0);317IntegerType *IntTy = cast<IntegerType>(IntVal->getType());318319unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();320unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;321// fp80 conversion is implemented by conversion tp fp128 first following322// a fptrunc to fp80.323FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;324// FIXME: As there is no related builtins added in compliler-rt,325// here currently utilized the fp32 <-> fp16 lib calls to implement.326FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;327FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;328unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);329bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;330331assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "332"assumes integer width is larger than fp.");333334Value *Temp1 =335Builder.CreateShl(Builder.getIntN(BitWidth, 1),336Builder.getIntN(BitWidth, FPMantissaWidth + 3));337338BasicBlock *Entry = Builder.GetInsertBlock();339Function *F = Entry->getParent();340Entry->setName(Twine(Entry->getName(), "itofp-entry"));341BasicBlock *End =342Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");343BasicBlock *IfEnd =344BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);345BasicBlock *IfThen4 =346BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);347BasicBlock *SwBB =348BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);349BasicBlock *SwDefault =350BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);351BasicBlock *SwEpilog =352BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);353BasicBlock *IfThen20 =354BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);355BasicBlock *IfElse =356BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);357BasicBlock *IfEnd26 =358BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);359360Entry->getTerminator()->eraseFromParent();361362Function *CTLZ =363Intrinsic::getOrInsertDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);364ConstantInt *True = Builder.getTrue();365366// entry:367Builder.SetInsertPoint(Entry);368Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));369Builder.CreateCondBr(Cmp, End, IfEnd);370371// if.end:372Builder.SetInsertPoint(IfEnd);373Value *Shr =374Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));375Value *Xor = Builder.CreateXor(Shr, IntVal);376Value *Sub = Builder.CreateSub(Xor, Shr);377Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});378Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());379int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;380Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),381FloatWidth == 128 ? Call : Cast);382Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),383FloatWidth == 128 ? Call : Cast);384Value *Cmp3 = Builder.CreateICmpSGT(385Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));386Builder.CreateCondBr(Cmp3, IfThen4, IfElse);387388// if.then4:389Builder.SetInsertPoint(IfThen4);390llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);391SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);392SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);393394// sw.bb:395Builder.SetInsertPoint(SwBB);396Value *Shl =397Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));398Builder.CreateBr(SwEpilog);399400// sw.default:401Builder.SetInsertPoint(SwDefault);402Value *Sub5 = Builder.CreateSub(403Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),404FloatWidth == 128 ? Call : Cast);405Value *ShProm = Builder.CreateZExt(Sub5, IntTy);406Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,407FloatWidth == 128 ? Sub5 : ShProm);408Value *Sub8 =409Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,410Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));411Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);412Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),413FloatWidth == 128 ? Sub8 : ShProm9);414Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);415Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));416Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);417Value *Or = Builder.CreateOr(Shr6, Conv11);418Builder.CreateBr(SwEpilog);419420// sw.epilog:421Builder.SetInsertPoint(SwEpilog);422PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);423AAddr0->addIncoming(Or, SwDefault);424AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);425AAddr0->addIncoming(Shl, SwBB);426Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());427Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));428Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));429Value *Conv16 = Builder.CreateZExt(A2, IntTy);430Value *Or17 = Builder.CreateOr(AAddr0, Conv16);431Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));432Value *Shr18 = nullptr;433if (IsSigned)434Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));435else436Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));437Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");438Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));439Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));440Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));441Value *ExtractT64 = nullptr;442if (FloatWidth > 80)443ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());444else445ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());446Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);447448// if.then20449Builder.SetInsertPoint(IfThen20);450Value *Shr21 = nullptr;451if (IsSigned)452Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));453else454Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));455Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));456Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));457Value *ExtractT62 = nullptr;458if (FloatWidth > 80)459ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));460else461ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));462Builder.CreateBr(IfEnd26);463464// if.else:465Builder.SetInsertPoint(IfElse);466Value *Sub24 = Builder.CreateAdd(467FloatWidth == 128 ? Call : Cast,468ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),469-(BitWidth - FPMantissaWidth - 1)));470Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);471Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,472FloatWidth == 128 ? Sub24 : ShProm25);473Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));474Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));475Value *ExtractT66 = nullptr;476if (FloatWidth > 80)477ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));478else479ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());480Builder.CreateBr(IfEnd26);481482// if.end26:483Builder.SetInsertPoint(IfEnd26);484PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);485AAddr1Off0->addIncoming(ExtractT, IfThen20);486AAddr1Off0->addIncoming(ExtractT60, SwEpilog);487AAddr1Off0->addIncoming(ExtractT61, IfElse);488PHINode *AAddr1Off32 = nullptr;489if (FloatWidth > 32) {490AAddr1Off32 =491Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);492AAddr1Off32->addIncoming(ExtractT62, IfThen20);493AAddr1Off32->addIncoming(ExtractT64, SwEpilog);494AAddr1Off32->addIncoming(ExtractT66, IfElse);495}496PHINode *E0 = nullptr;497if (FloatWidth <= 80) {498E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);499E0->addIncoming(Sub1, IfThen20);500E0->addIncoming(Sub2, SwEpilog);501E0->addIncoming(Sub2, IfElse);502}503Value *And29 = nullptr;504if (FloatWidth > 80) {505Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),506Builder.getIntN(BitWidth, 63));507And29 = Builder.CreateAnd(Shr, Temp2, "and29");508} else {509Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));510And29 = Builder.CreateAnd(511Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));512}513unsigned TempMod = FPMantissaWidth % 32;514Value *And34 = nullptr;515Value *Shl30 = nullptr;516if (FloatWidth > 80) {517TempMod += 32;518Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));519Shl30 = Builder.CreateAdd(520Add,521Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));522And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));523} else {524Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));525Shl30 = Builder.CreateAdd(526Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));527And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,528Builder.getIntN(32, (1 << TempMod) - 1));529}530Value *Or35 = nullptr;531if (FloatWidth > 80) {532Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));533Value *Or31 = Builder.CreateOr(And29Trunc, And34);534Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));535Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),536Builder.getIntN(128, FPMantissaWidth));537Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));538Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);539Or35 = Builder.CreateOr(Or34, A6);540} else {541Value *Or31 = Builder.CreateOr(And34, And29);542Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);543}544Value *A4 = nullptr;545if (IToFP->getType()->isDoubleTy()) {546Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));547Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));548Value *And1 =549Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));550Value *Or1 = Builder.CreateOr(Shl1, And1);551A4 = Builder.CreateBitCast(Or1, IToFP->getType());552} else if (IToFP->getType()->isX86_FP80Ty()) {553Value *A40 =554Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));555A4 = Builder.CreateFPTrunc(A40, IToFP->getType());556} else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {557// Deal with "half" situation. This is a workaround since we don't have558// floattihf.c currently as referring.559Value *A40 =560Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));561A4 = Builder.CreateFPTrunc(A40, IToFP->getType());562} else // float type563A4 = Builder.CreateBitCast(Or35, IToFP->getType());564Builder.CreateBr(End);565566// return:567Builder.SetInsertPoint(End, End->begin());568PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);569Retval0->addIncoming(A4, IfEnd26);570Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);571572IToFP->replaceAllUsesWith(Retval0);573IToFP->dropAllReferences();574IToFP->eraseFromParent();575}576577static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {578VectorType *VTy = cast<FixedVectorType>(I->getType());579580IRBuilder<> Builder(I);581582unsigned NumElements = VTy->getElementCount().getFixedValue();583Value *Result = PoisonValue::get(VTy);584for (unsigned Idx = 0; Idx < NumElements; ++Idx) {585Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);586Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,587I->getType()->getScalarType());588Result = Builder.CreateInsertElement(Result, Cast, Idx);589if (isa<Instruction>(Cast))590Replace.push_back(cast<Instruction>(Cast));591}592I->replaceAllUsesWith(Result);593I->dropAllReferences();594I->eraseFromParent();595}596597static bool runImpl(Function &F, const TargetLowering &TLI) {598SmallVector<Instruction *, 4> Replace;599SmallVector<Instruction *, 4> ReplaceVector;600bool Modified = false;601602unsigned MaxLegalFpConvertBitWidth =603TLI.getMaxLargeFPConvertBitWidthSupported();604if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)605MaxLegalFpConvertBitWidth = ExpandFpConvertBits;606607if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)608return false;609610for (auto &I : instructions(F)) {611switch (I.getOpcode()) {612case Instruction::FPToUI:613case Instruction::FPToSI: {614// TODO: This pass doesn't handle scalable vectors.615if (I.getOperand(0)->getType()->isScalableTy())616continue;617618auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());619if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)620continue;621622if (I.getOperand(0)->getType()->isVectorTy())623ReplaceVector.push_back(&I);624else625Replace.push_back(&I);626Modified = true;627break;628}629case Instruction::UIToFP:630case Instruction::SIToFP: {631// TODO: This pass doesn't handle scalable vectors.632if (I.getOperand(0)->getType()->isScalableTy())633continue;634635auto *IntTy =636cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());637if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)638continue;639640if (I.getOperand(0)->getType()->isVectorTy())641ReplaceVector.push_back(&I);642else643Replace.push_back(&I);644Modified = true;645break;646}647default:648break;649}650}651652while (!ReplaceVector.empty()) {653Instruction *I = ReplaceVector.pop_back_val();654scalarize(I, Replace);655}656657if (Replace.empty())658return false;659660while (!Replace.empty()) {661Instruction *I = Replace.pop_back_val();662if (I->getOpcode() == Instruction::FPToUI ||663I->getOpcode() == Instruction::FPToSI) {664expandFPToI(I);665} else {666expandIToFP(I);667}668}669670return Modified;671}672673namespace {674class ExpandFpLegacyPass : public FunctionPass {675public:676static char ID;677678ExpandFpLegacyPass() : FunctionPass(ID) {679initializeExpandFpLegacyPassPass(*PassRegistry::getPassRegistry());680}681682bool runOnFunction(Function &F) override {683auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();684auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();685return runImpl(F, *TLI);686}687688void getAnalysisUsage(AnalysisUsage &AU) const override {689AU.addRequired<TargetPassConfig>();690AU.addPreserved<AAResultsWrapperPass>();691AU.addPreserved<GlobalsAAWrapperPass>();692}693};694} // namespace695696PreservedAnalyses ExpandFpPass::run(Function &F, FunctionAnalysisManager &FAM) {697const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);698return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()699: PreservedAnalyses::all();700}701702char ExpandFpLegacyPass::ID = 0;703INITIALIZE_PASS_BEGIN(ExpandFpLegacyPass, "expand-fp",704"Expand certain fp instructions", false, false)705INITIALIZE_PASS_END(ExpandFpLegacyPass, "expand-fp", "Expand fp", false, false)706707FunctionPass *llvm::createExpandFpPass() { return new ExpandFpLegacyPass(); }708709710