Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp
35234 views
//===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//89// This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,10// ‘sitofp .. to’ instructions with a bitwidth above a threshold into11// auto-generated functions. This is useful for targets like x86_64 that cannot12// lower fp convertions with more than 128 bits.13//14//===----------------------------------------------------------------------===//1516#include "llvm/CodeGen/ExpandLargeFpConvert.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/StringExtras.h"19#include "llvm/Analysis/GlobalsModRef.h"20#include "llvm/CodeGen/Passes.h"21#include "llvm/CodeGen/TargetLowering.h"22#include "llvm/CodeGen/TargetPassConfig.h"23#include "llvm/CodeGen/TargetSubtargetInfo.h"24#include "llvm/IR/IRBuilder.h"25#include "llvm/IR/InstIterator.h"26#include "llvm/IR/PassManager.h"27#include "llvm/InitializePasses.h"28#include "llvm/Pass.h"29#include "llvm/Support/CommandLine.h"30#include "llvm/Target/TargetMachine.h"3132using namespace llvm;3334static cl::opt<unsigned>35ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,36cl::init(llvm::IntegerType::MAX_INT_BITS),37cl::desc("fp convert instructions on integers with "38"more than <N> bits are expanded."));3940/// Generate code to convert a fp number to integer, replacing FPToS(U)I with41/// the generated code. This currently generates code similarly to compiler-rt's42/// implementations.43///44/// An example IR generated from compiler-rt/fixsfdi.c looks like below:45/// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {46/// entry:47/// %0 = bitcast float %a to i3248/// %conv.i = zext i32 %0 to i6449/// %tobool.not = icmp sgt i32 %0, -150/// %conv = select i1 %tobool.not, i64 1, i64 -151/// %and = lshr i64 %conv.i, 2352/// %shr = and i64 %and, 25553/// %and2 = and i64 %conv.i, 838860754/// %or = or i64 %and2, 838860855/// %cmp = icmp ult i64 %shr, 12756/// br i1 %cmp, label %cleanup, label %if.end57///58/// if.end: ; preds = %entry59/// %sub = add nuw nsw i64 %shr, 429496716960/// %conv5 = and i64 %sub, 429496723261/// %cmp6.not = icmp eq i64 %conv5, 062/// br i1 %cmp6.not, label %if.end12, label %if.then863///64/// if.then8: ; preds = %if.end65/// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -922337203685477580866/// br label %cleanup67///68/// if.end12: ; preds = %if.end69/// %cmp13 = icmp ult i64 %shr, 15070/// br i1 %cmp13, label %if.then15, label %if.else71///72/// if.then15: ; preds = %if.end1273/// %sub16 = sub nuw nsw i64 150, %shr74/// %shr17 = lshr i64 %or, %sub1675/// %mul = mul nsw i64 %shr17, %conv76/// br label %cleanup77///78/// if.else: ; preds = %if.end1279/// %sub18 = add nsw i64 %shr, -15080/// %shl = shl i64 %or, %sub1881/// %mul19 = mul nsw i64 %shl, %conv82/// br label %cleanup83///84/// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then885/// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]86/// ret i64 %retval.087/// }88///89/// Replace fp to integer with generated code.90static void expandFPToI(Instruction *FPToI) {91IRBuilder<> Builder(FPToI);92auto *FloatVal = FPToI->getOperand(0);93IntegerType *IntTy = cast<IntegerType>(FPToI->getType());9495unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();96unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;9798// FIXME: fp16's range is covered by i32. So `fptoi half` can convert99// to i32 first following a sext/zext to target integer type.100Value *A1 = nullptr;101if (FloatVal->getType()->isHalfTy()) {102if (FPToI->getOpcode() == Instruction::FPToUI) {103Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));104A1 = Builder.CreateZExt(A0, IntTy);105} else { // FPToSI106Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));107A1 = Builder.CreateSExt(A0, IntTy);108}109FPToI->replaceAllUsesWith(A1);110FPToI->dropAllReferences();111FPToI->eraseFromParent();112return;113}114115// fp80 conversion is implemented by fpext to fp128 first then do the116// conversion.117FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;118unsigned FloatWidth =119PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits());120unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;121unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;122Value *ImplicitBit = Builder.CreateShl(123Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));124Value *SignificandMask =125Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));126Value *NegOne = Builder.CreateSExt(127ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);128Value *NegInf =129Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),130ConstantInt::getSigned(IntTy, BitWidth - 1));131132BasicBlock *Entry = Builder.GetInsertBlock();133Function *F = Entry->getParent();134Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));135BasicBlock *End =136Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");137BasicBlock *IfEnd =138BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);139BasicBlock *IfThen5 =140BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);141BasicBlock *IfEnd9 =142BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);143BasicBlock *IfThen12 =144BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);145BasicBlock *IfElse =146BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);147148Entry->getTerminator()->eraseFromParent();149150// entry:151Builder.SetInsertPoint(Entry);152Value *FloatVal0 = FloatVal;153// fp80 conversion is implemented by fpext to fp128 first then do the154// conversion.155if (FloatVal->getType()->isX86_FP80Ty())156FloatVal0 =157Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));158Value *ARep0 =159Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));160Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());161Value *PosOrNeg = Builder.CreateICmpSGT(162ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));163Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),164ConstantInt::getSigned(IntTy, -1));165Value *And =166Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));167Value *And2 = Builder.CreateAnd(168And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));169Value *Abs = Builder.CreateAnd(ARep, SignificandMask);170Value *Or = Builder.CreateOr(Abs, ImplicitBit);171Value *Cmp =172Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));173Builder.CreateCondBr(Cmp, End, IfEnd);174175// if.end:176Builder.SetInsertPoint(IfEnd);177Value *Add1 = Builder.CreateAdd(178And2, ConstantInt::getSigned(179IntTy, -static_cast<int64_t>(ExponentBias + BitWidth)));180Value *Cmp3 = Builder.CreateICmpULT(181Add1, ConstantInt::getSigned(IntTy, -static_cast<int64_t>(BitWidth)));182Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);183184// if.then5:185Builder.SetInsertPoint(IfThen5);186Value *PosInf = Builder.CreateXor(NegOne, NegInf);187Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);188Builder.CreateBr(End);189190// if.end9:191Builder.SetInsertPoint(IfEnd9);192Value *Cmp10 = Builder.CreateICmpULT(193And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));194Builder.CreateCondBr(Cmp10, IfThen12, IfElse);195196// if.then12:197Builder.SetInsertPoint(IfThen12);198Value *Sub13 = Builder.CreateSub(199Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);200Value *Shr14 = Builder.CreateLShr(Or, Sub13);201Value *Mul = Builder.CreateMul(Shr14, Sign);202Builder.CreateBr(End);203204// if.else:205Builder.SetInsertPoint(IfElse);206Value *Sub15 = Builder.CreateAdd(207And2, ConstantInt::getSigned(208IntTy, -static_cast<int64_t>(ExponentBias + FPMantissaWidth)));209Value *Shl = Builder.CreateShl(Or, Sub15);210Value *Mul16 = Builder.CreateMul(Shl, Sign);211Builder.CreateBr(End);212213// cleanup:214Builder.SetInsertPoint(End, End->begin());215PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);216217Retval0->addIncoming(Cond8, IfThen5);218Retval0->addIncoming(Mul, IfThen12);219Retval0->addIncoming(Mul16, IfElse);220Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);221222FPToI->replaceAllUsesWith(Retval0);223FPToI->dropAllReferences();224FPToI->eraseFromParent();225}226227/// Generate code to convert a fp number to integer, replacing S(U)IToFP with228/// the generated code. This currently generates code similarly to compiler-rt's229/// implementations. This implementation has an implicit assumption that integer230/// width is larger than fp.231///232/// An example IR generated from compiler-rt/floatdisf.c looks like below:233/// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {234/// entry:235/// %cmp = icmp eq i64 %a, 0236/// br i1 %cmp, label %return, label %if.end237///238/// if.end: ; preds = %entry239/// %shr = ashr i64 %a, 63240/// %xor = xor i64 %shr, %a241/// %sub = sub nsw i64 %xor, %shr242/// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5243/// %cast = trunc i64 %0 to i32244/// %sub1 = sub nuw nsw i32 64, %cast245/// %sub2 = xor i32 %cast, 63246/// %cmp3 = icmp ult i32 %cast, 40247/// br i1 %cmp3, label %if.then4, label %if.else248///249/// if.then4: ; preds = %if.end250/// switch i32 %sub1, label %sw.default [251/// i32 25, label %sw.bb252/// i32 26, label %sw.epilog253/// ]254///255/// sw.bb: ; preds = %if.then4256/// %shl = shl i64 %sub, 1257/// br label %sw.epilog258///259/// sw.default: ; preds = %if.then4260/// %sub5 = sub nsw i64 38, %0261/// %sh_prom = and i64 %sub5, 4294967295262/// %shr6 = lshr i64 %sub, %sh_prom263/// %shr9 = lshr i64 274877906943, %0264/// %and = and i64 %shr9, %sub265/// %cmp10 = icmp ne i64 %and, 0266/// %conv11 = zext i1 %cmp10 to i64267/// %or = or i64 %shr6, %conv11268/// br label %sw.epilog269///270/// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb271/// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]272/// %1 = lshr i64 %a.addr.0, 2273/// %2 = and i64 %1, 1274/// %or16 = or i64 %2, %a.addr.0275/// %inc = add nsw i64 %or16, 1276/// %3 = and i64 %inc, 67108864277/// %tobool.not = icmp eq i64 %3, 0278/// %spec.select.v = select i1 %tobool.not, i64 2, i64 3279/// %spec.select = ashr i64 %inc, %spec.select.v280/// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1281/// br label %if.end26282///283/// if.else: ; preds = %if.end284/// %sub23 = add nuw nsw i64 %0, 4294967256285/// %sh_prom24 = and i64 %sub23, 4294967295286/// %shl25 = shl i64 %sub, %sh_prom24287/// br label %if.end26288///289/// if.end26: ; preds = %sw.epilog, %if.else290/// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]291/// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]292/// %conv27 = trunc i64 %shr to i32293/// %and28 = and i32 %conv27, -2147483648294/// %add = shl nuw nsw i32 %e.0, 23295/// %shl29 = add nuw nsw i32 %add, 1065353216296/// %conv31 = trunc i64 %a.addr.1 to i32297/// %and32 = and i32 %conv31, 8388607298/// %or30 = or i32 %and32, %and28299/// %or33 = or i32 %or30, %shl29300/// %4 = bitcast i32 %or33 to float301/// br label %return302///303/// return: ; preds = %entry, %if.end26304/// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]305/// ret float %retval.0306/// }307///308/// Replace integer to fp with generated code.309static void expandIToFP(Instruction *IToFP) {310IRBuilder<> Builder(IToFP);311auto *IntVal = IToFP->getOperand(0);312IntegerType *IntTy = cast<IntegerType>(IntVal->getType());313314unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();315unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;316// fp80 conversion is implemented by conversion tp fp128 first following317// a fptrunc to fp80.318FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;319// FIXME: As there is no related builtins added in compliler-rt,320// here currently utilized the fp32 <-> fp16 lib calls to implement.321FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;322FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth;323unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);324bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;325326assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "327"assumes integer width is larger than fp.");328329Value *Temp1 =330Builder.CreateShl(Builder.getIntN(BitWidth, 1),331Builder.getIntN(BitWidth, FPMantissaWidth + 3));332333BasicBlock *Entry = Builder.GetInsertBlock();334Function *F = Entry->getParent();335Entry->setName(Twine(Entry->getName(), "itofp-entry"));336BasicBlock *End =337Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");338BasicBlock *IfEnd =339BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);340BasicBlock *IfThen4 =341BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);342BasicBlock *SwBB =343BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);344BasicBlock *SwDefault =345BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);346BasicBlock *SwEpilog =347BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);348BasicBlock *IfThen20 =349BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);350BasicBlock *IfElse =351BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);352BasicBlock *IfEnd26 =353BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);354355Entry->getTerminator()->eraseFromParent();356357Function *CTLZ =358Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);359ConstantInt *True = Builder.getTrue();360361// entry:362Builder.SetInsertPoint(Entry);363Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));364Builder.CreateCondBr(Cmp, End, IfEnd);365366// if.end:367Builder.SetInsertPoint(IfEnd);368Value *Shr =369Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));370Value *Xor = Builder.CreateXor(Shr, IntVal);371Value *Sub = Builder.CreateSub(Xor, Shr);372Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});373Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());374int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;375Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),376FloatWidth == 128 ? Call : Cast);377Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),378FloatWidth == 128 ? Call : Cast);379Value *Cmp3 = Builder.CreateICmpSGT(380Sub1, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));381Builder.CreateCondBr(Cmp3, IfThen4, IfElse);382383// if.then4:384Builder.SetInsertPoint(IfThen4);385llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);386SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);387SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);388389// sw.bb:390Builder.SetInsertPoint(SwBB);391Value *Shl =392Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));393Builder.CreateBr(SwEpilog);394395// sw.default:396Builder.SetInsertPoint(SwDefault);397Value *Sub5 = Builder.CreateSub(398Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),399FloatWidth == 128 ? Call : Cast);400Value *ShProm = Builder.CreateZExt(Sub5, IntTy);401Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,402FloatWidth == 128 ? Sub5 : ShProm);403Value *Sub8 =404Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,405Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));406Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);407Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),408FloatWidth == 128 ? Sub8 : ShProm9);409Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);410Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));411Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);412Value *Or = Builder.CreateOr(Shr6, Conv11);413Builder.CreateBr(SwEpilog);414415// sw.epilog:416Builder.SetInsertPoint(SwEpilog);417PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);418AAddr0->addIncoming(Or, SwDefault);419AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);420AAddr0->addIncoming(Shl, SwBB);421Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());422Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));423Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));424Value *Conv16 = Builder.CreateZExt(A2, IntTy);425Value *Or17 = Builder.CreateOr(AAddr0, Conv16);426Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));427Value *Shr18 = nullptr;428if (IsSigned)429Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));430else431Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));432Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");433Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));434Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));435Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));436Value *ExtractT64 = nullptr;437if (FloatWidth > 80)438ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());439else440ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());441Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);442443// if.then20444Builder.SetInsertPoint(IfThen20);445Value *Shr21 = nullptr;446if (IsSigned)447Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));448else449Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));450Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));451Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));452Value *ExtractT62 = nullptr;453if (FloatWidth > 80)454ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));455else456ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));457Builder.CreateBr(IfEnd26);458459// if.else:460Builder.SetInsertPoint(IfElse);461Value *Sub24 = Builder.CreateAdd(462FloatWidth == 128 ? Call : Cast,463ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),464-(BitWidth - FPMantissaWidth - 1)));465Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);466Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,467FloatWidth == 128 ? Sub24 : ShProm25);468Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));469Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));470Value *ExtractT66 = nullptr;471if (FloatWidth > 80)472ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));473else474ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());475Builder.CreateBr(IfEnd26);476477// if.end26:478Builder.SetInsertPoint(IfEnd26);479PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);480AAddr1Off0->addIncoming(ExtractT, IfThen20);481AAddr1Off0->addIncoming(ExtractT60, SwEpilog);482AAddr1Off0->addIncoming(ExtractT61, IfElse);483PHINode *AAddr1Off32 = nullptr;484if (FloatWidth > 32) {485AAddr1Off32 =486Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);487AAddr1Off32->addIncoming(ExtractT62, IfThen20);488AAddr1Off32->addIncoming(ExtractT64, SwEpilog);489AAddr1Off32->addIncoming(ExtractT66, IfElse);490}491PHINode *E0 = nullptr;492if (FloatWidth <= 80) {493E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);494E0->addIncoming(Sub1, IfThen20);495E0->addIncoming(Sub2, SwEpilog);496E0->addIncoming(Sub2, IfElse);497}498Value *And29 = nullptr;499if (FloatWidth > 80) {500Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),501Builder.getIntN(BitWidth, 63));502And29 = Builder.CreateAnd(Shr, Temp2, "and29");503} else {504Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));505And29 = Builder.CreateAnd(506Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));507}508unsigned TempMod = FPMantissaWidth % 32;509Value *And34 = nullptr;510Value *Shl30 = nullptr;511if (FloatWidth > 80) {512TempMod += 32;513Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));514Shl30 = Builder.CreateAdd(515Add,516Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));517And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));518} else {519Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));520Shl30 = Builder.CreateAdd(521Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));522And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,523Builder.getIntN(32, (1 << TempMod) - 1));524}525Value *Or35 = nullptr;526if (FloatWidth > 80) {527Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));528Value *Or31 = Builder.CreateOr(And29Trunc, And34);529Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));530Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),531Builder.getIntN(128, FPMantissaWidth));532Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));533Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);534Or35 = Builder.CreateOr(Or34, A6);535} else {536Value *Or31 = Builder.CreateOr(And34, And29);537Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);538}539Value *A4 = nullptr;540if (IToFP->getType()->isDoubleTy()) {541Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));542Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));543Value *And1 =544Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));545Value *Or1 = Builder.CreateOr(Shl1, And1);546A4 = Builder.CreateBitCast(Or1, IToFP->getType());547} else if (IToFP->getType()->isX86_FP80Ty()) {548Value *A40 =549Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));550A4 = Builder.CreateFPTrunc(A40, IToFP->getType());551} else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) {552// Deal with "half" situation. This is a workaround since we don't have553// floattihf.c currently as referring.554Value *A40 =555Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));556A4 = Builder.CreateFPTrunc(A40, IToFP->getType());557} else // float type558A4 = Builder.CreateBitCast(Or35, IToFP->getType());559Builder.CreateBr(End);560561// return:562Builder.SetInsertPoint(End, End->begin());563PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);564Retval0->addIncoming(A4, IfEnd26);565Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);566567IToFP->replaceAllUsesWith(Retval0);568IToFP->dropAllReferences();569IToFP->eraseFromParent();570}571572static void scalarize(Instruction *I, SmallVectorImpl<Instruction *> &Replace) {573VectorType *VTy = cast<FixedVectorType>(I->getType());574575IRBuilder<> Builder(I);576577unsigned NumElements = VTy->getElementCount().getFixedValue();578Value *Result = PoisonValue::get(VTy);579for (unsigned Idx = 0; Idx < NumElements; ++Idx) {580Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx);581Value *Cast = Builder.CreateCast(cast<CastInst>(I)->getOpcode(), Ext,582I->getType()->getScalarType());583Result = Builder.CreateInsertElement(Result, Cast, Idx);584if (isa<Instruction>(Cast))585Replace.push_back(cast<Instruction>(Cast));586}587I->replaceAllUsesWith(Result);588I->dropAllReferences();589I->eraseFromParent();590}591592static bool runImpl(Function &F, const TargetLowering &TLI) {593SmallVector<Instruction *, 4> Replace;594SmallVector<Instruction *, 4> ReplaceVector;595bool Modified = false;596597unsigned MaxLegalFpConvertBitWidth =598TLI.getMaxLargeFPConvertBitWidthSupported();599if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)600MaxLegalFpConvertBitWidth = ExpandFpConvertBits;601602if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)603return false;604605for (auto &I : instructions(F)) {606switch (I.getOpcode()) {607case Instruction::FPToUI:608case Instruction::FPToSI: {609// TODO: This pass doesn't handle scalable vectors.610if (I.getOperand(0)->getType()->isScalableTy())611continue;612613auto *IntTy = cast<IntegerType>(I.getType()->getScalarType());614if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)615continue;616617if (I.getOperand(0)->getType()->isVectorTy())618ReplaceVector.push_back(&I);619else620Replace.push_back(&I);621Modified = true;622break;623}624case Instruction::UIToFP:625case Instruction::SIToFP: {626// TODO: This pass doesn't handle scalable vectors.627if (I.getOperand(0)->getType()->isScalableTy())628continue;629630auto *IntTy =631cast<IntegerType>(I.getOperand(0)->getType()->getScalarType());632if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)633continue;634635if (I.getOperand(0)->getType()->isVectorTy())636ReplaceVector.push_back(&I);637else638Replace.push_back(&I);639Modified = true;640break;641}642default:643break;644}645}646647while (!ReplaceVector.empty()) {648Instruction *I = ReplaceVector.pop_back_val();649scalarize(I, Replace);650}651652if (Replace.empty())653return false;654655while (!Replace.empty()) {656Instruction *I = Replace.pop_back_val();657if (I->getOpcode() == Instruction::FPToUI ||658I->getOpcode() == Instruction::FPToSI) {659expandFPToI(I);660} else {661expandIToFP(I);662}663}664665return Modified;666}667668namespace {669class ExpandLargeFpConvertLegacyPass : public FunctionPass {670public:671static char ID;672673ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {674initializeExpandLargeFpConvertLegacyPassPass(675*PassRegistry::getPassRegistry());676}677678bool runOnFunction(Function &F) override {679auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();680auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();681return runImpl(F, *TLI);682}683684void getAnalysisUsage(AnalysisUsage &AU) const override {685AU.addRequired<TargetPassConfig>();686AU.addPreserved<AAResultsWrapperPass>();687AU.addPreserved<GlobalsAAWrapperPass>();688}689};690} // namespace691692PreservedAnalyses ExpandLargeFpConvertPass::run(Function &F,693FunctionAnalysisManager &FAM) {694const TargetSubtargetInfo *STI = TM->getSubtargetImpl(F);695return runImpl(F, *STI->getTargetLowering()) ? PreservedAnalyses::none()696: PreservedAnalyses::all();697}698699char ExpandLargeFpConvertLegacyPass::ID = 0;700INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",701"Expand large fp convert", false, false)702INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",703"Expand large fp convert", false, false)704705FunctionPass *llvm::createExpandLargeFpConvertPass() {706return new ExpandLargeFpConvertLegacyPass();707}708709710