Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
35233 views
//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines routines for folding instructions into constants.9//10// Also, to supplement the basic IR ConstantExpr simplifications,11// this file defines some additional folding routines that can make use of12// DataLayout information. These functions cannot go in IR due to library13// dependency issues.14//15//===----------------------------------------------------------------------===//1617#include "llvm/Analysis/ConstantFolding.h"18#include "llvm/ADT/APFloat.h"19#include "llvm/ADT/APInt.h"20#include "llvm/ADT/APSInt.h"21#include "llvm/ADT/ArrayRef.h"22#include "llvm/ADT/DenseMap.h"23#include "llvm/ADT/STLExtras.h"24#include "llvm/ADT/SmallVector.h"25#include "llvm/ADT/StringRef.h"26#include "llvm/Analysis/TargetFolder.h"27#include "llvm/Analysis/TargetLibraryInfo.h"28#include "llvm/Analysis/ValueTracking.h"29#include "llvm/Analysis/VectorUtils.h"30#include "llvm/Config/config.h"31#include "llvm/IR/Constant.h"32#include "llvm/IR/ConstantFold.h"33#include "llvm/IR/Constants.h"34#include "llvm/IR/DataLayout.h"35#include "llvm/IR/DerivedTypes.h"36#include "llvm/IR/Function.h"37#include "llvm/IR/GlobalValue.h"38#include "llvm/IR/GlobalVariable.h"39#include "llvm/IR/InstrTypes.h"40#include "llvm/IR/Instruction.h"41#include "llvm/IR/Instructions.h"42#include "llvm/IR/IntrinsicInst.h"43#include "llvm/IR/Intrinsics.h"44#include "llvm/IR/IntrinsicsAArch64.h"45#include "llvm/IR/IntrinsicsAMDGPU.h"46#include "llvm/IR/IntrinsicsARM.h"47#include "llvm/IR/IntrinsicsWebAssembly.h"48#include "llvm/IR/IntrinsicsX86.h"49#include "llvm/IR/Operator.h"50#include "llvm/IR/Type.h"51#include "llvm/IR/Value.h"52#include "llvm/Support/Casting.h"53#include "llvm/Support/ErrorHandling.h"54#include "llvm/Support/KnownBits.h"55#include "llvm/Support/MathExtras.h"56#include <cassert>57#include <cerrno>58#include <cfenv>59#include <cmath>60#include <cstdint>6162using namespace llvm;6364namespace {6566//===----------------------------------------------------------------------===//67// Constant Folding internal helper functions68//===----------------------------------------------------------------------===//6970static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,71Constant *C, Type *SrcEltTy,72unsigned NumSrcElts,73const DataLayout &DL) {74// Now that we know that the input value is a vector of integers, just shift75// and insert them into our result.76unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);77for (unsigned i = 0; i != NumSrcElts; ++i) {78Constant *Element;79if (DL.isLittleEndian())80Element = C->getAggregateElement(NumSrcElts - i - 1);81else82Element = C->getAggregateElement(i);8384if (Element && isa<UndefValue>(Element)) {85Result <<= BitShift;86continue;87}8889auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);90if (!ElementCI)91return ConstantExpr::getBitCast(C, DestTy);9293Result <<= BitShift;94Result |= ElementCI->getValue().zext(Result.getBitWidth());95}9697return nullptr;98}99100/// Constant fold bitcast, symbolically evaluating it with DataLayout.101/// This always returns a non-null constant, but it may be a102/// ConstantExpr if unfoldable.103Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {104assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&105"Invalid constantexpr bitcast!");106107// Catch the obvious splat cases.108if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))109return Res;110111if (auto *VTy = dyn_cast<VectorType>(C->getType())) {112// Handle a vector->scalar integer/fp cast.113if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {114unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();115Type *SrcEltTy = VTy->getElementType();116117// If the vector is a vector of floating point, convert it to vector of int118// to simplify things.119if (SrcEltTy->isFloatingPointTy()) {120unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();121auto *SrcIVTy = FixedVectorType::get(122IntegerType::get(C->getContext(), FPWidth), NumSrcElts);123// Ask IR to do the conversion now that #elts line up.124C = ConstantExpr::getBitCast(C, SrcIVTy);125}126127APInt Result(DL.getTypeSizeInBits(DestTy), 0);128if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,129SrcEltTy, NumSrcElts, DL))130return CE;131132if (isa<IntegerType>(DestTy))133return ConstantInt::get(DestTy, Result);134135APFloat FP(DestTy->getFltSemantics(), Result);136return ConstantFP::get(DestTy->getContext(), FP);137}138}139140// The code below only handles casts to vectors currently.141auto *DestVTy = dyn_cast<VectorType>(DestTy);142if (!DestVTy)143return ConstantExpr::getBitCast(C, DestTy);144145// If this is a scalar -> vector cast, convert the input into a <1 x scalar>146// vector so the code below can handle it uniformly.147if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {148Constant *Ops = C; // don't take the address of C!149return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);150}151152// If this is a bitcast from constant vector -> vector, fold it.153if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))154return ConstantExpr::getBitCast(C, DestTy);155156// If the element types match, IR can fold it.157unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();158unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();159if (NumDstElt == NumSrcElt)160return ConstantExpr::getBitCast(C, DestTy);161162Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();163Type *DstEltTy = DestVTy->getElementType();164165// Otherwise, we're changing the number of elements in a vector, which166// requires endianness information to do the right thing. For example,167// bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)168// folds to (little endian):169// <4 x i32> <i32 0, i32 0, i32 1, i32 0>170// and to (big endian):171// <4 x i32> <i32 0, i32 0, i32 0, i32 1>172173// First thing is first. We only want to think about integer here, so if174// we have something in FP form, recast it as integer.175if (DstEltTy->isFloatingPointTy()) {176// Fold to an vector of integers with same size as our FP type.177unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();178auto *DestIVTy = FixedVectorType::get(179IntegerType::get(C->getContext(), FPWidth), NumDstElt);180// Recursively handle this integer conversion, if possible.181C = FoldBitCast(C, DestIVTy, DL);182183// Finally, IR can handle this now that #elts line up.184return ConstantExpr::getBitCast(C, DestTy);185}186187// Okay, we know the destination is integer, if the input is FP, convert188// it to integer first.189if (SrcEltTy->isFloatingPointTy()) {190unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();191auto *SrcIVTy = FixedVectorType::get(192IntegerType::get(C->getContext(), FPWidth), NumSrcElt);193// Ask IR to do the conversion now that #elts line up.194C = ConstantExpr::getBitCast(C, SrcIVTy);195// If IR wasn't able to fold it, bail out.196if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.197!isa<ConstantDataVector>(C))198return C;199}200201// Now we know that the input and output vectors are both integer vectors202// of the same size, and that their #elements is not the same. Do the203// conversion here, which depends on whether the input or output has204// more elements.205bool isLittleEndian = DL.isLittleEndian();206207SmallVector<Constant*, 32> Result;208if (NumDstElt < NumSrcElt) {209// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)210Constant *Zero = Constant::getNullValue(DstEltTy);211unsigned Ratio = NumSrcElt/NumDstElt;212unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();213unsigned SrcElt = 0;214for (unsigned i = 0; i != NumDstElt; ++i) {215// Build each element of the result.216Constant *Elt = Zero;217unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);218for (unsigned j = 0; j != Ratio; ++j) {219Constant *Src = C->getAggregateElement(SrcElt++);220if (Src && isa<UndefValue>(Src))221Src = Constant::getNullValue(222cast<VectorType>(C->getType())->getElementType());223else224Src = dyn_cast_or_null<ConstantInt>(Src);225if (!Src) // Reject constantexpr elements.226return ConstantExpr::getBitCast(C, DestTy);227228// Zero extend the element to the right size.229Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(),230DL);231assert(Src && "Constant folding cannot fail on plain integers");232233// Shift it to the right place, depending on endianness.234Src = ConstantFoldBinaryOpOperands(235Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt),236DL);237assert(Src && "Constant folding cannot fail on plain integers");238239ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;240241// Mix it in.242Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);243assert(Elt && "Constant folding cannot fail on plain integers");244}245Result.push_back(Elt);246}247return ConstantVector::get(Result);248}249250// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)251unsigned Ratio = NumDstElt/NumSrcElt;252unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);253254// Loop over each source value, expanding into multiple results.255for (unsigned i = 0; i != NumSrcElt; ++i) {256auto *Element = C->getAggregateElement(i);257258if (!Element) // Reject constantexpr elements.259return ConstantExpr::getBitCast(C, DestTy);260261if (isa<UndefValue>(Element)) {262// Correctly Propagate undef values.263Result.append(Ratio, UndefValue::get(DstEltTy));264continue;265}266267auto *Src = dyn_cast<ConstantInt>(Element);268if (!Src)269return ConstantExpr::getBitCast(C, DestTy);270271unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);272for (unsigned j = 0; j != Ratio; ++j) {273// Shift the piece of the value into the right place, depending on274// endianness.275APInt Elt = Src->getValue().lshr(ShiftAmt);276ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;277278// Truncate and remember this piece.279Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize)));280}281}282283return ConstantVector::get(Result);284}285286} // end anonymous namespace287288/// If this constant is a constant offset from a global, return the global and289/// the constant. Because of constantexprs, this function is recursive.290bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,291APInt &Offset, const DataLayout &DL,292DSOLocalEquivalent **DSOEquiv) {293if (DSOEquiv)294*DSOEquiv = nullptr;295296// Trivial case, constant is the global.297if ((GV = dyn_cast<GlobalValue>(C))) {298unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());299Offset = APInt(BitWidth, 0);300return true;301}302303if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {304if (DSOEquiv)305*DSOEquiv = FoundDSOEquiv;306GV = FoundDSOEquiv->getGlobalValue();307unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());308Offset = APInt(BitWidth, 0);309return true;310}311312// Otherwise, if this isn't a constant expr, bail out.313auto *CE = dyn_cast<ConstantExpr>(C);314if (!CE) return false;315316// Look through ptr->int and ptr->ptr casts.317if (CE->getOpcode() == Instruction::PtrToInt ||318CE->getOpcode() == Instruction::BitCast)319return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,320DSOEquiv);321322// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)323auto *GEP = dyn_cast<GEPOperator>(CE);324if (!GEP)325return false;326327unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());328APInt TmpOffset(BitWidth, 0);329330// If the base isn't a global+constant, we aren't either.331if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,332DSOEquiv))333return false;334335// Otherwise, add any offset that our operands provide.336if (!GEP->accumulateConstantOffset(DL, TmpOffset))337return false;338339Offset = TmpOffset;340return true;341}342343Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,344const DataLayout &DL) {345do {346Type *SrcTy = C->getType();347if (SrcTy == DestTy)348return C;349350TypeSize DestSize = DL.getTypeSizeInBits(DestTy);351TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);352if (!TypeSize::isKnownGE(SrcSize, DestSize))353return nullptr;354355// Catch the obvious splat cases (since all-zeros can coerce non-integral356// pointers legally).357if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL))358return Res;359360// If the type sizes are the same and a cast is legal, just directly361// cast the constant.362// But be careful not to coerce non-integral pointers illegally.363if (SrcSize == DestSize &&364DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==365DL.isNonIntegralPointerType(DestTy->getScalarType())) {366Instruction::CastOps Cast = Instruction::BitCast;367// If we are going from a pointer to int or vice versa, we spell the cast368// differently.369if (SrcTy->isIntegerTy() && DestTy->isPointerTy())370Cast = Instruction::IntToPtr;371else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())372Cast = Instruction::PtrToInt;373374if (CastInst::castIsValid(Cast, C, DestTy))375return ConstantFoldCastOperand(Cast, C, DestTy, DL);376}377378// If this isn't an aggregate type, there is nothing we can do to drill down379// and find a bitcastable constant.380if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())381return nullptr;382383// We're simulating a load through a pointer that was bitcast to point to384// a different type, so we can try to walk down through the initial385// elements of an aggregate to see if some part of the aggregate is386// castable to implement the "load" semantic model.387if (SrcTy->isStructTy()) {388// Struct types might have leading zero-length elements like [0 x i32],389// which are certainly not what we are looking for, so skip them.390unsigned Elem = 0;391Constant *ElemC;392do {393ElemC = C->getAggregateElement(Elem++);394} while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());395C = ElemC;396} else {397// For non-byte-sized vector elements, the first element is not398// necessarily located at the vector base address.399if (auto *VT = dyn_cast<VectorType>(SrcTy))400if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))401return nullptr;402403C = C->getAggregateElement(0u);404}405} while (C);406407return nullptr;408}409410namespace {411412/// Recursive helper to read bits out of global. C is the constant being copied413/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy414/// results into and BytesLeft is the number of bytes left in415/// the CurPtr buffer. DL is the DataLayout.416bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,417unsigned BytesLeft, const DataLayout &DL) {418assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&419"Out of range access");420421// If this element is zero or undefined, we can just return since *CurPtr is422// zero initialized.423if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))424return true;425426if (auto *CI = dyn_cast<ConstantInt>(C)) {427if ((CI->getBitWidth() & 7) != 0)428return false;429const APInt &Val = CI->getValue();430unsigned IntBytes = unsigned(CI->getBitWidth()/8);431432for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {433unsigned n = ByteOffset;434if (!DL.isLittleEndian())435n = IntBytes - n - 1;436CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();437++ByteOffset;438}439return true;440}441442if (auto *CFP = dyn_cast<ConstantFP>(C)) {443if (CFP->getType()->isDoubleTy()) {444C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);445return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);446}447if (CFP->getType()->isFloatTy()){448C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);449return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);450}451if (CFP->getType()->isHalfTy()){452C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);453return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);454}455return false;456}457458if (auto *CS = dyn_cast<ConstantStruct>(C)) {459const StructLayout *SL = DL.getStructLayout(CS->getType());460unsigned Index = SL->getElementContainingOffset(ByteOffset);461uint64_t CurEltOffset = SL->getElementOffset(Index);462ByteOffset -= CurEltOffset;463464while (true) {465// If the element access is to the element itself and not to tail padding,466// read the bytes from the element.467uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());468469if (ByteOffset < EltSize &&470!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,471BytesLeft, DL))472return false;473474++Index;475476// Check to see if we read from the last struct element, if so we're done.477if (Index == CS->getType()->getNumElements())478return true;479480// If we read all of the bytes we needed from this element we're done.481uint64_t NextEltOffset = SL->getElementOffset(Index);482483if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)484return true;485486// Move to the next element of the struct.487CurPtr += NextEltOffset - CurEltOffset - ByteOffset;488BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;489ByteOffset = 0;490CurEltOffset = NextEltOffset;491}492// not reached.493}494495if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||496isa<ConstantDataSequential>(C)) {497uint64_t NumElts, EltSize;498Type *EltTy;499if (auto *AT = dyn_cast<ArrayType>(C->getType())) {500NumElts = AT->getNumElements();501EltTy = AT->getElementType();502EltSize = DL.getTypeAllocSize(EltTy);503} else {504NumElts = cast<FixedVectorType>(C->getType())->getNumElements();505EltTy = cast<FixedVectorType>(C->getType())->getElementType();506// TODO: For non-byte-sized vectors, current implementation assumes there is507// padding to the next byte boundary between elements.508if (!DL.typeSizeEqualsStoreSize(EltTy))509return false;510511EltSize = DL.getTypeStoreSize(EltTy);512}513uint64_t Index = ByteOffset / EltSize;514uint64_t Offset = ByteOffset - Index * EltSize;515516for (; Index != NumElts; ++Index) {517if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,518BytesLeft, DL))519return false;520521uint64_t BytesWritten = EltSize - Offset;522assert(BytesWritten <= EltSize && "Not indexing into this element?");523if (BytesWritten >= BytesLeft)524return true;525526Offset = 0;527BytesLeft -= BytesWritten;528CurPtr += BytesWritten;529}530return true;531}532533if (auto *CE = dyn_cast<ConstantExpr>(C)) {534if (CE->getOpcode() == Instruction::IntToPtr &&535CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {536return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,537BytesLeft, DL);538}539}540541// Otherwise, unknown initializer type.542return false;543}544545Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,546int64_t Offset, const DataLayout &DL) {547// Bail out early. Not expect to load from scalable global variable.548if (isa<ScalableVectorType>(LoadTy))549return nullptr;550551auto *IntType = dyn_cast<IntegerType>(LoadTy);552553// If this isn't an integer load we can't fold it directly.554if (!IntType) {555// If this is a non-integer load, we can try folding it as an int load and556// then bitcast the result. This can be useful for union cases. Note557// that address spaces don't matter here since we're not going to result in558// an actual new load.559if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&560!LoadTy->isVectorTy())561return nullptr;562563Type *MapTy = Type::getIntNTy(C->getContext(),564DL.getTypeSizeInBits(LoadTy).getFixedValue());565if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {566if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&567!LoadTy->isX86_AMXTy())568// Materializing a zero can be done trivially without a bitcast569return Constant::getNullValue(LoadTy);570Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;571Res = FoldBitCast(Res, CastTy, DL);572if (LoadTy->isPtrOrPtrVectorTy()) {573// For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr574if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&575!LoadTy->isX86_AMXTy())576return Constant::getNullValue(LoadTy);577if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))578// Be careful not to replace a load of an addrspace value with an inttoptr here579return nullptr;580Res = ConstantExpr::getIntToPtr(Res, LoadTy);581}582return Res;583}584return nullptr;585}586587unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;588if (BytesLoaded > 32 || BytesLoaded == 0)589return nullptr;590591// If we're not accessing anything in this constant, the result is undefined.592if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))593return PoisonValue::get(IntType);594595// TODO: We should be able to support scalable types.596TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());597if (InitializerSize.isScalable())598return nullptr;599600// If we're not accessing anything in this constant, the result is undefined.601if (Offset >= (int64_t)InitializerSize.getFixedValue())602return PoisonValue::get(IntType);603604unsigned char RawBytes[32] = {0};605unsigned char *CurPtr = RawBytes;606unsigned BytesLeft = BytesLoaded;607608// If we're loading off the beginning of the global, some bytes may be valid.609if (Offset < 0) {610CurPtr += -Offset;611BytesLeft += Offset;612Offset = 0;613}614615if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))616return nullptr;617618APInt ResultVal = APInt(IntType->getBitWidth(), 0);619if (DL.isLittleEndian()) {620ResultVal = RawBytes[BytesLoaded - 1];621for (unsigned i = 1; i != BytesLoaded; ++i) {622ResultVal <<= 8;623ResultVal |= RawBytes[BytesLoaded - 1 - i];624}625} else {626ResultVal = RawBytes[0];627for (unsigned i = 1; i != BytesLoaded; ++i) {628ResultVal <<= 8;629ResultVal |= RawBytes[i];630}631}632633return ConstantInt::get(IntType->getContext(), ResultVal);634}635636} // anonymous namespace637638// If GV is a constant with an initializer read its representation starting639// at Offset and return it as a constant array of unsigned char. Otherwise640// return null.641Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV,642uint64_t Offset) {643if (!GV->isConstant() || !GV->hasDefinitiveInitializer())644return nullptr;645646const DataLayout &DL = GV->getDataLayout();647Constant *Init = const_cast<Constant *>(GV->getInitializer());648TypeSize InitSize = DL.getTypeAllocSize(Init->getType());649if (InitSize < Offset)650return nullptr;651652uint64_t NBytes = InitSize - Offset;653if (NBytes > UINT16_MAX)654// Bail for large initializers in excess of 64K to avoid allocating655// too much memory.656// Offset is assumed to be less than or equal than InitSize (this657// is enforced in ReadDataFromGlobal).658return nullptr;659660SmallVector<unsigned char, 256> RawBytes(static_cast<size_t>(NBytes));661unsigned char *CurPtr = RawBytes.data();662663if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL))664return nullptr;665666return ConstantDataArray::get(GV->getContext(), RawBytes);667}668669/// If this Offset points exactly to the start of an aggregate element, return670/// that element, otherwise return nullptr.671Constant *getConstantAtOffset(Constant *Base, APInt Offset,672const DataLayout &DL) {673if (Offset.isZero())674return Base;675676if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))677return nullptr;678679Type *ElemTy = Base->getType();680SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);681if (!Offset.isZero() || !Indices[0].isZero())682return nullptr;683684Constant *C = Base;685for (const APInt &Index : drop_begin(Indices)) {686if (Index.isNegative() || Index.getActiveBits() >= 32)687return nullptr;688689C = C->getAggregateElement(Index.getZExtValue());690if (!C)691return nullptr;692}693694return C;695}696697Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,698const APInt &Offset,699const DataLayout &DL) {700if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))701if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))702return Result;703704// Explicitly check for out-of-bounds access, so we return poison even if the705// constant is a uniform value.706TypeSize Size = DL.getTypeAllocSize(C->getType());707if (!Size.isScalable() && Offset.sge(Size.getFixedValue()))708return PoisonValue::get(Ty);709710// Try an offset-independent fold of a uniform value.711if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL))712return Result;713714// Try hard to fold loads from bitcasted strange and non-type-safe things.715if (Offset.getSignificantBits() <= 64)716if (Constant *Result =717FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))718return Result;719720return nullptr;721}722723Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,724const DataLayout &DL) {725return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);726}727728Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,729APInt Offset,730const DataLayout &DL) {731// We can only fold loads from constant globals with a definitive initializer.732// Check this upfront, to skip expensive offset calculations.733auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C));734if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())735return nullptr;736737C = cast<Constant>(C->stripAndAccumulateConstantOffsets(738DL, Offset, /* AllowNonInbounds */ true));739740if (C == GV)741if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,742Offset, DL))743return Result;744745// If this load comes from anywhere in a uniform constant global, the value746// is always the same, regardless of the loaded offset.747return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL);748}749750Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,751const DataLayout &DL) {752APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);753return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL);754}755756Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty,757const DataLayout &DL) {758if (isa<PoisonValue>(C))759return PoisonValue::get(Ty);760if (isa<UndefValue>(C))761return UndefValue::get(Ty);762// If padding is needed when storing C to memory, then it isn't considered as763// uniform.764if (!DL.typeSizeEqualsStoreSize(C->getType()))765return nullptr;766if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy())767return Constant::getNullValue(Ty);768if (C->isAllOnesValue() &&769(Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))770return Constant::getAllOnesValue(Ty);771return nullptr;772}773774namespace {775776/// One of Op0/Op1 is a constant expression.777/// Attempt to symbolically evaluate the result of a binary operator merging778/// these together. If target data info is available, it is provided as DL,779/// otherwise DL is null.780Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,781const DataLayout &DL) {782// SROA783784// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.785// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute786// bits.787788if (Opc == Instruction::And) {789KnownBits Known0 = computeKnownBits(Op0, DL);790KnownBits Known1 = computeKnownBits(Op1, DL);791if ((Known1.One | Known0.Zero).isAllOnes()) {792// All the bits of Op0 that the 'and' could be masking are already zero.793return Op0;794}795if ((Known0.One | Known1.Zero).isAllOnes()) {796// All the bits of Op1 that the 'and' could be masking are already zero.797return Op1;798}799800Known0 &= Known1;801if (Known0.isConstant())802return ConstantInt::get(Op0->getType(), Known0.getConstant());803}804805// If the constant expr is something like &A[123] - &A[4].f, fold this into a806// constant. This happens frequently when iterating over a global array.807if (Opc == Instruction::Sub) {808GlobalValue *GV1, *GV2;809APInt Offs1, Offs2;810811if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))812if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {813unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());814815// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.816// PtrToInt may change the bitwidth so we have convert to the right size817// first.818return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -819Offs2.zextOrTrunc(OpSize));820}821}822823return nullptr;824}825826/// If array indices are not pointer-sized integers, explicitly cast them so827/// that they aren't implicitly casted by the getelementptr.828Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,829Type *ResultTy, GEPNoWrapFlags NW,830std::optional<ConstantRange> InRange,831const DataLayout &DL, const TargetLibraryInfo *TLI) {832Type *IntIdxTy = DL.getIndexType(ResultTy);833Type *IntIdxScalarTy = IntIdxTy->getScalarType();834835bool Any = false;836SmallVector<Constant*, 32> NewIdxs;837for (unsigned i = 1, e = Ops.size(); i != e; ++i) {838if ((i == 1 ||839!isa<StructType>(GetElementPtrInst::getIndexedType(840SrcElemTy, Ops.slice(1, i - 1)))) &&841Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {842Any = true;843Type *NewType =844Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy;845Constant *NewIdx = ConstantFoldCastOperand(846CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType,847DL);848if (!NewIdx)849return nullptr;850NewIdxs.push_back(NewIdx);851} else852NewIdxs.push_back(Ops[i]);853}854855if (!Any)856return nullptr;857858Constant *C =859ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange);860return ConstantFoldConstant(C, DL, TLI);861}862863/// If we can symbolically evaluate the GEP constant expression, do so.864Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,865ArrayRef<Constant *> Ops,866const DataLayout &DL,867const TargetLibraryInfo *TLI) {868Type *SrcElemTy = GEP->getSourceElementType();869Type *ResTy = GEP->getType();870if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))871return nullptr;872873if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(),874GEP->getInRange(), DL, TLI))875return C;876877Constant *Ptr = Ops[0];878if (!Ptr->getType()->isPointerTy())879return nullptr;880881Type *IntIdxTy = DL.getIndexType(Ptr->getType());882883for (unsigned i = 1, e = Ops.size(); i != e; ++i)884if (!isa<ConstantInt>(Ops[i]))885return nullptr;886887unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);888APInt Offset = APInt(889BitWidth,890DL.getIndexedOffsetInType(891SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1)));892893std::optional<ConstantRange> InRange = GEP->getInRange();894if (InRange)895InRange = InRange->sextOrTrunc(BitWidth);896897// If this is a GEP of a GEP, fold it all into a single GEP.898GEPNoWrapFlags NW = GEP->getNoWrapFlags();899bool Overflow = false;900while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {901NW &= GEP->getNoWrapFlags();902903SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));904905// Do not try the incorporate the sub-GEP if some index is not a number.906bool AllConstantInt = true;907for (Value *NestedOp : NestedOps)908if (!isa<ConstantInt>(NestedOp)) {909AllConstantInt = false;910break;911}912if (!AllConstantInt)913break;914915// TODO: Try to intersect two inrange attributes?916if (!InRange) {917InRange = GEP->getInRange();918if (InRange)919// Adjust inrange by offset until now.920InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset);921}922923Ptr = cast<Constant>(GEP->getOperand(0));924SrcElemTy = GEP->getSourceElementType();925Offset = Offset.sadd_ov(926APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps)),927Overflow);928}929930// Preserving nusw (without inbounds) also requires that the offset931// additions did not overflow.932if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow)933NW = NW.withoutNoUnsignedSignedWrap();934935// If the base value for this address is a literal integer value, fold the936// getelementptr to the resulting integer value casted to the pointer type.937APInt BasePtr(BitWidth, 0);938if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {939if (CE->getOpcode() == Instruction::IntToPtr) {940if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))941BasePtr = Base->getValue().zextOrTrunc(BitWidth);942}943}944945auto *PTy = cast<PointerType>(Ptr->getType());946if ((Ptr->isNullValue() || BasePtr != 0) &&947!DL.isNonIntegralPointerType(PTy)) {948Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);949return ConstantExpr::getIntToPtr(C, ResTy);950}951952// Try to infer inbounds for GEPs of globals.953// TODO(gep_nowrap): Also infer nuw flag.954if (!NW.isInBounds() && Offset.isNonNegative()) {955bool CanBeNull, CanBeFreed;956uint64_t DerefBytes =957Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);958if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes))959NW |= GEPNoWrapFlags::inBounds();960}961962// Otherwise canonicalize this to a single ptradd.963LLVMContext &Ctx = Ptr->getContext();964return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), Ptr,965ConstantInt::get(Ctx, Offset), NW,966InRange);967}968969/// Attempt to constant fold an instruction with the970/// specified opcode and operands. If successful, the constant result is971/// returned, if not, null is returned. Note that this function can fail when972/// attempting to fold instructions like loads and stores, which have no973/// constant expression form.974Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,975ArrayRef<Constant *> Ops,976const DataLayout &DL,977const TargetLibraryInfo *TLI,978bool AllowNonDeterministic) {979Type *DestTy = InstOrCE->getType();980981if (Instruction::isUnaryOp(Opcode))982return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);983984if (Instruction::isBinaryOp(Opcode)) {985switch (Opcode) {986default:987break;988case Instruction::FAdd:989case Instruction::FSub:990case Instruction::FMul:991case Instruction::FDiv:992case Instruction::FRem:993// Handle floating point instructions separately to account for denormals994// TODO: If a constant expression is being folded rather than an995// instruction, denormals will not be flushed/treated as zero996if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {997return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,998AllowNonDeterministic);999}1000}1001return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);1002}10031004if (Instruction::isCast(Opcode))1005return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);10061007if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {1008Type *SrcElemTy = GEP->getSourceElementType();1009if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy))1010return nullptr;10111012if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))1013return C;10141015return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),1016GEP->getNoWrapFlags(),1017GEP->getInRange());1018}10191020if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))1021return CE->getWithOperands(Ops);10221023switch (Opcode) {1024default: return nullptr;1025case Instruction::ICmp:1026case Instruction::FCmp: {1027auto *C = cast<CmpInst>(InstOrCE);1028return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1],1029DL, TLI, C);1030}1031case Instruction::Freeze:1032return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;1033case Instruction::Call:1034if (auto *F = dyn_cast<Function>(Ops.back())) {1035const auto *Call = cast<CallBase>(InstOrCE);1036if (canConstantFoldCallTo(Call, F))1037return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,1038AllowNonDeterministic);1039}1040return nullptr;1041case Instruction::Select:1042return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);1043case Instruction::ExtractElement:1044return ConstantExpr::getExtractElement(Ops[0], Ops[1]);1045case Instruction::ExtractValue:1046return ConstantFoldExtractValueInstruction(1047Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());1048case Instruction::InsertElement:1049return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);1050case Instruction::InsertValue:1051return ConstantFoldInsertValueInstruction(1052Ops[0], Ops[1], cast<InsertValueInst>(InstOrCE)->getIndices());1053case Instruction::ShuffleVector:1054return ConstantExpr::getShuffleVector(1055Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());1056case Instruction::Load: {1057const auto *LI = dyn_cast<LoadInst>(InstOrCE);1058if (LI->isVolatile())1059return nullptr;1060return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);1061}1062}1063}10641065} // end anonymous namespace10661067//===----------------------------------------------------------------------===//1068// Constant Folding public APIs1069//===----------------------------------------------------------------------===//10701071namespace {10721073Constant *1074ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,1075const TargetLibraryInfo *TLI,1076SmallDenseMap<Constant *, Constant *> &FoldedOps) {1077if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))1078return const_cast<Constant *>(C);10791080SmallVector<Constant *, 8> Ops;1081for (const Use &OldU : C->operands()) {1082Constant *OldC = cast<Constant>(&OldU);1083Constant *NewC = OldC;1084// Recursively fold the ConstantExpr's operands. If we have already folded1085// a ConstantExpr, we don't have to process it again.1086if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {1087auto It = FoldedOps.find(OldC);1088if (It == FoldedOps.end()) {1089NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);1090FoldedOps.insert({OldC, NewC});1091} else {1092NewC = It->second;1093}1094}1095Ops.push_back(NewC);1096}10971098if (auto *CE = dyn_cast<ConstantExpr>(C)) {1099if (Constant *Res = ConstantFoldInstOperandsImpl(1100CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))1101return Res;1102return const_cast<Constant *>(C);1103}11041105assert(isa<ConstantVector>(C));1106return ConstantVector::get(Ops);1107}11081109} // end anonymous namespace11101111Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,1112const TargetLibraryInfo *TLI) {1113// Handle PHI nodes quickly here...1114if (auto *PN = dyn_cast<PHINode>(I)) {1115Constant *CommonValue = nullptr;11161117SmallDenseMap<Constant *, Constant *> FoldedOps;1118for (Value *Incoming : PN->incoming_values()) {1119// If the incoming value is undef then skip it. Note that while we could1120// skip the value if it is equal to the phi node itself we choose not to1121// because that would break the rule that constant folding only applies if1122// all operands are constants.1123if (isa<UndefValue>(Incoming))1124continue;1125// If the incoming value is not a constant, then give up.1126auto *C = dyn_cast<Constant>(Incoming);1127if (!C)1128return nullptr;1129// Fold the PHI's operands.1130C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);1131// If the incoming value is a different constant to1132// the one we saw previously, then give up.1133if (CommonValue && C != CommonValue)1134return nullptr;1135CommonValue = C;1136}11371138// If we reach here, all incoming values are the same constant or undef.1139return CommonValue ? CommonValue : UndefValue::get(PN->getType());1140}11411142// Scan the operand list, checking to see if they are all constants, if so,1143// hand off to ConstantFoldInstOperandsImpl.1144if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))1145return nullptr;11461147SmallDenseMap<Constant *, Constant *> FoldedOps;1148SmallVector<Constant *, 8> Ops;1149for (const Use &OpU : I->operands()) {1150auto *Op = cast<Constant>(&OpU);1151// Fold the Instruction's operands.1152Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);1153Ops.push_back(Op);1154}11551156return ConstantFoldInstOperands(I, Ops, DL, TLI);1157}11581159Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,1160const TargetLibraryInfo *TLI) {1161SmallDenseMap<Constant *, Constant *> FoldedOps;1162return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);1163}11641165Constant *llvm::ConstantFoldInstOperands(Instruction *I,1166ArrayRef<Constant *> Ops,1167const DataLayout &DL,1168const TargetLibraryInfo *TLI,1169bool AllowNonDeterministic) {1170return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,1171AllowNonDeterministic);1172}11731174Constant *llvm::ConstantFoldCompareInstOperands(1175unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL,1176const TargetLibraryInfo *TLI, const Instruction *I) {1177CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;1178// fold: icmp (inttoptr x), null -> icmp x, 01179// fold: icmp null, (inttoptr x) -> icmp 0, x1180// fold: icmp (ptrtoint x), 0 -> icmp x, null1181// fold: icmp 0, (ptrtoint x) -> icmp null, x1182// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y1183// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y1184//1185// FIXME: The following comment is out of data and the DataLayout is here now.1186// ConstantExpr::getCompare cannot do this, because it doesn't have DL1187// around to know if bit truncation is happening.1188if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {1189if (Ops1->isNullValue()) {1190if (CE0->getOpcode() == Instruction::IntToPtr) {1191Type *IntPtrTy = DL.getIntPtrType(CE0->getType());1192// Convert the integer value to the right size to ensure we get the1193// proper extension or truncation.1194if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,1195/*IsSigned*/ false, DL)) {1196Constant *Null = Constant::getNullValue(C->getType());1197return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);1198}1199}12001201// Only do this transformation if the int is intptrty in size, otherwise1202// there is a truncation or extension that we aren't modeling.1203if (CE0->getOpcode() == Instruction::PtrToInt) {1204Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());1205if (CE0->getType() == IntPtrTy) {1206Constant *C = CE0->getOperand(0);1207Constant *Null = Constant::getNullValue(C->getType());1208return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);1209}1210}1211}12121213if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {1214if (CE0->getOpcode() == CE1->getOpcode()) {1215if (CE0->getOpcode() == Instruction::IntToPtr) {1216Type *IntPtrTy = DL.getIntPtrType(CE0->getType());12171218// Convert the integer value to the right size to ensure we get the1219// proper extension or truncation.1220Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy,1221/*IsSigned*/ false, DL);1222Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy,1223/*IsSigned*/ false, DL);1224if (C0 && C1)1225return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);1226}12271228// Only do this transformation if the int is intptrty in size, otherwise1229// there is a truncation or extension that we aren't modeling.1230if (CE0->getOpcode() == Instruction::PtrToInt) {1231Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());1232if (CE0->getType() == IntPtrTy &&1233CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {1234return ConstantFoldCompareInstOperands(1235Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);1236}1237}1238}1239}12401241// Convert pointer comparison (base+offset1) pred (base+offset2) into1242// offset1 pred offset2, for the case where the offset is inbounds. This1243// only works for equality and unsigned comparison, as inbounds permits1244// crossing the sign boundary. However, the offset comparison itself is1245// signed.1246if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {1247unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());1248APInt Offset0(IndexWidth, 0);1249Value *Stripped0 =1250Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0);1251APInt Offset1(IndexWidth, 0);1252Value *Stripped1 =1253Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1);1254if (Stripped0 == Stripped1)1255return ConstantInt::getBool(1256Ops0->getContext(),1257ICmpInst::compare(Offset0, Offset1,1258ICmpInst::getSignedPredicate(Predicate)));1259}1260} else if (isa<ConstantExpr>(Ops1)) {1261// If RHS is a constant expression, but the left side isn't, swap the1262// operands and try again.1263Predicate = ICmpInst::getSwappedPredicate(Predicate);1264return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);1265}12661267// Flush any denormal constant float input according to denormal handling1268// mode.1269Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);1270if (!Ops0)1271return nullptr;1272Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);1273if (!Ops1)1274return nullptr;12751276return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1);1277}12781279Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,1280const DataLayout &DL) {1281assert(Instruction::isUnaryOp(Opcode));12821283return ConstantFoldUnaryInstruction(Opcode, Op);1284}12851286Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,1287Constant *RHS,1288const DataLayout &DL) {1289assert(Instruction::isBinaryOp(Opcode));1290if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))1291if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))1292return C;12931294if (ConstantExpr::isDesirableBinOp(Opcode))1295return ConstantExpr::get(Opcode, LHS, RHS);1296return ConstantFoldBinaryInstruction(Opcode, LHS, RHS);1297}12981299Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,1300bool IsOutput) {1301if (!I || !I->getParent() || !I->getFunction())1302return Operand;13031304ConstantFP *CFP = dyn_cast<ConstantFP>(Operand);1305if (!CFP)1306return Operand;13071308const APFloat &APF = CFP->getValueAPF();1309// TODO: Should this canonicalize nans?1310if (!APF.isDenormal())1311return Operand;13121313Type *Ty = CFP->getType();1314DenormalMode DenormMode =1315I->getFunction()->getDenormalMode(Ty->getFltSemantics());1316DenormalMode::DenormalModeKind Mode =1317IsOutput ? DenormMode.Output : DenormMode.Input;1318switch (Mode) {1319default:1320llvm_unreachable("unknown denormal mode");1321case DenormalMode::Dynamic:1322return nullptr;1323case DenormalMode::IEEE:1324return Operand;1325case DenormalMode::PreserveSign:1326if (APF.isDenormal()) {1327return ConstantFP::get(1328Ty->getContext(),1329APFloat::getZero(Ty->getFltSemantics(), APF.isNegative()));1330}1331return Operand;1332case DenormalMode::PositiveZero:1333if (APF.isDenormal()) {1334return ConstantFP::get(Ty->getContext(),1335APFloat::getZero(Ty->getFltSemantics(), false));1336}1337return Operand;1338}1339return Operand;1340}13411342Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,1343Constant *RHS, const DataLayout &DL,1344const Instruction *I,1345bool AllowNonDeterministic) {1346if (Instruction::isBinaryOp(Opcode)) {1347// Flush denormal inputs if needed.1348Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);1349if (!Op0)1350return nullptr;1351Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);1352if (!Op1)1353return nullptr;13541355// If nsz or an algebraic FMF flag is set, the result of the FP operation1356// may change due to future optimization. Don't constant fold them if1357// non-deterministic results are not allowed.1358if (!AllowNonDeterministic)1359if (auto *FP = dyn_cast_or_null<FPMathOperator>(I))1360if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||1361FP->hasAllowContract() || FP->hasAllowReciprocal())1362return nullptr;13631364// Calculate constant result.1365Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);1366if (!C)1367return nullptr;13681369// Flush denormal output if needed.1370C = FlushFPConstant(C, I, /* IsOutput */ true);1371if (!C)1372return nullptr;13731374// The precise NaN value is non-deterministic.1375if (!AllowNonDeterministic && C->isNaN())1376return nullptr;13771378return C;1379}1380// If instruction lacks a parent/function and the denormal mode cannot be1381// determined, use the default (IEEE).1382return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);1383}13841385Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,1386Type *DestTy, const DataLayout &DL) {1387assert(Instruction::isCast(Opcode));1388switch (Opcode) {1389default:1390llvm_unreachable("Missing case");1391case Instruction::PtrToInt:1392if (auto *CE = dyn_cast<ConstantExpr>(C)) {1393Constant *FoldedValue = nullptr;1394// If the input is a inttoptr, eliminate the pair. This requires knowing1395// the width of a pointer, so it can't be done in ConstantExpr::getCast.1396if (CE->getOpcode() == Instruction::IntToPtr) {1397// zext/trunc the inttoptr to pointer size.1398FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0),1399DL.getIntPtrType(CE->getType()),1400/*IsSigned=*/false, DL);1401} else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {1402// If we have GEP, we can perform the following folds:1403// (ptrtoint (gep null, x)) -> x1404// (ptrtoint (gep (gep null, x), y) -> x + y, etc.1405unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());1406APInt BaseOffset(BitWidth, 0);1407auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(1408DL, BaseOffset, /*AllowNonInbounds=*/true));1409if (Base->isNullValue()) {1410FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);1411} else {1412// ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V1413if (GEP->getNumIndices() == 1 &&1414GEP->getSourceElementType()->isIntegerTy(8)) {1415auto *Ptr = cast<Constant>(GEP->getPointerOperand());1416auto *Sub = dyn_cast<ConstantExpr>(GEP->getOperand(1));1417Type *IntIdxTy = DL.getIndexType(Ptr->getType());1418if (Sub && Sub->getType() == IntIdxTy &&1419Sub->getOpcode() == Instruction::Sub &&1420Sub->getOperand(0)->isNullValue())1421FoldedValue = ConstantExpr::getSub(1422ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1));1423}1424}1425}1426if (FoldedValue) {1427// Do a zext or trunc to get to the ptrtoint dest size.1428return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false,1429DL);1430}1431}1432break;1433case Instruction::IntToPtr:1434// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if1435// the int size is >= the ptr size and the address spaces are the same.1436// This requires knowing the width of a pointer, so it can't be done in1437// ConstantExpr::getCast.1438if (auto *CE = dyn_cast<ConstantExpr>(C)) {1439if (CE->getOpcode() == Instruction::PtrToInt) {1440Constant *SrcPtr = CE->getOperand(0);1441unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());1442unsigned MidIntSize = CE->getType()->getScalarSizeInBits();14431444if (MidIntSize >= SrcPtrSize) {1445unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();1446if (SrcAS == DestTy->getPointerAddressSpace())1447return FoldBitCast(CE->getOperand(0), DestTy, DL);1448}1449}1450}1451break;1452case Instruction::Trunc:1453case Instruction::ZExt:1454case Instruction::SExt:1455case Instruction::FPTrunc:1456case Instruction::FPExt:1457case Instruction::UIToFP:1458case Instruction::SIToFP:1459case Instruction::FPToUI:1460case Instruction::FPToSI:1461case Instruction::AddrSpaceCast:1462break;1463case Instruction::BitCast:1464return FoldBitCast(C, DestTy, DL);1465}14661467if (ConstantExpr::isDesirableCastOp(Opcode))1468return ConstantExpr::getCast(Opcode, C, DestTy);1469return ConstantFoldCastInstruction(Opcode, C, DestTy);1470}14711472Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy,1473bool IsSigned, const DataLayout &DL) {1474Type *SrcTy = C->getType();1475if (SrcTy == DestTy)1476return C;1477if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits())1478return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL);1479if (IsSigned)1480return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL);1481return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL);1482}14831484//===----------------------------------------------------------------------===//1485// Constant Folding for Calls1486//14871488bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {1489if (Call->isNoBuiltin())1490return false;1491if (Call->getFunctionType() != F->getFunctionType())1492return false;1493switch (F->getIntrinsicID()) {1494// Operations that do not operate floating-point numbers and do not depend on1495// FP environment can be folded even in strictfp functions.1496case Intrinsic::bswap:1497case Intrinsic::ctpop:1498case Intrinsic::ctlz:1499case Intrinsic::cttz:1500case Intrinsic::fshl:1501case Intrinsic::fshr:1502case Intrinsic::launder_invariant_group:1503case Intrinsic::strip_invariant_group:1504case Intrinsic::masked_load:1505case Intrinsic::get_active_lane_mask:1506case Intrinsic::abs:1507case Intrinsic::smax:1508case Intrinsic::smin:1509case Intrinsic::umax:1510case Intrinsic::umin:1511case Intrinsic::scmp:1512case Intrinsic::ucmp:1513case Intrinsic::sadd_with_overflow:1514case Intrinsic::uadd_with_overflow:1515case Intrinsic::ssub_with_overflow:1516case Intrinsic::usub_with_overflow:1517case Intrinsic::smul_with_overflow:1518case Intrinsic::umul_with_overflow:1519case Intrinsic::sadd_sat:1520case Intrinsic::uadd_sat:1521case Intrinsic::ssub_sat:1522case Intrinsic::usub_sat:1523case Intrinsic::smul_fix:1524case Intrinsic::smul_fix_sat:1525case Intrinsic::bitreverse:1526case Intrinsic::is_constant:1527case Intrinsic::vector_reduce_add:1528case Intrinsic::vector_reduce_mul:1529case Intrinsic::vector_reduce_and:1530case Intrinsic::vector_reduce_or:1531case Intrinsic::vector_reduce_xor:1532case Intrinsic::vector_reduce_smin:1533case Intrinsic::vector_reduce_smax:1534case Intrinsic::vector_reduce_umin:1535case Intrinsic::vector_reduce_umax:1536// Target intrinsics1537case Intrinsic::amdgcn_perm:1538case Intrinsic::amdgcn_wave_reduce_umin:1539case Intrinsic::amdgcn_wave_reduce_umax:1540case Intrinsic::amdgcn_s_wqm:1541case Intrinsic::amdgcn_s_quadmask:1542case Intrinsic::amdgcn_s_bitreplicate:1543case Intrinsic::arm_mve_vctp8:1544case Intrinsic::arm_mve_vctp16:1545case Intrinsic::arm_mve_vctp32:1546case Intrinsic::arm_mve_vctp64:1547case Intrinsic::aarch64_sve_convert_from_svbool:1548// WebAssembly float semantics are always known1549case Intrinsic::wasm_trunc_signed:1550case Intrinsic::wasm_trunc_unsigned:1551return true;15521553// Floating point operations cannot be folded in strictfp functions in1554// general case. They can be folded if FP environment is known to compiler.1555case Intrinsic::minnum:1556case Intrinsic::maxnum:1557case Intrinsic::minimum:1558case Intrinsic::maximum:1559case Intrinsic::log:1560case Intrinsic::log2:1561case Intrinsic::log10:1562case Intrinsic::exp:1563case Intrinsic::exp2:1564case Intrinsic::exp10:1565case Intrinsic::sqrt:1566case Intrinsic::sin:1567case Intrinsic::cos:1568case Intrinsic::pow:1569case Intrinsic::powi:1570case Intrinsic::ldexp:1571case Intrinsic::fma:1572case Intrinsic::fmuladd:1573case Intrinsic::frexp:1574case Intrinsic::fptoui_sat:1575case Intrinsic::fptosi_sat:1576case Intrinsic::convert_from_fp16:1577case Intrinsic::convert_to_fp16:1578case Intrinsic::amdgcn_cos:1579case Intrinsic::amdgcn_cubeid:1580case Intrinsic::amdgcn_cubema:1581case Intrinsic::amdgcn_cubesc:1582case Intrinsic::amdgcn_cubetc:1583case Intrinsic::amdgcn_fmul_legacy:1584case Intrinsic::amdgcn_fma_legacy:1585case Intrinsic::amdgcn_fract:1586case Intrinsic::amdgcn_sin:1587// The intrinsics below depend on rounding mode in MXCSR.1588case Intrinsic::x86_sse_cvtss2si:1589case Intrinsic::x86_sse_cvtss2si64:1590case Intrinsic::x86_sse_cvttss2si:1591case Intrinsic::x86_sse_cvttss2si64:1592case Intrinsic::x86_sse2_cvtsd2si:1593case Intrinsic::x86_sse2_cvtsd2si64:1594case Intrinsic::x86_sse2_cvttsd2si:1595case Intrinsic::x86_sse2_cvttsd2si64:1596case Intrinsic::x86_avx512_vcvtss2si32:1597case Intrinsic::x86_avx512_vcvtss2si64:1598case Intrinsic::x86_avx512_cvttss2si:1599case Intrinsic::x86_avx512_cvttss2si64:1600case Intrinsic::x86_avx512_vcvtsd2si32:1601case Intrinsic::x86_avx512_vcvtsd2si64:1602case Intrinsic::x86_avx512_cvttsd2si:1603case Intrinsic::x86_avx512_cvttsd2si64:1604case Intrinsic::x86_avx512_vcvtss2usi32:1605case Intrinsic::x86_avx512_vcvtss2usi64:1606case Intrinsic::x86_avx512_cvttss2usi:1607case Intrinsic::x86_avx512_cvttss2usi64:1608case Intrinsic::x86_avx512_vcvtsd2usi32:1609case Intrinsic::x86_avx512_vcvtsd2usi64:1610case Intrinsic::x86_avx512_cvttsd2usi:1611case Intrinsic::x86_avx512_cvttsd2usi64:1612return !Call->isStrictFP();16131614// Sign operations are actually bitwise operations, they do not raise1615// exceptions even for SNANs.1616case Intrinsic::fabs:1617case Intrinsic::copysign:1618case Intrinsic::is_fpclass:1619// Non-constrained variants of rounding operations means default FP1620// environment, they can be folded in any case.1621case Intrinsic::ceil:1622case Intrinsic::floor:1623case Intrinsic::round:1624case Intrinsic::roundeven:1625case Intrinsic::trunc:1626case Intrinsic::nearbyint:1627case Intrinsic::rint:1628case Intrinsic::canonicalize:1629// Constrained intrinsics can be folded if FP environment is known1630// to compiler.1631case Intrinsic::experimental_constrained_fma:1632case Intrinsic::experimental_constrained_fmuladd:1633case Intrinsic::experimental_constrained_fadd:1634case Intrinsic::experimental_constrained_fsub:1635case Intrinsic::experimental_constrained_fmul:1636case Intrinsic::experimental_constrained_fdiv:1637case Intrinsic::experimental_constrained_frem:1638case Intrinsic::experimental_constrained_ceil:1639case Intrinsic::experimental_constrained_floor:1640case Intrinsic::experimental_constrained_round:1641case Intrinsic::experimental_constrained_roundeven:1642case Intrinsic::experimental_constrained_trunc:1643case Intrinsic::experimental_constrained_nearbyint:1644case Intrinsic::experimental_constrained_rint:1645case Intrinsic::experimental_constrained_fcmp:1646case Intrinsic::experimental_constrained_fcmps:1647return true;1648default:1649return false;1650case Intrinsic::not_intrinsic: break;1651}16521653if (!F->hasName() || Call->isStrictFP())1654return false;16551656// In these cases, the check of the length is required. We don't want to1657// return true for a name like "cos\0blah" which strcmp would return equal to1658// "cos", but has length 8.1659StringRef Name = F->getName();1660switch (Name[0]) {1661default:1662return false;1663case 'a':1664return Name == "acos" || Name == "acosf" ||1665Name == "asin" || Name == "asinf" ||1666Name == "atan" || Name == "atanf" ||1667Name == "atan2" || Name == "atan2f";1668case 'c':1669return Name == "ceil" || Name == "ceilf" ||1670Name == "cos" || Name == "cosf" ||1671Name == "cosh" || Name == "coshf";1672case 'e':1673return Name == "exp" || Name == "expf" ||1674Name == "exp2" || Name == "exp2f";1675case 'f':1676return Name == "fabs" || Name == "fabsf" ||1677Name == "floor" || Name == "floorf" ||1678Name == "fmod" || Name == "fmodf";1679case 'l':1680return Name == "log" || Name == "logf" || Name == "log2" ||1681Name == "log2f" || Name == "log10" || Name == "log10f" ||1682Name == "logl";1683case 'n':1684return Name == "nearbyint" || Name == "nearbyintf";1685case 'p':1686return Name == "pow" || Name == "powf";1687case 'r':1688return Name == "remainder" || Name == "remainderf" ||1689Name == "rint" || Name == "rintf" ||1690Name == "round" || Name == "roundf";1691case 's':1692return Name == "sin" || Name == "sinf" ||1693Name == "sinh" || Name == "sinhf" ||1694Name == "sqrt" || Name == "sqrtf";1695case 't':1696return Name == "tan" || Name == "tanf" ||1697Name == "tanh" || Name == "tanhf" ||1698Name == "trunc" || Name == "truncf";1699case '_':1700// Check for various function names that get used for the math functions1701// when the header files are preprocessed with the macro1702// __FINITE_MATH_ONLY__ enabled.1703// The '12' here is the length of the shortest name that can match.1704// We need to check the size before looking at Name[1] and Name[2]1705// so we may as well check a limit that will eliminate mismatches.1706if (Name.size() < 12 || Name[1] != '_')1707return false;1708switch (Name[2]) {1709default:1710return false;1711case 'a':1712return Name == "__acos_finite" || Name == "__acosf_finite" ||1713Name == "__asin_finite" || Name == "__asinf_finite" ||1714Name == "__atan2_finite" || Name == "__atan2f_finite";1715case 'c':1716return Name == "__cosh_finite" || Name == "__coshf_finite";1717case 'e':1718return Name == "__exp_finite" || Name == "__expf_finite" ||1719Name == "__exp2_finite" || Name == "__exp2f_finite";1720case 'l':1721return Name == "__log_finite" || Name == "__logf_finite" ||1722Name == "__log10_finite" || Name == "__log10f_finite";1723case 'p':1724return Name == "__pow_finite" || Name == "__powf_finite";1725case 's':1726return Name == "__sinh_finite" || Name == "__sinhf_finite";1727}1728}1729}17301731namespace {17321733Constant *GetConstantFoldFPValue(double V, Type *Ty) {1734if (Ty->isHalfTy() || Ty->isFloatTy()) {1735APFloat APF(V);1736bool unused;1737APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);1738return ConstantFP::get(Ty->getContext(), APF);1739}1740if (Ty->isDoubleTy())1741return ConstantFP::get(Ty->getContext(), APFloat(V));1742llvm_unreachable("Can only constant fold half/float/double");1743}17441745#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)1746Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) {1747if (Ty->isFP128Ty())1748return ConstantFP::get(Ty, V);1749llvm_unreachable("Can only constant fold fp128");1750}1751#endif17521753/// Clear the floating-point exception state.1754inline void llvm_fenv_clearexcept() {1755#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT1756feclearexcept(FE_ALL_EXCEPT);1757#endif1758errno = 0;1759}17601761/// Test if a floating-point exception was raised.1762inline bool llvm_fenv_testexcept() {1763int errno_val = errno;1764if (errno_val == ERANGE || errno_val == EDOM)1765return true;1766#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT1767if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))1768return true;1769#endif1770return false;1771}17721773Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,1774Type *Ty) {1775llvm_fenv_clearexcept();1776double Result = NativeFP(V.convertToDouble());1777if (llvm_fenv_testexcept()) {1778llvm_fenv_clearexcept();1779return nullptr;1780}17811782return GetConstantFoldFPValue(Result, Ty);1783}17841785#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)1786Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V,1787Type *Ty) {1788llvm_fenv_clearexcept();1789float128 Result = NativeFP(V.convertToQuad());1790if (llvm_fenv_testexcept()) {1791llvm_fenv_clearexcept();1792return nullptr;1793}17941795return GetConstantFoldFPValue128(Result, Ty);1796}1797#endif17981799Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),1800const APFloat &V, const APFloat &W, Type *Ty) {1801llvm_fenv_clearexcept();1802double Result = NativeFP(V.convertToDouble(), W.convertToDouble());1803if (llvm_fenv_testexcept()) {1804llvm_fenv_clearexcept();1805return nullptr;1806}18071808return GetConstantFoldFPValue(Result, Ty);1809}18101811Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {1812FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());1813if (!VT)1814return nullptr;18151816// This isn't strictly necessary, but handle the special/common case of zero:1817// all integer reductions of a zero input produce zero.1818if (isa<ConstantAggregateZero>(Op))1819return ConstantInt::get(VT->getElementType(), 0);18201821// This is the same as the underlying binops - poison propagates.1822if (isa<PoisonValue>(Op) || Op->containsPoisonElement())1823return PoisonValue::get(VT->getElementType());18241825// TODO: Handle undef.1826if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))1827return nullptr;18281829auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));1830if (!EltC)1831return nullptr;18321833APInt Acc = EltC->getValue();1834for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {1835if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))1836return nullptr;1837const APInt &X = EltC->getValue();1838switch (IID) {1839case Intrinsic::vector_reduce_add:1840Acc = Acc + X;1841break;1842case Intrinsic::vector_reduce_mul:1843Acc = Acc * X;1844break;1845case Intrinsic::vector_reduce_and:1846Acc = Acc & X;1847break;1848case Intrinsic::vector_reduce_or:1849Acc = Acc | X;1850break;1851case Intrinsic::vector_reduce_xor:1852Acc = Acc ^ X;1853break;1854case Intrinsic::vector_reduce_smin:1855Acc = APIntOps::smin(Acc, X);1856break;1857case Intrinsic::vector_reduce_smax:1858Acc = APIntOps::smax(Acc, X);1859break;1860case Intrinsic::vector_reduce_umin:1861Acc = APIntOps::umin(Acc, X);1862break;1863case Intrinsic::vector_reduce_umax:1864Acc = APIntOps::umax(Acc, X);1865break;1866}1867}18681869return ConstantInt::get(Op->getContext(), Acc);1870}18711872/// Attempt to fold an SSE floating point to integer conversion of a constant1873/// floating point. If roundTowardZero is false, the default IEEE rounding is1874/// used (toward nearest, ties to even). This matches the behavior of the1875/// non-truncating SSE instructions in the default rounding mode. The desired1876/// integer type Ty is used to select how many bits are available for the1877/// result. Returns null if the conversion cannot be performed, otherwise1878/// returns the Constant value resulting from the conversion.1879Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,1880Type *Ty, bool IsSigned) {1881// All of these conversion intrinsics form an integer of at most 64bits.1882unsigned ResultWidth = Ty->getIntegerBitWidth();1883assert(ResultWidth <= 64 &&1884"Can only constant fold conversions to 64 and 32 bit ints");18851886uint64_t UIntVal;1887bool isExact = false;1888APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero1889: APFloat::rmNearestTiesToEven;1890APFloat::opStatus status =1891Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth,1892IsSigned, mode, &isExact);1893if (status != APFloat::opOK &&1894(!roundTowardZero || status != APFloat::opInexact))1895return nullptr;1896return ConstantInt::get(Ty, UIntVal, IsSigned);1897}18981899double getValueAsDouble(ConstantFP *Op) {1900Type *Ty = Op->getType();19011902if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())1903return Op->getValueAPF().convertToDouble();19041905bool unused;1906APFloat APF = Op->getValueAPF();1907APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);1908return APF.convertToDouble();1909}19101911static bool getConstIntOrUndef(Value *Op, const APInt *&C) {1912if (auto *CI = dyn_cast<ConstantInt>(Op)) {1913C = &CI->getValue();1914return true;1915}1916if (isa<UndefValue>(Op)) {1917C = nullptr;1918return true;1919}1920return false;1921}19221923/// Checks if the given intrinsic call, which evaluates to constant, is allowed1924/// to be folded.1925///1926/// \param CI Constrained intrinsic call.1927/// \param St Exception flags raised during constant evaluation.1928static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,1929APFloat::opStatus St) {1930std::optional<RoundingMode> ORM = CI->getRoundingMode();1931std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();19321933// If the operation does not change exception status flags, it is safe1934// to fold.1935if (St == APFloat::opStatus::opOK)1936return true;19371938// If evaluation raised FP exception, the result can depend on rounding1939// mode. If the latter is unknown, folding is not possible.1940if (ORM && *ORM == RoundingMode::Dynamic)1941return false;19421943// If FP exceptions are ignored, fold the call, even if such exception is1944// raised.1945if (EB && *EB != fp::ExceptionBehavior::ebStrict)1946return true;19471948// Leave the calculation for runtime so that exception flags be correctly set1949// in hardware.1950return false;1951}19521953/// Returns the rounding mode that should be used for constant evaluation.1954static RoundingMode1955getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {1956std::optional<RoundingMode> ORM = CI->getRoundingMode();1957if (!ORM || *ORM == RoundingMode::Dynamic)1958// Even if the rounding mode is unknown, try evaluating the operation.1959// If it does not raise inexact exception, rounding was not applied,1960// so the result is exact and does not depend on rounding mode. Whether1961// other FP exceptions are raised, it does not depend on rounding mode.1962return RoundingMode::NearestTiesToEven;1963return *ORM;1964}19651966/// Try to constant fold llvm.canonicalize for the given caller and value.1967static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,1968const APFloat &Src) {1969// Zero, positive and negative, is always OK to fold.1970if (Src.isZero()) {1971// Get a fresh 0, since ppc_fp128 does have non-canonical zeros.1972return ConstantFP::get(1973CI->getContext(),1974APFloat::getZero(Src.getSemantics(), Src.isNegative()));1975}19761977if (!Ty->isIEEELikeFPTy())1978return nullptr;19791980// Zero is always canonical and the sign must be preserved.1981//1982// Denorms and nans may have special encodings, but it should be OK to fold a1983// totally average number.1984if (Src.isNormal() || Src.isInfinity())1985return ConstantFP::get(CI->getContext(), Src);19861987if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {1988DenormalMode DenormMode =1989CI->getFunction()->getDenormalMode(Src.getSemantics());19901991if (DenormMode == DenormalMode::getIEEE())1992return ConstantFP::get(CI->getContext(), Src);19931994if (DenormMode.Input == DenormalMode::Dynamic)1995return nullptr;19961997// If we know if either input or output is flushed, we can fold.1998if ((DenormMode.Input == DenormalMode::Dynamic &&1999DenormMode.Output == DenormalMode::IEEE) ||2000(DenormMode.Input == DenormalMode::IEEE &&2001DenormMode.Output == DenormalMode::Dynamic))2002return nullptr;20032004bool IsPositive =2005(!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||2006(DenormMode.Output == DenormalMode::PositiveZero &&2007DenormMode.Input == DenormalMode::IEEE));20082009return ConstantFP::get(CI->getContext(),2010APFloat::getZero(Src.getSemantics(), !IsPositive));2011}20122013return nullptr;2014}20152016static Constant *ConstantFoldScalarCall1(StringRef Name,2017Intrinsic::ID IntrinsicID,2018Type *Ty,2019ArrayRef<Constant *> Operands,2020const TargetLibraryInfo *TLI,2021const CallBase *Call) {2022assert(Operands.size() == 1 && "Wrong number of operands.");20232024if (IntrinsicID == Intrinsic::is_constant) {2025// We know we have a "Constant" argument. But we want to only2026// return true for manifest constants, not those that depend on2027// constants with unknowable values, e.g. GlobalValue or BlockAddress.2028if (Operands[0]->isManifestConstant())2029return ConstantInt::getTrue(Ty->getContext());2030return nullptr;2031}20322033if (isa<PoisonValue>(Operands[0])) {2034// TODO: All of these operations should probably propagate poison.2035if (IntrinsicID == Intrinsic::canonicalize)2036return PoisonValue::get(Ty);2037}20382039if (isa<UndefValue>(Operands[0])) {2040// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.2041// ctpop() is between 0 and bitwidth, pick 0 for undef.2042// fptoui.sat and fptosi.sat can always fold to zero (for a zero input).2043if (IntrinsicID == Intrinsic::cos ||2044IntrinsicID == Intrinsic::ctpop ||2045IntrinsicID == Intrinsic::fptoui_sat ||2046IntrinsicID == Intrinsic::fptosi_sat ||2047IntrinsicID == Intrinsic::canonicalize)2048return Constant::getNullValue(Ty);2049if (IntrinsicID == Intrinsic::bswap ||2050IntrinsicID == Intrinsic::bitreverse ||2051IntrinsicID == Intrinsic::launder_invariant_group ||2052IntrinsicID == Intrinsic::strip_invariant_group)2053return Operands[0];2054}20552056if (isa<ConstantPointerNull>(Operands[0])) {2057// launder(null) == null == strip(null) iff in addrspace 02058if (IntrinsicID == Intrinsic::launder_invariant_group ||2059IntrinsicID == Intrinsic::strip_invariant_group) {2060// If instruction is not yet put in a basic block (e.g. when cloning2061// a function during inlining), Call's caller may not be available.2062// So check Call's BB first before querying Call->getCaller.2063const Function *Caller =2064Call->getParent() ? Call->getCaller() : nullptr;2065if (Caller &&2066!NullPointerIsDefined(2067Caller, Operands[0]->getType()->getPointerAddressSpace())) {2068return Operands[0];2069}2070return nullptr;2071}2072}20732074if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {2075if (IntrinsicID == Intrinsic::convert_to_fp16) {2076APFloat Val(Op->getValueAPF());20772078bool lost = false;2079Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);20802081return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());2082}20832084APFloat U = Op->getValueAPF();20852086if (IntrinsicID == Intrinsic::wasm_trunc_signed ||2087IntrinsicID == Intrinsic::wasm_trunc_unsigned) {2088bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;20892090if (U.isNaN())2091return nullptr;20922093unsigned Width = Ty->getIntegerBitWidth();2094APSInt Int(Width, !Signed);2095bool IsExact = false;2096APFloat::opStatus Status =2097U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);20982099if (Status == APFloat::opOK || Status == APFloat::opInexact)2100return ConstantInt::get(Ty, Int);21012102return nullptr;2103}21042105if (IntrinsicID == Intrinsic::fptoui_sat ||2106IntrinsicID == Intrinsic::fptosi_sat) {2107// convertToInteger() already has the desired saturation semantics.2108APSInt Int(Ty->getIntegerBitWidth(),2109IntrinsicID == Intrinsic::fptoui_sat);2110bool IsExact;2111U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);2112return ConstantInt::get(Ty, Int);2113}21142115if (IntrinsicID == Intrinsic::canonicalize)2116return constantFoldCanonicalize(Ty, Call, U);21172118#if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128)2119if (Ty->isFP128Ty()) {2120if (IntrinsicID == Intrinsic::log) {2121float128 Result = logf128(Op->getValueAPF().convertToQuad());2122return GetConstantFoldFPValue128(Result, Ty);2123}21242125LibFunc Fp128Func = NotLibFunc;2126if (TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) &&2127Fp128Func == LibFunc_logl)2128return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty);2129}2130#endif21312132if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())2133return nullptr;21342135// Use internal versions of these intrinsics.21362137if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {2138U.roundToIntegral(APFloat::rmNearestTiesToEven);2139return ConstantFP::get(Ty->getContext(), U);2140}21412142if (IntrinsicID == Intrinsic::round) {2143U.roundToIntegral(APFloat::rmNearestTiesToAway);2144return ConstantFP::get(Ty->getContext(), U);2145}21462147if (IntrinsicID == Intrinsic::roundeven) {2148U.roundToIntegral(APFloat::rmNearestTiesToEven);2149return ConstantFP::get(Ty->getContext(), U);2150}21512152if (IntrinsicID == Intrinsic::ceil) {2153U.roundToIntegral(APFloat::rmTowardPositive);2154return ConstantFP::get(Ty->getContext(), U);2155}21562157if (IntrinsicID == Intrinsic::floor) {2158U.roundToIntegral(APFloat::rmTowardNegative);2159return ConstantFP::get(Ty->getContext(), U);2160}21612162if (IntrinsicID == Intrinsic::trunc) {2163U.roundToIntegral(APFloat::rmTowardZero);2164return ConstantFP::get(Ty->getContext(), U);2165}21662167if (IntrinsicID == Intrinsic::fabs) {2168U.clearSign();2169return ConstantFP::get(Ty->getContext(), U);2170}21712172if (IntrinsicID == Intrinsic::amdgcn_fract) {2173// The v_fract instruction behaves like the OpenCL spec, which defines2174// fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is2175// there to prevent fract(-small) from returning 1.0. It returns the2176// largest positive floating-point number less than 1.0."2177APFloat FloorU(U);2178FloorU.roundToIntegral(APFloat::rmTowardNegative);2179APFloat FractU(U - FloorU);2180APFloat AlmostOne(U.getSemantics(), 1);2181AlmostOne.next(/*nextDown*/ true);2182return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));2183}21842185// Rounding operations (floor, trunc, ceil, round and nearbyint) do not2186// raise FP exceptions, unless the argument is signaling NaN.21872188std::optional<APFloat::roundingMode> RM;2189switch (IntrinsicID) {2190default:2191break;2192case Intrinsic::experimental_constrained_nearbyint:2193case Intrinsic::experimental_constrained_rint: {2194auto CI = cast<ConstrainedFPIntrinsic>(Call);2195RM = CI->getRoundingMode();2196if (!RM || *RM == RoundingMode::Dynamic)2197return nullptr;2198break;2199}2200case Intrinsic::experimental_constrained_round:2201RM = APFloat::rmNearestTiesToAway;2202break;2203case Intrinsic::experimental_constrained_ceil:2204RM = APFloat::rmTowardPositive;2205break;2206case Intrinsic::experimental_constrained_floor:2207RM = APFloat::rmTowardNegative;2208break;2209case Intrinsic::experimental_constrained_trunc:2210RM = APFloat::rmTowardZero;2211break;2212}2213if (RM) {2214auto CI = cast<ConstrainedFPIntrinsic>(Call);2215if (U.isFinite()) {2216APFloat::opStatus St = U.roundToIntegral(*RM);2217if (IntrinsicID == Intrinsic::experimental_constrained_rint &&2218St == APFloat::opInexact) {2219std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();2220if (EB && *EB == fp::ebStrict)2221return nullptr;2222}2223} else if (U.isSignaling()) {2224std::optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();2225if (EB && *EB != fp::ebIgnore)2226return nullptr;2227U = APFloat::getQNaN(U.getSemantics());2228}2229return ConstantFP::get(Ty->getContext(), U);2230}22312232/// We only fold functions with finite arguments. Folding NaN and inf is2233/// likely to be aborted with an exception anyway, and some host libms2234/// have known errors raising exceptions.2235if (!U.isFinite())2236return nullptr;22372238/// Currently APFloat versions of these functions do not exist, so we use2239/// the host native double versions. Float versions are not called2240/// directly but for all these it is true (float)(f((double)arg)) ==2241/// f(arg). Long double not supported yet.2242const APFloat &APF = Op->getValueAPF();22432244switch (IntrinsicID) {2245default: break;2246case Intrinsic::log:2247return ConstantFoldFP(log, APF, Ty);2248case Intrinsic::log2:2249// TODO: What about hosts that lack a C99 library?2250return ConstantFoldFP(log2, APF, Ty);2251case Intrinsic::log10:2252// TODO: What about hosts that lack a C99 library?2253return ConstantFoldFP(log10, APF, Ty);2254case Intrinsic::exp:2255return ConstantFoldFP(exp, APF, Ty);2256case Intrinsic::exp2:2257// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.2258return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);2259case Intrinsic::exp10:2260// Fold exp10(x) as pow(10, x), in case the host lacks a C99 library.2261return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty);2262case Intrinsic::sin:2263return ConstantFoldFP(sin, APF, Ty);2264case Intrinsic::cos:2265return ConstantFoldFP(cos, APF, Ty);2266case Intrinsic::sqrt:2267return ConstantFoldFP(sqrt, APF, Ty);2268case Intrinsic::amdgcn_cos:2269case Intrinsic::amdgcn_sin: {2270double V = getValueAsDouble(Op);2271if (V < -256.0 || V > 256.0)2272// The gfx8 and gfx9 architectures handle arguments outside the range2273// [-256, 256] differently. This should be a rare case so bail out2274// rather than trying to handle the difference.2275return nullptr;2276bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;2277double V4 = V * 4.0;2278if (V4 == floor(V4)) {2279// Force exact results for quarter-integer inputs.2280const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };2281V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];2282} else {2283if (IsCos)2284V = cos(V * 2.0 * numbers::pi);2285else2286V = sin(V * 2.0 * numbers::pi);2287}2288return GetConstantFoldFPValue(V, Ty);2289}2290}22912292if (!TLI)2293return nullptr;22942295LibFunc Func = NotLibFunc;2296if (!TLI->getLibFunc(Name, Func))2297return nullptr;22982299switch (Func) {2300default:2301break;2302case LibFunc_acos:2303case LibFunc_acosf:2304case LibFunc_acos_finite:2305case LibFunc_acosf_finite:2306if (TLI->has(Func))2307return ConstantFoldFP(acos, APF, Ty);2308break;2309case LibFunc_asin:2310case LibFunc_asinf:2311case LibFunc_asin_finite:2312case LibFunc_asinf_finite:2313if (TLI->has(Func))2314return ConstantFoldFP(asin, APF, Ty);2315break;2316case LibFunc_atan:2317case LibFunc_atanf:2318if (TLI->has(Func))2319return ConstantFoldFP(atan, APF, Ty);2320break;2321case LibFunc_ceil:2322case LibFunc_ceilf:2323if (TLI->has(Func)) {2324U.roundToIntegral(APFloat::rmTowardPositive);2325return ConstantFP::get(Ty->getContext(), U);2326}2327break;2328case LibFunc_cos:2329case LibFunc_cosf:2330if (TLI->has(Func))2331return ConstantFoldFP(cos, APF, Ty);2332break;2333case LibFunc_cosh:2334case LibFunc_coshf:2335case LibFunc_cosh_finite:2336case LibFunc_coshf_finite:2337if (TLI->has(Func))2338return ConstantFoldFP(cosh, APF, Ty);2339break;2340case LibFunc_exp:2341case LibFunc_expf:2342case LibFunc_exp_finite:2343case LibFunc_expf_finite:2344if (TLI->has(Func))2345return ConstantFoldFP(exp, APF, Ty);2346break;2347case LibFunc_exp2:2348case LibFunc_exp2f:2349case LibFunc_exp2_finite:2350case LibFunc_exp2f_finite:2351if (TLI->has(Func))2352// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.2353return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);2354break;2355case LibFunc_fabs:2356case LibFunc_fabsf:2357if (TLI->has(Func)) {2358U.clearSign();2359return ConstantFP::get(Ty->getContext(), U);2360}2361break;2362case LibFunc_floor:2363case LibFunc_floorf:2364if (TLI->has(Func)) {2365U.roundToIntegral(APFloat::rmTowardNegative);2366return ConstantFP::get(Ty->getContext(), U);2367}2368break;2369case LibFunc_log:2370case LibFunc_logf:2371case LibFunc_log_finite:2372case LibFunc_logf_finite:2373if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2374return ConstantFoldFP(log, APF, Ty);2375break;2376case LibFunc_log2:2377case LibFunc_log2f:2378case LibFunc_log2_finite:2379case LibFunc_log2f_finite:2380if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2381// TODO: What about hosts that lack a C99 library?2382return ConstantFoldFP(log2, APF, Ty);2383break;2384case LibFunc_log10:2385case LibFunc_log10f:2386case LibFunc_log10_finite:2387case LibFunc_log10f_finite:2388if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))2389// TODO: What about hosts that lack a C99 library?2390return ConstantFoldFP(log10, APF, Ty);2391break;2392case LibFunc_logl:2393return nullptr;2394case LibFunc_nearbyint:2395case LibFunc_nearbyintf:2396case LibFunc_rint:2397case LibFunc_rintf:2398if (TLI->has(Func)) {2399U.roundToIntegral(APFloat::rmNearestTiesToEven);2400return ConstantFP::get(Ty->getContext(), U);2401}2402break;2403case LibFunc_round:2404case LibFunc_roundf:2405if (TLI->has(Func)) {2406U.roundToIntegral(APFloat::rmNearestTiesToAway);2407return ConstantFP::get(Ty->getContext(), U);2408}2409break;2410case LibFunc_sin:2411case LibFunc_sinf:2412if (TLI->has(Func))2413return ConstantFoldFP(sin, APF, Ty);2414break;2415case LibFunc_sinh:2416case LibFunc_sinhf:2417case LibFunc_sinh_finite:2418case LibFunc_sinhf_finite:2419if (TLI->has(Func))2420return ConstantFoldFP(sinh, APF, Ty);2421break;2422case LibFunc_sqrt:2423case LibFunc_sqrtf:2424if (!APF.isNegative() && TLI->has(Func))2425return ConstantFoldFP(sqrt, APF, Ty);2426break;2427case LibFunc_tan:2428case LibFunc_tanf:2429if (TLI->has(Func))2430return ConstantFoldFP(tan, APF, Ty);2431break;2432case LibFunc_tanh:2433case LibFunc_tanhf:2434if (TLI->has(Func))2435return ConstantFoldFP(tanh, APF, Ty);2436break;2437case LibFunc_trunc:2438case LibFunc_truncf:2439if (TLI->has(Func)) {2440U.roundToIntegral(APFloat::rmTowardZero);2441return ConstantFP::get(Ty->getContext(), U);2442}2443break;2444}2445return nullptr;2446}24472448if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {2449switch (IntrinsicID) {2450case Intrinsic::bswap:2451return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());2452case Intrinsic::ctpop:2453return ConstantInt::get(Ty, Op->getValue().popcount());2454case Intrinsic::bitreverse:2455return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());2456case Intrinsic::convert_from_fp16: {2457APFloat Val(APFloat::IEEEhalf(), Op->getValue());24582459bool lost = false;2460APFloat::opStatus status = Val.convert(2461Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);24622463// Conversion is always precise.2464(void)status;2465assert(status != APFloat::opInexact && !lost &&2466"Precision lost during fp16 constfolding");24672468return ConstantFP::get(Ty->getContext(), Val);2469}24702471case Intrinsic::amdgcn_s_wqm: {2472uint64_t Val = Op->getZExtValue();2473Val |= (Val & 0x5555555555555555ULL) << 1 |2474((Val >> 1) & 0x5555555555555555ULL);2475Val |= (Val & 0x3333333333333333ULL) << 2 |2476((Val >> 2) & 0x3333333333333333ULL);2477return ConstantInt::get(Ty, Val);2478}24792480case Intrinsic::amdgcn_s_quadmask: {2481uint64_t Val = Op->getZExtValue();2482uint64_t QuadMask = 0;2483for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) {2484if (!(Val & 0xF))2485continue;24862487QuadMask |= (1ULL << I);2488}2489return ConstantInt::get(Ty, QuadMask);2490}24912492case Intrinsic::amdgcn_s_bitreplicate: {2493uint64_t Val = Op->getZExtValue();2494Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16;2495Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8;2496Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4;2497Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2;2498Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1;2499Val = Val | Val << 1;2500return ConstantInt::get(Ty, Val);2501}25022503default:2504return nullptr;2505}2506}25072508switch (IntrinsicID) {2509default: break;2510case Intrinsic::vector_reduce_add:2511case Intrinsic::vector_reduce_mul:2512case Intrinsic::vector_reduce_and:2513case Intrinsic::vector_reduce_or:2514case Intrinsic::vector_reduce_xor:2515case Intrinsic::vector_reduce_smin:2516case Intrinsic::vector_reduce_smax:2517case Intrinsic::vector_reduce_umin:2518case Intrinsic::vector_reduce_umax:2519if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))2520return C;2521break;2522}25232524// Support ConstantVector in case we have an Undef in the top.2525if (isa<ConstantVector>(Operands[0]) ||2526isa<ConstantDataVector>(Operands[0])) {2527auto *Op = cast<Constant>(Operands[0]);2528switch (IntrinsicID) {2529default: break;2530case Intrinsic::x86_sse_cvtss2si:2531case Intrinsic::x86_sse_cvtss2si64:2532case Intrinsic::x86_sse2_cvtsd2si:2533case Intrinsic::x86_sse2_cvtsd2si64:2534if (ConstantFP *FPOp =2535dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2536return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2537/*roundTowardZero=*/false, Ty,2538/*IsSigned*/true);2539break;2540case Intrinsic::x86_sse_cvttss2si:2541case Intrinsic::x86_sse_cvttss2si64:2542case Intrinsic::x86_sse2_cvttsd2si:2543case Intrinsic::x86_sse2_cvttsd2si64:2544if (ConstantFP *FPOp =2545dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2546return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2547/*roundTowardZero=*/true, Ty,2548/*IsSigned*/true);2549break;2550}2551}25522553return nullptr;2554}25552556static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2,2557const ConstrainedFPIntrinsic *Call) {2558APFloat::opStatus St = APFloat::opOK;2559auto *FCmp = cast<ConstrainedFPCmpIntrinsic>(Call);2560FCmpInst::Predicate Cond = FCmp->getPredicate();2561if (FCmp->isSignaling()) {2562if (Op1.isNaN() || Op2.isNaN())2563St = APFloat::opInvalidOp;2564} else {2565if (Op1.isSignaling() || Op2.isSignaling())2566St = APFloat::opInvalidOp;2567}2568bool Result = FCmpInst::compare(Op1, Op2, Cond);2569if (mayFoldConstrained(const_cast<ConstrainedFPCmpIntrinsic *>(FCmp), St))2570return ConstantInt::get(Call->getType()->getScalarType(), Result);2571return nullptr;2572}25732574static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty,2575ArrayRef<Constant *> Operands,2576const TargetLibraryInfo *TLI) {2577if (!TLI)2578return nullptr;25792580LibFunc Func = NotLibFunc;2581if (!TLI->getLibFunc(Name, Func))2582return nullptr;25832584const auto *Op1 = dyn_cast<ConstantFP>(Operands[0]);2585if (!Op1)2586return nullptr;25872588const auto *Op2 = dyn_cast<ConstantFP>(Operands[1]);2589if (!Op2)2590return nullptr;25912592const APFloat &Op1V = Op1->getValueAPF();2593const APFloat &Op2V = Op2->getValueAPF();25942595switch (Func) {2596default:2597break;2598case LibFunc_pow:2599case LibFunc_powf:2600case LibFunc_pow_finite:2601case LibFunc_powf_finite:2602if (TLI->has(Func))2603return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);2604break;2605case LibFunc_fmod:2606case LibFunc_fmodf:2607if (TLI->has(Func)) {2608APFloat V = Op1->getValueAPF();2609if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))2610return ConstantFP::get(Ty->getContext(), V);2611}2612break;2613case LibFunc_remainder:2614case LibFunc_remainderf:2615if (TLI->has(Func)) {2616APFloat V = Op1->getValueAPF();2617if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))2618return ConstantFP::get(Ty->getContext(), V);2619}2620break;2621case LibFunc_atan2:2622case LibFunc_atan2f:2623// atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm2624// (Solaris), so we do not assume a known result for that.2625if (Op1V.isZero() && Op2V.isZero())2626return nullptr;2627[[fallthrough]];2628case LibFunc_atan2_finite:2629case LibFunc_atan2f_finite:2630if (TLI->has(Func))2631return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);2632break;2633}26342635return nullptr;2636}26372638static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,2639ArrayRef<Constant *> Operands,2640const CallBase *Call) {2641assert(Operands.size() == 2 && "Wrong number of operands.");26422643if (Ty->isFloatingPointTy()) {2644// TODO: We should have undef handling for all of the FP intrinsics that2645// are attempted to be folded in this function.2646bool IsOp0Undef = isa<UndefValue>(Operands[0]);2647bool IsOp1Undef = isa<UndefValue>(Operands[1]);2648switch (IntrinsicID) {2649case Intrinsic::maxnum:2650case Intrinsic::minnum:2651case Intrinsic::maximum:2652case Intrinsic::minimum:2653// If one argument is undef, return the other argument.2654if (IsOp0Undef)2655return Operands[1];2656if (IsOp1Undef)2657return Operands[0];2658break;2659}2660}26612662if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {2663const APFloat &Op1V = Op1->getValueAPF();26642665if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {2666if (Op2->getType() != Op1->getType())2667return nullptr;2668const APFloat &Op2V = Op2->getValueAPF();26692670if (const auto *ConstrIntr =2671dyn_cast_if_present<ConstrainedFPIntrinsic>(Call)) {2672RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);2673APFloat Res = Op1V;2674APFloat::opStatus St;2675switch (IntrinsicID) {2676default:2677return nullptr;2678case Intrinsic::experimental_constrained_fadd:2679St = Res.add(Op2V, RM);2680break;2681case Intrinsic::experimental_constrained_fsub:2682St = Res.subtract(Op2V, RM);2683break;2684case Intrinsic::experimental_constrained_fmul:2685St = Res.multiply(Op2V, RM);2686break;2687case Intrinsic::experimental_constrained_fdiv:2688St = Res.divide(Op2V, RM);2689break;2690case Intrinsic::experimental_constrained_frem:2691St = Res.mod(Op2V);2692break;2693case Intrinsic::experimental_constrained_fcmp:2694case Intrinsic::experimental_constrained_fcmps:2695return evaluateCompare(Op1V, Op2V, ConstrIntr);2696}2697if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),2698St))2699return ConstantFP::get(Ty->getContext(), Res);2700return nullptr;2701}27022703switch (IntrinsicID) {2704default:2705break;2706case Intrinsic::copysign:2707return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));2708case Intrinsic::minnum:2709return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));2710case Intrinsic::maxnum:2711return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));2712case Intrinsic::minimum:2713return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));2714case Intrinsic::maximum:2715return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));2716}27172718if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())2719return nullptr;27202721switch (IntrinsicID) {2722default:2723break;2724case Intrinsic::pow:2725return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);2726case Intrinsic::amdgcn_fmul_legacy:2727// The legacy behaviour is that multiplying +/- 0.0 by anything, even2728// NaN or infinity, gives +0.0.2729if (Op1V.isZero() || Op2V.isZero())2730return ConstantFP::getZero(Ty);2731return ConstantFP::get(Ty->getContext(), Op1V * Op2V);2732}27332734} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {2735switch (IntrinsicID) {2736case Intrinsic::ldexp: {2737return ConstantFP::get(2738Ty->getContext(),2739scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven));2740}2741case Intrinsic::is_fpclass: {2742FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());2743bool Result =2744((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||2745((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||2746((Mask & fcNegInf) && Op1V.isNegInfinity()) ||2747((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||2748((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||2749((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||2750((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||2751((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||2752((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||2753((Mask & fcPosInf) && Op1V.isPosInfinity());2754return ConstantInt::get(Ty, Result);2755}2756case Intrinsic::powi: {2757int Exp = static_cast<int>(Op2C->getSExtValue());2758switch (Ty->getTypeID()) {2759case Type::HalfTyID:2760case Type::FloatTyID: {2761APFloat Res(static_cast<float>(std::pow(Op1V.convertToFloat(), Exp)));2762if (Ty->isHalfTy()) {2763bool Unused;2764Res.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,2765&Unused);2766}2767return ConstantFP::get(Ty->getContext(), Res);2768}2769case Type::DoubleTyID:2770return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp));2771default:2772return nullptr;2773}2774}2775default:2776break;2777}2778}2779return nullptr;2780}27812782if (Operands[0]->getType()->isIntegerTy() &&2783Operands[1]->getType()->isIntegerTy()) {2784const APInt *C0, *C1;2785if (!getConstIntOrUndef(Operands[0], C0) ||2786!getConstIntOrUndef(Operands[1], C1))2787return nullptr;27882789switch (IntrinsicID) {2790default: break;2791case Intrinsic::smax:2792case Intrinsic::smin:2793case Intrinsic::umax:2794case Intrinsic::umin:2795// This is the same as for binary ops - poison propagates.2796// TODO: Poison handling should be consolidated.2797if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2798return PoisonValue::get(Ty);27992800if (!C0 && !C1)2801return UndefValue::get(Ty);2802if (!C0 || !C1)2803return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);2804return ConstantInt::get(2805Ty, ICmpInst::compare(*C0, *C1,2806MinMaxIntrinsic::getPredicate(IntrinsicID))2807? *C02808: *C1);28092810case Intrinsic::scmp:2811case Intrinsic::ucmp:2812if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2813return PoisonValue::get(Ty);28142815if (!C0 || !C1)2816return ConstantInt::get(Ty, 0);28172818int Res;2819if (IntrinsicID == Intrinsic::scmp)2820Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;2821else2822Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;2823return ConstantInt::get(Ty, Res, /*IsSigned=*/true);28242825case Intrinsic::usub_with_overflow:2826case Intrinsic::ssub_with_overflow:2827// X - undef -> { 0, false }2828// undef - X -> { 0, false }2829if (!C0 || !C1)2830return Constant::getNullValue(Ty);2831[[fallthrough]];2832case Intrinsic::uadd_with_overflow:2833case Intrinsic::sadd_with_overflow:2834// X + undef -> { -1, false }2835// undef + x -> { -1, false }2836if (!C0 || !C1) {2837return ConstantStruct::get(2838cast<StructType>(Ty),2839{Constant::getAllOnesValue(Ty->getStructElementType(0)),2840Constant::getNullValue(Ty->getStructElementType(1))});2841}2842[[fallthrough]];2843case Intrinsic::smul_with_overflow:2844case Intrinsic::umul_with_overflow: {2845// undef * X -> { 0, false }2846// X * undef -> { 0, false }2847if (!C0 || !C1)2848return Constant::getNullValue(Ty);28492850APInt Res;2851bool Overflow;2852switch (IntrinsicID) {2853default: llvm_unreachable("Invalid case");2854case Intrinsic::sadd_with_overflow:2855Res = C0->sadd_ov(*C1, Overflow);2856break;2857case Intrinsic::uadd_with_overflow:2858Res = C0->uadd_ov(*C1, Overflow);2859break;2860case Intrinsic::ssub_with_overflow:2861Res = C0->ssub_ov(*C1, Overflow);2862break;2863case Intrinsic::usub_with_overflow:2864Res = C0->usub_ov(*C1, Overflow);2865break;2866case Intrinsic::smul_with_overflow:2867Res = C0->smul_ov(*C1, Overflow);2868break;2869case Intrinsic::umul_with_overflow:2870Res = C0->umul_ov(*C1, Overflow);2871break;2872}2873Constant *Ops[] = {2874ConstantInt::get(Ty->getContext(), Res),2875ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)2876};2877return ConstantStruct::get(cast<StructType>(Ty), Ops);2878}2879case Intrinsic::uadd_sat:2880case Intrinsic::sadd_sat:2881// This is the same as for binary ops - poison propagates.2882// TODO: Poison handling should be consolidated.2883if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2884return PoisonValue::get(Ty);28852886if (!C0 && !C1)2887return UndefValue::get(Ty);2888if (!C0 || !C1)2889return Constant::getAllOnesValue(Ty);2890if (IntrinsicID == Intrinsic::uadd_sat)2891return ConstantInt::get(Ty, C0->uadd_sat(*C1));2892else2893return ConstantInt::get(Ty, C0->sadd_sat(*C1));2894case Intrinsic::usub_sat:2895case Intrinsic::ssub_sat:2896// This is the same as for binary ops - poison propagates.2897// TODO: Poison handling should be consolidated.2898if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))2899return PoisonValue::get(Ty);29002901if (!C0 && !C1)2902return UndefValue::get(Ty);2903if (!C0 || !C1)2904return Constant::getNullValue(Ty);2905if (IntrinsicID == Intrinsic::usub_sat)2906return ConstantInt::get(Ty, C0->usub_sat(*C1));2907else2908return ConstantInt::get(Ty, C0->ssub_sat(*C1));2909case Intrinsic::cttz:2910case Intrinsic::ctlz:2911assert(C1 && "Must be constant int");29122913// cttz(0, 1) and ctlz(0, 1) are poison.2914if (C1->isOne() && (!C0 || C0->isZero()))2915return PoisonValue::get(Ty);2916if (!C0)2917return Constant::getNullValue(Ty);2918if (IntrinsicID == Intrinsic::cttz)2919return ConstantInt::get(Ty, C0->countr_zero());2920else2921return ConstantInt::get(Ty, C0->countl_zero());29222923case Intrinsic::abs:2924assert(C1 && "Must be constant int");2925assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");29262927// Undef or minimum val operand with poison min --> undef2928if (C1->isOne() && (!C0 || C0->isMinSignedValue()))2929return UndefValue::get(Ty);29302931// Undef operand with no poison min --> 0 (sign bit must be clear)2932if (!C0)2933return Constant::getNullValue(Ty);29342935return ConstantInt::get(Ty, C0->abs());2936case Intrinsic::amdgcn_wave_reduce_umin:2937case Intrinsic::amdgcn_wave_reduce_umax:2938return dyn_cast<Constant>(Operands[0]);2939}29402941return nullptr;2942}29432944// Support ConstantVector in case we have an Undef in the top.2945if ((isa<ConstantVector>(Operands[0]) ||2946isa<ConstantDataVector>(Operands[0])) &&2947// Check for default rounding mode.2948// FIXME: Support other rounding modes?2949isa<ConstantInt>(Operands[1]) &&2950cast<ConstantInt>(Operands[1])->getValue() == 4) {2951auto *Op = cast<Constant>(Operands[0]);2952switch (IntrinsicID) {2953default: break;2954case Intrinsic::x86_avx512_vcvtss2si32:2955case Intrinsic::x86_avx512_vcvtss2si64:2956case Intrinsic::x86_avx512_vcvtsd2si32:2957case Intrinsic::x86_avx512_vcvtsd2si64:2958if (ConstantFP *FPOp =2959dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2960return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2961/*roundTowardZero=*/false, Ty,2962/*IsSigned*/true);2963break;2964case Intrinsic::x86_avx512_vcvtss2usi32:2965case Intrinsic::x86_avx512_vcvtss2usi64:2966case Intrinsic::x86_avx512_vcvtsd2usi32:2967case Intrinsic::x86_avx512_vcvtsd2usi64:2968if (ConstantFP *FPOp =2969dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2970return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2971/*roundTowardZero=*/false, Ty,2972/*IsSigned*/false);2973break;2974case Intrinsic::x86_avx512_cvttss2si:2975case Intrinsic::x86_avx512_cvttss2si64:2976case Intrinsic::x86_avx512_cvttsd2si:2977case Intrinsic::x86_avx512_cvttsd2si64:2978if (ConstantFP *FPOp =2979dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2980return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2981/*roundTowardZero=*/true, Ty,2982/*IsSigned*/true);2983break;2984case Intrinsic::x86_avx512_cvttss2usi:2985case Intrinsic::x86_avx512_cvttss2usi64:2986case Intrinsic::x86_avx512_cvttsd2usi:2987case Intrinsic::x86_avx512_cvttsd2usi64:2988if (ConstantFP *FPOp =2989dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))2990return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),2991/*roundTowardZero=*/true, Ty,2992/*IsSigned*/false);2993break;2994}2995}2996return nullptr;2997}29982999static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,3000const APFloat &S0,3001const APFloat &S1,3002const APFloat &S2) {3003unsigned ID;3004const fltSemantics &Sem = S0.getSemantics();3005APFloat MA(Sem), SC(Sem), TC(Sem);3006if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {3007if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {3008// S2 < 03009ID = 5;3010SC = -S0;3011} else {3012ID = 4;3013SC = S0;3014}3015MA = S2;3016TC = -S1;3017} else if (abs(S1) >= abs(S0)) {3018if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {3019// S1 < 03020ID = 3;3021TC = -S2;3022} else {3023ID = 2;3024TC = S2;3025}3026MA = S1;3027SC = S0;3028} else {3029if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {3030// S0 < 03031ID = 1;3032SC = S2;3033} else {3034ID = 0;3035SC = -S2;3036}3037MA = S0;3038TC = -S1;3039}3040switch (IntrinsicID) {3041default:3042llvm_unreachable("unhandled amdgcn cube intrinsic");3043case Intrinsic::amdgcn_cubeid:3044return APFloat(Sem, ID);3045case Intrinsic::amdgcn_cubema:3046return MA + MA;3047case Intrinsic::amdgcn_cubesc:3048return SC;3049case Intrinsic::amdgcn_cubetc:3050return TC;3051}3052}30533054static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,3055Type *Ty) {3056const APInt *C0, *C1, *C2;3057if (!getConstIntOrUndef(Operands[0], C0) ||3058!getConstIntOrUndef(Operands[1], C1) ||3059!getConstIntOrUndef(Operands[2], C2))3060return nullptr;30613062if (!C2)3063return UndefValue::get(Ty);30643065APInt Val(32, 0);3066unsigned NumUndefBytes = 0;3067for (unsigned I = 0; I < 32; I += 8) {3068unsigned Sel = C2->extractBitsAsZExtValue(8, I);3069unsigned B = 0;30703071if (Sel >= 13)3072B = 0xff;3073else if (Sel == 12)3074B = 0x00;3075else {3076const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;3077if (!Src)3078++NumUndefBytes;3079else if (Sel < 8)3080B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);3081else3082B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;3083}30843085Val.insertBits(B, I, 8);3086}30873088if (NumUndefBytes == 4)3089return UndefValue::get(Ty);30903091return ConstantInt::get(Ty, Val);3092}30933094static Constant *ConstantFoldScalarCall3(StringRef Name,3095Intrinsic::ID IntrinsicID,3096Type *Ty,3097ArrayRef<Constant *> Operands,3098const TargetLibraryInfo *TLI,3099const CallBase *Call) {3100assert(Operands.size() == 3 && "Wrong number of operands.");31013102if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {3103if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {3104if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {3105const APFloat &C1 = Op1->getValueAPF();3106const APFloat &C2 = Op2->getValueAPF();3107const APFloat &C3 = Op3->getValueAPF();31083109if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {3110RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);3111APFloat Res = C1;3112APFloat::opStatus St;3113switch (IntrinsicID) {3114default:3115return nullptr;3116case Intrinsic::experimental_constrained_fma:3117case Intrinsic::experimental_constrained_fmuladd:3118St = Res.fusedMultiplyAdd(C2, C3, RM);3119break;3120}3121if (mayFoldConstrained(3122const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))3123return ConstantFP::get(Ty->getContext(), Res);3124return nullptr;3125}31263127switch (IntrinsicID) {3128default: break;3129case Intrinsic::amdgcn_fma_legacy: {3130// The legacy behaviour is that multiplying +/- 0.0 by anything, even3131// NaN or infinity, gives +0.0.3132if (C1.isZero() || C2.isZero()) {3133// It's tempting to just return C3 here, but that would give the3134// wrong result if C3 was -0.0.3135return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);3136}3137[[fallthrough]];3138}3139case Intrinsic::fma:3140case Intrinsic::fmuladd: {3141APFloat V = C1;3142V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);3143return ConstantFP::get(Ty->getContext(), V);3144}3145case Intrinsic::amdgcn_cubeid:3146case Intrinsic::amdgcn_cubema:3147case Intrinsic::amdgcn_cubesc:3148case Intrinsic::amdgcn_cubetc: {3149APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);3150return ConstantFP::get(Ty->getContext(), V);3151}3152}3153}3154}3155}31563157if (IntrinsicID == Intrinsic::smul_fix ||3158IntrinsicID == Intrinsic::smul_fix_sat) {3159// poison * C -> poison3160// C * poison -> poison3161if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))3162return PoisonValue::get(Ty);31633164const APInt *C0, *C1;3165if (!getConstIntOrUndef(Operands[0], C0) ||3166!getConstIntOrUndef(Operands[1], C1))3167return nullptr;31683169// undef * C -> 03170// C * undef -> 03171if (!C0 || !C1)3172return Constant::getNullValue(Ty);31733174// This code performs rounding towards negative infinity in case the result3175// cannot be represented exactly for the given scale. Targets that do care3176// about rounding should use a target hook for specifying how rounding3177// should be done, and provide their own folding to be consistent with3178// rounding. This is the same approach as used by3179// DAGTypeLegalizer::ExpandIntRes_MULFIX.3180unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();3181unsigned Width = C0->getBitWidth();3182assert(Scale < Width && "Illegal scale.");3183unsigned ExtendedWidth = Width * 2;3184APInt Product =3185(C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale);3186if (IntrinsicID == Intrinsic::smul_fix_sat) {3187APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth);3188APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth);3189Product = APIntOps::smin(Product, Max);3190Product = APIntOps::smax(Product, Min);3191}3192return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));3193}31943195if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {3196const APInt *C0, *C1, *C2;3197if (!getConstIntOrUndef(Operands[0], C0) ||3198!getConstIntOrUndef(Operands[1], C1) ||3199!getConstIntOrUndef(Operands[2], C2))3200return nullptr;32013202bool IsRight = IntrinsicID == Intrinsic::fshr;3203if (!C2)3204return Operands[IsRight ? 1 : 0];3205if (!C0 && !C1)3206return UndefValue::get(Ty);32073208// The shift amount is interpreted as modulo the bitwidth. If the shift3209// amount is effectively 0, avoid UB due to oversized inverse shift below.3210unsigned BitWidth = C2->getBitWidth();3211unsigned ShAmt = C2->urem(BitWidth);3212if (!ShAmt)3213return Operands[IsRight ? 1 : 0];32143215// (C0 << ShlAmt) | (C1 >> LshrAmt)3216unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;3217unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;3218if (!C0)3219return ConstantInt::get(Ty, C1->lshr(LshrAmt));3220if (!C1)3221return ConstantInt::get(Ty, C0->shl(ShlAmt));3222return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));3223}32243225if (IntrinsicID == Intrinsic::amdgcn_perm)3226return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);32273228return nullptr;3229}32303231static Constant *ConstantFoldScalarCall(StringRef Name,3232Intrinsic::ID IntrinsicID,3233Type *Ty,3234ArrayRef<Constant *> Operands,3235const TargetLibraryInfo *TLI,3236const CallBase *Call) {3237if (Operands.size() == 1)3238return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);32393240if (Operands.size() == 2) {3241if (Constant *FoldedLibCall =3242ConstantFoldLibCall2(Name, Ty, Operands, TLI)) {3243return FoldedLibCall;3244}3245return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call);3246}32473248if (Operands.size() == 3)3249return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);32503251return nullptr;3252}32533254static Constant *ConstantFoldFixedVectorCall(3255StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,3256ArrayRef<Constant *> Operands, const DataLayout &DL,3257const TargetLibraryInfo *TLI, const CallBase *Call) {3258SmallVector<Constant *, 4> Result(FVTy->getNumElements());3259SmallVector<Constant *, 4> Lane(Operands.size());3260Type *Ty = FVTy->getElementType();32613262switch (IntrinsicID) {3263case Intrinsic::masked_load: {3264auto *SrcPtr = Operands[0];3265auto *Mask = Operands[2];3266auto *Passthru = Operands[3];32673268Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);32693270SmallVector<Constant *, 32> NewElements;3271for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {3272auto *MaskElt = Mask->getAggregateElement(I);3273if (!MaskElt)3274break;3275auto *PassthruElt = Passthru->getAggregateElement(I);3276auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;3277if (isa<UndefValue>(MaskElt)) {3278if (PassthruElt)3279NewElements.push_back(PassthruElt);3280else if (VecElt)3281NewElements.push_back(VecElt);3282else3283return nullptr;3284}3285if (MaskElt->isNullValue()) {3286if (!PassthruElt)3287return nullptr;3288NewElements.push_back(PassthruElt);3289} else if (MaskElt->isOneValue()) {3290if (!VecElt)3291return nullptr;3292NewElements.push_back(VecElt);3293} else {3294return nullptr;3295}3296}3297if (NewElements.size() != FVTy->getNumElements())3298return nullptr;3299return ConstantVector::get(NewElements);3300}3301case Intrinsic::arm_mve_vctp8:3302case Intrinsic::arm_mve_vctp16:3303case Intrinsic::arm_mve_vctp32:3304case Intrinsic::arm_mve_vctp64: {3305if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {3306unsigned Lanes = FVTy->getNumElements();3307uint64_t Limit = Op->getZExtValue();33083309SmallVector<Constant *, 16> NCs;3310for (unsigned i = 0; i < Lanes; i++) {3311if (i < Limit)3312NCs.push_back(ConstantInt::getTrue(Ty));3313else3314NCs.push_back(ConstantInt::getFalse(Ty));3315}3316return ConstantVector::get(NCs);3317}3318return nullptr;3319}3320case Intrinsic::get_active_lane_mask: {3321auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);3322auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);3323if (Op0 && Op1) {3324unsigned Lanes = FVTy->getNumElements();3325uint64_t Base = Op0->getZExtValue();3326uint64_t Limit = Op1->getZExtValue();33273328SmallVector<Constant *, 16> NCs;3329for (unsigned i = 0; i < Lanes; i++) {3330if (Base + i < Limit)3331NCs.push_back(ConstantInt::getTrue(Ty));3332else3333NCs.push_back(ConstantInt::getFalse(Ty));3334}3335return ConstantVector::get(NCs);3336}3337return nullptr;3338}3339default:3340break;3341}33423343for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {3344// Gather a column of constants.3345for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {3346// Some intrinsics use a scalar type for certain arguments.3347if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) {3348Lane[J] = Operands[J];3349continue;3350}33513352Constant *Agg = Operands[J]->getAggregateElement(I);3353if (!Agg)3354return nullptr;33553356Lane[J] = Agg;3357}33583359// Use the regular scalar folding to simplify this column.3360Constant *Folded =3361ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);3362if (!Folded)3363return nullptr;3364Result[I] = Folded;3365}33663367return ConstantVector::get(Result);3368}33693370static Constant *ConstantFoldScalableVectorCall(3371StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,3372ArrayRef<Constant *> Operands, const DataLayout &DL,3373const TargetLibraryInfo *TLI, const CallBase *Call) {3374switch (IntrinsicID) {3375case Intrinsic::aarch64_sve_convert_from_svbool: {3376auto *Src = dyn_cast<Constant>(Operands[0]);3377if (!Src || !Src->isNullValue())3378break;33793380return ConstantInt::getFalse(SVTy);3381}3382default:3383break;3384}3385return nullptr;3386}33873388static std::pair<Constant *, Constant *>3389ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {3390if (isa<PoisonValue>(Op))3391return {Op, PoisonValue::get(IntTy)};33923393auto *ConstFP = dyn_cast<ConstantFP>(Op);3394if (!ConstFP)3395return {};33963397const APFloat &U = ConstFP->getValueAPF();3398int FrexpExp;3399APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);3400Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);34013402// The exponent is an "unspecified value" for inf/nan. We use zero to avoid3403// using undef.3404Constant *Result1 = FrexpMant.isFinite() ? ConstantInt::get(IntTy, FrexpExp)3405: ConstantInt::getNullValue(IntTy);3406return {Result0, Result1};3407}34083409/// Handle intrinsics that return tuples, which may be tuples of vectors.3410static Constant *3411ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,3412StructType *StTy, ArrayRef<Constant *> Operands,3413const DataLayout &DL, const TargetLibraryInfo *TLI,3414const CallBase *Call) {34153416switch (IntrinsicID) {3417case Intrinsic::frexp: {3418Type *Ty0 = StTy->getContainedType(0);3419Type *Ty1 = StTy->getContainedType(1)->getScalarType();34203421if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {3422SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());3423SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());34243425for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {3426Constant *Lane = Operands[0]->getAggregateElement(I);3427std::tie(Results0[I], Results1[I]) =3428ConstantFoldScalarFrexpCall(Lane, Ty1);3429if (!Results0[I])3430return nullptr;3431}34323433return ConstantStruct::get(StTy, ConstantVector::get(Results0),3434ConstantVector::get(Results1));3435}34363437auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);3438if (!Result0)3439return nullptr;3440return ConstantStruct::get(StTy, Result0, Result1);3441}3442default:3443// TODO: Constant folding of vector intrinsics that fall through here does3444// not work (e.g. overflow intrinsics)3445return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);3446}34473448return nullptr;3449}34503451} // end anonymous namespace34523453Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,3454Constant *RHS, Type *Ty,3455Instruction *FMFSource) {3456return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS},3457dyn_cast_if_present<CallBase>(FMFSource));3458}34593460Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,3461ArrayRef<Constant *> Operands,3462const TargetLibraryInfo *TLI,3463bool AllowNonDeterministic) {3464if (Call->isNoBuiltin())3465return nullptr;3466if (!F->hasName())3467return nullptr;34683469// If this is not an intrinsic and not recognized as a library call, bail out.3470Intrinsic::ID IID = F->getIntrinsicID();3471if (IID == Intrinsic::not_intrinsic) {3472if (!TLI)3473return nullptr;3474LibFunc LibF;3475if (!TLI->getLibFunc(*F, LibF))3476return nullptr;3477}34783479// Conservatively assume that floating-point libcalls may be3480// non-deterministic.3481Type *Ty = F->getReturnType();3482if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())3483return nullptr;34843485StringRef Name = F->getName();3486if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))3487return ConstantFoldFixedVectorCall(3488Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call);34893490if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))3491return ConstantFoldScalableVectorCall(3492Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call);34933494if (auto *StTy = dyn_cast<StructType>(Ty))3495return ConstantFoldStructCall(Name, IID, StTy, Operands,3496F->getDataLayout(), TLI, Call);34973498// TODO: If this is a library function, we already discovered that above,3499// so we should pass the LibFunc, not the name (and it might be better3500// still to separate intrinsic handling from libcalls).3501return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);3502}35033504bool llvm::isMathLibCallNoop(const CallBase *Call,3505const TargetLibraryInfo *TLI) {3506// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap3507// (and to some extent ConstantFoldScalarCall).3508if (Call->isNoBuiltin() || Call->isStrictFP())3509return false;3510Function *F = Call->getCalledFunction();3511if (!F)3512return false;35133514LibFunc Func;3515if (!TLI || !TLI->getLibFunc(*F, Func))3516return false;35173518if (Call->arg_size() == 1) {3519if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {3520const APFloat &Op = OpC->getValueAPF();3521switch (Func) {3522case LibFunc_logl:3523case LibFunc_log:3524case LibFunc_logf:3525case LibFunc_log2l:3526case LibFunc_log2:3527case LibFunc_log2f:3528case LibFunc_log10l:3529case LibFunc_log10:3530case LibFunc_log10f:3531return Op.isNaN() || (!Op.isZero() && !Op.isNegative());35323533case LibFunc_expl:3534case LibFunc_exp:3535case LibFunc_expf:3536// FIXME: These boundaries are slightly conservative.3537if (OpC->getType()->isDoubleTy())3538return !(Op < APFloat(-745.0) || Op > APFloat(709.0));3539if (OpC->getType()->isFloatTy())3540return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));3541break;35423543case LibFunc_exp2l:3544case LibFunc_exp2:3545case LibFunc_exp2f:3546// FIXME: These boundaries are slightly conservative.3547if (OpC->getType()->isDoubleTy())3548return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));3549if (OpC->getType()->isFloatTy())3550return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));3551break;35523553case LibFunc_sinl:3554case LibFunc_sin:3555case LibFunc_sinf:3556case LibFunc_cosl:3557case LibFunc_cos:3558case LibFunc_cosf:3559return !Op.isInfinity();35603561case LibFunc_tanl:3562case LibFunc_tan:3563case LibFunc_tanf: {3564// FIXME: Stop using the host math library.3565// FIXME: The computation isn't done in the right precision.3566Type *Ty = OpC->getType();3567if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())3568return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;3569break;3570}35713572case LibFunc_atan:3573case LibFunc_atanf:3574case LibFunc_atanl:3575// Per POSIX, this MAY fail if Op is denormal. We choose not failing.3576return true;357735783579case LibFunc_asinl:3580case LibFunc_asin:3581case LibFunc_asinf:3582case LibFunc_acosl:3583case LibFunc_acos:3584case LibFunc_acosf:3585return !(Op < APFloat(Op.getSemantics(), "-1") ||3586Op > APFloat(Op.getSemantics(), "1"));35873588case LibFunc_sinh:3589case LibFunc_cosh:3590case LibFunc_sinhf:3591case LibFunc_coshf:3592case LibFunc_sinhl:3593case LibFunc_coshl:3594// FIXME: These boundaries are slightly conservative.3595if (OpC->getType()->isDoubleTy())3596return !(Op < APFloat(-710.0) || Op > APFloat(710.0));3597if (OpC->getType()->isFloatTy())3598return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));3599break;36003601case LibFunc_sqrtl:3602case LibFunc_sqrt:3603case LibFunc_sqrtf:3604return Op.isNaN() || Op.isZero() || !Op.isNegative();36053606// FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,3607// maybe others?3608default:3609break;3610}3611}3612}36133614if (Call->arg_size() == 2) {3615ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));3616ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));3617if (Op0C && Op1C) {3618const APFloat &Op0 = Op0C->getValueAPF();3619const APFloat &Op1 = Op1C->getValueAPF();36203621switch (Func) {3622case LibFunc_powl:3623case LibFunc_pow:3624case LibFunc_powf: {3625// FIXME: Stop using the host math library.3626// FIXME: The computation isn't done in the right precision.3627Type *Ty = Op0C->getType();3628if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {3629if (Ty == Op1C->getType())3630return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;3631}3632break;3633}36343635case LibFunc_fmodl:3636case LibFunc_fmod:3637case LibFunc_fmodf:3638case LibFunc_remainderl:3639case LibFunc_remainder:3640case LibFunc_remainderf:3641return Op0.isNaN() || Op1.isNaN() ||3642(!Op0.isInfinity() && !Op1.isZero());36433644case LibFunc_atan2:3645case LibFunc_atan2f:3646case LibFunc_atan2l:3647// Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and3648// GLIBC and MSVC do not appear to raise an error on those, we3649// cannot rely on that behavior. POSIX and C11 say that a domain error3650// may occur, so allow for that possibility.3651return !Op0.isZero() || !Op1.isZero();36523653default:3654break;3655}3656}3657}36583659return false;3660}36613662void TargetFolder::anchor() {}366336643665