Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
35266 views
//===- InstCombineCasts.cpp -----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the visit functions for cast operations.9//10//===----------------------------------------------------------------------===//1112#include "InstCombineInternal.h"13#include "llvm/ADT/SetVector.h"14#include "llvm/Analysis/ConstantFolding.h"15#include "llvm/IR/DataLayout.h"16#include "llvm/IR/DebugInfo.h"17#include "llvm/IR/PatternMatch.h"18#include "llvm/Support/KnownBits.h"19#include "llvm/Transforms/InstCombine/InstCombiner.h"20#include <optional>2122using namespace llvm;23using namespace PatternMatch;2425#define DEBUG_TYPE "instcombine"2627/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns28/// true for, actually insert the code to evaluate the expression.29Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,30bool isSigned) {31if (Constant *C = dyn_cast<Constant>(V))32return ConstantFoldIntegerCast(C, Ty, isSigned, DL);3334// Otherwise, it must be an instruction.35Instruction *I = cast<Instruction>(V);36Instruction *Res = nullptr;37unsigned Opc = I->getOpcode();38switch (Opc) {39case Instruction::Add:40case Instruction::Sub:41case Instruction::Mul:42case Instruction::And:43case Instruction::Or:44case Instruction::Xor:45case Instruction::AShr:46case Instruction::LShr:47case Instruction::Shl:48case Instruction::UDiv:49case Instruction::URem: {50Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);51Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);52Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);53break;54}55case Instruction::Trunc:56case Instruction::ZExt:57case Instruction::SExt:58// If the source type of the cast is the type we're trying for then we can59// just return the source. There's no need to insert it because it is not60// new.61if (I->getOperand(0)->getType() == Ty)62return I->getOperand(0);6364// Otherwise, must be the same type of cast, so just reinsert a new one.65// This also handles the case of zext(trunc(x)) -> zext(x).66Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,67Opc == Instruction::SExt);68break;69case Instruction::Select: {70Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);71Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);72Res = SelectInst::Create(I->getOperand(0), True, False);73break;74}75case Instruction::PHI: {76PHINode *OPN = cast<PHINode>(I);77PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());78for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {79Value *V =80EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);81NPN->addIncoming(V, OPN->getIncomingBlock(i));82}83Res = NPN;84break;85}86case Instruction::FPToUI:87case Instruction::FPToSI:88Res = CastInst::Create(89static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);90break;91case Instruction::Call:92if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {93switch (II->getIntrinsicID()) {94default:95llvm_unreachable("Unsupported call!");96case Intrinsic::vscale: {97Function *Fn =98Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});99Res = CallInst::Create(Fn->getFunctionType(), Fn);100break;101}102}103}104break;105case Instruction::ShuffleVector: {106auto *ScalarTy = cast<VectorType>(Ty)->getElementType();107auto *VTy = cast<VectorType>(I->getOperand(0)->getType());108auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());109Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);110Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);111Res = new ShuffleVectorInst(Op0, Op1,112cast<ShuffleVectorInst>(I)->getShuffleMask());113break;114}115default:116// TODO: Can handle more cases here.117llvm_unreachable("Unreachable!");118}119120Res->takeName(I);121return InsertNewInstWith(Res, I->getIterator());122}123124Instruction::CastOps125InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,126const CastInst *CI2) {127Type *SrcTy = CI1->getSrcTy();128Type *MidTy = CI1->getDestTy();129Type *DstTy = CI2->getDestTy();130131Instruction::CastOps firstOp = CI1->getOpcode();132Instruction::CastOps secondOp = CI2->getOpcode();133Type *SrcIntPtrTy =134SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;135Type *MidIntPtrTy =136MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;137Type *DstIntPtrTy =138DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;139unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,140DstTy, SrcIntPtrTy, MidIntPtrTy,141DstIntPtrTy);142143// We don't want to form an inttoptr or ptrtoint that converts to an integer144// type that differs from the pointer size.145if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||146(Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))147Res = 0;148149return Instruction::CastOps(Res);150}151152/// Implement the transforms common to all CastInst visitors.153Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {154Value *Src = CI.getOperand(0);155Type *Ty = CI.getType();156157if (auto *SrcC = dyn_cast<Constant>(Src))158if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))159return replaceInstUsesWith(CI, Res);160161// Try to eliminate a cast of a cast.162if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast163if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {164// The first cast (CSrc) is eliminable so we need to fix up or replace165// the second cast (CI). CSrc will then have a good chance of being dead.166auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);167// Point debug users of the dying cast to the new one.168if (CSrc->hasOneUse())169replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);170return Res;171}172}173174if (auto *Sel = dyn_cast<SelectInst>(Src)) {175// We are casting a select. Try to fold the cast into the select if the176// select does not have a compare instruction with matching operand types177// or the select is likely better done in a narrow type.178// Creating a select with operands that are different sizes than its179// condition may inhibit other folds and lead to worse codegen.180auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());181if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||182(CI.getOpcode() == Instruction::Trunc &&183shouldChangeType(CI.getSrcTy(), CI.getType()))) {184185// If it's a bitcast involving vectors, make sure it has the same number186// of elements on both sides.187if (CI.getOpcode() != Instruction::BitCast ||188match(&CI, m_ElementWiseBitCast(m_Value()))) {189if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {190replaceAllDbgUsesWith(*Sel, *NV, CI, DT);191return NV;192}193}194}195}196197// If we are casting a PHI, then fold the cast into the PHI.198if (auto *PN = dyn_cast<PHINode>(Src)) {199// Don't do this if it would create a PHI node with an illegal type from a200// legal type.201if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||202shouldChangeType(CI.getSrcTy(), CI.getType()))203if (Instruction *NV = foldOpIntoPhi(CI, PN))204return NV;205}206207// Canonicalize a unary shuffle after the cast if neither operation changes208// the size or element size of the input vector.209// TODO: We could allow size-changing ops if that doesn't harm codegen.210// cast (shuffle X, Mask) --> shuffle (cast X), Mask211Value *X;212ArrayRef<int> Mask;213if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {214// TODO: Allow scalable vectors?215auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());216auto *DestTy = dyn_cast<FixedVectorType>(Ty);217if (SrcTy && DestTy &&218SrcTy->getNumElements() == DestTy->getNumElements() &&219SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {220Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);221return new ShuffleVectorInst(CastX, Mask);222}223}224225return nullptr;226}227228/// Constants and extensions/truncates from the destination type are always229/// free to be evaluated in that type. This is a helper for canEvaluate*.230static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {231if (isa<Constant>(V))232return match(V, m_ImmConstant());233234Value *X;235if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&236X->getType() == Ty)237return true;238239return false;240}241242/// Filter out values that we can not evaluate in the destination type for free.243/// This is a helper for canEvaluate*.244static bool canNotEvaluateInType(Value *V, Type *Ty) {245if (!isa<Instruction>(V))246return true;247// We don't extend or shrink something that has multiple uses -- doing so248// would require duplicating the instruction which isn't profitable.249if (!V->hasOneUse())250return true;251252return false;253}254255/// Return true if we can evaluate the specified expression tree as type Ty256/// instead of its larger type, and arrive with the same value.257/// This is used by code that tries to eliminate truncates.258///259/// Ty will always be a type smaller than V. We should return true if trunc(V)260/// can be computed by computing V in the smaller type. If V is an instruction,261/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only262/// makes sense if x and y can be efficiently truncated.263///264/// This function works on both vectors and scalars.265///266static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,267Instruction *CxtI) {268if (canAlwaysEvaluateInType(V, Ty))269return true;270if (canNotEvaluateInType(V, Ty))271return false;272273auto *I = cast<Instruction>(V);274Type *OrigTy = V->getType();275switch (I->getOpcode()) {276case Instruction::Add:277case Instruction::Sub:278case Instruction::Mul:279case Instruction::And:280case Instruction::Or:281case Instruction::Xor:282// These operators can all arbitrarily be extended or truncated.283return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&284canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);285286case Instruction::UDiv:287case Instruction::URem: {288// UDiv and URem can be truncated if all the truncated bits are zero.289uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();290uint32_t BitWidth = Ty->getScalarSizeInBits();291assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");292APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);293// Do not preserve the original context instruction. Simplifying div/rem294// based on later context may introduce a trap.295if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, I) &&296IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, I)) {297return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&298canEvaluateTruncated(I->getOperand(1), Ty, IC, I);299}300break;301}302case Instruction::Shl: {303// If we are truncating the result of this SHL, and if it's a shift of an304// inrange amount, we can always perform a SHL in a smaller type.305uint32_t BitWidth = Ty->getScalarSizeInBits();306KnownBits AmtKnownBits =307llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());308if (AmtKnownBits.getMaxValue().ult(BitWidth))309return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&310canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);311break;312}313case Instruction::LShr: {314// If this is a truncate of a logical shr, we can truncate it to a smaller315// lshr iff we know that the bits we would otherwise be shifting in are316// already zeros.317// TODO: It is enough to check that the bits we would be shifting in are318// zero - use AmtKnownBits.getMaxValue().319uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();320uint32_t BitWidth = Ty->getScalarSizeInBits();321KnownBits AmtKnownBits =322llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());323APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);324if (AmtKnownBits.getMaxValue().ult(BitWidth) &&325IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {326return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&327canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);328}329break;330}331case Instruction::AShr: {332// If this is a truncate of an arithmetic shr, we can truncate it to a333// smaller ashr iff we know that all the bits from the sign bit of the334// original type and the sign bit of the truncate type are similar.335// TODO: It is enough to check that the bits we would be shifting in are336// similar to sign bit of the truncate type.337uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();338uint32_t BitWidth = Ty->getScalarSizeInBits();339KnownBits AmtKnownBits =340llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());341unsigned ShiftedBits = OrigBitWidth - BitWidth;342if (AmtKnownBits.getMaxValue().ult(BitWidth) &&343ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))344return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&345canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);346break;347}348case Instruction::Trunc:349// trunc(trunc(x)) -> trunc(x)350return true;351case Instruction::ZExt:352case Instruction::SExt:353// trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest354// trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest355return true;356case Instruction::Select: {357SelectInst *SI = cast<SelectInst>(I);358return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&359canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);360}361case Instruction::PHI: {362// We can change a phi if we can change all operands. Note that we never363// get into trouble with cyclic PHIs here because we only consider364// instructions with a single use.365PHINode *PN = cast<PHINode>(I);366for (Value *IncValue : PN->incoming_values())367if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))368return false;369return true;370}371case Instruction::FPToUI:372case Instruction::FPToSI: {373// If the integer type can hold the max FP value, it is safe to cast374// directly to that type. Otherwise, we may create poison via overflow375// that did not exist in the original code.376Type *InputTy = I->getOperand(0)->getType()->getScalarType();377const fltSemantics &Semantics = InputTy->getFltSemantics();378uint32_t MinBitWidth =379APFloatBase::semanticsIntSizeInBits(Semantics,380I->getOpcode() == Instruction::FPToSI);381return Ty->getScalarSizeInBits() >= MinBitWidth;382}383case Instruction::ShuffleVector:384return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&385canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);386default:387// TODO: Can handle more cases here.388break;389}390391return false;392}393394/// Given a vector that is bitcast to an integer, optionally logically395/// right-shifted, and truncated, convert it to an extractelement.396/// Example (big endian):397/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32398/// --->399/// extractelement <4 x i32> %X, 1400static Instruction *foldVecTruncToExtElt(TruncInst &Trunc,401InstCombinerImpl &IC) {402Value *TruncOp = Trunc.getOperand(0);403Type *DestType = Trunc.getType();404if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))405return nullptr;406407Value *VecInput = nullptr;408ConstantInt *ShiftVal = nullptr;409if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),410m_LShr(m_BitCast(m_Value(VecInput)),411m_ConstantInt(ShiftVal)))) ||412!isa<VectorType>(VecInput->getType()))413return nullptr;414415VectorType *VecType = cast<VectorType>(VecInput->getType());416unsigned VecWidth = VecType->getPrimitiveSizeInBits();417unsigned DestWidth = DestType->getPrimitiveSizeInBits();418unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;419420if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))421return nullptr;422423// If the element type of the vector doesn't match the result type,424// bitcast it to a vector type that we can extract from.425unsigned NumVecElts = VecWidth / DestWidth;426if (VecType->getElementType() != DestType) {427VecType = FixedVectorType::get(DestType, NumVecElts);428VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");429}430431unsigned Elt = ShiftAmount / DestWidth;432if (IC.getDataLayout().isBigEndian())433Elt = NumVecElts - 1 - Elt;434435return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));436}437438/// Funnel/Rotate left/right may occur in a wider type than necessary because of439/// type promotion rules. Try to narrow the inputs and convert to funnel shift.440Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {441assert((isa<VectorType>(Trunc.getSrcTy()) ||442shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&443"Don't narrow to an illegal scalar type");444445// Bail out on strange types. It is possible to handle some of these patterns446// even with non-power-of-2 sizes, but it is not a likely scenario.447Type *DestTy = Trunc.getType();448unsigned NarrowWidth = DestTy->getScalarSizeInBits();449unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();450if (!isPowerOf2_32(NarrowWidth))451return nullptr;452453// First, find an or'd pair of opposite shifts:454// trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))455BinaryOperator *Or0, *Or1;456if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))457return nullptr;458459Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;460if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||461!match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||462Or0->getOpcode() == Or1->getOpcode())463return nullptr;464465// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).466if (Or0->getOpcode() == BinaryOperator::LShr) {467std::swap(Or0, Or1);468std::swap(ShVal0, ShVal1);469std::swap(ShAmt0, ShAmt1);470}471assert(Or0->getOpcode() == BinaryOperator::Shl &&472Or1->getOpcode() == BinaryOperator::LShr &&473"Illegal or(shift,shift) pair");474475// Match the shift amount operands for a funnel/rotate pattern. This always476// matches a subtraction on the R operand.477auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {478// The shift amounts may add up to the narrow bit width:479// (shl ShVal0, L) | (lshr ShVal1, Width - L)480// If this is a funnel shift (different operands are shifted), then the481// shift amount can not over-shift (create poison) in the narrow type.482unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);483APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);484if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))485if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))486return L;487488// The following patterns currently only work for rotation patterns.489// TODO: Add more general funnel-shift compatible patterns.490if (ShVal0 != ShVal1)491return nullptr;492493// The shift amount may be masked with negation:494// (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))495Value *X;496unsigned Mask = Width - 1;497if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&498match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))499return X;500501// Same as above, but the shift amount may be extended after masking:502if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&503match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))504return X;505506return nullptr;507};508509Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);510bool IsFshl = true; // Sub on LSHR.511if (!ShAmt) {512ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);513IsFshl = false; // Sub on SHL.514}515if (!ShAmt)516return nullptr;517518// The right-shifted value must have high zeros in the wide type (for example519// from 'zext', 'and' or 'shift'). High bits of the left-shifted value are520// truncated, so those do not matter.521APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);522if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))523return nullptr;524525// Adjust the width of ShAmt for narrowed funnel shift operation:526// - Zero-extend if ShAmt is narrower than the destination type.527// - Truncate if ShAmt is wider, discarding non-significant high-order bits.528// This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),529// zext/trunc(ShAmt)).530Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);531532Value *X, *Y;533X = Y = Builder.CreateTrunc(ShVal0, DestTy);534if (ShVal0 != ShVal1)535Y = Builder.CreateTrunc(ShVal1, DestTy);536Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;537Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);538return CallInst::Create(F, {X, Y, NarrowShAmt});539}540541/// Try to narrow the width of math or bitwise logic instructions by pulling a542/// truncate ahead of binary operators.543Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {544Type *SrcTy = Trunc.getSrcTy();545Type *DestTy = Trunc.getType();546unsigned SrcWidth = SrcTy->getScalarSizeInBits();547unsigned DestWidth = DestTy->getScalarSizeInBits();548549if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))550return nullptr;551552BinaryOperator *BinOp;553if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))554return nullptr;555556Value *BinOp0 = BinOp->getOperand(0);557Value *BinOp1 = BinOp->getOperand(1);558switch (BinOp->getOpcode()) {559case Instruction::And:560case Instruction::Or:561case Instruction::Xor:562case Instruction::Add:563case Instruction::Sub:564case Instruction::Mul: {565Constant *C;566if (match(BinOp0, m_Constant(C))) {567// trunc (binop C, X) --> binop (trunc C', X)568Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);569Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);570return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);571}572if (match(BinOp1, m_Constant(C))) {573// trunc (binop X, C) --> binop (trunc X, C')574Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);575Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);576return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);577}578Value *X;579if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {580// trunc (binop (ext X), Y) --> binop X, (trunc Y)581Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);582return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);583}584if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {585// trunc (binop Y, (ext X)) --> binop (trunc Y), X586Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);587return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);588}589break;590}591case Instruction::LShr:592case Instruction::AShr: {593// trunc (*shr (trunc A), C) --> trunc(*shr A, C)594Value *A;595Constant *C;596if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {597unsigned MaxShiftAmt = SrcWidth - DestWidth;598// If the shift is small enough, all zero/sign bits created by the shift599// are removed by the trunc.600if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,601APInt(SrcWidth, MaxShiftAmt)))) {602auto *OldShift = cast<Instruction>(Trunc.getOperand(0));603bool IsExact = OldShift->isExact();604if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),605/*IsSigned*/ true, DL)) {606ShAmt = Constant::mergeUndefsWith(ShAmt, C);607Value *Shift =608OldShift->getOpcode() == Instruction::AShr609? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)610: Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);611return CastInst::CreateTruncOrBitCast(Shift, DestTy);612}613}614}615break;616}617default: break;618}619620if (Instruction *NarrowOr = narrowFunnelShift(Trunc))621return NarrowOr;622623return nullptr;624}625626/// Try to narrow the width of a splat shuffle. This could be generalized to any627/// shuffle with a constant operand, but we limit the transform to avoid628/// creating a shuffle type that targets may not be able to lower effectively.629static Instruction *shrinkSplatShuffle(TruncInst &Trunc,630InstCombiner::BuilderTy &Builder) {631auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));632if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&633all_equal(Shuf->getShuffleMask()) &&634Shuf->getType() == Shuf->getOperand(0)->getType()) {635// trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask636// trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask637Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());638return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());639}640641return nullptr;642}643644/// Try to narrow the width of an insert element. This could be generalized for645/// any vector constant, but we limit the transform to insertion into undef to646/// avoid potential backend problems from unsupported insertion widths. This647/// could also be extended to handle the case of inserting a scalar constant648/// into a vector variable.649static Instruction *shrinkInsertElt(CastInst &Trunc,650InstCombiner::BuilderTy &Builder) {651Instruction::CastOps Opcode = Trunc.getOpcode();652assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&653"Unexpected instruction for shrinking");654655auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));656if (!InsElt || !InsElt->hasOneUse())657return nullptr;658659Type *DestTy = Trunc.getType();660Type *DestScalarTy = DestTy->getScalarType();661Value *VecOp = InsElt->getOperand(0);662Value *ScalarOp = InsElt->getOperand(1);663Value *Index = InsElt->getOperand(2);664665if (match(VecOp, m_Undef())) {666// trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index667// fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index668UndefValue *NarrowUndef = UndefValue::get(DestTy);669Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);670return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);671}672673return nullptr;674}675676Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {677if (Instruction *Result = commonCastTransforms(Trunc))678return Result;679680Value *Src = Trunc.getOperand(0);681Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();682unsigned DestWidth = DestTy->getScalarSizeInBits();683unsigned SrcWidth = SrcTy->getScalarSizeInBits();684685// Attempt to truncate the entire input expression tree to the destination686// type. Only do this if the dest type is a simple type, don't convert the687// expression tree to something weird like i93 unless the source is also688// strange.689if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&690canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {691692// If this cast is a truncate, evaluting in a different type always693// eliminates the cast, so it is always a win.694LLVM_DEBUG(695dbgs() << "ICE: EvaluateInDifferentType converting expression type"696" to avoid cast: "697<< Trunc << '\n');698Value *Res = EvaluateInDifferentType(Src, DestTy, false);699assert(Res->getType() == DestTy);700return replaceInstUsesWith(Trunc, Res);701}702703// For integer types, check if we can shorten the entire input expression to704// DestWidth * 2, which won't allow removing the truncate, but reducing the705// width may enable further optimizations, e.g. allowing for larger706// vectorization factors.707if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {708if (DestWidth * 2 < SrcWidth) {709auto *NewDestTy = DestITy->getExtendedType();710if (shouldChangeType(SrcTy, NewDestTy) &&711canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {712LLVM_DEBUG(713dbgs() << "ICE: EvaluateInDifferentType converting expression type"714" to reduce the width of operand of"715<< Trunc << '\n');716Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);717return new TruncInst(Res, DestTy);718}719}720}721722// Test if the trunc is the user of a select which is part of a723// minimum or maximum operation. If so, don't do any more simplification.724// Even simplifying demanded bits can break the canonical form of a725// min/max.726Value *LHS, *RHS;727if (SelectInst *Sel = dyn_cast<SelectInst>(Src))728if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)729return nullptr;730731// See if we can simplify any instructions used by the input whose sole732// purpose is to compute bits we don't care about.733if (SimplifyDemandedInstructionBits(Trunc))734return &Trunc;735736if (DestWidth == 1) {737Value *Zero = Constant::getNullValue(SrcTy);738739Value *X;740const APInt *C1;741Constant *C2;742if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),743m_ImmConstant(C2))))) {744// trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2745Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());746Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);747return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);748}749750Constant *C;751if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {752// trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0753Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));754Value *MaskC = Builder.CreateShl(One, C);755Value *And = Builder.CreateAnd(X, MaskC);756return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);757}758if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_ImmConstant(C)),759m_Deferred(X))))) {760// trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0761Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));762Value *MaskC = Builder.CreateShl(One, C);763Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));764return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);765}766767{768const APInt *C;769if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {770// trunc (C << X) to i1 --> X == 0, where C is odd771return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);772}773}774775if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {776Value *X, *Y;777if (match(Src, m_Xor(m_Value(X), m_Value(Y))))778return new ICmpInst(ICmpInst::ICMP_NE, X, Y);779}780}781782Value *A, *B;783Constant *C;784if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {785unsigned AWidth = A->getType()->getScalarSizeInBits();786unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);787auto *OldSh = cast<Instruction>(Src);788bool IsExact = OldSh->isExact();789790// If the shift is small enough, all zero bits created by the shift are791// removed by the trunc.792if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,793APInt(SrcWidth, MaxShiftAmt)))) {794auto GetNewShAmt = [&](unsigned Width) {795Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);796Constant *Cmp =797ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, C, MaxAmt, DL);798Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);799return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),800DL);801};802803// trunc (lshr (sext A), C) --> ashr A, C804if (A->getType() == DestTy) {805Constant *ShAmt = GetNewShAmt(DestWidth);806ShAmt = Constant::mergeUndefsWith(ShAmt, C);807return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)808: BinaryOperator::CreateAShr(A, ShAmt);809}810// The types are mismatched, so create a cast after shifting:811// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)812if (Src->hasOneUse()) {813Constant *ShAmt = GetNewShAmt(AWidth);814Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);815return CastInst::CreateIntegerCast(Shift, DestTy, true);816}817}818// TODO: Mask high bits with 'and'.819}820821if (Instruction *I = narrowBinOp(Trunc))822return I;823824if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))825return I;826827if (Instruction *I = shrinkInsertElt(Trunc, Builder))828return I;829830if (Src->hasOneUse() &&831(isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {832// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the833// dest type is native and cst < dest size.834if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&835!match(A, m_Shr(m_Value(), m_Constant()))) {836// Skip shifts of shift by constants. It undoes a combine in837// FoldShiftByConstant and is the extend in reg pattern.838APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);839if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {840Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");841return BinaryOperator::Create(Instruction::Shl, NewTrunc,842ConstantExpr::getTrunc(C, DestTy));843}844}845}846847if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))848return I;849850// Whenever an element is extracted from a vector, and then truncated,851// canonicalize by converting it to a bitcast followed by an852// extractelement.853//854// Example (little endian):855// trunc (extractelement <4 x i64> %X, 0) to i32856// --->857// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0858Value *VecOp;859ConstantInt *Cst;860if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {861auto *VecOpTy = cast<VectorType>(VecOp->getType());862auto VecElts = VecOpTy->getElementCount();863864// A badly fit destination size would result in an invalid cast.865if (SrcWidth % DestWidth == 0) {866uint64_t TruncRatio = SrcWidth / DestWidth;867uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;868uint64_t VecOpIdx = Cst->getZExtValue();869uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1870: VecOpIdx * TruncRatio;871assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&872"overflow 32-bits");873874auto *BitCastTo =875VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());876Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);877return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));878}879}880881// trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)882if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),883m_Value(B))))) {884unsigned AWidth = A->getType()->getScalarSizeInBits();885if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {886Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);887Value *NarrowCtlz =888Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});889return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);890}891}892893if (match(Src, m_VScale())) {894if (Trunc.getFunction() &&895Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {896Attribute Attr =897Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);898if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {899if (Log2_32(*MaxVScale) < DestWidth) {900Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));901return replaceInstUsesWith(Trunc, VScale);902}903}904}905}906907bool Changed = false;908if (!Trunc.hasNoSignedWrap() &&909ComputeMaxSignificantBits(Src, /*Depth=*/0, &Trunc) <= DestWidth) {910Trunc.setHasNoSignedWrap(true);911Changed = true;912}913if (!Trunc.hasNoUnsignedWrap() &&914MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),915/*Depth=*/0, &Trunc)) {916Trunc.setHasNoUnsignedWrap(true);917Changed = true;918}919920return Changed ? &Trunc : nullptr;921}922923Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,924ZExtInst &Zext) {925// If we are just checking for a icmp eq of a single bit and zext'ing it926// to an integer, then shift the bit to the appropriate place and then927// cast to integer to avoid the comparison.928929// FIXME: This set of transforms does not check for extra uses and/or creates930// an extra instruction (an optional final cast is not included931// in the transform comments). We may also want to favor icmp over932// shifts in cases of equal instructions because icmp has better933// analysis in general (invert the transform).934935const APInt *Op1CV;936if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {937938// zext (x <s 0) to i32 --> x>>u31 true if signbit set.939if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {940Value *In = Cmp->getOperand(0);941Value *Sh = ConstantInt::get(In->getType(),942In->getType()->getScalarSizeInBits() - 1);943In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");944if (In->getType() != Zext.getType())945In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);946947return replaceInstUsesWith(Zext, In);948}949950// zext (X == 0) to i32 --> X^1 iff X has only the low bit set.951// zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.952// zext (X != 0) to i32 --> X iff X has only the low bit set.953// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.954955if (Op1CV->isZero() && Cmp->isEquality()) {956// Exactly 1 possible 1? But not the high-bit because that is957// canonicalized to this form.958KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);959APInt KnownZeroMask(~Known.Zero);960uint32_t ShAmt = KnownZeroMask.logBase2();961bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&962(Zext.getType()->getScalarSizeInBits() != ShAmt + 1);963if (IsExpectShAmt &&964(Cmp->getOperand(0)->getType() == Zext.getType() ||965Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {966Value *In = Cmp->getOperand(0);967if (ShAmt) {968// Perform a logical shr by shiftamt.969// Insert the shift to put the result in the low bit.970In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),971In->getName() + ".lobit");972}973974// Toggle the low bit for "X == 0".975if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)976In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));977978if (Zext.getType() == In->getType())979return replaceInstUsesWith(Zext, In);980981Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);982return replaceInstUsesWith(Zext, IntCast);983}984}985}986987if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {988// Test if a bit is clear/set using a shifted-one mask:989// zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1990// zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1991Value *X, *ShAmt;992if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&993match(Cmp->getOperand(0),994m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {995if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)996X = Builder.CreateNot(X);997Value *Lshr = Builder.CreateLShr(X, ShAmt);998Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));999return replaceInstUsesWith(Zext, And1);1000}1001}10021003return nullptr;1004}10051006/// Determine if the specified value can be computed in the specified wider type1007/// and produce the same low bits. If not, return false.1008///1009/// If this function returns true, it can also return a non-zero number of bits1010/// (in BitsToClear) which indicates that the value it computes is correct for1011/// the zero extend, but that the additional BitsToClear bits need to be zero'd1012/// out. For example, to promote something like:1013///1014/// %B = trunc i64 %A to i321015/// %C = lshr i32 %B, 81016/// %E = zext i32 %C to i641017///1018/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be1019/// set to 8 to indicate that the promoted value needs to have bits 24-311020/// cleared in addition to bits 32-63. Since an 'and' will be generated to1021/// clear the top bits anyway, doing this has no extra cost.1022///1023/// This function works on both vectors and scalars.1024static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,1025InstCombinerImpl &IC, Instruction *CxtI) {1026BitsToClear = 0;1027if (canAlwaysEvaluateInType(V, Ty))1028return true;1029if (canNotEvaluateInType(V, Ty))1030return false;10311032auto *I = cast<Instruction>(V);1033unsigned Tmp;1034switch (I->getOpcode()) {1035case Instruction::ZExt: // zext(zext(x)) -> zext(x).1036case Instruction::SExt: // zext(sext(x)) -> sext(x).1037case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)1038return true;1039case Instruction::And:1040case Instruction::Or:1041case Instruction::Xor:1042case Instruction::Add:1043case Instruction::Sub:1044case Instruction::Mul:1045if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||1046!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))1047return false;1048// These can all be promoted if neither operand has 'bits to clear'.1049if (BitsToClear == 0 && Tmp == 0)1050return true;10511052// If the operation is an AND/OR/XOR and the bits to clear are zero in the1053// other side, BitsToClear is ok.1054if (Tmp == 0 && I->isBitwiseLogicOp()) {1055// We use MaskedValueIsZero here for generality, but the case we care1056// about the most is constant RHS.1057unsigned VSize = V->getType()->getScalarSizeInBits();1058if (IC.MaskedValueIsZero(I->getOperand(1),1059APInt::getHighBitsSet(VSize, BitsToClear),10600, CxtI)) {1061// If this is an And instruction and all of the BitsToClear are1062// known to be zero we can reset BitsToClear.1063if (I->getOpcode() == Instruction::And)1064BitsToClear = 0;1065return true;1066}1067}10681069// Otherwise, we don't know how to analyze this BitsToClear case yet.1070return false;10711072case Instruction::Shl: {1073// We can promote shl(x, cst) if we can promote x. Since shl overwrites the1074// upper bits we can reduce BitsToClear by the shift amount.1075const APInt *Amt;1076if (match(I->getOperand(1), m_APInt(Amt))) {1077if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))1078return false;1079uint64_t ShiftAmt = Amt->getZExtValue();1080BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;1081return true;1082}1083return false;1084}1085case Instruction::LShr: {1086// We can promote lshr(x, cst) if we can promote x. This requires the1087// ultimate 'and' to clear out the high zero bits we're clearing out though.1088const APInt *Amt;1089if (match(I->getOperand(1), m_APInt(Amt))) {1090if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))1091return false;1092BitsToClear += Amt->getZExtValue();1093if (BitsToClear > V->getType()->getScalarSizeInBits())1094BitsToClear = V->getType()->getScalarSizeInBits();1095return true;1096}1097// Cannot promote variable LSHR.1098return false;1099}1100case Instruction::Select:1101if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||1102!canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||1103// TODO: If important, we could handle the case when the BitsToClear are1104// known zero in the disagreeing side.1105Tmp != BitsToClear)1106return false;1107return true;11081109case Instruction::PHI: {1110// We can change a phi if we can change all operands. Note that we never1111// get into trouble with cyclic PHIs here because we only consider1112// instructions with a single use.1113PHINode *PN = cast<PHINode>(I);1114if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))1115return false;1116for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)1117if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||1118// TODO: If important, we could handle the case when the BitsToClear1119// are known zero in the disagreeing input.1120Tmp != BitsToClear)1121return false;1122return true;1123}1124case Instruction::Call:1125// llvm.vscale() can always be executed in larger type, because the1126// value is automatically zero-extended.1127if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))1128if (II->getIntrinsicID() == Intrinsic::vscale)1129return true;1130return false;1131default:1132// TODO: Can handle more cases here.1133return false;1134}1135}11361137Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {1138// If this zero extend is only used by a truncate, let the truncate be1139// eliminated before we try to optimize this zext.1140if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&1141!isa<Constant>(Zext.getOperand(0)))1142return nullptr;11431144// If one of the common conversion will work, do it.1145if (Instruction *Result = commonCastTransforms(Zext))1146return Result;11471148Value *Src = Zext.getOperand(0);1149Type *SrcTy = Src->getType(), *DestTy = Zext.getType();11501151// zext nneg bool x -> 01152if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())1153return replaceInstUsesWith(Zext, Constant::getNullValue(Zext.getType()));11541155// Try to extend the entire expression tree to the wide destination type.1156unsigned BitsToClear;1157if (shouldChangeType(SrcTy, DestTy) &&1158canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {1159assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&1160"Can't clear more bits than in SrcTy");11611162// Okay, we can transform this! Insert the new expression now.1163LLVM_DEBUG(1164dbgs() << "ICE: EvaluateInDifferentType converting expression type"1165" to avoid zero extend: "1166<< Zext << '\n');1167Value *Res = EvaluateInDifferentType(Src, DestTy, false);1168assert(Res->getType() == DestTy);11691170// Preserve debug values referring to Src if the zext is its last use.1171if (auto *SrcOp = dyn_cast<Instruction>(Src))1172if (SrcOp->hasOneUse())1173replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);11741175uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;1176uint32_t DestBitSize = DestTy->getScalarSizeInBits();11771178// If the high bits are already filled with zeros, just replace this1179// cast with the result.1180if (MaskedValueIsZero(Res,1181APInt::getHighBitsSet(DestBitSize,1182DestBitSize - SrcBitsKept),11830, &Zext))1184return replaceInstUsesWith(Zext, Res);11851186// We need to emit an AND to clear the high bits.1187Constant *C = ConstantInt::get(Res->getType(),1188APInt::getLowBitsSet(DestBitSize, SrcBitsKept));1189return BinaryOperator::CreateAnd(Res, C);1190}11911192// If this is a TRUNC followed by a ZEXT then we are dealing with integral1193// types and if the sizes are just right we can convert this into a logical1194// 'and' which will be much cheaper than the pair of casts.1195if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast1196// TODO: Subsume this into EvaluateInDifferentType.11971198// Get the sizes of the types involved. We know that the intermediate type1199// will be smaller than A or C, but don't know the relation between A and C.1200Value *A = CSrc->getOperand(0);1201unsigned SrcSize = A->getType()->getScalarSizeInBits();1202unsigned MidSize = CSrc->getType()->getScalarSizeInBits();1203unsigned DstSize = DestTy->getScalarSizeInBits();1204// If we're actually extending zero bits, then if1205// SrcSize < DstSize: zext(a & mask)1206// SrcSize == DstSize: a & mask1207// SrcSize > DstSize: trunc(a) & mask1208if (SrcSize < DstSize) {1209APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));1210Constant *AndConst = ConstantInt::get(A->getType(), AndValue);1211Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");1212return new ZExtInst(And, DestTy);1213}12141215if (SrcSize == DstSize) {1216APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));1217return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),1218AndValue));1219}1220if (SrcSize > DstSize) {1221Value *Trunc = Builder.CreateTrunc(A, DestTy);1222APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));1223return BinaryOperator::CreateAnd(Trunc,1224ConstantInt::get(Trunc->getType(),1225AndValue));1226}1227}12281229if (auto *Cmp = dyn_cast<ICmpInst>(Src))1230return transformZExtICmp(Cmp, Zext);12311232// zext(trunc(X) & C) -> (X & zext(C)).1233Constant *C;1234Value *X;1235if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&1236X->getType() == DestTy)1237return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));12381239// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).1240Value *And;1241if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&1242match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&1243X->getType() == DestTy) {1244Value *ZC = Builder.CreateZExt(C, DestTy);1245return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);1246}12471248// If we are truncating, masking, and then zexting back to the original type,1249// that's just a mask. This is not handled by canEvaluateZextd if the1250// intermediate values have extra uses. This could be generalized further for1251// a non-constant mask operand.1252// zext (and (trunc X), C) --> and X, (zext C)1253if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&1254X->getType() == DestTy) {1255Value *ZextC = Builder.CreateZExt(C, DestTy);1256return BinaryOperator::CreateAnd(X, ZextC);1257}12581259if (match(Src, m_VScale())) {1260if (Zext.getFunction() &&1261Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {1262Attribute Attr =1263Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);1264if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {1265unsigned TypeWidth = Src->getType()->getScalarSizeInBits();1266if (Log2_32(*MaxVScale) < TypeWidth) {1267Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));1268return replaceInstUsesWith(Zext, VScale);1269}1270}1271}1272}12731274if (!Zext.hasNonNeg()) {1275// If this zero extend is only used by a shift, add nneg flag.1276if (Zext.hasOneUse() &&1277SrcTy->getScalarSizeInBits() >1278Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&1279match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {1280Zext.setNonNeg();1281return &Zext;1282}12831284if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {1285Zext.setNonNeg();1286return &Zext;1287}1288}12891290return nullptr;1291}12921293/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.1294Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,1295SExtInst &Sext) {1296Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);1297ICmpInst::Predicate Pred = Cmp->getPredicate();12981299// Don't bother if Op1 isn't of vector or integer type.1300if (!Op1->getType()->isIntOrIntVectorTy())1301return nullptr;13021303if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {1304// sext (x <s 0) --> ashr x, 31 (all ones if negative)1305Value *Sh = ConstantInt::get(Op0->getType(),1306Op0->getType()->getScalarSizeInBits() - 1);1307Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");1308if (In->getType() != Sext.getType())1309In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);13101311return replaceInstUsesWith(Sext, In);1312}13131314if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {1315// If we know that only one bit of the LHS of the icmp can be set and we1316// have an equality comparison with zero or a power of 2, we can transform1317// the icmp and sext into bitwise/integer operations.1318if (Cmp->hasOneUse() &&1319Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){1320KnownBits Known = computeKnownBits(Op0, 0, &Sext);13211322APInt KnownZeroMask(~Known.Zero);1323if (KnownZeroMask.isPowerOf2()) {1324Value *In = Cmp->getOperand(0);13251326// If the icmp tests for a known zero bit we can constant fold it.1327if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {1328Value *V = Pred == ICmpInst::ICMP_NE ?1329ConstantInt::getAllOnesValue(Sext.getType()) :1330ConstantInt::getNullValue(Sext.getType());1331return replaceInstUsesWith(Sext, V);1332}13331334if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {1335// sext ((x & 2^n) == 0) -> (x >> n) - 11336// sext ((x & 2^n) != 2^n) -> (x >> n) - 11337unsigned ShiftAmt = KnownZeroMask.countr_zero();1338// Perform a right shift to place the desired bit in the LSB.1339if (ShiftAmt)1340In = Builder.CreateLShr(In,1341ConstantInt::get(In->getType(), ShiftAmt));13421343// At this point "In" is either 1 or 0. Subtract 1 to turn1344// {1, 0} -> {0, -1}.1345In = Builder.CreateAdd(In,1346ConstantInt::getAllOnesValue(In->getType()),1347"sext");1348} else {1349// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-11350// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-11351unsigned ShiftAmt = KnownZeroMask.countl_zero();1352// Perform a left shift to place the desired bit in the MSB.1353if (ShiftAmt)1354In = Builder.CreateShl(In,1355ConstantInt::get(In->getType(), ShiftAmt));13561357// Distribute the bit over the whole bit width.1358In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),1359KnownZeroMask.getBitWidth() - 1), "sext");1360}13611362if (Sext.getType() == In->getType())1363return replaceInstUsesWith(Sext, In);1364return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);1365}1366}1367}13681369return nullptr;1370}13711372/// Return true if we can take the specified value and return it as type Ty1373/// without inserting any new casts and without changing the value of the common1374/// low bits. This is used by code that tries to promote integer operations to1375/// a wider types will allow us to eliminate the extension.1376///1377/// This function works on both vectors and scalars.1378///1379static bool canEvaluateSExtd(Value *V, Type *Ty) {1380assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&1381"Can't sign extend type to a smaller type");1382if (canAlwaysEvaluateInType(V, Ty))1383return true;1384if (canNotEvaluateInType(V, Ty))1385return false;13861387auto *I = cast<Instruction>(V);1388switch (I->getOpcode()) {1389case Instruction::SExt: // sext(sext(x)) -> sext(x)1390case Instruction::ZExt: // sext(zext(x)) -> zext(x)1391case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)1392return true;1393case Instruction::And:1394case Instruction::Or:1395case Instruction::Xor:1396case Instruction::Add:1397case Instruction::Sub:1398case Instruction::Mul:1399// These operators can all arbitrarily be extended if their inputs can.1400return canEvaluateSExtd(I->getOperand(0), Ty) &&1401canEvaluateSExtd(I->getOperand(1), Ty);14021403//case Instruction::Shl: TODO1404//case Instruction::LShr: TODO14051406case Instruction::Select:1407return canEvaluateSExtd(I->getOperand(1), Ty) &&1408canEvaluateSExtd(I->getOperand(2), Ty);14091410case Instruction::PHI: {1411// We can change a phi if we can change all operands. Note that we never1412// get into trouble with cyclic PHIs here because we only consider1413// instructions with a single use.1414PHINode *PN = cast<PHINode>(I);1415for (Value *IncValue : PN->incoming_values())1416if (!canEvaluateSExtd(IncValue, Ty)) return false;1417return true;1418}1419default:1420// TODO: Can handle more cases here.1421break;1422}14231424return false;1425}14261427Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {1428// If this sign extend is only used by a truncate, let the truncate be1429// eliminated before we try to optimize this sext.1430if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))1431return nullptr;14321433if (Instruction *I = commonCastTransforms(Sext))1434return I;14351436Value *Src = Sext.getOperand(0);1437Type *SrcTy = Src->getType(), *DestTy = Sext.getType();1438unsigned SrcBitSize = SrcTy->getScalarSizeInBits();1439unsigned DestBitSize = DestTy->getScalarSizeInBits();14401441// If the value being extended is zero or positive, use a zext instead.1442if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {1443auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);1444CI->setNonNeg(true);1445return CI;1446}14471448// Try to extend the entire expression tree to the wide destination type.1449if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {1450// Okay, we can transform this! Insert the new expression now.1451LLVM_DEBUG(1452dbgs() << "ICE: EvaluateInDifferentType converting expression type"1453" to avoid sign extend: "1454<< Sext << '\n');1455Value *Res = EvaluateInDifferentType(Src, DestTy, true);1456assert(Res->getType() == DestTy);14571458// If the high bits are already filled with sign bit, just replace this1459// cast with the result.1460if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)1461return replaceInstUsesWith(Sext, Res);14621463// We need to emit a shl + ashr to do the sign extend.1464Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);1465return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),1466ShAmt);1467}14681469Value *X;1470if (match(Src, m_Trunc(m_Value(X)))) {1471// If the input has more sign bits than bits truncated, then convert1472// directly to final type.1473unsigned XBitSize = X->getType()->getScalarSizeInBits();1474if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)1475return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);14761477// If input is a trunc from the destination type, then convert into shifts.1478if (Src->hasOneUse() && X->getType() == DestTy) {1479// sext (trunc X) --> ashr (shl X, C), C1480Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);1481return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);1482}14831484// If we are replacing shifted-in high zero bits with sign bits, convert1485// the logic shift to arithmetic shift and eliminate the cast to1486// intermediate type:1487// sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)1488Value *Y;1489if (Src->hasOneUse() &&1490match(X, m_LShr(m_Value(Y),1491m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {1492Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);1493return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);1494}1495}14961497if (auto *Cmp = dyn_cast<ICmpInst>(Src))1498return transformSExtICmp(Cmp, Sext);14991500// If the input is a shl/ashr pair of a same constant, then this is a sign1501// extension from a smaller value. If we could trust arbitrary bitwidth1502// integers, we could turn this into a truncate to the smaller bit and then1503// use a sext for the whole extension. Since we don't, look deeper and check1504// for a truncate. If the source and dest are the same type, eliminate the1505// trunc and extend and just do shifts. For example, turn:1506// %a = trunc i32 %i to i81507// %b = shl i8 %a, C1508// %c = ashr i8 %b, C1509// %d = sext i8 %c to i321510// into:1511// %a = shl i32 %i, 32-(8-C)1512// %d = ashr i32 %a, 32-(8-C)1513Value *A = nullptr;1514// TODO: Eventually this could be subsumed by EvaluateInDifferentType.1515Constant *BA = nullptr, *CA = nullptr;1516if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),1517m_ImmConstant(CA))) &&1518BA->isElementWiseEqual(CA) && A->getType() == DestTy) {1519Constant *WideCurrShAmt =1520ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);1521assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");1522Constant *NumLowbitsLeft = ConstantExpr::getSub(1523ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);1524Constant *NewShAmt = ConstantExpr::getSub(1525ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),1526NumLowbitsLeft);1527NewShAmt =1528Constant::mergeUndefsWith(Constant::mergeUndefsWith(NewShAmt, BA), CA);1529A = Builder.CreateShl(A, NewShAmt, Sext.getName());1530return BinaryOperator::CreateAShr(A, NewShAmt);1531}15321533// Splatting a bit of constant-index across a value:1534// sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-11535// If the dest type is different, use a cast (adjust use check).1536if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),1537m_SpecificInt(SrcBitSize - 1))))) {1538Type *XTy = X->getType();1539unsigned XBitSize = XTy->getScalarSizeInBits();1540Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);1541Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);1542if (XTy == DestTy)1543return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),1544AshrAmtC);1545if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {1546Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);1547return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);1548}1549}15501551if (match(Src, m_VScale())) {1552if (Sext.getFunction() &&1553Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {1554Attribute Attr =1555Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);1556if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {1557if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {1558Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));1559return replaceInstUsesWith(Sext, VScale);1560}1561}1562}1563}15641565return nullptr;1566}15671568/// Return a Constant* for the specified floating-point constant if it fits1569/// in the specified FP type without changing its value.1570static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {1571bool losesInfo;1572APFloat F = CFP->getValueAPF();1573(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);1574return !losesInfo;1575}15761577static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {1578if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))1579return nullptr; // No constant folding of this.1580// See if the value can be truncated to bfloat and then reextended.1581if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))1582return Type::getBFloatTy(CFP->getContext());1583// See if the value can be truncated to half and then reextended.1584if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))1585return Type::getHalfTy(CFP->getContext());1586// See if the value can be truncated to float and then reextended.1587if (fitsInFPType(CFP, APFloat::IEEEsingle()))1588return Type::getFloatTy(CFP->getContext());1589if (CFP->getType()->isDoubleTy())1590return nullptr; // Won't shrink.1591if (fitsInFPType(CFP, APFloat::IEEEdouble()))1592return Type::getDoubleTy(CFP->getContext());1593// Don't try to shrink to various long double types.1594return nullptr;1595}15961597// Determine if this is a vector of ConstantFPs and if so, return the minimal1598// type we can safely truncate all elements to.1599static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {1600auto *CV = dyn_cast<Constant>(V);1601auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());1602if (!CV || !CVVTy)1603return nullptr;16041605Type *MinType = nullptr;16061607unsigned NumElts = CVVTy->getNumElements();16081609// For fixed-width vectors we find the minimal type by looking1610// through the constant values of the vector.1611for (unsigned i = 0; i != NumElts; ++i) {1612if (isa<UndefValue>(CV->getAggregateElement(i)))1613continue;16141615auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));1616if (!CFP)1617return nullptr;16181619Type *T = shrinkFPConstant(CFP, PreferBFloat);1620if (!T)1621return nullptr;16221623// If we haven't found a type yet or this type has a larger mantissa than1624// our previous type, this is our new minimal type.1625if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())1626MinType = T;1627}16281629// Make a vector type from the minimal type.1630return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;1631}16321633/// Find the minimum FP type we can safely truncate to.1634static Type *getMinimumFPType(Value *V, bool PreferBFloat) {1635if (auto *FPExt = dyn_cast<FPExtInst>(V))1636return FPExt->getOperand(0)->getType();16371638// If this value is a constant, return the constant in the smallest FP type1639// that can accurately represent it. This allows us to turn1640// (float)((double)X+2.0) into x+2.0f.1641if (auto *CFP = dyn_cast<ConstantFP>(V))1642if (Type *T = shrinkFPConstant(CFP, PreferBFloat))1643return T;16441645// We can only correctly find a minimum type for a scalable vector when it is1646// a splat. For splats of constant values the fpext is wrapped up as a1647// ConstantExpr.1648if (auto *FPCExt = dyn_cast<ConstantExpr>(V))1649if (FPCExt->getOpcode() == Instruction::FPExt)1650return FPCExt->getOperand(0)->getType();16511652// Try to shrink a vector of FP constants. This returns nullptr on scalable1653// vectors1654if (Type *T = shrinkFPConstantVector(V, PreferBFloat))1655return T;16561657return V->getType();1658}16591660/// Return true if the cast from integer to FP can be proven to be exact for all1661/// possible inputs (the conversion does not lose any precision).1662static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {1663CastInst::CastOps Opcode = I.getOpcode();1664assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&1665"Unexpected cast");1666Value *Src = I.getOperand(0);1667Type *SrcTy = Src->getType();1668Type *FPTy = I.getType();1669bool IsSigned = Opcode == Instruction::SIToFP;1670int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;16711672// Easy case - if the source integer type has less bits than the FP mantissa,1673// then the cast must be exact.1674int DestNumSigBits = FPTy->getFPMantissaWidth();1675if (SrcSize <= DestNumSigBits)1676return true;16771678// Cast from FP to integer and back to FP is independent of the intermediate1679// integer width because of poison on overflow.1680Value *F;1681if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {1682// If this is uitofp (fptosi F), the source needs an extra bit to avoid1683// potential rounding of negative FP input values.1684int SrcNumSigBits = F->getType()->getFPMantissaWidth();1685if (!IsSigned && match(Src, m_FPToSI(m_Value())))1686SrcNumSigBits++;16871688// [su]itofp (fpto[su]i F) --> exact if the source type has less or equal1689// significant bits than the destination (and make sure neither type is1690// weird -- ppc_fp128).1691if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&1692SrcNumSigBits <= DestNumSigBits)1693return true;1694}16951696// TODO:1697// Try harder to find if the source integer type has less significant bits.1698// For example, compute number of sign bits.1699KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);1700int SigBits = (int)SrcTy->getScalarSizeInBits() -1701SrcKnown.countMinLeadingZeros() -1702SrcKnown.countMinTrailingZeros();1703if (SigBits <= DestNumSigBits)1704return true;17051706return false;1707}17081709Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {1710if (Instruction *I = commonCastTransforms(FPT))1711return I;17121713// If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to1714// simplify this expression to avoid one or more of the trunc/extend1715// operations if we can do so without changing the numerical results.1716//1717// The exact manner in which the widths of the operands interact to limit1718// what we can and cannot do safely varies from operation to operation, and1719// is explained below in the various case statements.1720Type *Ty = FPT.getType();1721auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));1722if (BO && BO->hasOneUse()) {1723Type *LHSMinType =1724getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());1725Type *RHSMinType =1726getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());1727unsigned OpWidth = BO->getType()->getFPMantissaWidth();1728unsigned LHSWidth = LHSMinType->getFPMantissaWidth();1729unsigned RHSWidth = RHSMinType->getFPMantissaWidth();1730unsigned SrcWidth = std::max(LHSWidth, RHSWidth);1731unsigned DstWidth = Ty->getFPMantissaWidth();1732switch (BO->getOpcode()) {1733default: break;1734case Instruction::FAdd:1735case Instruction::FSub:1736// For addition and subtraction, the infinitely precise result can1737// essentially be arbitrarily wide; proving that double rounding1738// will not occur because the result of OpI is exact (as we will for1739// FMul, for example) is hopeless. However, we *can* nonetheless1740// frequently know that double rounding cannot occur (or that it is1741// innocuous) by taking advantage of the specific structure of1742// infinitely-precise results that admit double rounding.1743//1744// Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient1745// to represent both sources, we can guarantee that the double1746// rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,1747// "A Rigorous Framework for Fully Supporting the IEEE Standard ..."1748// for proof of this fact).1749//1750// Note: Figueroa does not consider the case where DstFormat !=1751// SrcFormat. It's possible (likely even!) that this analysis1752// could be tightened for those cases, but they are rare (the main1753// case of interest here is (float)((double)float + float)).1754if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {1755Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);1756Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);1757Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);1758RI->copyFastMathFlags(BO);1759return RI;1760}1761break;1762case Instruction::FMul:1763// For multiplication, the infinitely precise result has at most1764// LHSWidth + RHSWidth significant bits; if OpWidth is sufficient1765// that such a value can be exactly represented, then no double1766// rounding can possibly occur; we can safely perform the operation1767// in the destination format if it can represent both sources.1768if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {1769Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);1770Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);1771return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);1772}1773break;1774case Instruction::FDiv:1775// For division, we use again use the bound from Figueroa's1776// dissertation. I am entirely certain that this bound can be1777// tightened in the unbalanced operand case by an analysis based on1778// the diophantine rational approximation bound, but the well-known1779// condition used here is a good conservative first pass.1780// TODO: Tighten bound via rigorous analysis of the unbalanced case.1781if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {1782Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);1783Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);1784return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);1785}1786break;1787case Instruction::FRem: {1788// Remainder is straightforward. Remainder is always exact, so the1789// type of OpI doesn't enter into things at all. We simply evaluate1790// in whichever source type is larger, then convert to the1791// destination type.1792if (SrcWidth == OpWidth)1793break;1794Value *LHS, *RHS;1795if (LHSWidth == SrcWidth) {1796LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);1797RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);1798} else {1799LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);1800RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);1801}18021803Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);1804return CastInst::CreateFPCast(ExactResult, Ty);1805}1806}1807}18081809// (fptrunc (fneg x)) -> (fneg (fptrunc x))1810Value *X;1811Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));1812if (Op && Op->hasOneUse()) {1813// FIXME: The FMF should propagate from the fptrunc, not the source op.1814IRBuilder<>::FastMathFlagGuard FMFG(Builder);1815if (isa<FPMathOperator>(Op))1816Builder.setFastMathFlags(Op->getFastMathFlags());18171818if (match(Op, m_FNeg(m_Value(X)))) {1819Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);18201821return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);1822}18231824// If we are truncating a select that has an extended operand, we can1825// narrow the other operand and do the select as a narrow op.1826Value *Cond, *X, *Y;1827if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&1828X->getType() == Ty) {1829// fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)1830Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);1831Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);1832return replaceInstUsesWith(FPT, Sel);1833}1834if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&1835X->getType() == Ty) {1836// fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X1837Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);1838Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);1839return replaceInstUsesWith(FPT, Sel);1840}1841}18421843if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {1844switch (II->getIntrinsicID()) {1845default: break;1846case Intrinsic::ceil:1847case Intrinsic::fabs:1848case Intrinsic::floor:1849case Intrinsic::nearbyint:1850case Intrinsic::rint:1851case Intrinsic::round:1852case Intrinsic::roundeven:1853case Intrinsic::trunc: {1854Value *Src = II->getArgOperand(0);1855if (!Src->hasOneUse())1856break;18571858// Except for fabs, this transformation requires the input of the unary FP1859// operation to be itself an fpext from the type to which we're1860// truncating.1861if (II->getIntrinsicID() != Intrinsic::fabs) {1862FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);1863if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)1864break;1865}18661867// Do unary FP operation on smaller type.1868// (fptrunc (fabs x)) -> (fabs (fptrunc x))1869Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);1870Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),1871II->getIntrinsicID(), Ty);1872SmallVector<OperandBundleDef, 1> OpBundles;1873II->getOperandBundlesAsDefs(OpBundles);1874CallInst *NewCI =1875CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());1876NewCI->copyFastMathFlags(II);1877return NewCI;1878}1879}1880}18811882if (Instruction *I = shrinkInsertElt(FPT, Builder))1883return I;18841885Value *Src = FPT.getOperand(0);1886if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {1887auto *FPCast = cast<CastInst>(Src);1888if (isKnownExactCastIntToFP(*FPCast, *this))1889return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);1890}18911892return nullptr;1893}18941895Instruction *InstCombinerImpl::visitFPExt(CastInst &FPExt) {1896// If the source operand is a cast from integer to FP and known exact, then1897// cast the integer operand directly to the destination type.1898Type *Ty = FPExt.getType();1899Value *Src = FPExt.getOperand(0);1900if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {1901auto *FPCast = cast<CastInst>(Src);1902if (isKnownExactCastIntToFP(*FPCast, *this))1903return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);1904}19051906return commonCastTransforms(FPExt);1907}19081909/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)1910/// This is safe if the intermediate type has enough bits in its mantissa to1911/// accurately represent all values of X. For example, this won't work with1912/// i64 -> float -> i64.1913Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {1914if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))1915return nullptr;19161917auto *OpI = cast<CastInst>(FI.getOperand(0));1918Value *X = OpI->getOperand(0);1919Type *XType = X->getType();1920Type *DestType = FI.getType();1921bool IsOutputSigned = isa<FPToSIInst>(FI);19221923// Since we can assume the conversion won't overflow, our decision as to1924// whether the input will fit in the float should depend on the minimum1925// of the input range and output range.19261927// This means this is also safe for a signed input and unsigned output, since1928// a negative input would lead to undefined behavior.1929if (!isKnownExactCastIntToFP(*OpI, *this)) {1930// The first cast may not round exactly based on the source integer width1931// and FP width, but the overflow UB rules can still allow this to fold.1932// If the destination type is narrow, that means the intermediate FP value1933// must be large enough to hold the source value exactly.1934// For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.1935int OutputSize = (int)DestType->getScalarSizeInBits();1936if (OutputSize > OpI->getType()->getFPMantissaWidth())1937return nullptr;1938}19391940if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {1941bool IsInputSigned = isa<SIToFPInst>(OpI);1942if (IsInputSigned && IsOutputSigned)1943return new SExtInst(X, DestType);1944return new ZExtInst(X, DestType);1945}1946if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())1947return new TruncInst(X, DestType);19481949assert(XType == DestType && "Unexpected types for int to FP to int casts");1950return replaceInstUsesWith(FI, X);1951}19521953static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {1954// fpto{u/s}i non-norm --> 01955FPClassTest Mask =1956FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;1957KnownFPClass FPClass =1958computeKnownFPClass(FI.getOperand(0), Mask, /*Depth=*/0,1959IC.getSimplifyQuery().getWithInstruction(&FI));1960if (FPClass.isKnownNever(Mask))1961return IC.replaceInstUsesWith(FI, ConstantInt::getNullValue(FI.getType()));19621963return nullptr;1964}19651966Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {1967if (Instruction *I = foldItoFPtoI(FI))1968return I;19691970if (Instruction *I = foldFPtoI(FI, *this))1971return I;19721973return commonCastTransforms(FI);1974}19751976Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {1977if (Instruction *I = foldItoFPtoI(FI))1978return I;19791980if (Instruction *I = foldFPtoI(FI, *this))1981return I;19821983return commonCastTransforms(FI);1984}19851986Instruction *InstCombinerImpl::visitUIToFP(CastInst &CI) {1987if (Instruction *R = commonCastTransforms(CI))1988return R;1989if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {1990CI.setNonNeg();1991return &CI;1992}1993return nullptr;1994}19951996Instruction *InstCombinerImpl::visitSIToFP(CastInst &CI) {1997if (Instruction *R = commonCastTransforms(CI))1998return R;1999if (isKnownNonNegative(CI.getOperand(0), SQ)) {2000auto *UI =2001CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());2002UI->setNonNeg(true);2003return UI;2004}2005return nullptr;2006}20072008Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {2009// If the source integer type is not the intptr_t type for this target, do a2010// trunc or zext to the intptr_t type, then inttoptr of it. This allows the2011// cast to be exposed to other transforms.2012unsigned AS = CI.getAddressSpace();2013if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=2014DL.getPointerSizeInBits(AS)) {2015Type *Ty = CI.getOperand(0)->getType()->getWithNewType(2016DL.getIntPtrType(CI.getContext(), AS));2017Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);2018return new IntToPtrInst(P, CI.getType());2019}20202021if (Instruction *I = commonCastTransforms(CI))2022return I;20232024return nullptr;2025}20262027Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {2028// If the destination integer type is not the intptr_t type for this target,2029// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast2030// to be exposed to other transforms.2031Value *SrcOp = CI.getPointerOperand();2032Type *SrcTy = SrcOp->getType();2033Type *Ty = CI.getType();2034unsigned AS = CI.getPointerAddressSpace();2035unsigned TySize = Ty->getScalarSizeInBits();2036unsigned PtrSize = DL.getPointerSizeInBits(AS);2037if (TySize != PtrSize) {2038Type *IntPtrTy =2039SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));2040Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);2041return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);2042}20432044// (ptrtoint (ptrmask P, M))2045// -> (and (ptrtoint P), M)2046// This is generally beneficial as `and` is better supported than `ptrmask`.2047Value *Ptr, *Mask;2048if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),2049m_Value(Mask)))) &&2050Mask->getType() == Ty)2051return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);20522053if (auto *GEP = dyn_cast<GEPOperator>(SrcOp)) {2054// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.2055// While this can increase the number of instructions it doesn't actually2056// increase the overall complexity since the arithmetic is just part of2057// the GEP otherwise.2058if (GEP->hasOneUse() &&2059isa<ConstantPointerNull>(GEP->getPointerOperand())) {2060return replaceInstUsesWith(CI,2061Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,2062/*isSigned=*/false));2063}20642065// (ptrtoint (gep (inttoptr Base), ...)) -> Base + Offset2066Value *Base;2067if (GEP->hasOneUse() &&2068match(GEP->getPointerOperand(), m_OneUse(m_IntToPtr(m_Value(Base)))) &&2069Base->getType() == Ty) {2070Value *Offset = EmitGEPOffset(GEP);2071auto *NewOp = BinaryOperator::CreateAdd(Base, Offset);2072if (GEP->hasNoUnsignedWrap() ||2073(GEP->hasNoUnsignedSignedWrap() &&2074isKnownNonNegative(Offset, SQ.getWithInstruction(&CI))))2075NewOp->setHasNoUnsignedWrap(true);2076return NewOp;2077}2078}20792080Value *Vec, *Scalar, *Index;2081if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),2082m_Value(Scalar), m_Value(Index)))) &&2083Vec->getType() == Ty) {2084assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");2085// Convert the scalar to int followed by insert to eliminate one cast:2086// p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index2087Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());2088return InsertElementInst::Create(Vec, NewCast, Index);2089}20902091return commonCastTransforms(CI);2092}20932094/// This input value (which is known to have vector type) is being zero extended2095/// or truncated to the specified vector type. Since the zext/trunc is done2096/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,2097/// endianness will impact which end of the vector that is extended or2098/// truncated.2099///2100/// A vector is always stored with index 0 at the lowest address, which2101/// corresponds to the most significant bits for a big endian stored integer and2102/// the least significant bits for little endian. A trunc/zext of an integer2103/// impacts the big end of the integer. Thus, we need to add/remove elements at2104/// the front of the vector for big endian targets, and the back of the vector2105/// for little endian targets.2106///2107/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.2108///2109/// The source and destination vector types may have different element types.2110static Instruction *2111optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,2112InstCombinerImpl &IC) {2113// We can only do this optimization if the output is a multiple of the input2114// element size, or the input is a multiple of the output element size.2115// Convert the input type to have the same element type as the output.2116VectorType *SrcTy = cast<VectorType>(InVal->getType());21172118if (SrcTy->getElementType() != DestTy->getElementType()) {2119// The input types don't need to be identical, but for now they must be the2120// same size. There is no specific reason we couldn't handle things like2121// <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten2122// there yet.2123if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=2124DestTy->getElementType()->getPrimitiveSizeInBits())2125return nullptr;21262127SrcTy =2128FixedVectorType::get(DestTy->getElementType(),2129cast<FixedVectorType>(SrcTy)->getNumElements());2130InVal = IC.Builder.CreateBitCast(InVal, SrcTy);2131}21322133bool IsBigEndian = IC.getDataLayout().isBigEndian();2134unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();2135unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();21362137assert(SrcElts != DestElts && "Element counts should be different.");21382139// Now that the element types match, get the shuffle mask and RHS of the2140// shuffle to use, which depends on whether we're increasing or decreasing the2141// size of the input.2142auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));2143ArrayRef<int> ShuffleMask;2144Value *V2;21452146if (SrcElts > DestElts) {2147// If we're shrinking the number of elements (rewriting an integer2148// truncate), just shuffle in the elements corresponding to the least2149// significant bits from the input and use poison as the second shuffle2150// input.2151V2 = PoisonValue::get(SrcTy);2152// Make sure the shuffle mask selects the "least significant bits" by2153// keeping elements from back of the src vector for big endian, and from the2154// front for little endian.2155ShuffleMask = ShuffleMaskStorage;2156if (IsBigEndian)2157ShuffleMask = ShuffleMask.take_back(DestElts);2158else2159ShuffleMask = ShuffleMask.take_front(DestElts);2160} else {2161// If we're increasing the number of elements (rewriting an integer zext),2162// shuffle in all of the elements from InVal. Fill the rest of the result2163// elements with zeros from a constant zero.2164V2 = Constant::getNullValue(SrcTy);2165// Use first elt from V2 when indicating zero in the shuffle mask.2166uint32_t NullElt = SrcElts;2167// Extend with null values in the "most significant bits" by adding elements2168// in front of the src vector for big endian, and at the back for little2169// endian.2170unsigned DeltaElts = DestElts - SrcElts;2171if (IsBigEndian)2172ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);2173else2174ShuffleMaskStorage.append(DeltaElts, NullElt);2175ShuffleMask = ShuffleMaskStorage;2176}21772178return new ShuffleVectorInst(InVal, V2, ShuffleMask);2179}21802181static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {2182return Value % Ty->getPrimitiveSizeInBits() == 0;2183}21842185static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {2186return Value / Ty->getPrimitiveSizeInBits();2187}21882189/// V is a value which is inserted into a vector of VecEltTy.2190/// Look through the value to see if we can decompose it into2191/// insertions into the vector. See the example in the comment for2192/// OptimizeIntegerToVectorInsertions for the pattern this handles.2193/// The type of V is always a non-zero multiple of VecEltTy's size.2194/// Shift is the number of bits between the lsb of V and the lsb of2195/// the vector.2196///2197/// This returns false if the pattern can't be matched or true if it can,2198/// filling in Elements with the elements found here.2199static bool collectInsertionElements(Value *V, unsigned Shift,2200SmallVectorImpl<Value *> &Elements,2201Type *VecEltTy, bool isBigEndian) {2202assert(isMultipleOfTypeSize(Shift, VecEltTy) &&2203"Shift should be a multiple of the element type size");22042205// Undef values never contribute useful bits to the result.2206if (isa<UndefValue>(V)) return true;22072208// If we got down to a value of the right type, we win, try inserting into the2209// right element.2210if (V->getType() == VecEltTy) {2211// Inserting null doesn't actually insert any elements.2212if (Constant *C = dyn_cast<Constant>(V))2213if (C->isNullValue())2214return true;22152216unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);2217if (isBigEndian)2218ElementIndex = Elements.size() - ElementIndex - 1;22192220// Fail if multiple elements are inserted into this slot.2221if (Elements[ElementIndex])2222return false;22232224Elements[ElementIndex] = V;2225return true;2226}22272228if (Constant *C = dyn_cast<Constant>(V)) {2229// Figure out the # elements this provides, and bitcast it or slice it up2230// as required.2231unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),2232VecEltTy);2233// If the constant is the size of a vector element, we just need to bitcast2234// it to the right type so it gets properly inserted.2235if (NumElts == 1)2236return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),2237Shift, Elements, VecEltTy, isBigEndian);22382239// Okay, this is a constant that covers multiple elements. Slice it up into2240// pieces and insert each element-sized piece into the vector.2241if (!isa<IntegerType>(C->getType()))2242C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),2243C->getType()->getPrimitiveSizeInBits()));2244unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();2245Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);22462247for (unsigned i = 0; i != NumElts; ++i) {2248unsigned ShiftI = i * ElementSize;2249Constant *Piece = ConstantFoldBinaryInstruction(2250Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));2251if (!Piece)2252return false;22532254Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);2255if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,2256isBigEndian))2257return false;2258}2259return true;2260}22612262if (!V->hasOneUse()) return false;22632264Instruction *I = dyn_cast<Instruction>(V);2265if (!I) return false;2266switch (I->getOpcode()) {2267default: return false; // Unhandled case.2268case Instruction::BitCast:2269if (I->getOperand(0)->getType()->isVectorTy())2270return false;2271return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,2272isBigEndian);2273case Instruction::ZExt:2274if (!isMultipleOfTypeSize(2275I->getOperand(0)->getType()->getPrimitiveSizeInBits(),2276VecEltTy))2277return false;2278return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,2279isBigEndian);2280case Instruction::Or:2281return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,2282isBigEndian) &&2283collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,2284isBigEndian);2285case Instruction::Shl: {2286// Must be shifting by a constant that is a multiple of the element size.2287ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));2288if (!CI) return false;2289Shift += CI->getZExtValue();2290if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;2291return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,2292isBigEndian);2293}22942295}2296}229722982299/// If the input is an 'or' instruction, we may be doing shifts and ors to2300/// assemble the elements of the vector manually.2301/// Try to rip the code out and replace it with insertelements. This is to2302/// optimize code like this:2303///2304/// %tmp37 = bitcast float %inc to i322305/// %tmp38 = zext i32 %tmp37 to i642306/// %tmp31 = bitcast float %inc5 to i322307/// %tmp32 = zext i32 %tmp31 to i642308/// %tmp33 = shl i64 %tmp32, 322309/// %ins35 = or i64 %tmp33, %tmp382310/// %tmp43 = bitcast i64 %ins35 to <2 x float>2311///2312/// Into two insertelements that do "buildvector{%inc, %inc5}".2313static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,2314InstCombinerImpl &IC) {2315auto *DestVecTy = cast<FixedVectorType>(CI.getType());2316Value *IntInput = CI.getOperand(0);23172318SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());2319if (!collectInsertionElements(IntInput, 0, Elements,2320DestVecTy->getElementType(),2321IC.getDataLayout().isBigEndian()))2322return nullptr;23232324// If we succeeded, we know that all of the element are specified by Elements2325// or are zero if Elements has a null entry. Recast this as a set of2326// insertions.2327Value *Result = Constant::getNullValue(CI.getType());2328for (unsigned i = 0, e = Elements.size(); i != e; ++i) {2329if (!Elements[i]) continue; // Unset element.23302331Result = IC.Builder.CreateInsertElement(Result, Elements[i],2332IC.Builder.getInt32(i));2333}23342335return Result;2336}23372338/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the2339/// vector followed by extract element. The backend tends to handle bitcasts of2340/// vectors better than bitcasts of scalars because vector registers are2341/// usually not type-specific like scalar integer or scalar floating-point.2342static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,2343InstCombinerImpl &IC) {2344Value *VecOp, *Index;2345if (!match(BitCast.getOperand(0),2346m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))2347return nullptr;23482349// The bitcast must be to a vectorizable type, otherwise we can't make a new2350// type to extract from.2351Type *DestType = BitCast.getType();2352VectorType *VecType = cast<VectorType>(VecOp->getType());2353if (VectorType::isValidElementType(DestType)) {2354auto *NewVecType = VectorType::get(DestType, VecType);2355auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");2356return ExtractElementInst::Create(NewBC, Index);2357}23582359// Only solve DestType is vector to avoid inverse transform in visitBitCast.2360// bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)2361auto *FixedVType = dyn_cast<FixedVectorType>(VecType);2362if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)2363return CastInst::Create(Instruction::BitCast, VecOp, DestType);23642365return nullptr;2366}23672368/// Change the type of a bitwise logic operation if we can eliminate a bitcast.2369static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,2370InstCombiner::BuilderTy &Builder) {2371Type *DestTy = BitCast.getType();2372BinaryOperator *BO;23732374if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||2375!BO->isBitwiseLogicOp())2376return nullptr;23772378// FIXME: This transform is restricted to vector types to avoid backend2379// problems caused by creating potentially illegal operations. If a fix-up is2380// added to handle that situation, we can remove this check.2381if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())2382return nullptr;23832384if (DestTy->isFPOrFPVectorTy()) {2385Value *X, *Y;2386// bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))2387if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&2388match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) {2389if (X->getType()->isFPOrFPVectorTy() &&2390Y->getType()->isIntOrIntVectorTy()) {2391Value *CastedOp =2392Builder.CreateBitCast(BO->getOperand(0), Y->getType());2393Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);2394return CastInst::CreateBitOrPointerCast(NewBO, DestTy);2395}2396if (X->getType()->isIntOrIntVectorTy() &&2397Y->getType()->isFPOrFPVectorTy()) {2398Value *CastedOp =2399Builder.CreateBitCast(BO->getOperand(1), X->getType());2400Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);2401return CastInst::CreateBitOrPointerCast(NewBO, DestTy);2402}2403}2404return nullptr;2405}24062407if (!DestTy->isIntOrIntVectorTy())2408return nullptr;24092410Value *X;2411if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&2412X->getType() == DestTy && !isa<Constant>(X)) {2413// bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))2414Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);2415return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);2416}24172418if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&2419X->getType() == DestTy && !isa<Constant>(X)) {2420// bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)2421Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);2422return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);2423}24242425// Canonicalize vector bitcasts to come before vector bitwise logic with a2426// constant. This eases recognition of special constants for later ops.2427// Example:2428// icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b2429Constant *C;2430if (match(BO->getOperand(1), m_Constant(C))) {2431// bitcast (logic X, C) --> logic (bitcast X, C')2432Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);2433Value *CastedC = Builder.CreateBitCast(C, DestTy);2434return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);2435}24362437return nullptr;2438}24392440/// Change the type of a select if we can eliminate a bitcast.2441static Instruction *foldBitCastSelect(BitCastInst &BitCast,2442InstCombiner::BuilderTy &Builder) {2443Value *Cond, *TVal, *FVal;2444if (!match(BitCast.getOperand(0),2445m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))2446return nullptr;24472448// A vector select must maintain the same number of elements in its operands.2449Type *CondTy = Cond->getType();2450Type *DestTy = BitCast.getType();2451if (auto *CondVTy = dyn_cast<VectorType>(CondTy))2452if (!DestTy->isVectorTy() ||2453CondVTy->getElementCount() !=2454cast<VectorType>(DestTy)->getElementCount())2455return nullptr;24562457// FIXME: This transform is restricted from changing the select between2458// scalars and vectors to avoid backend problems caused by creating2459// potentially illegal operations. If a fix-up is added to handle that2460// situation, we can remove this check.2461if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())2462return nullptr;24632464auto *Sel = cast<Instruction>(BitCast.getOperand(0));2465Value *X;2466if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&2467!isa<Constant>(X)) {2468// bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))2469Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);2470return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);2471}24722473if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&2474!isa<Constant>(X)) {2475// bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)2476Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);2477return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);2478}24792480return nullptr;2481}24822483/// Check if all users of CI are StoreInsts.2484static bool hasStoreUsersOnly(CastInst &CI) {2485for (User *U : CI.users()) {2486if (!isa<StoreInst>(U))2487return false;2488}2489return true;2490}24912492/// This function handles following case2493///2494/// A -> B cast2495/// PHI2496/// B -> A cast2497///2498/// All the related PHI nodes can be replaced by new PHI nodes with type A.2499/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.2500Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,2501PHINode *PN) {2502// BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.2503if (hasStoreUsersOnly(CI))2504return nullptr;25052506Value *Src = CI.getOperand(0);2507Type *SrcTy = Src->getType(); // Type B2508Type *DestTy = CI.getType(); // Type A25092510SmallVector<PHINode *, 4> PhiWorklist;2511SmallSetVector<PHINode *, 4> OldPhiNodes;25122513// Find all of the A->B casts and PHI nodes.2514// We need to inspect all related PHI nodes, but PHIs can be cyclic, so2515// OldPhiNodes is used to track all known PHI nodes, before adding a new2516// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.2517PhiWorklist.push_back(PN);2518OldPhiNodes.insert(PN);2519while (!PhiWorklist.empty()) {2520auto *OldPN = PhiWorklist.pop_back_val();2521for (Value *IncValue : OldPN->incoming_values()) {2522if (isa<Constant>(IncValue))2523continue;25242525if (auto *LI = dyn_cast<LoadInst>(IncValue)) {2526// If there is a sequence of one or more load instructions, each loaded2527// value is used as address of later load instruction, bitcast is2528// necessary to change the value type, don't optimize it. For2529// simplicity we give up if the load address comes from another load.2530Value *Addr = LI->getOperand(0);2531if (Addr == &CI || isa<LoadInst>(Addr))2532return nullptr;2533// Don't tranform "load <256 x i32>, <256 x i32>*" to2534// "load x86_amx, x86_amx*", because x86_amx* is invalid.2535// TODO: Remove this check when bitcast between vector and x86_amx2536// is replaced with a specific intrinsic.2537if (DestTy->isX86_AMXTy())2538return nullptr;2539if (LI->hasOneUse() && LI->isSimple())2540continue;2541// If a LoadInst has more than one use, changing the type of loaded2542// value may create another bitcast.2543return nullptr;2544}25452546if (auto *PNode = dyn_cast<PHINode>(IncValue)) {2547if (OldPhiNodes.insert(PNode))2548PhiWorklist.push_back(PNode);2549continue;2550}25512552auto *BCI = dyn_cast<BitCastInst>(IncValue);2553// We can't handle other instructions.2554if (!BCI)2555return nullptr;25562557// Verify it's a A->B cast.2558Type *TyA = BCI->getOperand(0)->getType();2559Type *TyB = BCI->getType();2560if (TyA != DestTy || TyB != SrcTy)2561return nullptr;2562}2563}25642565// Check that each user of each old PHI node is something that we can2566// rewrite, so that all of the old PHI nodes can be cleaned up afterwards.2567for (auto *OldPN : OldPhiNodes) {2568for (User *V : OldPN->users()) {2569if (auto *SI = dyn_cast<StoreInst>(V)) {2570if (!SI->isSimple() || SI->getOperand(0) != OldPN)2571return nullptr;2572} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {2573// Verify it's a B->A cast.2574Type *TyB = BCI->getOperand(0)->getType();2575Type *TyA = BCI->getType();2576if (TyA != DestTy || TyB != SrcTy)2577return nullptr;2578} else if (auto *PHI = dyn_cast<PHINode>(V)) {2579// As long as the user is another old PHI node, then even if we don't2580// rewrite it, the PHI web we're considering won't have any users2581// outside itself, so it'll be dead.2582if (!OldPhiNodes.contains(PHI))2583return nullptr;2584} else {2585return nullptr;2586}2587}2588}25892590// For each old PHI node, create a corresponding new PHI node with a type A.2591SmallDenseMap<PHINode *, PHINode *> NewPNodes;2592for (auto *OldPN : OldPhiNodes) {2593Builder.SetInsertPoint(OldPN);2594PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());2595NewPNodes[OldPN] = NewPN;2596}25972598// Fill in the operands of new PHI nodes.2599for (auto *OldPN : OldPhiNodes) {2600PHINode *NewPN = NewPNodes[OldPN];2601for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {2602Value *V = OldPN->getOperand(j);2603Value *NewV = nullptr;2604if (auto *C = dyn_cast<Constant>(V)) {2605NewV = ConstantExpr::getBitCast(C, DestTy);2606} else if (auto *LI = dyn_cast<LoadInst>(V)) {2607// Explicitly perform load combine to make sure no opposing transform2608// can remove the bitcast in the meantime and trigger an infinite loop.2609Builder.SetInsertPoint(LI);2610NewV = combineLoadToNewType(*LI, DestTy);2611// Remove the old load and its use in the old phi, which itself becomes2612// dead once the whole transform finishes.2613replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));2614eraseInstFromFunction(*LI);2615} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {2616NewV = BCI->getOperand(0);2617} else if (auto *PrevPN = dyn_cast<PHINode>(V)) {2618NewV = NewPNodes[PrevPN];2619}2620assert(NewV);2621NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));2622}2623}26242625// Traverse all accumulated PHI nodes and process its users,2626// which are Stores and BitcCasts. Without this processing2627// NewPHI nodes could be replicated and could lead to extra2628// moves generated after DeSSA.2629// If there is a store with type B, change it to type A.263026312632// Replace users of BitCast B->A with NewPHI. These will help2633// later to get rid off a closure formed by OldPHI nodes.2634Instruction *RetVal = nullptr;2635for (auto *OldPN : OldPhiNodes) {2636PHINode *NewPN = NewPNodes[OldPN];2637for (User *V : make_early_inc_range(OldPN->users())) {2638if (auto *SI = dyn_cast<StoreInst>(V)) {2639assert(SI->isSimple() && SI->getOperand(0) == OldPN);2640Builder.SetInsertPoint(SI);2641auto *NewBC =2642cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));2643SI->setOperand(0, NewBC);2644Worklist.push(SI);2645assert(hasStoreUsersOnly(*NewBC));2646}2647else if (auto *BCI = dyn_cast<BitCastInst>(V)) {2648Type *TyB = BCI->getOperand(0)->getType();2649Type *TyA = BCI->getType();2650assert(TyA == DestTy && TyB == SrcTy);2651(void) TyA;2652(void) TyB;2653Instruction *I = replaceInstUsesWith(*BCI, NewPN);2654if (BCI == &CI)2655RetVal = I;2656} else if (auto *PHI = dyn_cast<PHINode>(V)) {2657assert(OldPhiNodes.contains(PHI));2658(void) PHI;2659} else {2660llvm_unreachable("all uses should be handled");2661}2662}2663}26642665return RetVal;2666}26672668Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {2669// If the operands are integer typed then apply the integer transforms,2670// otherwise just apply the common ones.2671Value *Src = CI.getOperand(0);2672Type *SrcTy = Src->getType();2673Type *DestTy = CI.getType();26742675// Get rid of casts from one type to the same type. These are useless and can2676// be replaced by the operand.2677if (DestTy == Src->getType())2678return replaceInstUsesWith(CI, Src);26792680if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {2681// Beware: messing with this target-specific oddity may cause trouble.2682if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {2683Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());2684return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,2685Constant::getNullValue(Type::getInt32Ty(CI.getContext())));2686}26872688if (isa<IntegerType>(SrcTy)) {2689// If this is a cast from an integer to vector, check to see if the input2690// is a trunc or zext of a bitcast from vector. If so, we can replace all2691// the casts with a shuffle and (potentially) a bitcast.2692if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {2693CastInst *SrcCast = cast<CastInst>(Src);2694if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))2695if (isa<VectorType>(BCIn->getOperand(0)->getType()))2696if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(2697BCIn->getOperand(0), cast<VectorType>(DestTy), *this))2698return I;2699}27002701// If the input is an 'or' instruction, we may be doing shifts and ors to2702// assemble the elements of the vector manually. Try to rip the code out2703// and replace it with insertelements.2704if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))2705return replaceInstUsesWith(CI, V);2706}2707}27082709if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {2710if (SrcVTy->getNumElements() == 1) {2711// If our destination is not a vector, then make this a straight2712// scalar-scalar cast.2713if (!DestTy->isVectorTy()) {2714Value *Elem =2715Builder.CreateExtractElement(Src,2716Constant::getNullValue(Type::getInt32Ty(CI.getContext())));2717return CastInst::Create(Instruction::BitCast, Elem, DestTy);2718}27192720// Otherwise, see if our source is an insert. If so, then use the scalar2721// component directly:2722// bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>2723if (auto *InsElt = dyn_cast<InsertElementInst>(Src))2724return new BitCastInst(InsElt->getOperand(1), DestTy);2725}27262727// Convert an artificial vector insert into more analyzable bitwise logic.2728unsigned BitWidth = DestTy->getScalarSizeInBits();2729Value *X, *Y;2730uint64_t IndexC;2731if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),2732m_Value(Y), m_ConstantInt(IndexC)))) &&2733DestTy->isIntegerTy() && X->getType() == DestTy &&2734Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {2735// Adjust for big endian - the LSBs are at the high index.2736if (DL.isBigEndian())2737IndexC = SrcVTy->getNumElements() - 1 - IndexC;27382739// We only handle (endian-normalized) insert to index 0. Any other insert2740// would require a left-shift, so that is an extra instruction.2741if (IndexC == 0) {2742// bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)2743unsigned EltWidth = Y->getType()->getScalarSizeInBits();2744APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);2745Value *AndX = Builder.CreateAnd(X, MaskC);2746Value *ZextY = Builder.CreateZExt(Y, DestTy);2747return BinaryOperator::CreateOr(AndX, ZextY);2748}2749}2750}27512752if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {2753// Okay, we have (bitcast (shuffle ..)). Check to see if this is2754// a bitcast to a vector with the same # elts.2755Value *ShufOp0 = Shuf->getOperand(0);2756Value *ShufOp1 = Shuf->getOperand(1);2757auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();2758auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();2759if (Shuf->hasOneUse() && DestTy->isVectorTy() &&2760cast<VectorType>(DestTy)->getElementCount() == ShufElts &&2761ShufElts == SrcVecElts) {2762BitCastInst *Tmp;2763// If either of the operands is a cast from CI.getType(), then2764// evaluating the shuffle in the casted destination's type will allow2765// us to eliminate at least one cast.2766if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&2767Tmp->getOperand(0)->getType() == DestTy) ||2768((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&2769Tmp->getOperand(0)->getType() == DestTy)) {2770Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);2771Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);2772// Return a new shuffle vector. Use the same element ID's, as we2773// know the vector types match #elts.2774return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());2775}2776}27772778// A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized2779// as a byte/bit swap:2780// bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)2781// bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)2782if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&2783Shuf->hasOneUse() && Shuf->isReverse()) {2784unsigned IntrinsicNum = 0;2785if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&2786SrcTy->getScalarSizeInBits() == 8) {2787IntrinsicNum = Intrinsic::bswap;2788} else if (SrcTy->getScalarSizeInBits() == 1) {2789IntrinsicNum = Intrinsic::bitreverse;2790}2791if (IntrinsicNum != 0) {2792assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");2793assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");2794Function *BswapOrBitreverse =2795Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);2796Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);2797return CallInst::Create(BswapOrBitreverse, {ScalarX});2798}2799}2800}28012802// Handle the A->B->A cast, and there is an intervening PHI node.2803if (PHINode *PN = dyn_cast<PHINode>(Src))2804if (Instruction *I = optimizeBitCastFromPhi(CI, PN))2805return I;28062807if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))2808return I;28092810if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder))2811return I;28122813if (Instruction *I = foldBitCastSelect(CI, Builder))2814return I;28152816return commonCastTransforms(CI);2817}28182819Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {2820return commonCastTransforms(CI);2821}282228232824