Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
35266 views
//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains logic for simplifying instructions based on information9// about how they are used.10//11//===----------------------------------------------------------------------===//1213#include "InstCombineInternal.h"14#include "llvm/Analysis/ValueTracking.h"15#include "llvm/IR/GetElementPtrTypeIterator.h"16#include "llvm/IR/IntrinsicInst.h"17#include "llvm/IR/PatternMatch.h"18#include "llvm/Support/KnownBits.h"19#include "llvm/Transforms/InstCombine/InstCombiner.h"2021using namespace llvm;22using namespace llvm::PatternMatch;2324#define DEBUG_TYPE "instcombine"2526static cl::opt<bool>27VerifyKnownBits("instcombine-verify-known-bits",28cl::desc("Verify that computeKnownBits() and "29"SimplifyDemandedBits() are consistent"),30cl::Hidden, cl::init(false));3132/// Check to see if the specified operand of the specified instruction is a33/// constant integer. If so, check to see if there are any bits set in the34/// constant that are not demanded. If so, shrink the constant and return true.35static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,36const APInt &Demanded) {37assert(I && "No instruction?");38assert(OpNo < I->getNumOperands() && "Operand index too large");3940// The operand must be a constant integer or splat integer.41Value *Op = I->getOperand(OpNo);42const APInt *C;43if (!match(Op, m_APInt(C)))44return false;4546// If there are no bits set that aren't demanded, nothing to do.47if (C->isSubsetOf(Demanded))48return false;4950// This instruction is producing bits that are not demanded. Shrink the RHS.51I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));5253return true;54}5556/// Returns the bitwidth of the given scalar or pointer type. For vector types,57/// returns the element type's bitwidth.58static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {59if (unsigned BitWidth = Ty->getScalarSizeInBits())60return BitWidth;6162return DL.getPointerTypeSizeInBits(Ty);63}6465/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if66/// the instruction has any properties that allow us to simplify its operands.67bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,68KnownBits &Known) {69APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));70Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,710, SQ.getWithInstruction(&Inst));72if (!V) return false;73if (V == &Inst) return true;74replaceInstUsesWith(Inst, V);75return true;76}7778/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if79/// the instruction has any properties that allow us to simplify its operands.80bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {81KnownBits Known(getBitWidth(Inst.getType(), DL));82return SimplifyDemandedInstructionBits(Inst, Known);83}8485/// This form of SimplifyDemandedBits simplifies the specified instruction86/// operand if possible, updating it in place. It returns true if it made any87/// change and false otherwise.88bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,89const APInt &DemandedMask,90KnownBits &Known, unsigned Depth,91const SimplifyQuery &Q) {92Use &U = I->getOperandUse(OpNo);93Value *V = U.get();94if (isa<Constant>(V)) {95llvm::computeKnownBits(V, Known, Depth, Q);96return false;97}9899Known.resetAll();100if (DemandedMask.isZero()) {101// Not demanding any bits from V.102replaceUse(U, UndefValue::get(V->getType()));103return true;104}105106if (Depth == MaxAnalysisRecursionDepth)107return false;108109Instruction *VInst = dyn_cast<Instruction>(V);110if (!VInst) {111llvm::computeKnownBits(V, Known, Depth, Q);112return false;113}114115Value *NewVal;116if (VInst->hasOneUse()) {117// If the instruction has one use, we can directly simplify it.118NewVal = SimplifyDemandedUseBits(VInst, DemandedMask, Known, Depth, Q);119} else {120// If there are multiple uses of this instruction, then we can simplify121// VInst to some other value, but not modify the instruction.122NewVal =123SimplifyMultipleUseDemandedBits(VInst, DemandedMask, Known, Depth, Q);124}125if (!NewVal) return false;126if (Instruction* OpInst = dyn_cast<Instruction>(U))127salvageDebugInfo(*OpInst);128129replaceUse(U, NewVal);130return true;131}132133/// This function attempts to replace V with a simpler value based on the134/// demanded bits. When this function is called, it is known that only the bits135/// set in DemandedMask of the result of V are ever used downstream.136/// Consequently, depending on the mask and V, it may be possible to replace V137/// with a constant or one of its operands. In such cases, this function does138/// the replacement and returns true. In all other cases, it returns false after139/// analyzing the expression and setting KnownOne and known to be one in the140/// expression. Known.Zero contains all the bits that are known to be zero in141/// the expression. These are provided to potentially allow the caller (which142/// might recursively be SimplifyDemandedBits itself) to simplify the143/// expression.144/// Known.One and Known.Zero always follow the invariant that:145/// Known.One & Known.Zero == 0.146/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero147/// are accurate even for bits not in DemandedMask. Note148/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all149/// be the same.150///151/// This returns null if it did not change anything and it permits no152/// simplification. This returns V itself if it did some simplification of V's153/// operands based on the information about what bits are demanded. This returns154/// some other non-null value if it found out that V is equal to another value155/// in the context where the specified bits are demanded, but not for all users.156Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,157const APInt &DemandedMask,158KnownBits &Known,159unsigned Depth,160const SimplifyQuery &Q) {161assert(I != nullptr && "Null pointer of Value???");162assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");163uint32_t BitWidth = DemandedMask.getBitWidth();164Type *VTy = I->getType();165assert(166(!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&167Known.getBitWidth() == BitWidth &&168"Value *V, DemandedMask and Known must have same BitWidth");169170KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);171172// Update flags after simplifying an operand based on the fact that some high173// order bits are not demanded.174auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,175unsigned NLZ) {176if (NLZ > 0) {177// Disable the nsw and nuw flags here: We can no longer guarantee that178// we won't wrap after simplification. Removing the nsw/nuw flags is179// legal here because the top bit is not demanded.180I->setHasNoSignedWrap(false);181I->setHasNoUnsignedWrap(false);182}183return I;184};185186// If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care187// about the high bits of the operands.188auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {189unsigned NLZ = DemandedMask.countl_zero();190// Right fill the mask of bits for the operands to demand the most191// significant bit and all those below it.192DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);193if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||194SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1, Q) ||195ShrinkDemandedConstant(I, 1, DemandedFromOps) ||196SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1, Q)) {197disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);198return true;199}200return false;201};202203switch (I->getOpcode()) {204default:205llvm::computeKnownBits(I, Known, Depth, Q);206break;207case Instruction::And: {208// If either the LHS or the RHS are Zero, the result is zero.209if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1, Q) ||210SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,211Depth + 1, Q))212return I;213214Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,215Depth, Q);216217// If the client is only demanding bits that we know, return the known218// constant.219if (DemandedMask.isSubsetOf(Known.Zero | Known.One))220return Constant::getIntegerValue(VTy, Known.One);221222// If all of the demanded bits are known 1 on one side, return the other.223// These bits cannot contribute to the result of the 'and'.224if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))225return I->getOperand(0);226if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))227return I->getOperand(1);228229// If the RHS is a constant, see if we can simplify it.230if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))231return I;232233break;234}235case Instruction::Or: {236// If either the LHS or the RHS are One, the result is One.237if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1, Q) ||238SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,239Depth + 1, Q)) {240// Disjoint flag may not longer hold.241I->dropPoisonGeneratingFlags();242return I;243}244245Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,246Depth, Q);247248// If the client is only demanding bits that we know, return the known249// constant.250if (DemandedMask.isSubsetOf(Known.Zero | Known.One))251return Constant::getIntegerValue(VTy, Known.One);252253// If all of the demanded bits are known zero on one side, return the other.254// These bits cannot contribute to the result of the 'or'.255if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))256return I->getOperand(0);257if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))258return I->getOperand(1);259260// If the RHS is a constant, see if we can simplify it.261if (ShrinkDemandedConstant(I, 1, DemandedMask))262return I;263264// Infer disjoint flag if no common bits are set.265if (!cast<PossiblyDisjointInst>(I)->isDisjoint()) {266WithCache<const Value *> LHSCache(I->getOperand(0), LHSKnown),267RHSCache(I->getOperand(1), RHSKnown);268if (haveNoCommonBitsSet(LHSCache, RHSCache, Q)) {269cast<PossiblyDisjointInst>(I)->setIsDisjoint(true);270return I;271}272}273274break;275}276case Instruction::Xor: {277if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1, Q) ||278SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1, Q))279return I;280Value *LHS, *RHS;281if (DemandedMask == 1 &&282match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&283match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {284// (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1285IRBuilderBase::InsertPointGuard Guard(Builder);286Builder.SetInsertPoint(I);287auto *Xor = Builder.CreateXor(LHS, RHS);288return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);289}290291Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,292Depth, Q);293294// If the client is only demanding bits that we know, return the known295// constant.296if (DemandedMask.isSubsetOf(Known.Zero | Known.One))297return Constant::getIntegerValue(VTy, Known.One);298299// If all of the demanded bits are known zero on one side, return the other.300// These bits cannot contribute to the result of the 'xor'.301if (DemandedMask.isSubsetOf(RHSKnown.Zero))302return I->getOperand(0);303if (DemandedMask.isSubsetOf(LHSKnown.Zero))304return I->getOperand(1);305306// If all of the demanded bits are known to be zero on one side or the307// other, turn this into an *inclusive* or.308// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0309if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {310Instruction *Or =311BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1));312if (DemandedMask.isAllOnes())313cast<PossiblyDisjointInst>(Or)->setIsDisjoint(true);314Or->takeName(I);315return InsertNewInstWith(Or, I->getIterator());316}317318// If all of the demanded bits on one side are known, and all of the set319// bits on that side are also known to be set on the other side, turn this320// into an AND, as we know the bits will be cleared.321// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2322if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&323RHSKnown.One.isSubsetOf(LHSKnown.One)) {324Constant *AndC = Constant::getIntegerValue(VTy,325~RHSKnown.One & DemandedMask);326Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);327return InsertNewInstWith(And, I->getIterator());328}329330// If the RHS is a constant, see if we can change it. Don't alter a -1331// constant because that's a canonical 'not' op, and that is better for332// combining, SCEV, and codegen.333const APInt *C;334if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {335if ((*C | ~DemandedMask).isAllOnes()) {336// Force bits to 1 to create a 'not' op.337I->setOperand(1, ConstantInt::getAllOnesValue(VTy));338return I;339}340// If we can't turn this into a 'not', try to shrink the constant.341if (ShrinkDemandedConstant(I, 1, DemandedMask))342return I;343}344345// If our LHS is an 'and' and if it has one use, and if any of the bits we346// are flipping are known to be set, then the xor is just resetting those347// bits to zero. We can just knock out bits from the 'and' and the 'xor',348// simplifying both of them.349if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) {350ConstantInt *AndRHS, *XorRHS;351if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&352match(I->getOperand(1), m_ConstantInt(XorRHS)) &&353match(LHSInst->getOperand(1), m_ConstantInt(AndRHS)) &&354(LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {355APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);356357Constant *AndC = ConstantInt::get(VTy, NewMask & AndRHS->getValue());358Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC);359InsertNewInstWith(NewAnd, I->getIterator());360361Constant *XorC = ConstantInt::get(VTy, NewMask & XorRHS->getValue());362Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC);363return InsertNewInstWith(NewXor, I->getIterator());364}365}366break;367}368case Instruction::Select: {369if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1, Q) ||370SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1, Q))371return I;372373// If the operands are constants, see if we can simplify them.374// This is similar to ShrinkDemandedConstant, but for a select we want to375// try to keep the selected constants the same as icmp value constants, if376// we can. This helps not break apart (or helps put back together)377// canonical patterns like min and max.378auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,379const APInt &DemandedMask) {380const APInt *SelC;381if (!match(I->getOperand(OpNo), m_APInt(SelC)))382return false;383384// Get the constant out of the ICmp, if there is one.385// Only try this when exactly 1 operand is a constant (if both operands386// are constant, the icmp should eventually simplify). Otherwise, we may387// invert the transform that reduces set bits and infinite-loop.388Value *X;389const APInt *CmpC;390ICmpInst::Predicate Pred;391if (!match(I->getOperand(0), m_ICmp(Pred, m_Value(X), m_APInt(CmpC))) ||392isa<Constant>(X) || CmpC->getBitWidth() != SelC->getBitWidth())393return ShrinkDemandedConstant(I, OpNo, DemandedMask);394395// If the constant is already the same as the ICmp, leave it as-is.396if (*CmpC == *SelC)397return false;398// If the constants are not already the same, but can be with the demand399// mask, use the constant value from the ICmp.400if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {401I->setOperand(OpNo, ConstantInt::get(I->getType(), *CmpC));402return true;403}404return ShrinkDemandedConstant(I, OpNo, DemandedMask);405};406if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||407CanonicalizeSelectConstant(I, 2, DemandedMask))408return I;409410// Only known if known in both the LHS and RHS.411adjustKnownBitsForSelectArm(LHSKnown, I->getOperand(0), I->getOperand(1),412/*Invert=*/false, Depth, Q);413adjustKnownBitsForSelectArm(RHSKnown, I->getOperand(0), I->getOperand(2),414/*Invert=*/true, Depth, Q);415Known = LHSKnown.intersectWith(RHSKnown);416break;417}418case Instruction::Trunc: {419// If we do not demand the high bits of a right-shifted and truncated value,420// then we may be able to truncate it before the shift.421Value *X;422const APInt *C;423if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {424// The shift amount must be valid (not poison) in the narrow type, and425// it must not be greater than the high bits demanded of the result.426if (C->ult(VTy->getScalarSizeInBits()) &&427C->ule(DemandedMask.countl_zero())) {428// trunc (lshr X, C) --> lshr (trunc X), C429IRBuilderBase::InsertPointGuard Guard(Builder);430Builder.SetInsertPoint(I);431Value *Trunc = Builder.CreateTrunc(X, VTy);432return Builder.CreateLShr(Trunc, C->getZExtValue());433}434}435}436[[fallthrough]];437case Instruction::ZExt: {438unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();439440APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);441KnownBits InputKnown(SrcBitWidth);442if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1,443Q)) {444// For zext nneg, we may have dropped the instruction which made the445// input non-negative.446I->dropPoisonGeneratingFlags();447return I;448}449assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");450if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&451!InputKnown.isNegative())452InputKnown.makeNonNegative();453Known = InputKnown.zextOrTrunc(BitWidth);454455break;456}457case Instruction::SExt: {458// Compute the bits in the result that are not present in the input.459unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();460461APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth);462463// If any of the sign extended bits are demanded, we know that the sign464// bit is demanded.465if (DemandedMask.getActiveBits() > SrcBitWidth)466InputDemandedBits.setBit(SrcBitWidth-1);467468KnownBits InputKnown(SrcBitWidth);469if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1, Q))470return I;471472// If the input sign bit is known zero, or if the NewBits are not demanded473// convert this into a zero extension.474if (InputKnown.isNonNegative() ||475DemandedMask.getActiveBits() <= SrcBitWidth) {476// Convert to ZExt cast.477CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy);478NewCast->takeName(I);479return InsertNewInstWith(NewCast, I->getIterator());480}481482// If the sign bit of the input is known set or clear, then we know the483// top bits of the result.484Known = InputKnown.sext(BitWidth);485break;486}487case Instruction::Add: {488if ((DemandedMask & 1) == 0) {489// If we do not need the low bit, try to convert bool math to logic:490// add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN491Value *X, *Y;492if (match(I, m_c_Add(m_OneUse(m_ZExt(m_Value(X))),493m_OneUse(m_SExt(m_Value(Y))))) &&494X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType()) {495// Truth table for inputs and output signbits:496// X:0 | X:1497// ----------498// Y:0 | 0 | 0 |499// Y:1 | -1 | 0 |500// ----------501IRBuilderBase::InsertPointGuard Guard(Builder);502Builder.SetInsertPoint(I);503Value *AndNot = Builder.CreateAnd(Builder.CreateNot(X), Y);504return Builder.CreateSExt(AndNot, VTy);505}506507// add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN508if (match(I, m_Add(m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) &&509X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&510(I->getOperand(0)->hasOneUse() || I->getOperand(1)->hasOneUse())) {511512// Truth table for inputs and output signbits:513// X:0 | X:1514// -----------515// Y:0 | -1 | -1 |516// Y:1 | -1 | 0 |517// -----------518IRBuilderBase::InsertPointGuard Guard(Builder);519Builder.SetInsertPoint(I);520Value *Or = Builder.CreateOr(X, Y);521return Builder.CreateSExt(Or, VTy);522}523}524525// Right fill the mask of bits for the operands to demand the most526// significant bit and all those below it.527unsigned NLZ = DemandedMask.countl_zero();528APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);529if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||530SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1, Q))531return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);532533// If low order bits are not demanded and known to be zero in one operand,534// then we don't need to demand them from the other operand, since they535// can't cause overflow into any bits that are demanded in the result.536unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();537APInt DemandedFromLHS = DemandedFromOps;538DemandedFromLHS.clearLowBits(NTZ);539if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||540SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1, Q))541return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);542543// If we are known to be adding zeros to every bit below544// the highest demanded bit, we just return the other side.545if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))546return I->getOperand(0);547if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))548return I->getOperand(1);549550// (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask551{552const APInt *C;553if (match(I->getOperand(1), m_APInt(C)) &&554C->isOneBitSet(DemandedMask.getActiveBits() - 1)) {555IRBuilderBase::InsertPointGuard Guard(Builder);556Builder.SetInsertPoint(I);557return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));558}559}560561// Otherwise just compute the known bits of the result.562bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();563bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();564Known = KnownBits::computeForAddSub(true, NSW, NUW, LHSKnown, RHSKnown);565break;566}567case Instruction::Sub: {568// Right fill the mask of bits for the operands to demand the most569// significant bit and all those below it.570unsigned NLZ = DemandedMask.countl_zero();571APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);572if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||573SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1, Q))574return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);575576// If low order bits are not demanded and are known to be zero in RHS,577// then we don't need to demand them from LHS, since they can't cause a578// borrow from any bits that are demanded in the result.579unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();580APInt DemandedFromLHS = DemandedFromOps;581DemandedFromLHS.clearLowBits(NTZ);582if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||583SimplifyDemandedBits(I, 0, DemandedFromLHS, LHSKnown, Depth + 1, Q))584return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);585586// If we are known to be subtracting zeros from every bit below587// the highest demanded bit, we just return the other side.588if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))589return I->getOperand(0);590// We can't do this with the LHS for subtraction, unless we are only591// demanding the LSB.592if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(LHSKnown.Zero))593return I->getOperand(1);594595// Otherwise just compute the known bits of the result.596bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();597bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();598Known = KnownBits::computeForAddSub(false, NSW, NUW, LHSKnown, RHSKnown);599break;600}601case Instruction::Mul: {602APInt DemandedFromOps;603if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))604return I;605606if (DemandedMask.isPowerOf2()) {607// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.608// If we demand exactly one bit N and we have "X * (C' << N)" where C' is609// odd (has LSB set), then the left-shifted low bit of X is the answer.610unsigned CTZ = DemandedMask.countr_zero();611const APInt *C;612if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {613Constant *ShiftC = ConstantInt::get(VTy, CTZ);614Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);615return InsertNewInstWith(Shl, I->getIterator());616}617}618// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:619// X * X is odd iff X is odd.620// 'Quadratic Reciprocity': X * X -> 0 for bit[1]621if (I->getOperand(0) == I->getOperand(1) && DemandedMask.ult(4)) {622Constant *One = ConstantInt::get(VTy, 1);623Instruction *And1 = BinaryOperator::CreateAnd(I->getOperand(0), One);624return InsertNewInstWith(And1, I->getIterator());625}626627llvm::computeKnownBits(I, Known, Depth, Q);628break;629}630case Instruction::Shl: {631const APInt *SA;632if (match(I->getOperand(1), m_APInt(SA))) {633const APInt *ShrAmt;634if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt))))635if (Instruction *Shr = dyn_cast<Instruction>(I->getOperand(0)))636if (Value *R = simplifyShrShlDemandedBits(Shr, *ShrAmt, I, *SA,637DemandedMask, Known))638return R;639640// Do not simplify if shl is part of funnel-shift pattern641if (I->hasOneUse()) {642auto *Inst = dyn_cast<Instruction>(I->user_back());643if (Inst && Inst->getOpcode() == BinaryOperator::Or) {644if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {645auto [IID, FShiftArgs] = *Opt;646if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&647FShiftArgs[0] == FShiftArgs[1]) {648llvm::computeKnownBits(I, Known, Depth, Q);649break;650}651}652}653}654655// We only want bits that already match the signbit then we don't656// need to shift.657uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1);658if (DemandedMask.countr_zero() >= ShiftAmt) {659if (I->hasNoSignedWrap()) {660unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();661unsigned SignBits =662ComputeNumSignBits(I->getOperand(0), Depth + 1, Q.CxtI);663if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)664return I->getOperand(0);665}666667// If we can pre-shift a right-shifted constant to the left without668// losing any high bits and we don't demand the low bits, then eliminate669// the left-shift:670// (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X671Value *X;672Constant *C;673if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {674Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);675Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C,676LeftShiftAmtC, DL);677if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC,678LeftShiftAmtC, DL) == C) {679Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X);680return InsertNewInstWith(Lshr, I->getIterator());681}682}683}684685APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));686687// If the shift is NUW/NSW, then it does demand the high bits.688ShlOperator *IOp = cast<ShlOperator>(I);689if (IOp->hasNoSignedWrap())690DemandedMaskIn.setHighBits(ShiftAmt+1);691else if (IOp->hasNoUnsignedWrap())692DemandedMaskIn.setHighBits(ShiftAmt);693694if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1, Q))695return I;696697Known = KnownBits::shl(Known,698KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),699/* NUW */ IOp->hasNoUnsignedWrap(),700/* NSW */ IOp->hasNoSignedWrap());701} else {702// This is a variable shift, so we can't shift the demand mask by a known703// amount. But if we are not demanding high bits, then we are not704// demanding those bits from the pre-shifted operand either.705if (unsigned CTLZ = DemandedMask.countl_zero()) {706APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));707if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Depth + 1, Q)) {708// We can't guarantee that nsw/nuw hold after simplifying the operand.709I->dropPoisonGeneratingFlags();710return I;711}712}713llvm::computeKnownBits(I, Known, Depth, Q);714}715break;716}717case Instruction::LShr: {718const APInt *SA;719if (match(I->getOperand(1), m_APInt(SA))) {720uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);721722// Do not simplify if lshr is part of funnel-shift pattern723if (I->hasOneUse()) {724auto *Inst = dyn_cast<Instruction>(I->user_back());725if (Inst && Inst->getOpcode() == BinaryOperator::Or) {726if (auto Opt = convertOrOfShiftsToFunnelShift(*Inst)) {727auto [IID, FShiftArgs] = *Opt;728if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&729FShiftArgs[0] == FShiftArgs[1]) {730llvm::computeKnownBits(I, Known, Depth, Q);731break;732}733}734}735}736737// If we are just demanding the shifted sign bit and below, then this can738// be treated as an ASHR in disguise.739if (DemandedMask.countl_zero() >= ShiftAmt) {740// If we only want bits that already match the signbit then we don't741// need to shift.742unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();743unsigned SignBits =744ComputeNumSignBits(I->getOperand(0), Depth + 1, Q.CxtI);745if (SignBits >= NumHiDemandedBits)746return I->getOperand(0);747748// If we can pre-shift a left-shifted constant to the right without749// losing any low bits (we already know we don't demand the high bits),750// then eliminate the right-shift:751// (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X752Value *X;753Constant *C;754if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {755Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);756Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::LShr, C,757RightShiftAmtC, DL);758if (ConstantFoldBinaryOpOperands(Instruction::Shl, NewC,759RightShiftAmtC, DL) == C) {760Instruction *Shl = BinaryOperator::CreateShl(NewC, X);761return InsertNewInstWith(Shl, I->getIterator());762}763}764}765766// Unsigned shift right.767APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));768if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1, Q)) {769// exact flag may not longer hold.770I->dropPoisonGeneratingFlags();771return I;772}773Known.Zero.lshrInPlace(ShiftAmt);774Known.One.lshrInPlace(ShiftAmt);775if (ShiftAmt)776Known.Zero.setHighBits(ShiftAmt); // high bits known zero.777} else {778llvm::computeKnownBits(I, Known, Depth, Q);779}780break;781}782case Instruction::AShr: {783unsigned SignBits = ComputeNumSignBits(I->getOperand(0), Depth + 1, Q.CxtI);784785// If we only want bits that already match the signbit then we don't need786// to shift.787unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();788if (SignBits >= NumHiDemandedBits)789return I->getOperand(0);790791// If this is an arithmetic shift right and only the low-bit is set, we can792// always convert this into a logical shr, even if the shift amount is793// variable. The low bit of the shift cannot be an input sign bit unless794// the shift amount is >= the size of the datatype, which is undefined.795if (DemandedMask.isOne()) {796// Perform the logical shift right.797Instruction *NewVal = BinaryOperator::CreateLShr(798I->getOperand(0), I->getOperand(1), I->getName());799return InsertNewInstWith(NewVal, I->getIterator());800}801802const APInt *SA;803if (match(I->getOperand(1), m_APInt(SA))) {804uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);805806// Signed shift right.807APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));808// If any of the bits being shifted in are demanded, then we should set809// the sign bit as demanded.810bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt;811if (ShiftedInBitsDemanded)812DemandedMaskIn.setSignBit();813if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1, Q)) {814// exact flag may not longer hold.815I->dropPoisonGeneratingFlags();816return I;817}818819// If the input sign bit is known to be zero, or if none of the shifted in820// bits are demanded, turn this into an unsigned shift right.821if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) {822BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),823I->getOperand(1));824LShr->setIsExact(cast<BinaryOperator>(I)->isExact());825LShr->takeName(I);826return InsertNewInstWith(LShr, I->getIterator());827}828829Known = KnownBits::ashr(830Known, KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),831ShiftAmt != 0, I->isExact());832} else {833llvm::computeKnownBits(I, Known, Depth, Q);834}835break;836}837case Instruction::UDiv: {838// UDiv doesn't demand low bits that are zero in the divisor.839const APInt *SA;840if (match(I->getOperand(1), m_APInt(SA))) {841// TODO: Take the demanded mask of the result into account.842unsigned RHSTrailingZeros = SA->countr_zero();843APInt DemandedMaskIn =844APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);845if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1, Q)) {846// We can't guarantee that "exact" is still true after changing the847// the dividend.848I->dropPoisonGeneratingFlags();849return I;850}851852Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA),853cast<BinaryOperator>(I)->isExact());854} else {855llvm::computeKnownBits(I, Known, Depth, Q);856}857break;858}859case Instruction::SRem: {860const APInt *Rem;861if (match(I->getOperand(1), m_APInt(Rem))) {862// X % -1 demands all the bits because we don't want to introduce863// INT_MIN % -1 (== undef) by accident.864if (Rem->isAllOnes())865break;866APInt RA = Rem->abs();867if (RA.isPowerOf2()) {868if (DemandedMask.ult(RA)) // srem won't affect demanded bits869return I->getOperand(0);870871APInt LowBits = RA - 1;872APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);873if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1, Q))874return I;875876// The low bits of LHS are unchanged by the srem.877Known.Zero = LHSKnown.Zero & LowBits;878Known.One = LHSKnown.One & LowBits;879880// If LHS is non-negative or has all low bits zero, then the upper bits881// are all zero.882if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))883Known.Zero |= ~LowBits;884885// If LHS is negative and not all low bits are zero, then the upper bits886// are all one.887if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))888Known.One |= ~LowBits;889890break;891}892}893894llvm::computeKnownBits(I, Known, Depth, Q);895break;896}897case Instruction::Call: {898bool KnownBitsComputed = false;899if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {900switch (II->getIntrinsicID()) {901case Intrinsic::abs: {902if (DemandedMask == 1)903return II->getArgOperand(0);904break;905}906case Intrinsic::ctpop: {907// Checking if the number of clear bits is odd (parity)? If the type has908// an even number of bits, that's the same as checking if the number of909// set bits is odd, so we can eliminate the 'not' op.910Value *X;911if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&912match(II->getArgOperand(0), m_Not(m_Value(X)))) {913Function *Ctpop = Intrinsic::getDeclaration(914II->getModule(), Intrinsic::ctpop, VTy);915return InsertNewInstWith(CallInst::Create(Ctpop, {X}), I->getIterator());916}917break;918}919case Intrinsic::bswap: {920// If the only bits demanded come from one byte of the bswap result,921// just shift the input byte into position to eliminate the bswap.922unsigned NLZ = DemandedMask.countl_zero();923unsigned NTZ = DemandedMask.countr_zero();924925// Round NTZ down to the next byte. If we have 11 trailing zeros, then926// we need all the bits down to bit 8. Likewise, round NLZ. If we927// have 14 leading zeros, round to 8.928NLZ = alignDown(NLZ, 8);929NTZ = alignDown(NTZ, 8);930// If we need exactly one byte, we can do this transformation.931if (BitWidth - NLZ - NTZ == 8) {932// Replace this with either a left or right shift to get the byte into933// the right place.934Instruction *NewVal;935if (NLZ > NTZ)936NewVal = BinaryOperator::CreateLShr(937II->getArgOperand(0), ConstantInt::get(VTy, NLZ - NTZ));938else939NewVal = BinaryOperator::CreateShl(940II->getArgOperand(0), ConstantInt::get(VTy, NTZ - NLZ));941NewVal->takeName(I);942return InsertNewInstWith(NewVal, I->getIterator());943}944break;945}946case Intrinsic::ptrmask: {947unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();948RHSKnown = KnownBits(MaskWidth);949// If either the LHS or the RHS are Zero, the result is zero.950if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1, Q) ||951SimplifyDemandedBits(952I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),953RHSKnown, Depth + 1, Q))954return I;955956// TODO: Should be 1-extend957RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);958959Known = LHSKnown & RHSKnown;960KnownBitsComputed = true;961962// If the client is only demanding bits we know to be zero, return963// `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer964// provenance, but making the mask zero will be easily optimizable in965// the backend.966if (DemandedMask.isSubsetOf(Known.Zero) &&967!match(I->getOperand(1), m_Zero()))968return replaceOperand(969*I, 1, Constant::getNullValue(I->getOperand(1)->getType()));970971// Mask in demanded space does nothing.972// NOTE: We may have attributes associated with the return value of the973// llvm.ptrmask intrinsic that will be lost when we just return the974// operand. We should try to preserve them.975if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))976return I->getOperand(0);977978// If the RHS is a constant, see if we can simplify it.979if (ShrinkDemandedConstant(980I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))981return I;982983// Combine:984// (ptrmask (getelementptr i8, ptr p, imm i), imm mask)985// -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)986// where only the low bits known to be zero in the pointer are changed987Value *InnerPtr;988uint64_t GEPIndex;989uint64_t PtrMaskImmediate;990if (match(I, m_Intrinsic<Intrinsic::ptrmask>(991m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),992m_ConstantInt(PtrMaskImmediate)))) {993994LHSKnown = computeKnownBits(InnerPtr, Depth + 1, I);995if (!LHSKnown.isZero()) {996const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();997uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;998999uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;1000uint64_t MaskedLowBitsGEPIndex =1001GEPIndex & PointerAlignBits & PtrMaskImmediate;10021003uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;10041005if (MaskedGEPIndex != GEPIndex) {1006auto *GEP = cast<GEPOperator>(II->getArgOperand(0));1007Builder.SetInsertPoint(I);1008Type *GEPIndexType =1009DL.getIndexType(GEP->getPointerOperand()->getType());1010Value *MaskedGEP = Builder.CreateGEP(1011GEP->getSourceElementType(), InnerPtr,1012ConstantInt::get(GEPIndexType, MaskedGEPIndex),1013GEP->getName(), GEP->isInBounds());10141015replaceOperand(*I, 0, MaskedGEP);1016return I;1017}1018}1019}10201021break;1022}10231024case Intrinsic::fshr:1025case Intrinsic::fshl: {1026const APInt *SA;1027if (!match(I->getOperand(2), m_APInt(SA)))1028break;10291030// Normalize to funnel shift left. APInt shifts of BitWidth are well-1031// defined, so no need to special-case zero shifts here.1032uint64_t ShiftAmt = SA->urem(BitWidth);1033if (II->getIntrinsicID() == Intrinsic::fshr)1034ShiftAmt = BitWidth - ShiftAmt;10351036APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));1037APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));1038if (I->getOperand(0) != I->getOperand(1)) {1039if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown,1040Depth + 1, Q) ||1041SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1,1042Q))1043return I;1044} else { // fshl is a rotate1045// Avoid converting rotate into funnel shift.1046// Only simplify if one operand is constant.1047LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I);1048if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&1049!match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) {1050replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One));1051return I;1052}10531054RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, I);1055if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) &&1056!match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) {1057replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One));1058return I;1059}1060}10611062Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |1063RHSKnown.Zero.lshr(BitWidth - ShiftAmt);1064Known.One = LHSKnown.One.shl(ShiftAmt) |1065RHSKnown.One.lshr(BitWidth - ShiftAmt);1066KnownBitsComputed = true;1067break;1068}1069case Intrinsic::umax: {1070// UMax(A, C) == A if ...1071// The lowest non-zero bit of DemandMask is higher than the highest1072// non-zero bit of C.1073const APInt *C;1074unsigned CTZ = DemandedMask.countr_zero();1075if (match(II->getArgOperand(1), m_APInt(C)) &&1076CTZ >= C->getActiveBits())1077return II->getArgOperand(0);1078break;1079}1080case Intrinsic::umin: {1081// UMin(A, C) == A if ...1082// The lowest non-zero bit of DemandMask is higher than the highest1083// non-one bit of C.1084// This comes from using DeMorgans on the above umax example.1085const APInt *C;1086unsigned CTZ = DemandedMask.countr_zero();1087if (match(II->getArgOperand(1), m_APInt(C)) &&1088CTZ >= C->getBitWidth() - C->countl_one())1089return II->getArgOperand(0);1090break;1091}1092default: {1093// Handle target specific intrinsics1094std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(1095*II, DemandedMask, Known, KnownBitsComputed);1096if (V)1097return *V;1098break;1099}1100}1101}11021103if (!KnownBitsComputed)1104llvm::computeKnownBits(I, Known, Depth, Q);1105break;1106}1107}11081109if (I->getType()->isPointerTy()) {1110Align Alignment = I->getPointerAlignment(DL);1111Known.Zero.setLowBits(Log2(Alignment));1112}11131114// If the client is only demanding bits that we know, return the known1115// constant. We can't directly simplify pointers as a constant because of1116// pointer provenance.1117// TODO: We could return `(inttoptr const)` for pointers.1118if (!I->getType()->isPointerTy() &&1119DemandedMask.isSubsetOf(Known.Zero | Known.One))1120return Constant::getIntegerValue(VTy, Known.One);11211122if (VerifyKnownBits) {1123KnownBits ReferenceKnown = llvm::computeKnownBits(I, Depth, Q);1124if (Known != ReferenceKnown) {1125errs() << "Mismatched known bits for " << *I << " in "1126<< I->getFunction()->getName() << "\n";1127errs() << "computeKnownBits(): " << ReferenceKnown << "\n";1128errs() << "SimplifyDemandedBits(): " << Known << "\n";1129std::abort();1130}1131}11321133return nullptr;1134}11351136/// Helper routine of SimplifyDemandedUseBits. It computes Known1137/// bits. It also tries to handle simplifications that can be done based on1138/// DemandedMask, but without modifying the Instruction.1139Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(1140Instruction *I, const APInt &DemandedMask, KnownBits &Known, unsigned Depth,1141const SimplifyQuery &Q) {1142unsigned BitWidth = DemandedMask.getBitWidth();1143Type *ITy = I->getType();11441145KnownBits LHSKnown(BitWidth);1146KnownBits RHSKnown(BitWidth);11471148// Despite the fact that we can't simplify this instruction in all User's1149// context, we can at least compute the known bits, and we can1150// do simplifications that apply to *just* the one user if we know that1151// this instruction has a simpler value in that context.1152switch (I->getOpcode()) {1153case Instruction::And: {1154llvm::computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, Q);1155llvm::computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, Q);1156Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,1157Depth, Q);1158computeKnownBitsFromContext(I, Known, Depth, Q);11591160// If the client is only demanding bits that we know, return the known1161// constant.1162if (DemandedMask.isSubsetOf(Known.Zero | Known.One))1163return Constant::getIntegerValue(ITy, Known.One);11641165// If all of the demanded bits are known 1 on one side, return the other.1166// These bits cannot contribute to the result of the 'and' in this context.1167if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))1168return I->getOperand(0);1169if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))1170return I->getOperand(1);11711172break;1173}1174case Instruction::Or: {1175llvm::computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, Q);1176llvm::computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, Q);1177Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,1178Depth, Q);1179computeKnownBitsFromContext(I, Known, Depth, Q);11801181// If the client is only demanding bits that we know, return the known1182// constant.1183if (DemandedMask.isSubsetOf(Known.Zero | Known.One))1184return Constant::getIntegerValue(ITy, Known.One);11851186// We can simplify (X|Y) -> X or Y in the user's context if we know that1187// only bits from X or Y are demanded.1188// If all of the demanded bits are known zero on one side, return the other.1189// These bits cannot contribute to the result of the 'or' in this context.1190if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))1191return I->getOperand(0);1192if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))1193return I->getOperand(1);11941195break;1196}1197case Instruction::Xor: {1198llvm::computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, Q);1199llvm::computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, Q);1200Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,1201Depth, Q);1202computeKnownBitsFromContext(I, Known, Depth, Q);12031204// If the client is only demanding bits that we know, return the known1205// constant.1206if (DemandedMask.isSubsetOf(Known.Zero | Known.One))1207return Constant::getIntegerValue(ITy, Known.One);12081209// We can simplify (X^Y) -> X or Y in the user's context if we know that1210// only bits from X or Y are demanded.1211// If all of the demanded bits are known zero on one side, return the other.1212if (DemandedMask.isSubsetOf(RHSKnown.Zero))1213return I->getOperand(0);1214if (DemandedMask.isSubsetOf(LHSKnown.Zero))1215return I->getOperand(1);12161217break;1218}1219case Instruction::Add: {1220unsigned NLZ = DemandedMask.countl_zero();1221APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);12221223// If an operand adds zeros to every bit below the highest demanded bit,1224// that operand doesn't change the result. Return the other side.1225llvm::computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, Q);1226if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))1227return I->getOperand(0);12281229llvm::computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, Q);1230if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))1231return I->getOperand(1);12321233bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();1234bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();1235Known =1236KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, LHSKnown, RHSKnown);1237computeKnownBitsFromContext(I, Known, Depth, Q);1238break;1239}1240case Instruction::Sub: {1241unsigned NLZ = DemandedMask.countl_zero();1242APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);12431244// If an operand subtracts zeros from every bit below the highest demanded1245// bit, that operand doesn't change the result. Return the other side.1246llvm::computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, Q);1247if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))1248return I->getOperand(0);12491250bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();1251bool NUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();1252llvm::computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, Q);1253Known = KnownBits::computeForAddSub(/*Add=*/false, NSW, NUW, LHSKnown,1254RHSKnown);1255computeKnownBitsFromContext(I, Known, Depth, Q);1256break;1257}1258case Instruction::AShr: {1259// Compute the Known bits to simplify things downstream.1260llvm::computeKnownBits(I, Known, Depth, Q);12611262// If this user is only demanding bits that we know, return the known1263// constant.1264if (DemandedMask.isSubsetOf(Known.Zero | Known.One))1265return Constant::getIntegerValue(ITy, Known.One);12661267// If the right shift operand 0 is a result of a left shift by the same1268// amount, this is probably a zero/sign extension, which may be unnecessary,1269// if we do not demand any of the new sign bits. So, return the original1270// operand instead.1271const APInt *ShiftRC;1272const APInt *ShiftLC;1273Value *X;1274unsigned BitWidth = DemandedMask.getBitWidth();1275if (match(I,1276m_AShr(m_Shl(m_Value(X), m_APInt(ShiftLC)), m_APInt(ShiftRC))) &&1277ShiftLC == ShiftRC && ShiftLC->ult(BitWidth) &&1278DemandedMask.isSubsetOf(APInt::getLowBitsSet(1279BitWidth, BitWidth - ShiftRC->getZExtValue()))) {1280return X;1281}12821283break;1284}1285default:1286// Compute the Known bits to simplify things downstream.1287llvm::computeKnownBits(I, Known, Depth, Q);12881289// If this user is only demanding bits that we know, return the known1290// constant.1291if (DemandedMask.isSubsetOf(Known.Zero|Known.One))1292return Constant::getIntegerValue(ITy, Known.One);12931294break;1295}12961297return nullptr;1298}12991300/// Helper routine of SimplifyDemandedUseBits. It tries to simplify1301/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into1302/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign1303/// of "C2-C1".1304///1305/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,1306/// ..., bn}, without considering the specific value X is holding.1307/// This transformation is legal iff one of following conditions is hold:1308/// 1) All the bit in S are 0, in this case E1 == E2.1309/// 2) We don't care those bits in S, per the input DemandedMask.1310/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the1311/// rest bits.1312///1313/// Currently we only test condition 2).1314///1315/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was1316/// not successful.1317Value *InstCombinerImpl::simplifyShrShlDemandedBits(1318Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,1319const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {1320if (!ShlOp1 || !ShrOp1)1321return nullptr; // No-op.13221323Value *VarX = Shr->getOperand(0);1324Type *Ty = VarX->getType();1325unsigned BitWidth = Ty->getScalarSizeInBits();1326if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))1327return nullptr; // Undef.13281329unsigned ShlAmt = ShlOp1.getZExtValue();1330unsigned ShrAmt = ShrOp1.getZExtValue();13311332Known.One.clearAllBits();1333Known.Zero.setLowBits(ShlAmt - 1);1334Known.Zero &= DemandedMask;13351336APInt BitMask1(APInt::getAllOnes(BitWidth));1337APInt BitMask2(APInt::getAllOnes(BitWidth));13381339bool isLshr = (Shr->getOpcode() == Instruction::LShr);1340BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :1341(BitMask1.ashr(ShrAmt) << ShlAmt);13421343if (ShrAmt <= ShlAmt) {1344BitMask2 <<= (ShlAmt - ShrAmt);1345} else {1346BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):1347BitMask2.ashr(ShrAmt - ShlAmt);1348}13491350// Check if condition-2 (see the comment to this function) is satified.1351if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {1352if (ShrAmt == ShlAmt)1353return VarX;13541355if (!Shr->hasOneUse())1356return nullptr;13571358BinaryOperator *New;1359if (ShrAmt < ShlAmt) {1360Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);1361New = BinaryOperator::CreateShl(VarX, Amt);1362BinaryOperator *Orig = cast<BinaryOperator>(Shl);1363New->setHasNoSignedWrap(Orig->hasNoSignedWrap());1364New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());1365} else {1366Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);1367New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :1368BinaryOperator::CreateAShr(VarX, Amt);1369if (cast<BinaryOperator>(Shr)->isExact())1370New->setIsExact(true);1371}13721373return InsertNewInstWith(New, Shl->getIterator());1374}13751376return nullptr;1377}13781379/// The specified value produces a vector with any number of elements.1380/// This method analyzes which elements of the operand are poison and1381/// returns that information in PoisonElts.1382///1383/// DemandedElts contains the set of elements that are actually used by the1384/// caller, and by default (AllowMultipleUsers equals false) the value is1385/// simplified only if it has a single caller. If AllowMultipleUsers is set1386/// to true, DemandedElts refers to the union of sets of elements that are1387/// used by all callers.1388///1389/// If the information about demanded elements can be used to simplify the1390/// operation, the operation is simplified, then the resultant value is1391/// returned. This returns null if no change was made.1392Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,1393APInt DemandedElts,1394APInt &PoisonElts,1395unsigned Depth,1396bool AllowMultipleUsers) {1397// Cannot analyze scalable type. The number of vector elements is not a1398// compile-time constant.1399if (isa<ScalableVectorType>(V->getType()))1400return nullptr;14011402unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();1403APInt EltMask(APInt::getAllOnes(VWidth));1404assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");14051406if (match(V, m_Poison())) {1407// If the entire vector is poison, just return this info.1408PoisonElts = EltMask;1409return nullptr;1410}14111412if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.1413PoisonElts = EltMask;1414return PoisonValue::get(V->getType());1415}14161417PoisonElts = 0;14181419if (auto *C = dyn_cast<Constant>(V)) {1420// Check if this is identity. If so, return 0 since we are not simplifying1421// anything.1422if (DemandedElts.isAllOnes())1423return nullptr;14241425Type *EltTy = cast<VectorType>(V->getType())->getElementType();1426Constant *Poison = PoisonValue::get(EltTy);1427SmallVector<Constant*, 16> Elts;1428for (unsigned i = 0; i != VWidth; ++i) {1429if (!DemandedElts[i]) { // If not demanded, set to poison.1430Elts.push_back(Poison);1431PoisonElts.setBit(i);1432continue;1433}14341435Constant *Elt = C->getAggregateElement(i);1436if (!Elt) return nullptr;14371438Elts.push_back(Elt);1439if (isa<PoisonValue>(Elt)) // Already poison.1440PoisonElts.setBit(i);1441}14421443// If we changed the constant, return it.1444Constant *NewCV = ConstantVector::get(Elts);1445return NewCV != C ? NewCV : nullptr;1446}14471448// Limit search depth.1449if (Depth == 10)1450return nullptr;14511452if (!AllowMultipleUsers) {1453// If multiple users are using the root value, proceed with1454// simplification conservatively assuming that all elements1455// are needed.1456if (!V->hasOneUse()) {1457// Quit if we find multiple users of a non-root value though.1458// They'll be handled when it's their turn to be visited by1459// the main instcombine process.1460if (Depth != 0)1461// TODO: Just compute the PoisonElts information recursively.1462return nullptr;14631464// Conservatively assume that all elements are needed.1465DemandedElts = EltMask;1466}1467}14681469Instruction *I = dyn_cast<Instruction>(V);1470if (!I) return nullptr; // Only analyze instructions.14711472bool MadeChange = false;1473auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,1474APInt Demanded, APInt &Undef) {1475auto *II = dyn_cast<IntrinsicInst>(Inst);1476Value *Op = II ? II->getArgOperand(OpNum) : Inst->getOperand(OpNum);1477if (Value *V = SimplifyDemandedVectorElts(Op, Demanded, Undef, Depth + 1)) {1478replaceOperand(*Inst, OpNum, V);1479MadeChange = true;1480}1481};14821483APInt PoisonElts2(VWidth, 0);1484APInt PoisonElts3(VWidth, 0);1485switch (I->getOpcode()) {1486default: break;14871488case Instruction::GetElementPtr: {1489// The LangRef requires that struct geps have all constant indices. As1490// such, we can't convert any operand to partial undef.1491auto mayIndexStructType = [](GetElementPtrInst &GEP) {1492for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);1493I != E; I++)1494if (I.isStruct())1495return true;1496return false;1497};1498if (mayIndexStructType(cast<GetElementPtrInst>(*I)))1499break;15001501// Conservatively track the demanded elements back through any vector1502// operands we may have. We know there must be at least one, or we1503// wouldn't have a vector result to get here. Note that we intentionally1504// merge the undef bits here since gepping with either an poison base or1505// index results in poison.1506for (unsigned i = 0; i < I->getNumOperands(); i++) {1507if (i == 0 ? match(I->getOperand(i), m_Undef())1508: match(I->getOperand(i), m_Poison())) {1509// If the entire vector is undefined, just return this info.1510PoisonElts = EltMask;1511return nullptr;1512}1513if (I->getOperand(i)->getType()->isVectorTy()) {1514APInt PoisonEltsOp(VWidth, 0);1515simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);1516// gep(x, undef) is not undef, so skip considering idx ops here1517// Note that we could propagate poison, but we can't distinguish between1518// undef & poison bits ATM1519if (i == 0)1520PoisonElts |= PoisonEltsOp;1521}1522}15231524break;1525}1526case Instruction::InsertElement: {1527// If this is a variable index, we don't know which element it overwrites.1528// demand exactly the same input as we produce.1529ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));1530if (!Idx) {1531// Note that we can't propagate undef elt info, because we don't know1532// which elt is getting updated.1533simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);1534break;1535}15361537// The element inserted overwrites whatever was there, so the input demanded1538// set is simpler than the output set.1539unsigned IdxNo = Idx->getZExtValue();1540APInt PreInsertDemandedElts = DemandedElts;1541if (IdxNo < VWidth)1542PreInsertDemandedElts.clearBit(IdxNo);15431544// If we only demand the element that is being inserted and that element1545// was extracted from the same index in another vector with the same type,1546// replace this insert with that other vector.1547// Note: This is attempted before the call to simplifyAndSetOp because that1548// may change PoisonElts to a value that does not match with Vec.1549Value *Vec;1550if (PreInsertDemandedElts == 0 &&1551match(I->getOperand(1),1552m_ExtractElt(m_Value(Vec), m_SpecificInt(IdxNo))) &&1553Vec->getType() == I->getType()) {1554return Vec;1555}15561557simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);15581559// If this is inserting an element that isn't demanded, remove this1560// insertelement.1561if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {1562Worklist.push(I);1563return I->getOperand(0);1564}15651566// The inserted element is defined.1567PoisonElts.clearBit(IdxNo);1568break;1569}1570case Instruction::ShuffleVector: {1571auto *Shuffle = cast<ShuffleVectorInst>(I);1572assert(Shuffle->getOperand(0)->getType() ==1573Shuffle->getOperand(1)->getType() &&1574"Expected shuffle operands to have same type");1575unsigned OpWidth = cast<FixedVectorType>(Shuffle->getOperand(0)->getType())1576->getNumElements();1577// Handle trivial case of a splat. Only check the first element of LHS1578// operand.1579if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&1580DemandedElts.isAllOnes()) {1581if (!isa<PoisonValue>(I->getOperand(1))) {1582I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));1583MadeChange = true;1584}1585APInt LeftDemanded(OpWidth, 1);1586APInt LHSPoisonElts(OpWidth, 0);1587simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);1588if (LHSPoisonElts[0])1589PoisonElts = EltMask;1590else1591PoisonElts.clearAllBits();1592break;1593}15941595APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);1596for (unsigned i = 0; i < VWidth; i++) {1597if (DemandedElts[i]) {1598unsigned MaskVal = Shuffle->getMaskValue(i);1599if (MaskVal != -1u) {1600assert(MaskVal < OpWidth * 2 &&1601"shufflevector mask index out of range!");1602if (MaskVal < OpWidth)1603LeftDemanded.setBit(MaskVal);1604else1605RightDemanded.setBit(MaskVal - OpWidth);1606}1607}1608}16091610APInt LHSPoisonElts(OpWidth, 0);1611simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);16121613APInt RHSPoisonElts(OpWidth, 0);1614simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);16151616// If this shuffle does not change the vector length and the elements1617// demanded by this shuffle are an identity mask, then this shuffle is1618// unnecessary.1619//1620// We are assuming canonical form for the mask, so the source vector is1621// operand 0 and operand 1 is not used.1622//1623// Note that if an element is demanded and this shuffle mask is undefined1624// for that element, then the shuffle is not considered an identity1625// operation. The shuffle prevents poison from the operand vector from1626// leaking to the result by replacing poison with an undefined value.1627if (VWidth == OpWidth) {1628bool IsIdentityShuffle = true;1629for (unsigned i = 0; i < VWidth; i++) {1630unsigned MaskVal = Shuffle->getMaskValue(i);1631if (DemandedElts[i] && i != MaskVal) {1632IsIdentityShuffle = false;1633break;1634}1635}1636if (IsIdentityShuffle)1637return Shuffle->getOperand(0);1638}16391640bool NewPoisonElts = false;1641unsigned LHSIdx = -1u, LHSValIdx = -1u;1642unsigned RHSIdx = -1u, RHSValIdx = -1u;1643bool LHSUniform = true;1644bool RHSUniform = true;1645for (unsigned i = 0; i < VWidth; i++) {1646unsigned MaskVal = Shuffle->getMaskValue(i);1647if (MaskVal == -1u) {1648PoisonElts.setBit(i);1649} else if (!DemandedElts[i]) {1650NewPoisonElts = true;1651PoisonElts.setBit(i);1652} else if (MaskVal < OpWidth) {1653if (LHSPoisonElts[MaskVal]) {1654NewPoisonElts = true;1655PoisonElts.setBit(i);1656} else {1657LHSIdx = LHSIdx == -1u ? i : OpWidth;1658LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;1659LHSUniform = LHSUniform && (MaskVal == i);1660}1661} else {1662if (RHSPoisonElts[MaskVal - OpWidth]) {1663NewPoisonElts = true;1664PoisonElts.setBit(i);1665} else {1666RHSIdx = RHSIdx == -1u ? i : OpWidth;1667RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;1668RHSUniform = RHSUniform && (MaskVal - OpWidth == i);1669}1670}1671}16721673// Try to transform shuffle with constant vector and single element from1674// this constant vector to single insertelement instruction.1675// shufflevector V, C, <v1, v2, .., ci, .., vm> ->1676// insertelement V, C[ci], ci-n1677if (OpWidth ==1678cast<FixedVectorType>(Shuffle->getType())->getNumElements()) {1679Value *Op = nullptr;1680Constant *Value = nullptr;1681unsigned Idx = -1u;16821683// Find constant vector with the single element in shuffle (LHS or RHS).1684if (LHSIdx < OpWidth && RHSUniform) {1685if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {1686Op = Shuffle->getOperand(1);1687Value = CV->getOperand(LHSValIdx);1688Idx = LHSIdx;1689}1690}1691if (RHSIdx < OpWidth && LHSUniform) {1692if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {1693Op = Shuffle->getOperand(0);1694Value = CV->getOperand(RHSValIdx);1695Idx = RHSIdx;1696}1697}1698// Found constant vector with single element - convert to insertelement.1699if (Op && Value) {1700Instruction *New = InsertElementInst::Create(1701Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),1702Shuffle->getName());1703InsertNewInstWith(New, Shuffle->getIterator());1704return New;1705}1706}1707if (NewPoisonElts) {1708// Add additional discovered undefs.1709SmallVector<int, 16> Elts;1710for (unsigned i = 0; i < VWidth; ++i) {1711if (PoisonElts[i])1712Elts.push_back(PoisonMaskElem);1713else1714Elts.push_back(Shuffle->getMaskValue(i));1715}1716Shuffle->setShuffleMask(Elts);1717MadeChange = true;1718}1719break;1720}1721case Instruction::Select: {1722// If this is a vector select, try to transform the select condition based1723// on the current demanded elements.1724SelectInst *Sel = cast<SelectInst>(I);1725if (Sel->getCondition()->getType()->isVectorTy()) {1726// TODO: We are not doing anything with PoisonElts based on this call.1727// It is overwritten below based on the other select operands. If an1728// element of the select condition is known undef, then we are free to1729// choose the output value from either arm of the select. If we know that1730// one of those values is undef, then the output can be undef.1731simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);1732}17331734// Next, see if we can transform the arms of the select.1735APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);1736if (auto *CV = dyn_cast<ConstantVector>(Sel->getCondition())) {1737for (unsigned i = 0; i < VWidth; i++) {1738// isNullValue() always returns false when called on a ConstantExpr.1739// Skip constant expressions to avoid propagating incorrect information.1740Constant *CElt = CV->getAggregateElement(i);1741if (isa<ConstantExpr>(CElt))1742continue;1743// TODO: If a select condition element is undef, we can demand from1744// either side. If one side is known undef, choosing that side would1745// propagate undef.1746if (CElt->isNullValue())1747DemandedLHS.clearBit(i);1748else1749DemandedRHS.clearBit(i);1750}1751}17521753simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);1754simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);17551756// Output elements are undefined if the element from each arm is undefined.1757// TODO: This can be improved. See comment in select condition handling.1758PoisonElts = PoisonElts2 & PoisonElts3;1759break;1760}1761case Instruction::BitCast: {1762// Vector->vector casts only.1763VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());1764if (!VTy) break;1765unsigned InVWidth = cast<FixedVectorType>(VTy)->getNumElements();1766APInt InputDemandedElts(InVWidth, 0);1767PoisonElts2 = APInt(InVWidth, 0);1768unsigned Ratio;17691770if (VWidth == InVWidth) {1771// If we are converting from <4 x i32> -> <4 x f32>, we demand the same1772// elements as are demanded of us.1773Ratio = 1;1774InputDemandedElts = DemandedElts;1775} else if ((VWidth % InVWidth) == 0) {1776// If the number of elements in the output is a multiple of the number of1777// elements in the input then an input element is live if any of the1778// corresponding output elements are live.1779Ratio = VWidth / InVWidth;1780for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)1781if (DemandedElts[OutIdx])1782InputDemandedElts.setBit(OutIdx / Ratio);1783} else if ((InVWidth % VWidth) == 0) {1784// If the number of elements in the input is a multiple of the number of1785// elements in the output then an input element is live if the1786// corresponding output element is live.1787Ratio = InVWidth / VWidth;1788for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)1789if (DemandedElts[InIdx / Ratio])1790InputDemandedElts.setBit(InIdx);1791} else {1792// Unsupported so far.1793break;1794}17951796simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);17971798if (VWidth == InVWidth) {1799PoisonElts = PoisonElts2;1800} else if ((VWidth % InVWidth) == 0) {1801// If the number of elements in the output is a multiple of the number of1802// elements in the input then an output element is undef if the1803// corresponding input element is undef.1804for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)1805if (PoisonElts2[OutIdx / Ratio])1806PoisonElts.setBit(OutIdx);1807} else if ((InVWidth % VWidth) == 0) {1808// If the number of elements in the input is a multiple of the number of1809// elements in the output then an output element is undef if all of the1810// corresponding input elements are undef.1811for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {1812APInt SubUndef = PoisonElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);1813if (SubUndef.popcount() == Ratio)1814PoisonElts.setBit(OutIdx);1815}1816} else {1817llvm_unreachable("Unimp");1818}1819break;1820}1821case Instruction::FPTrunc:1822case Instruction::FPExt:1823simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);1824break;18251826case Instruction::Call: {1827IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);1828if (!II) break;1829switch (II->getIntrinsicID()) {1830case Intrinsic::masked_gather: // fallthrough1831case Intrinsic::masked_load: {1832// Subtlety: If we load from a pointer, the pointer must be valid1833// regardless of whether the element is demanded. Doing otherwise risks1834// segfaults which didn't exist in the original program.1835APInt DemandedPtrs(APInt::getAllOnes(VWidth)),1836DemandedPassThrough(DemandedElts);1837if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))1838for (unsigned i = 0; i < VWidth; i++) {1839Constant *CElt = CV->getAggregateElement(i);1840if (CElt->isNullValue())1841DemandedPtrs.clearBit(i);1842else if (CElt->isAllOnesValue())1843DemandedPassThrough.clearBit(i);1844}1845if (II->getIntrinsicID() == Intrinsic::masked_gather)1846simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);1847simplifyAndSetOp(II, 3, DemandedPassThrough, PoisonElts3);18481849// Output elements are undefined if the element from both sources are.1850// TODO: can strengthen via mask as well.1851PoisonElts = PoisonElts2 & PoisonElts3;1852break;1853}1854default: {1855// Handle target specific intrinsics1856std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(1857*II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,1858simplifyAndSetOp);1859if (V)1860return *V;1861break;1862}1863} // switch on IntrinsicID1864break;1865} // case Call1866} // switch on Opcode18671868// TODO: We bail completely on integer div/rem and shifts because they have1869// UB/poison potential, but that should be refined.1870BinaryOperator *BO;1871if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {1872Value *X = BO->getOperand(0);1873Value *Y = BO->getOperand(1);18741875// Look for an equivalent binop except that one operand has been shuffled.1876// If the demand for this binop only includes elements that are the same as1877// the other binop, then we may be able to replace this binop with a use of1878// the earlier one.1879//1880// Example:1881// %other_bo = bo (shuf X, {0}), Y1882// %this_extracted_bo = extelt (bo X, Y), 01883// -->1884// %other_bo = bo (shuf X, {0}), Y1885// %this_extracted_bo = extelt %other_bo, 01886//1887// TODO: Handle demand of an arbitrary single element or more than one1888// element instead of just element 0.1889// TODO: Unlike general demanded elements transforms, this should be safe1890// for any (div/rem/shift) opcode too.1891if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&1892BO->hasOneUse() ) {18931894auto findShufBO = [&](bool MatchShufAsOp0) -> User * {1895// Try to use shuffle-of-operand in place of an operand:1896// bo X, Y --> bo (shuf X), Y1897// bo X, Y --> bo X, (shuf Y)1898BinaryOperator::BinaryOps Opcode = BO->getOpcode();1899Value *ShufOp = MatchShufAsOp0 ? X : Y;1900Value *OtherOp = MatchShufAsOp0 ? Y : X;1901for (User *U : OtherOp->users()) {1902ArrayRef<int> Mask;1903auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_Mask(Mask));1904if (BO->isCommutative()1905? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))1906: MatchShufAsOp01907? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))1908: match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))1909if (match(Mask, m_ZeroMask()) && Mask[0] != PoisonMaskElem)1910if (DT.dominates(U, I))1911return U;1912}1913return nullptr;1914};19151916if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))1917return ShufBO;1918if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))1919return ShufBO;1920}19211922simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);1923simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);19241925// Output elements are undefined if both are undefined. Consider things1926// like undef & 0. The result is known zero, not undef.1927PoisonElts &= PoisonElts2;1928}19291930// If we've proven all of the lanes poison, return a poison value.1931// TODO: Intersect w/demanded lanes1932if (PoisonElts.isAllOnes())1933return PoisonValue::get(I->getType());19341935return MadeChange ? I : nullptr;1936}19371938/// For floating-point classes that resolve to a single bit pattern, return that1939/// value.1940static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {1941switch (Mask) {1942case fcPosZero:1943return ConstantFP::getZero(Ty);1944case fcNegZero:1945return ConstantFP::getZero(Ty, true);1946case fcPosInf:1947return ConstantFP::getInfinity(Ty);1948case fcNegInf:1949return ConstantFP::getInfinity(Ty, true);1950case fcNone:1951return PoisonValue::get(Ty);1952default:1953return nullptr;1954}1955}19561957Value *InstCombinerImpl::SimplifyDemandedUseFPClass(1958Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,1959unsigned Depth, Instruction *CxtI) {1960assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");1961Type *VTy = V->getType();19621963assert(Known == KnownFPClass() && "expected uninitialized state");19641965if (DemandedMask == fcNone)1966return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);19671968if (Depth == MaxAnalysisRecursionDepth)1969return nullptr;19701971Instruction *I = dyn_cast<Instruction>(V);1972if (!I) {1973// Handle constants and arguments1974Known = computeKnownFPClass(V, fcAllFlags, CxtI, Depth + 1);1975Value *FoldedToConst =1976getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);1977return FoldedToConst == V ? nullptr : FoldedToConst;1978}19791980if (!I->hasOneUse())1981return nullptr;19821983// TODO: Should account for nofpclass/FastMathFlags on current instruction1984switch (I->getOpcode()) {1985case Instruction::FNeg: {1986if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,1987Depth + 1))1988return I;1989Known.fneg();1990break;1991}1992case Instruction::Call: {1993CallInst *CI = cast<CallInst>(I);1994switch (CI->getIntrinsicID()) {1995case Intrinsic::fabs:1996if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask), Known,1997Depth + 1))1998return I;1999Known.fabs();2000break;2001case Intrinsic::arithmetic_fence:2002if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))2003return I;2004break;2005case Intrinsic::copysign: {2006// Flip on more potentially demanded classes2007const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(DemandedMask);2008if (SimplifyDemandedFPClass(I, 0, DemandedMaskAnySign, Known, Depth + 1))2009return I;20102011if ((DemandedMask & fcPositive) == fcNone) {2012// Roundabout way of replacing with fneg(fabs)2013I->setOperand(1, ConstantFP::get(VTy, -1.0));2014return I;2015}20162017if ((DemandedMask & fcNegative) == fcNone) {2018// Roundabout way of replacing with fabs2019I->setOperand(1, ConstantFP::getZero(VTy));2020return I;2021}20222023KnownFPClass KnownSign =2024computeKnownFPClass(I->getOperand(1), fcAllFlags, CxtI, Depth + 1);2025Known.copysign(KnownSign);2026break;2027}2028default:2029Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);2030break;2031}20322033break;2034}2035case Instruction::Select: {2036KnownFPClass KnownLHS, KnownRHS;2037if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||2038SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))2039return I;20402041if (KnownLHS.isKnownNever(DemandedMask))2042return I->getOperand(2);2043if (KnownRHS.isKnownNever(DemandedMask))2044return I->getOperand(1);20452046// TODO: Recognize clamping patterns2047Known = KnownLHS | KnownRHS;2048break;2049}2050default:2051Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);2052break;2053}20542055return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);2056}20572058bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo,2059FPClassTest DemandedMask,2060KnownFPClass &Known,2061unsigned Depth) {2062Use &U = I->getOperandUse(OpNo);2063Value *NewVal =2064SimplifyDemandedUseFPClass(U.get(), DemandedMask, Known, Depth, I);2065if (!NewVal)2066return false;2067if (Instruction *OpInst = dyn_cast<Instruction>(U))2068salvageDebugInfo(*OpInst);20692070replaceUse(U, NewVal);2071return true;2072}207320742075