Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
35266 views
//===- InstCombineSelect.cpp ----------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the visitSelect function.9//10//===----------------------------------------------------------------------===//1112#include "InstCombineInternal.h"13#include "llvm/ADT/APInt.h"14#include "llvm/ADT/STLExtras.h"15#include "llvm/ADT/SmallVector.h"16#include "llvm/Analysis/AssumptionCache.h"17#include "llvm/Analysis/CmpInstAnalysis.h"18#include "llvm/Analysis/InstructionSimplify.h"19#include "llvm/Analysis/OverflowInstAnalysis.h"20#include "llvm/Analysis/ValueTracking.h"21#include "llvm/Analysis/VectorUtils.h"22#include "llvm/IR/BasicBlock.h"23#include "llvm/IR/Constant.h"24#include "llvm/IR/ConstantRange.h"25#include "llvm/IR/Constants.h"26#include "llvm/IR/DerivedTypes.h"27#include "llvm/IR/IRBuilder.h"28#include "llvm/IR/InstrTypes.h"29#include "llvm/IR/Instruction.h"30#include "llvm/IR/Instructions.h"31#include "llvm/IR/IntrinsicInst.h"32#include "llvm/IR/Intrinsics.h"33#include "llvm/IR/Operator.h"34#include "llvm/IR/PatternMatch.h"35#include "llvm/IR/Type.h"36#include "llvm/IR/User.h"37#include "llvm/IR/Value.h"38#include "llvm/Support/Casting.h"39#include "llvm/Support/ErrorHandling.h"40#include "llvm/Support/KnownBits.h"41#include "llvm/Transforms/InstCombine/InstCombiner.h"42#include <cassert>43#include <utility>4445#define DEBUG_TYPE "instcombine"46#include "llvm/Transforms/Utils/InstructionWorklist.h"4748using namespace llvm;49using namespace PatternMatch;505152/// Replace a select operand based on an equality comparison with the identity53/// constant of a binop.54static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,55const TargetLibraryInfo &TLI,56InstCombinerImpl &IC) {57// The select condition must be an equality compare with a constant operand.58Value *X;59Constant *C;60CmpInst::Predicate Pred;61if (!match(Sel.getCondition(), m_Cmp(Pred, m_Value(X), m_Constant(C))))62return nullptr;6364bool IsEq;65if (ICmpInst::isEquality(Pred))66IsEq = Pred == ICmpInst::ICMP_EQ;67else if (Pred == FCmpInst::FCMP_OEQ)68IsEq = true;69else if (Pred == FCmpInst::FCMP_UNE)70IsEq = false;71else72return nullptr;7374// A select operand must be a binop.75BinaryOperator *BO;76if (!match(Sel.getOperand(IsEq ? 1 : 2), m_BinOp(BO)))77return nullptr;7879// The compare constant must be the identity constant for that binop.80// If this a floating-point compare with 0.0, any zero constant will do.81Type *Ty = BO->getType();82Constant *IdC = ConstantExpr::getBinOpIdentity(BO->getOpcode(), Ty, true);83if (IdC != C) {84if (!IdC || !CmpInst::isFPPredicate(Pred))85return nullptr;86if (!match(IdC, m_AnyZeroFP()) || !match(C, m_AnyZeroFP()))87return nullptr;88}8990// Last, match the compare variable operand with a binop operand.91Value *Y;92if (!BO->isCommutative() && !match(BO, m_BinOp(m_Value(Y), m_Specific(X))))93return nullptr;94if (!match(BO, m_c_BinOp(m_Value(Y), m_Specific(X))))95return nullptr;9697// +0.0 compares equal to -0.0, and so it does not behave as required for this98// transform. Bail out if we can not exclude that possibility.99if (isa<FPMathOperator>(BO))100if (!BO->hasNoSignedZeros() &&101!cannotBeNegativeZero(Y, 0,102IC.getSimplifyQuery().getWithInstruction(&Sel)))103return nullptr;104105// BO = binop Y, X106// S = { select (cmp eq X, C), BO, ? } or { select (cmp ne X, C), ?, BO }107// =>108// S = { select (cmp eq X, C), Y, ? } or { select (cmp ne X, C), ?, Y }109return IC.replaceOperand(Sel, IsEq ? 1 : 2, Y);110}111112/// This folds:113/// select (icmp eq (and X, C1)), TC, FC114/// iff C1 is a power 2 and the difference between TC and FC is a power-of-2.115/// To something like:116/// (shr (and (X, C1)), (log2(C1) - log2(TC-FC))) + FC117/// Or:118/// (shl (and (X, C1)), (log2(TC-FC) - log2(C1))) + FC119/// With some variations depending if FC is larger than TC, or the shift120/// isn't needed, or the bit widths don't match.121static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,122InstCombiner::BuilderTy &Builder) {123const APInt *SelTC, *SelFC;124if (!match(Sel.getTrueValue(), m_APInt(SelTC)) ||125!match(Sel.getFalseValue(), m_APInt(SelFC)))126return nullptr;127128// If this is a vector select, we need a vector compare.129Type *SelType = Sel.getType();130if (SelType->isVectorTy() != Cmp->getType()->isVectorTy())131return nullptr;132133Value *V;134APInt AndMask;135bool CreateAnd = false;136ICmpInst::Predicate Pred = Cmp->getPredicate();137if (ICmpInst::isEquality(Pred)) {138if (!match(Cmp->getOperand(1), m_Zero()))139return nullptr;140141V = Cmp->getOperand(0);142const APInt *AndRHS;143if (!match(V, m_And(m_Value(), m_Power2(AndRHS))))144return nullptr;145146AndMask = *AndRHS;147} else if (decomposeBitTestICmp(Cmp->getOperand(0), Cmp->getOperand(1),148Pred, V, AndMask)) {149assert(ICmpInst::isEquality(Pred) && "Not equality test?");150if (!AndMask.isPowerOf2())151return nullptr;152153CreateAnd = true;154} else {155return nullptr;156}157158// In general, when both constants are non-zero, we would need an offset to159// replace the select. This would require more instructions than we started160// with. But there's one special-case that we handle here because it can161// simplify/reduce the instructions.162APInt TC = *SelTC;163APInt FC = *SelFC;164if (!TC.isZero() && !FC.isZero()) {165// If the select constants differ by exactly one bit and that's the same166// bit that is masked and checked by the select condition, the select can167// be replaced by bitwise logic to set/clear one bit of the constant result.168if (TC.getBitWidth() != AndMask.getBitWidth() || (TC ^ FC) != AndMask)169return nullptr;170if (CreateAnd) {171// If we have to create an 'and', then we must kill the cmp to not172// increase the instruction count.173if (!Cmp->hasOneUse())174return nullptr;175V = Builder.CreateAnd(V, ConstantInt::get(SelType, AndMask));176}177bool ExtraBitInTC = TC.ugt(FC);178if (Pred == ICmpInst::ICMP_EQ) {179// If the masked bit in V is clear, clear or set the bit in the result:180// (V & AndMaskC) == 0 ? TC : FC --> (V & AndMaskC) ^ TC181// (V & AndMaskC) == 0 ? TC : FC --> (V & AndMaskC) | TC182Constant *C = ConstantInt::get(SelType, TC);183return ExtraBitInTC ? Builder.CreateXor(V, C) : Builder.CreateOr(V, C);184}185if (Pred == ICmpInst::ICMP_NE) {186// If the masked bit in V is set, set or clear the bit in the result:187// (V & AndMaskC) != 0 ? TC : FC --> (V & AndMaskC) | FC188// (V & AndMaskC) != 0 ? TC : FC --> (V & AndMaskC) ^ FC189Constant *C = ConstantInt::get(SelType, FC);190return ExtraBitInTC ? Builder.CreateOr(V, C) : Builder.CreateXor(V, C);191}192llvm_unreachable("Only expecting equality predicates");193}194195// Make sure one of the select arms is a power-of-2.196if (!TC.isPowerOf2() && !FC.isPowerOf2())197return nullptr;198199// Determine which shift is needed to transform result of the 'and' into the200// desired result.201const APInt &ValC = !TC.isZero() ? TC : FC;202unsigned ValZeros = ValC.logBase2();203unsigned AndZeros = AndMask.logBase2();204bool ShouldNotVal = !TC.isZero();205ShouldNotVal ^= Pred == ICmpInst::ICMP_NE;206207// If we would need to create an 'and' + 'shift' + 'xor' to replace a 'select'208// + 'icmp', then this transformation would result in more instructions and209// potentially interfere with other folding.210if (CreateAnd && ShouldNotVal && ValZeros != AndZeros)211return nullptr;212213// Insert the 'and' instruction on the input to the truncate.214if (CreateAnd)215V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), AndMask));216217// If types don't match, we can still convert the select by introducing a zext218// or a trunc of the 'and'.219if (ValZeros > AndZeros) {220V = Builder.CreateZExtOrTrunc(V, SelType);221V = Builder.CreateShl(V, ValZeros - AndZeros);222} else if (ValZeros < AndZeros) {223V = Builder.CreateLShr(V, AndZeros - ValZeros);224V = Builder.CreateZExtOrTrunc(V, SelType);225} else {226V = Builder.CreateZExtOrTrunc(V, SelType);227}228229// Okay, now we know that everything is set up, we just don't know whether we230// have a icmp_ne or icmp_eq and whether the true or false val is the zero.231if (ShouldNotVal)232V = Builder.CreateXor(V, ValC);233234return V;235}236237/// We want to turn code that looks like this:238/// %C = or %A, %B239/// %D = select %cond, %C, %A240/// into:241/// %C = select %cond, %B, 0242/// %D = or %A, %C243///244/// Assuming that the specified instruction is an operand to the select, return245/// a bitmask indicating which operands of this instruction are foldable if they246/// equal the other incoming value of the select.247static unsigned getSelectFoldableOperands(BinaryOperator *I) {248switch (I->getOpcode()) {249case Instruction::Add:250case Instruction::FAdd:251case Instruction::Mul:252case Instruction::FMul:253case Instruction::And:254case Instruction::Or:255case Instruction::Xor:256return 3; // Can fold through either operand.257case Instruction::Sub: // Can only fold on the amount subtracted.258case Instruction::FSub:259case Instruction::FDiv: // Can only fold on the divisor amount.260case Instruction::Shl: // Can only fold on the shift amount.261case Instruction::LShr:262case Instruction::AShr:263return 1;264default:265return 0; // Cannot fold266}267}268269/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.270Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,271Instruction *FI) {272// Don't break up min/max patterns. The hasOneUse checks below prevent that273// for most cases, but vector min/max with bitcasts can be transformed. If the274// one-use restrictions are eased for other patterns, we still don't want to275// obfuscate min/max.276if ((match(&SI, m_SMin(m_Value(), m_Value())) ||277match(&SI, m_SMax(m_Value(), m_Value())) ||278match(&SI, m_UMin(m_Value(), m_Value())) ||279match(&SI, m_UMax(m_Value(), m_Value()))))280return nullptr;281282// If this is a cast from the same type, merge.283Value *Cond = SI.getCondition();284Type *CondTy = Cond->getType();285if (TI->getNumOperands() == 1 && TI->isCast()) {286Type *FIOpndTy = FI->getOperand(0)->getType();287if (TI->getOperand(0)->getType() != FIOpndTy)288return nullptr;289290// The select condition may be a vector. We may only change the operand291// type if the vector width remains the same (and matches the condition).292if (auto *CondVTy = dyn_cast<VectorType>(CondTy)) {293if (!FIOpndTy->isVectorTy() ||294CondVTy->getElementCount() !=295cast<VectorType>(FIOpndTy)->getElementCount())296return nullptr;297298// TODO: If the backend knew how to deal with casts better, we could299// remove this limitation. For now, there's too much potential to create300// worse codegen by promoting the select ahead of size-altering casts301// (PR28160).302//303// Note that ValueTracking's matchSelectPattern() looks through casts304// without checking 'hasOneUse' when it matches min/max patterns, so this305// transform may end up happening anyway.306if (TI->getOpcode() != Instruction::BitCast &&307(!TI->hasOneUse() || !FI->hasOneUse()))308return nullptr;309} else if (!TI->hasOneUse() || !FI->hasOneUse()) {310// TODO: The one-use restrictions for a scalar select could be eased if311// the fold of a select in visitLoadInst() was enhanced to match a pattern312// that includes a cast.313return nullptr;314}315316// Fold this by inserting a select from the input values.317Value *NewSI =318Builder.CreateSelect(Cond, TI->getOperand(0), FI->getOperand(0),319SI.getName() + ".v", &SI);320return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,321TI->getType());322}323324Value *OtherOpT, *OtherOpF;325bool MatchIsOpZero;326auto getCommonOp = [&](Instruction *TI, Instruction *FI, bool Commute,327bool Swapped = false) -> Value * {328assert(!(Commute && Swapped) &&329"Commute and Swapped can't set at the same time");330if (!Swapped) {331if (TI->getOperand(0) == FI->getOperand(0)) {332OtherOpT = TI->getOperand(1);333OtherOpF = FI->getOperand(1);334MatchIsOpZero = true;335return TI->getOperand(0);336} else if (TI->getOperand(1) == FI->getOperand(1)) {337OtherOpT = TI->getOperand(0);338OtherOpF = FI->getOperand(0);339MatchIsOpZero = false;340return TI->getOperand(1);341}342}343344if (!Commute && !Swapped)345return nullptr;346347// If we are allowing commute or swap of operands, then348// allow a cross-operand match. In that case, MatchIsOpZero349// means that TI's operand 0 (FI's operand 1) is the common op.350if (TI->getOperand(0) == FI->getOperand(1)) {351OtherOpT = TI->getOperand(1);352OtherOpF = FI->getOperand(0);353MatchIsOpZero = true;354return TI->getOperand(0);355} else if (TI->getOperand(1) == FI->getOperand(0)) {356OtherOpT = TI->getOperand(0);357OtherOpF = FI->getOperand(1);358MatchIsOpZero = false;359return TI->getOperand(1);360}361return nullptr;362};363364if (TI->hasOneUse() || FI->hasOneUse()) {365// Cond ? -X : -Y --> -(Cond ? X : Y)366Value *X, *Y;367if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y)))) {368// Intersect FMF from the fneg instructions and union those with the369// select.370FastMathFlags FMF = TI->getFastMathFlags();371FMF &= FI->getFastMathFlags();372FMF |= SI.getFastMathFlags();373Value *NewSel =374Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);375if (auto *NewSelI = dyn_cast<Instruction>(NewSel))376NewSelI->setFastMathFlags(FMF);377Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel);378NewFNeg->setFastMathFlags(FMF);379return NewFNeg;380}381382// Min/max intrinsic with a common operand can have the common operand383// pulled after the select. This is the same transform as below for binops,384// but specialized for intrinsic matching and without the restrictive uses385// clause.386auto *TII = dyn_cast<IntrinsicInst>(TI);387auto *FII = dyn_cast<IntrinsicInst>(FI);388if (TII && FII && TII->getIntrinsicID() == FII->getIntrinsicID()) {389if (match(TII, m_MaxOrMin(m_Value(), m_Value()))) {390if (Value *MatchOp = getCommonOp(TI, FI, true)) {391Value *NewSel =392Builder.CreateSelect(Cond, OtherOpT, OtherOpF, "minmaxop", &SI);393return CallInst::Create(TII->getCalledFunction(), {NewSel, MatchOp});394}395}396397// select c, (ldexp v, e0), (ldexp v, e1) -> ldexp v, (select c, e0, e1)398// select c, (ldexp v0, e), (ldexp v1, e) -> ldexp (select c, v0, v1), e399//400// select c, (ldexp v0, e0), (ldexp v1, e1) ->401// ldexp (select c, v0, v1), (select c, e0, e1)402if (TII->getIntrinsicID() == Intrinsic::ldexp) {403Value *LdexpVal0 = TII->getArgOperand(0);404Value *LdexpExp0 = TII->getArgOperand(1);405Value *LdexpVal1 = FII->getArgOperand(0);406Value *LdexpExp1 = FII->getArgOperand(1);407if (LdexpExp0->getType() == LdexpExp1->getType()) {408FPMathOperator *SelectFPOp = cast<FPMathOperator>(&SI);409FastMathFlags FMF = cast<FPMathOperator>(TII)->getFastMathFlags();410FMF &= cast<FPMathOperator>(FII)->getFastMathFlags();411FMF |= SelectFPOp->getFastMathFlags();412413Value *SelectVal = Builder.CreateSelect(Cond, LdexpVal0, LdexpVal1);414Value *SelectExp = Builder.CreateSelect(Cond, LdexpExp0, LdexpExp1);415416CallInst *NewLdexp = Builder.CreateIntrinsic(417TII->getType(), Intrinsic::ldexp, {SelectVal, SelectExp});418NewLdexp->setFastMathFlags(FMF);419return replaceInstUsesWith(SI, NewLdexp);420}421}422}423424// icmp with a common operand also can have the common operand425// pulled after the select.426ICmpInst::Predicate TPred, FPred;427if (match(TI, m_ICmp(TPred, m_Value(), m_Value())) &&428match(FI, m_ICmp(FPred, m_Value(), m_Value()))) {429if (TPred == FPred || TPred == CmpInst::getSwappedPredicate(FPred)) {430bool Swapped = TPred != FPred;431if (Value *MatchOp =432getCommonOp(TI, FI, ICmpInst::isEquality(TPred), Swapped)) {433Value *NewSel = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,434SI.getName() + ".v", &SI);435return new ICmpInst(436MatchIsOpZero ? TPred : CmpInst::getSwappedPredicate(TPred),437MatchOp, NewSel);438}439}440}441}442443// Only handle binary operators (including two-operand getelementptr) with444// one-use here. As with the cast case above, it may be possible to relax the445// one-use constraint, but that needs be examined carefully since it may not446// reduce the total number of instructions.447if (TI->getNumOperands() != 2 || FI->getNumOperands() != 2 ||448!TI->isSameOperationAs(FI) ||449(!isa<BinaryOperator>(TI) && !isa<GetElementPtrInst>(TI)) ||450!TI->hasOneUse() || !FI->hasOneUse())451return nullptr;452453// Figure out if the operations have any operands in common.454Value *MatchOp = getCommonOp(TI, FI, TI->isCommutative());455if (!MatchOp)456return nullptr;457458// If the select condition is a vector, the operands of the original select's459// operands also must be vectors. This may not be the case for getelementptr460// for example.461if (CondTy->isVectorTy() && (!OtherOpT->getType()->isVectorTy() ||462!OtherOpF->getType()->isVectorTy()))463return nullptr;464465// If we are sinking div/rem after a select, we may need to freeze the466// condition because div/rem may induce immediate UB with a poison operand.467// For example, the following transform is not safe if Cond can ever be poison468// because we can replace poison with zero and then we have div-by-zero that469// didn't exist in the original code:470// Cond ? x/y : x/z --> x / (Cond ? y : z)471auto *BO = dyn_cast<BinaryOperator>(TI);472if (BO && BO->isIntDivRem() && !isGuaranteedNotToBePoison(Cond)) {473// A udiv/urem with a common divisor is safe because UB can only occur with474// div-by-zero, and that would be present in the original code.475if (BO->getOpcode() == Instruction::SDiv ||476BO->getOpcode() == Instruction::SRem || MatchIsOpZero)477Cond = Builder.CreateFreeze(Cond);478}479480// If we reach here, they do have operations in common.481Value *NewSI = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,482SI.getName() + ".v", &SI);483Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;484Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;485if (auto *BO = dyn_cast<BinaryOperator>(TI)) {486BinaryOperator *NewBO = BinaryOperator::Create(BO->getOpcode(), Op0, Op1);487NewBO->copyIRFlags(TI);488NewBO->andIRFlags(FI);489return NewBO;490}491if (auto *TGEP = dyn_cast<GetElementPtrInst>(TI)) {492auto *FGEP = cast<GetElementPtrInst>(FI);493Type *ElementType = TGEP->getSourceElementType();494return GetElementPtrInst::Create(495ElementType, Op0, Op1, TGEP->getNoWrapFlags() & FGEP->getNoWrapFlags());496}497llvm_unreachable("Expected BinaryOperator or GEP");498return nullptr;499}500501static bool isSelect01(const APInt &C1I, const APInt &C2I) {502if (!C1I.isZero() && !C2I.isZero()) // One side must be zero.503return false;504return C1I.isOne() || C1I.isAllOnes() || C2I.isOne() || C2I.isAllOnes();505}506507/// Try to fold the select into one of the operands to allow further508/// optimization.509Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,510Value *FalseVal) {511// See the comment above getSelectFoldableOperands for a description of the512// transformation we are doing here.513auto TryFoldSelectIntoOp = [&](SelectInst &SI, Value *TrueVal,514Value *FalseVal,515bool Swapped) -> Instruction * {516auto *TVI = dyn_cast<BinaryOperator>(TrueVal);517if (!TVI || !TVI->hasOneUse() || isa<Constant>(FalseVal))518return nullptr;519520unsigned SFO = getSelectFoldableOperands(TVI);521unsigned OpToFold = 0;522if ((SFO & 1) && FalseVal == TVI->getOperand(0))523OpToFold = 1;524else if ((SFO & 2) && FalseVal == TVI->getOperand(1))525OpToFold = 2;526527if (!OpToFold)528return nullptr;529530// TODO: We probably ought to revisit cases where the select and FP531// instructions have different flags and add tests to ensure the532// behaviour is correct.533FastMathFlags FMF;534if (isa<FPMathOperator>(&SI))535FMF = SI.getFastMathFlags();536Constant *C = ConstantExpr::getBinOpIdentity(537TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());538Value *OOp = TVI->getOperand(2 - OpToFold);539// Avoid creating select between 2 constants unless it's selecting540// between 0, 1 and -1.541const APInt *OOpC;542bool OOpIsAPInt = match(OOp, m_APInt(OOpC));543if (isa<Constant>(OOp) &&544(!OOpIsAPInt || !isSelect01(C->getUniqueInteger(), *OOpC)))545return nullptr;546547// If the false value is a NaN then we have that the floating point math548// operation in the transformed code may not preserve the exact NaN549// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.550// This makes the transformation incorrect since the original program would551// have preserved the exact NaN bit-pattern.552// Avoid the folding if the false value might be a NaN.553if (isa<FPMathOperator>(&SI) &&554!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN())555return nullptr;556557Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,558Swapped ? OOp : C, "", &SI);559if (isa<FPMathOperator>(&SI))560cast<Instruction>(NewSel)->setFastMathFlags(FMF);561NewSel->takeName(TVI);562BinaryOperator *BO =563BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);564BO->copyIRFlags(TVI);565return BO;566};567568if (Instruction *R = TryFoldSelectIntoOp(SI, TrueVal, FalseVal, false))569return R;570571if (Instruction *R = TryFoldSelectIntoOp(SI, FalseVal, TrueVal, true))572return R;573574return nullptr;575}576577/// We want to turn:578/// (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)579/// into:580/// zext (icmp ne i32 (and X, (or Y, (shl 1, Z))), 0)581/// Note:582/// Z may be 0 if lshr is missing.583/// Worst-case scenario is that we will replace 5 instructions with 5 different584/// instructions, but we got rid of select.585static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,586Value *TVal, Value *FVal,587InstCombiner::BuilderTy &Builder) {588if (!(Cmp->hasOneUse() && Cmp->getOperand(0)->hasOneUse() &&589Cmp->getPredicate() == ICmpInst::ICMP_EQ &&590match(Cmp->getOperand(1), m_Zero()) && match(FVal, m_One())))591return nullptr;592593// The TrueVal has general form of: and %B, 1594Value *B;595if (!match(TVal, m_OneUse(m_And(m_Value(B), m_One()))))596return nullptr;597598// Where %B may be optionally shifted: lshr %X, %Z.599Value *X, *Z;600const bool HasShift = match(B, m_OneUse(m_LShr(m_Value(X), m_Value(Z))));601602// The shift must be valid.603// TODO: This restricts the fold to constant shift amounts. Is there a way to604// handle variable shifts safely? PR47012605if (HasShift &&606!match(Z, m_SpecificInt_ICMP(CmpInst::ICMP_ULT,607APInt(SelType->getScalarSizeInBits(),608SelType->getScalarSizeInBits()))))609return nullptr;610611if (!HasShift)612X = B;613614Value *Y;615if (!match(Cmp->getOperand(0), m_c_And(m_Specific(X), m_Value(Y))))616return nullptr;617618// ((X & Y) == 0) ? ((X >> Z) & 1) : 1 --> (X & (Y | (1 << Z))) != 0619// ((X & Y) == 0) ? (X & 1) : 1 --> (X & (Y | 1)) != 0620Constant *One = ConstantInt::get(SelType, 1);621Value *MaskB = HasShift ? Builder.CreateShl(One, Z) : One;622Value *FullMask = Builder.CreateOr(Y, MaskB);623Value *MaskedX = Builder.CreateAnd(X, FullMask);624Value *ICmpNeZero = Builder.CreateIsNotNull(MaskedX);625return new ZExtInst(ICmpNeZero, SelType);626}627628/// We want to turn:629/// (select (icmp eq (and X, C1), 0), 0, (shl [nsw/nuw] X, C2));630/// iff C1 is a mask and the number of its leading zeros is equal to C2631/// into:632/// shl X, C2633static Value *foldSelectICmpAndZeroShl(const ICmpInst *Cmp, Value *TVal,634Value *FVal,635InstCombiner::BuilderTy &Builder) {636ICmpInst::Predicate Pred;637Value *AndVal;638if (!match(Cmp, m_ICmp(Pred, m_Value(AndVal), m_Zero())))639return nullptr;640641if (Pred == ICmpInst::ICMP_NE) {642Pred = ICmpInst::ICMP_EQ;643std::swap(TVal, FVal);644}645646Value *X;647const APInt *C2, *C1;648if (Pred != ICmpInst::ICMP_EQ ||649!match(AndVal, m_And(m_Value(X), m_APInt(C1))) ||650!match(TVal, m_Zero()) || !match(FVal, m_Shl(m_Specific(X), m_APInt(C2))))651return nullptr;652653if (!C1->isMask() ||654C1->countLeadingZeros() != static_cast<unsigned>(C2->getZExtValue()))655return nullptr;656657auto *FI = dyn_cast<Instruction>(FVal);658if (!FI)659return nullptr;660661FI->setHasNoSignedWrap(false);662FI->setHasNoUnsignedWrap(false);663return FVal;664}665666/// We want to turn:667/// (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1668/// (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0669/// into:670/// ashr (X, Y)671static Value *foldSelectICmpLshrAshr(const ICmpInst *IC, Value *TrueVal,672Value *FalseVal,673InstCombiner::BuilderTy &Builder) {674ICmpInst::Predicate Pred = IC->getPredicate();675Value *CmpLHS = IC->getOperand(0);676Value *CmpRHS = IC->getOperand(1);677if (!CmpRHS->getType()->isIntOrIntVectorTy())678return nullptr;679680Value *X, *Y;681unsigned Bitwidth = CmpRHS->getType()->getScalarSizeInBits();682if ((Pred != ICmpInst::ICMP_SGT ||683!match(CmpRHS,684m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, -1)))) &&685(Pred != ICmpInst::ICMP_SLT ||686!match(CmpRHS,687m_SpecificInt_ICMP(ICmpInst::ICMP_SGE, APInt(Bitwidth, 0)))))688return nullptr;689690// Canonicalize so that ashr is in FalseVal.691if (Pred == ICmpInst::ICMP_SLT)692std::swap(TrueVal, FalseVal);693694if (match(TrueVal, m_LShr(m_Value(X), m_Value(Y))) &&695match(FalseVal, m_AShr(m_Specific(X), m_Specific(Y))) &&696match(CmpLHS, m_Specific(X))) {697const auto *Ashr = cast<Instruction>(FalseVal);698// if lshr is not exact and ashr is, this new ashr must not be exact.699bool IsExact = Ashr->isExact() && cast<Instruction>(TrueVal)->isExact();700return Builder.CreateAShr(X, Y, IC->getName(), IsExact);701}702703return nullptr;704}705706/// We want to turn:707/// (select (icmp eq (and X, C1), 0), Y, (BinOp Y, C2))708/// into:709/// IF C2 u>= C1710/// (BinOp Y, (shl (and X, C1), C3))711/// ELSE712/// (BinOp Y, (lshr (and X, C1), C3))713/// iff:714/// 0 on the RHS is the identity value (i.e add, xor, shl, etc...)715/// C1 and C2 are both powers of 2716/// where:717/// IF C2 u>= C1718/// C3 = Log(C2) - Log(C1)719/// ELSE720/// C3 = Log(C1) - Log(C2)721///722/// This transform handles cases where:723/// 1. The icmp predicate is inverted724/// 2. The select operands are reversed725/// 3. The magnitude of C2 and C1 are flipped726static Value *foldSelectICmpAndBinOp(const ICmpInst *IC, Value *TrueVal,727Value *FalseVal,728InstCombiner::BuilderTy &Builder) {729// Only handle integer compares. Also, if this is a vector select, we need a730// vector compare.731if (!TrueVal->getType()->isIntOrIntVectorTy() ||732TrueVal->getType()->isVectorTy() != IC->getType()->isVectorTy())733return nullptr;734735Value *CmpLHS = IC->getOperand(0);736Value *CmpRHS = IC->getOperand(1);737738unsigned C1Log;739bool NeedAnd = false;740CmpInst::Predicate Pred = IC->getPredicate();741if (IC->isEquality()) {742if (!match(CmpRHS, m_Zero()))743return nullptr;744745const APInt *C1;746if (!match(CmpLHS, m_And(m_Value(), m_Power2(C1))))747return nullptr;748749C1Log = C1->logBase2();750} else {751APInt C1;752if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, CmpLHS, C1) ||753!C1.isPowerOf2())754return nullptr;755756C1Log = C1.logBase2();757NeedAnd = true;758}759760Value *Y, *V = CmpLHS;761BinaryOperator *BinOp;762const APInt *C2;763bool NeedXor;764if (match(FalseVal, m_BinOp(m_Specific(TrueVal), m_Power2(C2)))) {765Y = TrueVal;766BinOp = cast<BinaryOperator>(FalseVal);767NeedXor = Pred == ICmpInst::ICMP_NE;768} else if (match(TrueVal, m_BinOp(m_Specific(FalseVal), m_Power2(C2)))) {769Y = FalseVal;770BinOp = cast<BinaryOperator>(TrueVal);771NeedXor = Pred == ICmpInst::ICMP_EQ;772} else {773return nullptr;774}775776// Check that 0 on RHS is identity value for this binop.777auto *IdentityC =778ConstantExpr::getBinOpIdentity(BinOp->getOpcode(), BinOp->getType(),779/*AllowRHSConstant*/ true);780if (IdentityC == nullptr || !IdentityC->isNullValue())781return nullptr;782783unsigned C2Log = C2->logBase2();784785bool NeedShift = C1Log != C2Log;786bool NeedZExtTrunc = Y->getType()->getScalarSizeInBits() !=787V->getType()->getScalarSizeInBits();788789// Make sure we don't create more instructions than we save.790if ((NeedShift + NeedXor + NeedZExtTrunc + NeedAnd) >791(IC->hasOneUse() + BinOp->hasOneUse()))792return nullptr;793794if (NeedAnd) {795// Insert the AND instruction on the input to the truncate.796APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log);797V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1));798}799800if (C2Log > C1Log) {801V = Builder.CreateZExtOrTrunc(V, Y->getType());802V = Builder.CreateShl(V, C2Log - C1Log);803} else if (C1Log > C2Log) {804V = Builder.CreateLShr(V, C1Log - C2Log);805V = Builder.CreateZExtOrTrunc(V, Y->getType());806} else807V = Builder.CreateZExtOrTrunc(V, Y->getType());808809if (NeedXor)810V = Builder.CreateXor(V, *C2);811812return Builder.CreateBinOp(BinOp->getOpcode(), Y, V);813}814815/// Canonicalize a set or clear of a masked set of constant bits to816/// select-of-constants form.817static Instruction *foldSetClearBits(SelectInst &Sel,818InstCombiner::BuilderTy &Builder) {819Value *Cond = Sel.getCondition();820Value *T = Sel.getTrueValue();821Value *F = Sel.getFalseValue();822Type *Ty = Sel.getType();823Value *X;824const APInt *NotC, *C;825826// Cond ? (X & ~C) : (X | C) --> (X & ~C) | (Cond ? 0 : C)827if (match(T, m_And(m_Value(X), m_APInt(NotC))) &&828match(F, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {829Constant *Zero = ConstantInt::getNullValue(Ty);830Constant *OrC = ConstantInt::get(Ty, *C);831Value *NewSel = Builder.CreateSelect(Cond, Zero, OrC, "masksel", &Sel);832return BinaryOperator::CreateOr(T, NewSel);833}834835// Cond ? (X | C) : (X & ~C) --> (X & ~C) | (Cond ? C : 0)836if (match(F, m_And(m_Value(X), m_APInt(NotC))) &&837match(T, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {838Constant *Zero = ConstantInt::getNullValue(Ty);839Constant *OrC = ConstantInt::get(Ty, *C);840Value *NewSel = Builder.CreateSelect(Cond, OrC, Zero, "masksel", &Sel);841return BinaryOperator::CreateOr(F, NewSel);842}843844return nullptr;845}846847// select (x == 0), 0, x * y --> freeze(y) * x848// select (y == 0), 0, x * y --> freeze(x) * y849// select (x == 0), undef, x * y --> freeze(y) * x850// select (x == undef), 0, x * y --> freeze(y) * x851// Usage of mul instead of 0 will make the result more poisonous,852// so the operand that was not checked in the condition should be frozen.853// The latter folding is applied only when a constant compared with x is854// is a vector consisting of 0 and undefs. If a constant compared with x855// is a scalar undefined value or undefined vector then an expression856// should be already folded into a constant.857static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {858auto *CondVal = SI.getCondition();859auto *TrueVal = SI.getTrueValue();860auto *FalseVal = SI.getFalseValue();861Value *X, *Y;862ICmpInst::Predicate Predicate;863864// Assuming that constant compared with zero is not undef (but it may be865// a vector with some undef elements). Otherwise (when a constant is undef)866// the select expression should be already simplified.867if (!match(CondVal, m_ICmp(Predicate, m_Value(X), m_Zero())) ||868!ICmpInst::isEquality(Predicate))869return nullptr;870871if (Predicate == ICmpInst::ICMP_NE)872std::swap(TrueVal, FalseVal);873874// Check that TrueVal is a constant instead of matching it with m_Zero()875// to handle the case when it is a scalar undef value or a vector containing876// non-zero elements that are masked by undef elements in the compare877// constant.878auto *TrueValC = dyn_cast<Constant>(TrueVal);879if (TrueValC == nullptr ||880!match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||881!isa<Instruction>(FalseVal))882return nullptr;883884auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1));885auto *MergedC = Constant::mergeUndefsWith(TrueValC, ZeroC);886// If X is compared with 0 then TrueVal could be either zero or undef.887// m_Zero match vectors containing some undef elements, but for scalars888// m_Undef should be used explicitly.889if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef()))890return nullptr;891892auto *FalseValI = cast<Instruction>(FalseVal);893auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),894FalseValI->getIterator());895IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);896return IC.replaceInstUsesWith(SI, FalseValI);897}898899/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).900/// There are 8 commuted/swapped variants of this pattern.901/// TODO: Also support a - UMIN(a,b) patterns.902static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,903const Value *TrueVal,904const Value *FalseVal,905InstCombiner::BuilderTy &Builder) {906ICmpInst::Predicate Pred = ICI->getPredicate();907Value *A = ICI->getOperand(0);908Value *B = ICI->getOperand(1);909910// (b > a) ? 0 : a - b -> (b <= a) ? a - b : 0911// (a == 0) ? 0 : a - 1 -> (a != 0) ? a - 1 : 0912if (match(TrueVal, m_Zero())) {913Pred = ICmpInst::getInversePredicate(Pred);914std::swap(TrueVal, FalseVal);915}916917if (!match(FalseVal, m_Zero()))918return nullptr;919920// ugt 0 is canonicalized to ne 0 and requires special handling921// (a != 0) ? a + -1 : 0 -> usub.sat(a, 1)922if (Pred == ICmpInst::ICMP_NE) {923if (match(B, m_Zero()) && match(TrueVal, m_Add(m_Specific(A), m_AllOnes())))924return Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A,925ConstantInt::get(A->getType(), 1));926return nullptr;927}928929if (!ICmpInst::isUnsigned(Pred))930return nullptr;931932if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_ULT) {933// (b < a) ? a - b : 0 -> (a > b) ? a - b : 0934std::swap(A, B);935Pred = ICmpInst::getSwappedPredicate(Pred);936}937938assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&939"Unexpected isUnsigned predicate!");940941// Ensure the sub is of the form:942// (a > b) ? a - b : 0 -> usub.sat(a, b)943// (a > b) ? b - a : 0 -> -usub.sat(a, b)944// Checking for both a-b and a+(-b) as a constant.945bool IsNegative = false;946const APInt *C;947if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))) ||948(match(A, m_APInt(C)) &&949match(TrueVal, m_Add(m_Specific(B), m_SpecificInt(-*C)))))950IsNegative = true;951else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))) &&952!(match(B, m_APInt(C)) &&953match(TrueVal, m_Add(m_Specific(A), m_SpecificInt(-*C)))))954return nullptr;955956// If we are adding a negate and the sub and icmp are used anywhere else, we957// would end up with more instructions.958if (IsNegative && !TrueVal->hasOneUse() && !ICI->hasOneUse())959return nullptr;960961// (a > b) ? a - b : 0 -> usub.sat(a, b)962// (a > b) ? b - a : 0 -> -usub.sat(a, b)963Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);964if (IsNegative)965Result = Builder.CreateNeg(Result);966return Result;967}968969static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,970InstCombiner::BuilderTy &Builder) {971if (!Cmp->hasOneUse())972return nullptr;973974// Match unsigned saturated add with constant.975Value *Cmp0 = Cmp->getOperand(0);976Value *Cmp1 = Cmp->getOperand(1);977ICmpInst::Predicate Pred = Cmp->getPredicate();978Value *X;979const APInt *C, *CmpC;980if (Pred == ICmpInst::ICMP_ULT &&981match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&982match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {983// (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)984return Builder.CreateBinaryIntrinsic(985Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));986}987988// Match unsigned saturated add of 2 variables with an unnecessary 'not'.989// There are 8 commuted variants.990// Canonicalize -1 (saturated result) to true value of the select.991if (match(FVal, m_AllOnes())) {992std::swap(TVal, FVal);993Pred = CmpInst::getInversePredicate(Pred);994}995if (!match(TVal, m_AllOnes()))996return nullptr;997998// Canonicalize predicate to less-than or less-or-equal-than.999if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {1000std::swap(Cmp0, Cmp1);1001Pred = CmpInst::getSwappedPredicate(Pred);1002}1003if (Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_ULE)1004return nullptr;10051006// Match unsigned saturated add of 2 variables with an unnecessary 'not'.1007// Strictness of the comparison is irrelevant.1008Value *Y;1009if (match(Cmp0, m_Not(m_Value(X))) &&1010match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {1011// (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)1012// (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)1013return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);1014}1015// The 'not' op may be included in the sum but not the compare.1016// Strictness of the comparison is irrelevant.1017X = Cmp0;1018Y = Cmp1;1019if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {1020// (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)1021// (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)1022BinaryOperator *BO = cast<BinaryOperator>(FVal);1023return Builder.CreateBinaryIntrinsic(1024Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));1025}1026// The overflow may be detected via the add wrapping round.1027// This is only valid for strict comparison!1028if (Pred == ICmpInst::ICMP_ULT &&1029match(Cmp0, m_c_Add(m_Specific(Cmp1), m_Value(Y))) &&1030match(FVal, m_c_Add(m_Specific(Cmp1), m_Specific(Y)))) {1031// ((X + Y) u< X) ? -1 : (X + Y) --> uadd.sat(X, Y)1032// ((X + Y) u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)1033return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, Cmp1, Y);1034}10351036return nullptr;1037}10381039/// Try to match patterns with select and subtract as absolute difference.1040static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal,1041InstCombiner::BuilderTy &Builder) {1042auto *TI = dyn_cast<Instruction>(TVal);1043auto *FI = dyn_cast<Instruction>(FVal);1044if (!TI || !FI)1045return nullptr;10461047// Normalize predicate to gt/lt rather than ge/le.1048ICmpInst::Predicate Pred = Cmp->getStrictPredicate();1049Value *A = Cmp->getOperand(0);1050Value *B = Cmp->getOperand(1);10511052// Normalize "A - B" as the true value of the select.1053if (match(FI, m_Sub(m_Specific(A), m_Specific(B)))) {1054std::swap(FI, TI);1055Pred = ICmpInst::getSwappedPredicate(Pred);1056}10571058// With any pair of no-wrap subtracts:1059// (A > B) ? (A - B) : (B - A) --> abs(A - B)1060if (Pred == CmpInst::ICMP_SGT &&1061match(TI, m_Sub(m_Specific(A), m_Specific(B))) &&1062match(FI, m_Sub(m_Specific(B), m_Specific(A))) &&1063(TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap()) &&1064(FI->hasNoSignedWrap() || FI->hasNoUnsignedWrap())) {1065// The remaining subtract is not "nuw" any more.1066// If there's one use of the subtract (no other use than the use we are1067// about to replace), then we know that the sub is "nsw" in this context1068// even if it was only "nuw" before. If there's another use, then we can't1069// add "nsw" to the existing instruction because it may not be safe in the1070// other user's context.1071TI->setHasNoUnsignedWrap(false);1072if (!TI->hasNoSignedWrap())1073TI->setHasNoSignedWrap(TI->hasOneUse());1074return Builder.CreateBinaryIntrinsic(Intrinsic::abs, TI, Builder.getTrue());1075}10761077return nullptr;1078}10791080/// Fold the following code sequence:1081/// \code1082/// int a = ctlz(x & -x);1083// x ? 31 - a : a;1084// // or1085// x ? 31 - a : 32;1086/// \code1087///1088/// into:1089/// cttz(x)1090static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,1091Value *FalseVal,1092InstCombiner::BuilderTy &Builder) {1093unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();1094if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero()))1095return nullptr;10961097if (ICI->getPredicate() == ICmpInst::ICMP_NE)1098std::swap(TrueVal, FalseVal);10991100Value *Ctlz;1101if (!match(FalseVal,1102m_Xor(m_Value(Ctlz), m_SpecificInt(BitWidth - 1))))1103return nullptr;11041105if (!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>()))1106return nullptr;11071108if (TrueVal != Ctlz && !match(TrueVal, m_SpecificInt(BitWidth)))1109return nullptr;11101111Value *X = ICI->getOperand(0);1112auto *II = cast<IntrinsicInst>(Ctlz);1113if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X)))))1114return nullptr;11151116Function *F = Intrinsic::getDeclaration(II->getModule(), Intrinsic::cttz,1117II->getType());1118return CallInst::Create(F, {X, II->getArgOperand(1)});1119}11201121/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single1122/// call to cttz/ctlz with flag 'is_zero_poison' cleared.1123///1124/// For example, we can fold the following code sequence:1125/// \code1126/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)1127/// %1 = icmp ne i32 %x, 01128/// %2 = select i1 %1, i32 %0, i32 321129/// \code1130///1131/// into:1132/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)1133static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,1134InstCombinerImpl &IC) {1135ICmpInst::Predicate Pred = ICI->getPredicate();1136Value *CmpLHS = ICI->getOperand(0);1137Value *CmpRHS = ICI->getOperand(1);11381139// Check if the select condition compares a value for equality.1140if (!ICI->isEquality())1141return nullptr;11421143Value *SelectArg = FalseVal;1144Value *ValueOnZero = TrueVal;1145if (Pred == ICmpInst::ICMP_NE)1146std::swap(SelectArg, ValueOnZero);11471148// Skip zero extend/truncate.1149Value *Count = nullptr;1150if (!match(SelectArg, m_ZExt(m_Value(Count))) &&1151!match(SelectArg, m_Trunc(m_Value(Count))))1152Count = SelectArg;11531154// Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the1155// input to the cttz/ctlz is used as LHS for the compare instruction.1156Value *X;1157if (!match(Count, m_Intrinsic<Intrinsic::cttz>(m_Value(X))) &&1158!match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Value(X))))1159return nullptr;11601161// (X == 0) ? BitWidth : ctz(X)1162// (X == -1) ? BitWidth : ctz(~X)1163if ((X != CmpLHS || !match(CmpRHS, m_Zero())) &&1164(!match(X, m_Not(m_Specific(CmpLHS))) || !match(CmpRHS, m_AllOnes())))1165return nullptr;11661167IntrinsicInst *II = cast<IntrinsicInst>(Count);11681169// Check if the value propagated on zero is a constant number equal to the1170// sizeof in bits of 'Count'.1171unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();1172if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) {1173// Explicitly clear the 'is_zero_poison' flag. It's always valid to go from1174// true to false on this flag, so we can replace it for all users.1175II->setArgOperand(1, ConstantInt::getFalse(II->getContext()));1176// A range annotation on the intrinsic may no longer be valid.1177II->dropPoisonGeneratingAnnotations();1178IC.addToWorklist(II);1179return SelectArg;1180}11811182// The ValueOnZero is not the bitwidth. But if the cttz/ctlz (and optional1183// zext/trunc) have one use (ending at the select), the cttz/ctlz result will1184// not be used if the input is zero. Relax to 'zero is poison' for that case.1185if (II->hasOneUse() && SelectArg->hasOneUse() &&1186!match(II->getArgOperand(1), m_One()))1187II->setArgOperand(1, ConstantInt::getTrue(II->getContext()));11881189return nullptr;1190}11911192static Value *canonicalizeSPF(ICmpInst &Cmp, Value *TrueVal, Value *FalseVal,1193InstCombinerImpl &IC) {1194Value *LHS, *RHS;1195// TODO: What to do with pointer min/max patterns?1196if (!TrueVal->getType()->isIntOrIntVectorTy())1197return nullptr;11981199SelectPatternFlavor SPF =1200matchDecomposedSelectPattern(&Cmp, TrueVal, FalseVal, LHS, RHS).Flavor;1201if (SPF == SelectPatternFlavor::SPF_ABS ||1202SPF == SelectPatternFlavor::SPF_NABS) {1203if (!Cmp.hasOneUse() && !RHS->hasOneUse())1204return nullptr; // TODO: Relax this restriction.12051206// Note that NSW flag can only be propagated for normal, non-negated abs!1207bool IntMinIsPoison = SPF == SelectPatternFlavor::SPF_ABS &&1208match(RHS, m_NSWNeg(m_Specific(LHS)));1209Constant *IntMinIsPoisonC =1210ConstantInt::get(Type::getInt1Ty(Cmp.getContext()), IntMinIsPoison);1211Value *Abs =1212IC.Builder.CreateBinaryIntrinsic(Intrinsic::abs, LHS, IntMinIsPoisonC);12131214if (SPF == SelectPatternFlavor::SPF_NABS)1215return IC.Builder.CreateNeg(Abs); // Always without NSW flag!1216return Abs;1217}12181219if (SelectPatternResult::isMinOrMax(SPF)) {1220Intrinsic::ID IntrinsicID;1221switch (SPF) {1222case SelectPatternFlavor::SPF_UMIN:1223IntrinsicID = Intrinsic::umin;1224break;1225case SelectPatternFlavor::SPF_UMAX:1226IntrinsicID = Intrinsic::umax;1227break;1228case SelectPatternFlavor::SPF_SMIN:1229IntrinsicID = Intrinsic::smin;1230break;1231case SelectPatternFlavor::SPF_SMAX:1232IntrinsicID = Intrinsic::smax;1233break;1234default:1235llvm_unreachable("Unexpected SPF");1236}1237return IC.Builder.CreateBinaryIntrinsic(IntrinsicID, LHS, RHS);1238}12391240return nullptr;1241}12421243bool InstCombinerImpl::replaceInInstruction(Value *V, Value *Old, Value *New,1244unsigned Depth) {1245// Conservatively limit replacement to two instructions upwards.1246if (Depth == 2)1247return false;12481249assert(!isa<Constant>(Old) && "Only replace non-constant values");12501251auto *I = dyn_cast<Instruction>(V);1252if (!I || !I->hasOneUse() ||1253!isSafeToSpeculativelyExecuteWithVariableReplaced(I))1254return false;12551256bool Changed = false;1257for (Use &U : I->operands()) {1258if (U == Old) {1259replaceUse(U, New);1260Worklist.add(I);1261Changed = true;1262} else {1263Changed |= replaceInInstruction(U, Old, New, Depth + 1);1264}1265}1266return Changed;1267}12681269/// If we have a select with an equality comparison, then we know the value in1270/// one of the arms of the select. See if substituting this value into an arm1271/// and simplifying the result yields the same value as the other arm.1272///1273/// To make this transform safe, we must drop poison-generating flags1274/// (nsw, etc) if we simplified to a binop because the select may be guarding1275/// that poison from propagating. If the existing binop already had no1276/// poison-generating flags, then this transform can be done by instsimplify.1277///1278/// Consider:1279/// %cmp = icmp eq i32 %x, 21474836471280/// %add = add nsw i32 %x, 11281/// %sel = select i1 %cmp, i32 -2147483648, i32 %add1282///1283/// We can't replace %sel with %add unless we strip away the flags.1284/// TODO: Wrapping flags could be preserved in some cases with better analysis.1285Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,1286ICmpInst &Cmp) {1287if (!Cmp.isEquality())1288return nullptr;12891290// Canonicalize the pattern to ICMP_EQ by swapping the select operands.1291Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();1292bool Swapped = false;1293if (Cmp.getPredicate() == ICmpInst::ICMP_NE) {1294std::swap(TrueVal, FalseVal);1295Swapped = true;1296}12971298Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);1299auto ReplaceOldOpWithNewOp = [&](Value *OldOp,1300Value *NewOp) -> Instruction * {1301// In X == Y ? f(X) : Z, try to evaluate f(Y) and replace the operand.1302// Take care to avoid replacing X == Y ? X : Z with X == Y ? Y : Z, as that1303// would lead to an infinite replacement cycle.1304// If we will be able to evaluate f(Y) to a constant, we can allow undef,1305// otherwise Y cannot be undef as we might pick different values for undef1306// in the icmp and in f(Y).1307if (TrueVal == OldOp)1308return nullptr;13091310if (Value *V = simplifyWithOpReplaced(TrueVal, OldOp, NewOp, SQ,1311/* AllowRefinement=*/true)) {1312// Need some guarantees about the new simplified op to ensure we don't inf1313// loop.1314// If we simplify to a constant, replace if we aren't creating new undef.1315if (match(V, m_ImmConstant()) &&1316isGuaranteedNotToBeUndef(V, SQ.AC, &Sel, &DT))1317return replaceOperand(Sel, Swapped ? 2 : 1, V);13181319// If NewOp is a constant and OldOp is not replace iff NewOp doesn't1320// contain and undef elements.1321if (match(NewOp, m_ImmConstant()) || NewOp == V) {1322if (isGuaranteedNotToBeUndef(NewOp, SQ.AC, &Sel, &DT))1323return replaceOperand(Sel, Swapped ? 2 : 1, V);1324return nullptr;1325}1326}13271328// Even if TrueVal does not simplify, we can directly replace a use of1329// CmpLHS with CmpRHS, as long as the instruction is not used anywhere1330// else and is safe to speculatively execute (we may end up executing it1331// with different operands, which should not cause side-effects or trigger1332// undefined behavior). Only do this if CmpRHS is a constant, as1333// profitability is not clear for other cases.1334// FIXME: Support vectors.1335if (OldOp == CmpLHS && match(NewOp, m_ImmConstant()) &&1336!match(OldOp, m_Constant()) && !Cmp.getType()->isVectorTy() &&1337isGuaranteedNotToBeUndef(NewOp, SQ.AC, &Sel, &DT))1338if (replaceInInstruction(TrueVal, OldOp, NewOp))1339return &Sel;1340return nullptr;1341};13421343if (Instruction *R = ReplaceOldOpWithNewOp(CmpLHS, CmpRHS))1344return R;1345if (Instruction *R = ReplaceOldOpWithNewOp(CmpRHS, CmpLHS))1346return R;13471348auto *FalseInst = dyn_cast<Instruction>(FalseVal);1349if (!FalseInst)1350return nullptr;13511352// InstSimplify already performed this fold if it was possible subject to1353// current poison-generating flags. Check whether dropping poison-generating1354// flags enables the transform.13551356// Try each equivalence substitution possibility.1357// We have an 'EQ' comparison, so the select's false value will propagate.1358// Example:1359// (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 11360SmallVector<Instruction *> DropFlags;1361if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, SQ,1362/* AllowRefinement */ false,1363&DropFlags) == TrueVal ||1364simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, SQ,1365/* AllowRefinement */ false,1366&DropFlags) == TrueVal) {1367for (Instruction *I : DropFlags) {1368I->dropPoisonGeneratingAnnotations();1369Worklist.add(I);1370}13711372return replaceInstUsesWith(Sel, FalseVal);1373}13741375return nullptr;1376}13771378// See if this is a pattern like:1379// %old_cmp1 = icmp slt i32 %x, C21380// %old_replacement = select i1 %old_cmp1, i32 %target_low, i32 %target_high1381// %old_x_offseted = add i32 %x, C11382// %old_cmp0 = icmp ult i32 %old_x_offseted, C01383// %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement1384// This can be rewritten as more canonical pattern:1385// %new_cmp1 = icmp slt i32 %x, -C11386// %new_cmp2 = icmp sge i32 %x, C0-C11387// %new_clamped_low = select i1 %new_cmp1, i32 %target_low, i32 %x1388// %r = select i1 %new_cmp2, i32 %target_high, i32 %new_clamped_low1389// Iff -C1 s<= C2 s<= C0-C11390// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)1391// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)1392static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,1393InstCombiner::BuilderTy &Builder,1394InstCombiner &IC) {1395Value *X = Sel0.getTrueValue();1396Value *Sel1 = Sel0.getFalseValue();13971398// First match the condition of the outermost select.1399// Said condition must be one-use.1400if (!Cmp0.hasOneUse())1401return nullptr;1402ICmpInst::Predicate Pred0 = Cmp0.getPredicate();1403Value *Cmp00 = Cmp0.getOperand(0);1404Constant *C0;1405if (!match(Cmp0.getOperand(1),1406m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))1407return nullptr;14081409if (!isa<SelectInst>(Sel1)) {1410Pred0 = ICmpInst::getInversePredicate(Pred0);1411std::swap(X, Sel1);1412}14131414// Canonicalize Cmp0 into ult or uge.1415// FIXME: we shouldn't care about lanes that are 'undef' in the end?1416switch (Pred0) {1417case ICmpInst::Predicate::ICMP_ULT:1418case ICmpInst::Predicate::ICMP_UGE:1419// Although icmp ult %x, 0 is an unusual thing to try and should generally1420// have been simplified, it does not verify with undef inputs so ensure we1421// are not in a strange state.1422if (!match(C0, m_SpecificInt_ICMP(1423ICmpInst::Predicate::ICMP_NE,1424APInt::getZero(C0->getType()->getScalarSizeInBits()))))1425return nullptr;1426break; // Great!1427case ICmpInst::Predicate::ICMP_ULE:1428case ICmpInst::Predicate::ICMP_UGT:1429// We want to canonicalize it to 'ult' or 'uge', so we'll need to increment1430// C0, which again means it must not have any all-ones elements.1431if (!match(C0,1432m_SpecificInt_ICMP(1433ICmpInst::Predicate::ICMP_NE,1434APInt::getAllOnes(C0->getType()->getScalarSizeInBits()))))1435return nullptr; // Can't do, have all-ones element[s].1436Pred0 = ICmpInst::getFlippedStrictnessPredicate(Pred0);1437C0 = InstCombiner::AddOne(C0);1438break;1439default:1440return nullptr; // Unknown predicate.1441}14421443// Now that we've canonicalized the ICmp, we know the X we expect;1444// the select in other hand should be one-use.1445if (!Sel1->hasOneUse())1446return nullptr;14471448// If the types do not match, look through any truncs to the underlying1449// instruction.1450if (Cmp00->getType() != X->getType() && X->hasOneUse())1451match(X, m_TruncOrSelf(m_Value(X)));14521453// We now can finish matching the condition of the outermost select:1454// it should either be the X itself, or an addition of some constant to X.1455Constant *C1;1456if (Cmp00 == X)1457C1 = ConstantInt::getNullValue(X->getType());1458else if (!match(Cmp00,1459m_Add(m_Specific(X),1460m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))1461return nullptr;14621463Value *Cmp1;1464ICmpInst::Predicate Pred1;1465Constant *C2;1466Value *ReplacementLow, *ReplacementHigh;1467if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),1468m_Value(ReplacementHigh))) ||1469!match(Cmp1,1470m_ICmp(Pred1, m_Specific(X),1471m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))1472return nullptr;14731474if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))1475return nullptr; // Not enough one-use instructions for the fold.1476// FIXME: this restriction could be relaxed if Cmp1 can be reused as one of1477// two comparisons we'll need to build.14781479// Canonicalize Cmp1 into the form we expect.1480// FIXME: we shouldn't care about lanes that are 'undef' in the end?1481switch (Pred1) {1482case ICmpInst::Predicate::ICMP_SLT:1483break;1484case ICmpInst::Predicate::ICMP_SLE:1485// We'd have to increment C2 by one, and for that it must not have signed1486// max element, but then it would have been canonicalized to 'slt' before1487// we get here. So we can't do anything useful with 'sle'.1488return nullptr;1489case ICmpInst::Predicate::ICMP_SGT:1490// We want to canonicalize it to 'slt', so we'll need to increment C2,1491// which again means it must not have any signed max elements.1492if (!match(C2,1493m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,1494APInt::getSignedMaxValue(1495C2->getType()->getScalarSizeInBits()))))1496return nullptr; // Can't do, have signed max element[s].1497C2 = InstCombiner::AddOne(C2);1498[[fallthrough]];1499case ICmpInst::Predicate::ICMP_SGE:1500// Also non-canonical, but here we don't need to change C2,1501// so we don't have any restrictions on C2, so we can just handle it.1502Pred1 = ICmpInst::Predicate::ICMP_SLT;1503std::swap(ReplacementLow, ReplacementHigh);1504break;1505default:1506return nullptr; // Unknown predicate.1507}1508assert(Pred1 == ICmpInst::Predicate::ICMP_SLT &&1509"Unexpected predicate type.");15101511// The thresholds of this clamp-like pattern.1512auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);1513auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);15141515assert((Pred0 == ICmpInst::Predicate::ICMP_ULT ||1516Pred0 == ICmpInst::Predicate::ICMP_UGE) &&1517"Unexpected predicate type.");1518if (Pred0 == ICmpInst::Predicate::ICMP_UGE)1519std::swap(ThresholdLowIncl, ThresholdHighExcl);15201521// The fold has a precondition 1: C2 s>= ThresholdLow1522auto *Precond1 = ConstantFoldCompareInstOperands(1523ICmpInst::Predicate::ICMP_SGE, C2, ThresholdLowIncl, IC.getDataLayout());1524if (!Precond1 || !match(Precond1, m_One()))1525return nullptr;1526// The fold has a precondition 2: C2 s<= ThresholdHigh1527auto *Precond2 = ConstantFoldCompareInstOperands(1528ICmpInst::Predicate::ICMP_SLE, C2, ThresholdHighExcl, IC.getDataLayout());1529if (!Precond2 || !match(Precond2, m_One()))1530return nullptr;15311532// If we are matching from a truncated input, we need to sext the1533// ReplacementLow and ReplacementHigh values. Only do the transform if they1534// are free to extend due to being constants.1535if (X->getType() != Sel0.getType()) {1536Constant *LowC, *HighC;1537if (!match(ReplacementLow, m_ImmConstant(LowC)) ||1538!match(ReplacementHigh, m_ImmConstant(HighC)))1539return nullptr;1540const DataLayout &DL = Sel0.getDataLayout();1541ReplacementLow =1542ConstantFoldCastOperand(Instruction::SExt, LowC, X->getType(), DL);1543ReplacementHigh =1544ConstantFoldCastOperand(Instruction::SExt, HighC, X->getType(), DL);1545assert(ReplacementLow && ReplacementHigh &&1546"Constant folding of ImmConstant cannot fail");1547}15481549// All good, finally emit the new pattern.1550Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);1551Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);1552Value *MaybeReplacedLow =1553Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);15541555// Create the final select. If we looked through a truncate above, we will1556// need to retruncate the result.1557Value *MaybeReplacedHigh = Builder.CreateSelect(1558ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);1559return Builder.CreateTrunc(MaybeReplacedHigh, Sel0.getType());1560}15611562// If we have1563// %cmp = icmp [canonical predicate] i32 %x, C01564// %r = select i1 %cmp, i32 %y, i32 C11565// Where C0 != C1 and %x may be different from %y, see if the constant that we1566// will have if we flip the strictness of the predicate (i.e. without changing1567// the result) is identical to the C1 in select. If it matches we can change1568// original comparison to one with swapped predicate, reuse the constant,1569// and swap the hands of select.1570static Instruction *1571tryToReuseConstantFromSelectInComparison(SelectInst &Sel, ICmpInst &Cmp,1572InstCombinerImpl &IC) {1573ICmpInst::Predicate Pred;1574Value *X;1575Constant *C0;1576if (!match(&Cmp, m_OneUse(m_ICmp(1577Pred, m_Value(X),1578m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))))1579return nullptr;15801581// If comparison predicate is non-relational, we won't be able to do anything.1582if (ICmpInst::isEquality(Pred))1583return nullptr;15841585// If comparison predicate is non-canonical, then we certainly won't be able1586// to make it canonical; canonicalizeCmpWithConstant() already tried.1587if (!InstCombiner::isCanonicalPredicate(Pred))1588return nullptr;15891590// If the [input] type of comparison and select type are different, lets abort1591// for now. We could try to compare constants with trunc/[zs]ext though.1592if (C0->getType() != Sel.getType())1593return nullptr;15941595// ULT with 'add' of a constant is canonical. See foldICmpAddConstant().1596// FIXME: Are there more magic icmp predicate+constant pairs we must avoid?1597// Or should we just abandon this transform entirely?1598if (Pred == CmpInst::ICMP_ULT && match(X, m_Add(m_Value(), m_Constant())))1599return nullptr;160016011602Value *SelVal0, *SelVal1; // We do not care which one is from where.1603match(&Sel, m_Select(m_Value(), m_Value(SelVal0), m_Value(SelVal1)));1604// At least one of these values we are selecting between must be a constant1605// else we'll never succeed.1606if (!match(SelVal0, m_AnyIntegralConstant()) &&1607!match(SelVal1, m_AnyIntegralConstant()))1608return nullptr;16091610// Does this constant C match any of the `select` values?1611auto MatchesSelectValue = [SelVal0, SelVal1](Constant *C) {1612return C->isElementWiseEqual(SelVal0) || C->isElementWiseEqual(SelVal1);1613};16141615// If C0 *already* matches true/false value of select, we are done.1616if (MatchesSelectValue(C0))1617return nullptr;16181619// Check the constant we'd have with flipped-strictness predicate.1620auto FlippedStrictness =1621InstCombiner::getFlippedStrictnessPredicateAndConstant(Pred, C0);1622if (!FlippedStrictness)1623return nullptr;16241625// If said constant doesn't match either, then there is no hope,1626if (!MatchesSelectValue(FlippedStrictness->second))1627return nullptr;16281629// It matched! Lets insert the new comparison just before select.1630InstCombiner::BuilderTy::InsertPointGuard Guard(IC.Builder);1631IC.Builder.SetInsertPoint(&Sel);16321633Pred = ICmpInst::getSwappedPredicate(Pred); // Yes, swapped.1634Value *NewCmp = IC.Builder.CreateICmp(Pred, X, FlippedStrictness->second,1635Cmp.getName() + ".inv");1636IC.replaceOperand(Sel, 0, NewCmp);1637Sel.swapValues();1638Sel.swapProfMetadata();16391640return &Sel;1641}16421643static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal,1644Value *FVal,1645InstCombiner::BuilderTy &Builder) {1646if (!Cmp->hasOneUse())1647return nullptr;16481649const APInt *CmpC;1650if (!match(Cmp->getOperand(1), m_APIntAllowPoison(CmpC)))1651return nullptr;16521653// (X u< 2) ? -X : -1 --> sext (X != 0)1654Value *X = Cmp->getOperand(0);1655if (Cmp->getPredicate() == ICmpInst::ICMP_ULT && *CmpC == 2 &&1656match(TVal, m_Neg(m_Specific(X))) && match(FVal, m_AllOnes()))1657return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType());16581659// (X u> 1) ? -1 : -X --> sext (X != 0)1660if (Cmp->getPredicate() == ICmpInst::ICMP_UGT && *CmpC == 1 &&1661match(FVal, m_Neg(m_Specific(X))) && match(TVal, m_AllOnes()))1662return new SExtInst(Builder.CreateIsNotNull(X), TVal->getType());16631664return nullptr;1665}16661667static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI,1668InstCombiner::BuilderTy &Builder) {1669const APInt *CmpC;1670Value *V;1671CmpInst::Predicate Pred;1672if (!match(ICI, m_ICmp(Pred, m_Value(V), m_APInt(CmpC))))1673return nullptr;16741675// Match clamp away from min/max value as a max/min operation.1676Value *TVal = SI.getTrueValue();1677Value *FVal = SI.getFalseValue();1678if (Pred == ICmpInst::ICMP_EQ && V == FVal) {1679// (V == UMIN) ? UMIN+1 : V --> umax(V, UMIN+1)1680if (CmpC->isMinValue() && match(TVal, m_SpecificInt(*CmpC + 1)))1681return Builder.CreateBinaryIntrinsic(Intrinsic::umax, V, TVal);1682// (V == UMAX) ? UMAX-1 : V --> umin(V, UMAX-1)1683if (CmpC->isMaxValue() && match(TVal, m_SpecificInt(*CmpC - 1)))1684return Builder.CreateBinaryIntrinsic(Intrinsic::umin, V, TVal);1685// (V == SMIN) ? SMIN+1 : V --> smax(V, SMIN+1)1686if (CmpC->isMinSignedValue() && match(TVal, m_SpecificInt(*CmpC + 1)))1687return Builder.CreateBinaryIntrinsic(Intrinsic::smax, V, TVal);1688// (V == SMAX) ? SMAX-1 : V --> smin(V, SMAX-1)1689if (CmpC->isMaxSignedValue() && match(TVal, m_SpecificInt(*CmpC - 1)))1690return Builder.CreateBinaryIntrinsic(Intrinsic::smin, V, TVal);1691}16921693BinaryOperator *BO;1694const APInt *C;1695CmpInst::Predicate CPred;1696if (match(&SI, m_Select(m_Specific(ICI), m_APInt(C), m_BinOp(BO))))1697CPred = ICI->getPredicate();1698else if (match(&SI, m_Select(m_Specific(ICI), m_BinOp(BO), m_APInt(C))))1699CPred = ICI->getInversePredicate();1700else1701return nullptr;17021703const APInt *BinOpC;1704if (!match(BO, m_BinOp(m_Specific(V), m_APInt(BinOpC))))1705return nullptr;17061707ConstantRange R = ConstantRange::makeExactICmpRegion(CPred, *CmpC)1708.binaryOp(BO->getOpcode(), *BinOpC);1709if (R == *C) {1710BO->dropPoisonGeneratingFlags();1711return BO;1712}1713return nullptr;1714}17151716static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,1717InstCombinerImpl &IC) {1718ICmpInst::Predicate Pred = ICI->getPredicate();1719if (!ICmpInst::isEquality(Pred))1720return nullptr;17211722Value *TrueVal = SI.getTrueValue();1723Value *FalseVal = SI.getFalseValue();1724Value *CmpLHS = ICI->getOperand(0);1725Value *CmpRHS = ICI->getOperand(1);17261727if (Pred == ICmpInst::ICMP_NE)1728std::swap(TrueVal, FalseVal);17291730// Transform (X == C) ? X : Y -> (X == C) ? C : Y1731// specific handling for Bitwise operation.1732// x&y -> (x|y) ^ (x^y) or (x|y) & ~(x^y)1733// x|y -> (x&y) | (x^y) or (x&y) ^ (x^y)1734// x^y -> (x|y) ^ (x&y) or (x|y) & ~(x&y)1735Value *X, *Y;1736if (!match(CmpLHS, m_BitwiseLogic(m_Value(X), m_Value(Y))) ||1737!match(TrueVal, m_c_BitwiseLogic(m_Specific(X), m_Specific(Y))))1738return nullptr;17391740const unsigned AndOps = Instruction::And, OrOps = Instruction::Or,1741XorOps = Instruction::Xor, NoOps = 0;1742enum NotMask { None = 0, NotInner, NotRHS };17431744auto matchFalseVal = [&](unsigned OuterOpc, unsigned InnerOpc,1745unsigned NotMask) {1746auto matchInner = m_c_BinOp(InnerOpc, m_Specific(X), m_Specific(Y));1747if (OuterOpc == NoOps)1748return match(CmpRHS, m_Zero()) && match(FalseVal, matchInner);17491750if (NotMask == NotInner) {1751return match(FalseVal, m_c_BinOp(OuterOpc, m_NotForbidPoison(matchInner),1752m_Specific(CmpRHS)));1753} else if (NotMask == NotRHS) {1754return match(FalseVal, m_c_BinOp(OuterOpc, matchInner,1755m_NotForbidPoison(m_Specific(CmpRHS))));1756} else {1757return match(FalseVal,1758m_c_BinOp(OuterOpc, matchInner, m_Specific(CmpRHS)));1759}1760};17611762// (X&Y)==C ? X|Y : X^Y -> (X^Y)|C : X^Y or (X^Y)^ C : X^Y1763// (X&Y)==C ? X^Y : X|Y -> (X|Y)^C : X|Y or (X|Y)&~C : X|Y1764if (match(CmpLHS, m_And(m_Value(X), m_Value(Y)))) {1765if (match(TrueVal, m_c_Or(m_Specific(X), m_Specific(Y)))) {1766// (X&Y)==C ? X|Y : (X^Y)|C -> (X^Y)|C : (X^Y)|C -> (X^Y)|C1767// (X&Y)==C ? X|Y : (X^Y)^C -> (X^Y)^C : (X^Y)^C -> (X^Y)^C1768if (matchFalseVal(OrOps, XorOps, None) ||1769matchFalseVal(XorOps, XorOps, None))1770return IC.replaceInstUsesWith(SI, FalseVal);1771} else if (match(TrueVal, m_c_Xor(m_Specific(X), m_Specific(Y)))) {1772// (X&Y)==C ? X^Y : (X|Y)^ C -> (X|Y)^ C : (X|Y)^ C -> (X|Y)^ C1773// (X&Y)==C ? X^Y : (X|Y)&~C -> (X|Y)&~C : (X|Y)&~C -> (X|Y)&~C1774if (matchFalseVal(XorOps, OrOps, None) ||1775matchFalseVal(AndOps, OrOps, NotRHS))1776return IC.replaceInstUsesWith(SI, FalseVal);1777}1778}17791780// (X|Y)==C ? X&Y : X^Y -> (X^Y)^C : X^Y or ~(X^Y)&C : X^Y1781// (X|Y)==C ? X^Y : X&Y -> (X&Y)^C : X&Y or ~(X&Y)&C : X&Y1782if (match(CmpLHS, m_Or(m_Value(X), m_Value(Y)))) {1783if (match(TrueVal, m_c_And(m_Specific(X), m_Specific(Y)))) {1784// (X|Y)==C ? X&Y: (X^Y)^C -> (X^Y)^C: (X^Y)^C -> (X^Y)^C1785// (X|Y)==C ? X&Y:~(X^Y)&C ->~(X^Y)&C:~(X^Y)&C -> ~(X^Y)&C1786if (matchFalseVal(XorOps, XorOps, None) ||1787matchFalseVal(AndOps, XorOps, NotInner))1788return IC.replaceInstUsesWith(SI, FalseVal);1789} else if (match(TrueVal, m_c_Xor(m_Specific(X), m_Specific(Y)))) {1790// (X|Y)==C ? X^Y : (X&Y)^C -> (X&Y)^C : (X&Y)^C -> (X&Y)^C1791// (X|Y)==C ? X^Y :~(X&Y)&C -> ~(X&Y)&C :~(X&Y)&C -> ~(X&Y)&C1792if (matchFalseVal(XorOps, AndOps, None) ||1793matchFalseVal(AndOps, AndOps, NotInner))1794return IC.replaceInstUsesWith(SI, FalseVal);1795}1796}17971798// (X^Y)==C ? X&Y : X|Y -> (X|Y)^C : X|Y or (X|Y)&~C : X|Y1799// (X^Y)==C ? X|Y : X&Y -> (X&Y)|C : X&Y or (X&Y)^ C : X&Y1800if (match(CmpLHS, m_Xor(m_Value(X), m_Value(Y)))) {1801if ((match(TrueVal, m_c_And(m_Specific(X), m_Specific(Y))))) {1802// (X^Y)==C ? X&Y : (X|Y)^C -> (X|Y)^C1803// (X^Y)==C ? X&Y : (X|Y)&~C -> (X|Y)&~C1804if (matchFalseVal(XorOps, OrOps, None) ||1805matchFalseVal(AndOps, OrOps, NotRHS))1806return IC.replaceInstUsesWith(SI, FalseVal);1807} else if (match(TrueVal, m_c_Or(m_Specific(X), m_Specific(Y)))) {1808// (X^Y)==C ? (X|Y) : (X&Y)|C -> (X&Y)|C1809// (X^Y)==C ? (X|Y) : (X&Y)^C -> (X&Y)^C1810if (matchFalseVal(OrOps, AndOps, None) ||1811matchFalseVal(XorOps, AndOps, None))1812return IC.replaceInstUsesWith(SI, FalseVal);1813}1814}18151816return nullptr;1817}18181819/// Visit a SelectInst that has an ICmpInst as its first operand.1820Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,1821ICmpInst *ICI) {1822if (Instruction *NewSel = foldSelectValueEquivalence(SI, *ICI))1823return NewSel;18241825if (Value *V =1826canonicalizeSPF(*ICI, SI.getTrueValue(), SI.getFalseValue(), *this))1827return replaceInstUsesWith(SI, V);18281829if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder))1830return replaceInstUsesWith(SI, V);18311832if (Value *V = canonicalizeClampLike(SI, *ICI, Builder, *this))1833return replaceInstUsesWith(SI, V);18341835if (Instruction *NewSel =1836tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))1837return NewSel;18381839if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))1840return replaceInstUsesWith(SI, V);18411842// NOTE: if we wanted to, this is where to detect integer MIN/MAX1843bool Changed = false;1844Value *TrueVal = SI.getTrueValue();1845Value *FalseVal = SI.getFalseValue();1846ICmpInst::Predicate Pred = ICI->getPredicate();1847Value *CmpLHS = ICI->getOperand(0);1848Value *CmpRHS = ICI->getOperand(1);1849if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS) && !isa<Constant>(CmpLHS)) {1850if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {1851// Transform (X == C) ? X : Y -> (X == C) ? C : Y1852replaceOperand(SI, 1, CmpRHS);1853Changed = true;1854} else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {1855// Transform (X != C) ? Y : X -> (X != C) ? Y : C1856replaceOperand(SI, 2, CmpRHS);1857Changed = true;1858}1859}18601861if (Instruction *NewSel = foldSelectICmpEq(SI, ICI, *this))1862return NewSel;18631864// Canonicalize a signbit condition to use zero constant by swapping:1865// (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV1866// To avoid conflicts (infinite loops) with other canonicalizations, this is1867// not applied with any constant select arm.1868if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes()) &&1869!match(TrueVal, m_Constant()) && !match(FalseVal, m_Constant()) &&1870ICI->hasOneUse()) {1871InstCombiner::BuilderTy::InsertPointGuard Guard(Builder);1872Builder.SetInsertPoint(&SI);1873Value *IsNeg = Builder.CreateIsNeg(CmpLHS, ICI->getName());1874replaceOperand(SI, 0, IsNeg);1875SI.swapValues();1876SI.swapProfMetadata();1877return &SI;1878}18791880// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring1881// decomposeBitTestICmp() might help.1882if (TrueVal->getType()->isIntOrIntVectorTy()) {1883unsigned BitWidth =1884DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());1885APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);1886Value *X;1887const APInt *Y, *C;1888bool TrueWhenUnset;1889bool IsBitTest = false;1890if (ICmpInst::isEquality(Pred) &&1891match(CmpLHS, m_And(m_Value(X), m_Power2(Y))) &&1892match(CmpRHS, m_Zero())) {1893IsBitTest = true;1894TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;1895} else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {1896X = CmpLHS;1897Y = &MinSignedValue;1898IsBitTest = true;1899TrueWhenUnset = false;1900} else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {1901X = CmpLHS;1902Y = &MinSignedValue;1903IsBitTest = true;1904TrueWhenUnset = true;1905}1906if (IsBitTest) {1907Value *V = nullptr;1908// (X & Y) == 0 ? X : X ^ Y --> X & ~Y1909if (TrueWhenUnset && TrueVal == X &&1910match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)1911V = Builder.CreateAnd(X, ~(*Y));1912// (X & Y) != 0 ? X ^ Y : X --> X & ~Y1913else if (!TrueWhenUnset && FalseVal == X &&1914match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)1915V = Builder.CreateAnd(X, ~(*Y));1916// (X & Y) == 0 ? X ^ Y : X --> X | Y1917else if (TrueWhenUnset && FalseVal == X &&1918match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)1919V = Builder.CreateOr(X, *Y);1920// (X & Y) != 0 ? X : X ^ Y --> X | Y1921else if (!TrueWhenUnset && TrueVal == X &&1922match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C)1923V = Builder.CreateOr(X, *Y);19241925if (V)1926return replaceInstUsesWith(SI, V);1927}1928}19291930if (Instruction *V =1931foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))1932return V;19331934if (Value *V = foldSelectICmpAndZeroShl(ICI, TrueVal, FalseVal, Builder))1935return replaceInstUsesWith(SI, V);19361937if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder))1938return V;19391940if (Instruction *V = foldSelectZeroOrOnes(ICI, TrueVal, FalseVal, Builder))1941return V;19421943if (Value *V = foldSelectICmpAndBinOp(ICI, TrueVal, FalseVal, Builder))1944return replaceInstUsesWith(SI, V);19451946if (Value *V = foldSelectICmpLshrAshr(ICI, TrueVal, FalseVal, Builder))1947return replaceInstUsesWith(SI, V);19481949if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, *this))1950return replaceInstUsesWith(SI, V);19511952if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))1953return replaceInstUsesWith(SI, V);19541955if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))1956return replaceInstUsesWith(SI, V);19571958if (Value *V = foldAbsDiff(ICI, TrueVal, FalseVal, Builder))1959return replaceInstUsesWith(SI, V);19601961return Changed ? &SI : nullptr;1962}19631964/// SI is a select whose condition is a PHI node (but the two may be in1965/// different blocks). See if the true/false values (V) are live in all of the1966/// predecessor blocks of the PHI. For example, cases like this can't be mapped:1967///1968/// X = phi [ C1, BB1], [C2, BB2]1969/// Y = add1970/// Z = select X, Y, 01971///1972/// because Y is not live in BB1/BB2.1973static bool canSelectOperandBeMappingIntoPredBlock(const Value *V,1974const SelectInst &SI) {1975// If the value is a non-instruction value like a constant or argument, it1976// can always be mapped.1977const Instruction *I = dyn_cast<Instruction>(V);1978if (!I) return true;19791980// If V is a PHI node defined in the same block as the condition PHI, we can1981// map the arguments.1982const PHINode *CondPHI = cast<PHINode>(SI.getCondition());19831984if (const PHINode *VP = dyn_cast<PHINode>(I))1985if (VP->getParent() == CondPHI->getParent())1986return true;19871988// Otherwise, if the PHI and select are defined in the same block and if V is1989// defined in a different block, then we can transform it.1990if (SI.getParent() == CondPHI->getParent() &&1991I->getParent() != CondPHI->getParent())1992return true;19931994// Otherwise we have a 'hard' case and we can't tell without doing more1995// detailed dominator based analysis, punt.1996return false;1997}19981999/// We have an SPF (e.g. a min or max) of an SPF of the form:2000/// SPF2(SPF1(A, B), C)2001Instruction *InstCombinerImpl::foldSPFofSPF(Instruction *Inner,2002SelectPatternFlavor SPF1, Value *A,2003Value *B, Instruction &Outer,2004SelectPatternFlavor SPF2,2005Value *C) {2006if (Outer.getType() != Inner->getType())2007return nullptr;20082009if (C == A || C == B) {2010// MAX(MAX(A, B), B) -> MAX(A, B)2011// MIN(MIN(a, b), a) -> MIN(a, b)2012// TODO: This could be done in instsimplify.2013if (SPF1 == SPF2 && SelectPatternResult::isMinOrMax(SPF1))2014return replaceInstUsesWith(Outer, Inner);2015}20162017return nullptr;2018}20192020/// Turn select C, (X + Y), (X - Y) --> (X + (select C, Y, (-Y))).2021/// This is even legal for FP.2022static Instruction *foldAddSubSelect(SelectInst &SI,2023InstCombiner::BuilderTy &Builder) {2024Value *CondVal = SI.getCondition();2025Value *TrueVal = SI.getTrueValue();2026Value *FalseVal = SI.getFalseValue();2027auto *TI = dyn_cast<Instruction>(TrueVal);2028auto *FI = dyn_cast<Instruction>(FalseVal);2029if (!TI || !FI || !TI->hasOneUse() || !FI->hasOneUse())2030return nullptr;20312032Instruction *AddOp = nullptr, *SubOp = nullptr;2033if ((TI->getOpcode() == Instruction::Sub &&2034FI->getOpcode() == Instruction::Add) ||2035(TI->getOpcode() == Instruction::FSub &&2036FI->getOpcode() == Instruction::FAdd)) {2037AddOp = FI;2038SubOp = TI;2039} else if ((FI->getOpcode() == Instruction::Sub &&2040TI->getOpcode() == Instruction::Add) ||2041(FI->getOpcode() == Instruction::FSub &&2042TI->getOpcode() == Instruction::FAdd)) {2043AddOp = TI;2044SubOp = FI;2045}20462047if (AddOp) {2048Value *OtherAddOp = nullptr;2049if (SubOp->getOperand(0) == AddOp->getOperand(0)) {2050OtherAddOp = AddOp->getOperand(1);2051} else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {2052OtherAddOp = AddOp->getOperand(0);2053}20542055if (OtherAddOp) {2056// So at this point we know we have (Y -> OtherAddOp):2057// select C, (add X, Y), (sub X, Z)2058Value *NegVal; // Compute -Z2059if (SI.getType()->isFPOrFPVectorTy()) {2060NegVal = Builder.CreateFNeg(SubOp->getOperand(1));2061if (Instruction *NegInst = dyn_cast<Instruction>(NegVal)) {2062FastMathFlags Flags = AddOp->getFastMathFlags();2063Flags &= SubOp->getFastMathFlags();2064NegInst->setFastMathFlags(Flags);2065}2066} else {2067NegVal = Builder.CreateNeg(SubOp->getOperand(1));2068}20692070Value *NewTrueOp = OtherAddOp;2071Value *NewFalseOp = NegVal;2072if (AddOp != TI)2073std::swap(NewTrueOp, NewFalseOp);2074Value *NewSel = Builder.CreateSelect(CondVal, NewTrueOp, NewFalseOp,2075SI.getName() + ".p", &SI);20762077if (SI.getType()->isFPOrFPVectorTy()) {2078Instruction *RI =2079BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);20802081FastMathFlags Flags = AddOp->getFastMathFlags();2082Flags &= SubOp->getFastMathFlags();2083RI->setFastMathFlags(Flags);2084return RI;2085} else2086return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);2087}2088}2089return nullptr;2090}20912092/// Turn X + Y overflows ? -1 : X + Y -> uadd_sat X, Y2093/// And X - Y overflows ? 0 : X - Y -> usub_sat X, Y2094/// Along with a number of patterns similar to:2095/// X + Y overflows ? (X < 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2096/// X - Y overflows ? (X > 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2097static Instruction *2098foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {2099Value *CondVal = SI.getCondition();2100Value *TrueVal = SI.getTrueValue();2101Value *FalseVal = SI.getFalseValue();21022103WithOverflowInst *II;2104if (!match(CondVal, m_ExtractValue<1>(m_WithOverflowInst(II))) ||2105!match(FalseVal, m_ExtractValue<0>(m_Specific(II))))2106return nullptr;21072108Value *X = II->getLHS();2109Value *Y = II->getRHS();21102111auto IsSignedSaturateLimit = [&](Value *Limit, bool IsAdd) {2112Type *Ty = Limit->getType();21132114ICmpInst::Predicate Pred;2115Value *TrueVal, *FalseVal, *Op;2116const APInt *C;2117if (!match(Limit, m_Select(m_ICmp(Pred, m_Value(Op), m_APInt(C)),2118m_Value(TrueVal), m_Value(FalseVal))))2119return false;21202121auto IsZeroOrOne = [](const APInt &C) { return C.isZero() || C.isOne(); };2122auto IsMinMax = [&](Value *Min, Value *Max) {2123APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());2124APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());2125return match(Min, m_SpecificInt(MinVal)) &&2126match(Max, m_SpecificInt(MaxVal));2127};21282129if (Op != X && Op != Y)2130return false;21312132if (IsAdd) {2133// X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2134// X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2135// X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2136// X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2137if (Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&2138IsMinMax(TrueVal, FalseVal))2139return true;2140// X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2141// X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2142// X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2143// X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2144if (Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&2145IsMinMax(FalseVal, TrueVal))2146return true;2147} else {2148// X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2149// X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2150if (Op == X && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C + 1) &&2151IsMinMax(TrueVal, FalseVal))2152return true;2153// X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2154// X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2155if (Op == X && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 2) &&2156IsMinMax(FalseVal, TrueVal))2157return true;2158// X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2159// X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2160if (Op == Y && Pred == ICmpInst::ICMP_SLT && IsZeroOrOne(*C) &&2161IsMinMax(FalseVal, TrueVal))2162return true;2163// X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2164// X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2165if (Op == Y && Pred == ICmpInst::ICMP_SGT && IsZeroOrOne(*C + 1) &&2166IsMinMax(TrueVal, FalseVal))2167return true;2168}21692170return false;2171};21722173Intrinsic::ID NewIntrinsicID;2174if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow &&2175match(TrueVal, m_AllOnes()))2176// X + Y overflows ? -1 : X + Y -> uadd_sat X, Y2177NewIntrinsicID = Intrinsic::uadd_sat;2178else if (II->getIntrinsicID() == Intrinsic::usub_with_overflow &&2179match(TrueVal, m_Zero()))2180// X - Y overflows ? 0 : X - Y -> usub_sat X, Y2181NewIntrinsicID = Intrinsic::usub_sat;2182else if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow &&2183IsSignedSaturateLimit(TrueVal, /*IsAdd=*/true))2184// X + Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2185// X + Y overflows ? (X <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2186// X + Y overflows ? (X >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2187// X + Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2188// X + Y overflows ? (Y <s 0 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2189// X + Y overflows ? (Y <s 1 ? INTMIN : INTMAX) : X + Y --> sadd_sat X, Y2190// X + Y overflows ? (Y >s 0 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2191// X + Y overflows ? (Y >s -1 ? INTMAX : INTMIN) : X + Y --> sadd_sat X, Y2192NewIntrinsicID = Intrinsic::sadd_sat;2193else if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow &&2194IsSignedSaturateLimit(TrueVal, /*IsAdd=*/false))2195// X - Y overflows ? (X <s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2196// X - Y overflows ? (X <s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2197// X - Y overflows ? (X >s -1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2198// X - Y overflows ? (X >s -2 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2199// X - Y overflows ? (Y <s 0 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2200// X - Y overflows ? (Y <s 1 ? INTMAX : INTMIN) : X - Y --> ssub_sat X, Y2201// X - Y overflows ? (Y >s 0 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2202// X - Y overflows ? (Y >s -1 ? INTMIN : INTMAX) : X - Y --> ssub_sat X, Y2203NewIntrinsicID = Intrinsic::ssub_sat;2204else2205return nullptr;22062207Function *F =2208Intrinsic::getDeclaration(SI.getModule(), NewIntrinsicID, SI.getType());2209return CallInst::Create(F, {X, Y});2210}22112212Instruction *InstCombinerImpl::foldSelectExtConst(SelectInst &Sel) {2213Constant *C;2214if (!match(Sel.getTrueValue(), m_Constant(C)) &&2215!match(Sel.getFalseValue(), m_Constant(C)))2216return nullptr;22172218Instruction *ExtInst;2219if (!match(Sel.getTrueValue(), m_Instruction(ExtInst)) &&2220!match(Sel.getFalseValue(), m_Instruction(ExtInst)))2221return nullptr;22222223auto ExtOpcode = ExtInst->getOpcode();2224if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)2225return nullptr;22262227// If we are extending from a boolean type or if we can create a select that2228// has the same size operands as its condition, try to narrow the select.2229Value *X = ExtInst->getOperand(0);2230Type *SmallType = X->getType();2231Value *Cond = Sel.getCondition();2232auto *Cmp = dyn_cast<CmpInst>(Cond);2233if (!SmallType->isIntOrIntVectorTy(1) &&2234(!Cmp || Cmp->getOperand(0)->getType() != SmallType))2235return nullptr;22362237// If the constant is the same after truncation to the smaller type and2238// extension to the original type, we can narrow the select.2239Type *SelType = Sel.getType();2240Constant *TruncC = getLosslessTrunc(C, SmallType, ExtOpcode);2241if (TruncC && ExtInst->hasOneUse()) {2242Value *TruncCVal = cast<Value>(TruncC);2243if (ExtInst == Sel.getFalseValue())2244std::swap(X, TruncCVal);22452246// select Cond, (ext X), C --> ext(select Cond, X, C')2247// select Cond, C, (ext X) --> ext(select Cond, C', X)2248Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);2249return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);2250}22512252return nullptr;2253}22542255/// Try to transform a vector select with a constant condition vector into a2256/// shuffle for easier combining with other shuffles and insert/extract.2257static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {2258Value *CondVal = SI.getCondition();2259Constant *CondC;2260auto *CondValTy = dyn_cast<FixedVectorType>(CondVal->getType());2261if (!CondValTy || !match(CondVal, m_Constant(CondC)))2262return nullptr;22632264unsigned NumElts = CondValTy->getNumElements();2265SmallVector<int, 16> Mask;2266Mask.reserve(NumElts);2267for (unsigned i = 0; i != NumElts; ++i) {2268Constant *Elt = CondC->getAggregateElement(i);2269if (!Elt)2270return nullptr;22712272if (Elt->isOneValue()) {2273// If the select condition element is true, choose from the 1st vector.2274Mask.push_back(i);2275} else if (Elt->isNullValue()) {2276// If the select condition element is false, choose from the 2nd vector.2277Mask.push_back(i + NumElts);2278} else if (isa<UndefValue>(Elt)) {2279// Undef in a select condition (choose one of the operands) does not mean2280// the same thing as undef in a shuffle mask (any value is acceptable), so2281// give up.2282return nullptr;2283} else {2284// Bail out on a constant expression.2285return nullptr;2286}2287}22882289return new ShuffleVectorInst(SI.getTrueValue(), SI.getFalseValue(), Mask);2290}22912292/// If we have a select of vectors with a scalar condition, try to convert that2293/// to a vector select by splatting the condition. A splat may get folded with2294/// other operations in IR and having all operands of a select be vector types2295/// is likely better for vector codegen.2296static Instruction *canonicalizeScalarSelectOfVecs(SelectInst &Sel,2297InstCombinerImpl &IC) {2298auto *Ty = dyn_cast<VectorType>(Sel.getType());2299if (!Ty)2300return nullptr;23012302// We can replace a single-use extract with constant index.2303Value *Cond = Sel.getCondition();2304if (!match(Cond, m_OneUse(m_ExtractElt(m_Value(), m_ConstantInt()))))2305return nullptr;23062307// select (extelt V, Index), T, F --> select (splat V, Index), T, F2308// Splatting the extracted condition reduces code (we could directly create a2309// splat shuffle of the source vector to eliminate the intermediate step).2310return IC.replaceOperand(2311Sel, 0, IC.Builder.CreateVectorSplat(Ty->getElementCount(), Cond));2312}23132314/// Reuse bitcasted operands between a compare and select:2315/// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->2316/// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))2317static Instruction *foldSelectCmpBitcasts(SelectInst &Sel,2318InstCombiner::BuilderTy &Builder) {2319Value *Cond = Sel.getCondition();2320Value *TVal = Sel.getTrueValue();2321Value *FVal = Sel.getFalseValue();23222323CmpInst::Predicate Pred;2324Value *A, *B;2325if (!match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B))))2326return nullptr;23272328// The select condition is a compare instruction. If the select's true/false2329// values are already the same as the compare operands, there's nothing to do.2330if (TVal == A || TVal == B || FVal == A || FVal == B)2331return nullptr;23322333Value *C, *D;2334if (!match(A, m_BitCast(m_Value(C))) || !match(B, m_BitCast(m_Value(D))))2335return nullptr;23362337// select (cmp (bitcast C), (bitcast D)), (bitcast TSrc), (bitcast FSrc)2338Value *TSrc, *FSrc;2339if (!match(TVal, m_BitCast(m_Value(TSrc))) ||2340!match(FVal, m_BitCast(m_Value(FSrc))))2341return nullptr;23422343// If the select true/false values are *different bitcasts* of the same source2344// operands, make the select operands the same as the compare operands and2345// cast the result. This is the canonical select form for min/max.2346Value *NewSel;2347if (TSrc == C && FSrc == D) {2348// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->2349// bitcast (select (cmp A, B), A, B)2350NewSel = Builder.CreateSelect(Cond, A, B, "", &Sel);2351} else if (TSrc == D && FSrc == C) {2352// select (cmp (bitcast C), (bitcast D)), (bitcast' D), (bitcast' C) -->2353// bitcast (select (cmp A, B), B, A)2354NewSel = Builder.CreateSelect(Cond, B, A, "", &Sel);2355} else {2356return nullptr;2357}2358return CastInst::CreateBitOrPointerCast(NewSel, Sel.getType());2359}23602361/// Try to eliminate select instructions that test the returned flag of cmpxchg2362/// instructions.2363///2364/// If a select instruction tests the returned flag of a cmpxchg instruction and2365/// selects between the returned value of the cmpxchg instruction its compare2366/// operand, the result of the select will always be equal to its false value.2367/// For example:2368///2369/// %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst2370/// %val = extractvalue { i64, i1 } %cmpxchg, 02371/// %success = extractvalue { i64, i1 } %cmpxchg, 12372/// %sel = select i1 %success, i64 %compare, i64 %val2373/// ret i64 %sel2374///2375/// The returned value of the cmpxchg instruction (%val) is the original value2376/// located at %ptr prior to any update. If the cmpxchg operation succeeds, %val2377/// must have been equal to %compare. Thus, the result of the select is always2378/// equal to %val, and the code can be simplified to:2379///2380/// %cmpxchg = cmpxchg ptr %ptr, i64 %compare, i64 %new_value seq_cst seq_cst2381/// %val = extractvalue { i64, i1 } %cmpxchg, 02382/// ret i64 %val2383///2384static Value *foldSelectCmpXchg(SelectInst &SI) {2385// A helper that determines if V is an extractvalue instruction whose2386// aggregate operand is a cmpxchg instruction and whose single index is equal2387// to I. If such conditions are true, the helper returns the cmpxchg2388// instruction; otherwise, a nullptr is returned.2389auto isExtractFromCmpXchg = [](Value *V, unsigned I) -> AtomicCmpXchgInst * {2390auto *Extract = dyn_cast<ExtractValueInst>(V);2391if (!Extract)2392return nullptr;2393if (Extract->getIndices()[0] != I)2394return nullptr;2395return dyn_cast<AtomicCmpXchgInst>(Extract->getAggregateOperand());2396};23972398// If the select has a single user, and this user is a select instruction that2399// we can simplify, skip the cmpxchg simplification for now.2400if (SI.hasOneUse())2401if (auto *Select = dyn_cast<SelectInst>(SI.user_back()))2402if (Select->getCondition() == SI.getCondition())2403if (Select->getFalseValue() == SI.getTrueValue() ||2404Select->getTrueValue() == SI.getFalseValue())2405return nullptr;24062407// Ensure the select condition is the returned flag of a cmpxchg instruction.2408auto *CmpXchg = isExtractFromCmpXchg(SI.getCondition(), 1);2409if (!CmpXchg)2410return nullptr;24112412// Check the true value case: The true value of the select is the returned2413// value of the same cmpxchg used by the condition, and the false value is the2414// cmpxchg instruction's compare operand.2415if (auto *X = isExtractFromCmpXchg(SI.getTrueValue(), 0))2416if (X == CmpXchg && X->getCompareOperand() == SI.getFalseValue())2417return SI.getFalseValue();24182419// Check the false value case: The false value of the select is the returned2420// value of the same cmpxchg used by the condition, and the true value is the2421// cmpxchg instruction's compare operand.2422if (auto *X = isExtractFromCmpXchg(SI.getFalseValue(), 0))2423if (X == CmpXchg && X->getCompareOperand() == SI.getTrueValue())2424return SI.getFalseValue();24252426return nullptr;2427}24282429/// Try to reduce a funnel/rotate pattern that includes a compare and select2430/// into a funnel shift intrinsic. Example:2431/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))2432/// --> call llvm.fshl.i32(a, a, b)2433/// fshl32(a, b, c) --> (c == 0 ? a : ((b >> (32 - c)) | (a << c)))2434/// --> call llvm.fshl.i32(a, b, c)2435/// fshr32(a, b, c) --> (c == 0 ? b : ((a >> (32 - c)) | (b << c)))2436/// --> call llvm.fshr.i32(a, b, c)2437static Instruction *foldSelectFunnelShift(SelectInst &Sel,2438InstCombiner::BuilderTy &Builder) {2439// This must be a power-of-2 type for a bitmasking transform to be valid.2440unsigned Width = Sel.getType()->getScalarSizeInBits();2441if (!isPowerOf2_32(Width))2442return nullptr;24432444BinaryOperator *Or0, *Or1;2445if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))2446return nullptr;24472448Value *SV0, *SV1, *SA0, *SA1;2449if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(SV0),2450m_ZExtOrSelf(m_Value(SA0))))) ||2451!match(Or1, m_OneUse(m_LogicalShift(m_Value(SV1),2452m_ZExtOrSelf(m_Value(SA1))))) ||2453Or0->getOpcode() == Or1->getOpcode())2454return nullptr;24552456// Canonicalize to or(shl(SV0, SA0), lshr(SV1, SA1)).2457if (Or0->getOpcode() == BinaryOperator::LShr) {2458std::swap(Or0, Or1);2459std::swap(SV0, SV1);2460std::swap(SA0, SA1);2461}2462assert(Or0->getOpcode() == BinaryOperator::Shl &&2463Or1->getOpcode() == BinaryOperator::LShr &&2464"Illegal or(shift,shift) pair");24652466// Check the shift amounts to see if they are an opposite pair.2467Value *ShAmt;2468if (match(SA1, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA0)))))2469ShAmt = SA0;2470else if (match(SA0, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(SA1)))))2471ShAmt = SA1;2472else2473return nullptr;24742475// We should now have this pattern:2476// select ?, TVal, (or (shl SV0, SA0), (lshr SV1, SA1))2477// The false value of the select must be a funnel-shift of the true value:2478// IsFShl -> TVal must be SV0 else TVal must be SV1.2479bool IsFshl = (ShAmt == SA0);2480Value *TVal = Sel.getTrueValue();2481if ((IsFshl && TVal != SV0) || (!IsFshl && TVal != SV1))2482return nullptr;24832484// Finally, see if the select is filtering out a shift-by-zero.2485Value *Cond = Sel.getCondition();2486ICmpInst::Predicate Pred;2487if (!match(Cond, m_OneUse(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()))) ||2488Pred != ICmpInst::ICMP_EQ)2489return nullptr;24902491// If this is not a rotate then the select was blocking poison from the2492// 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.2493if (SV0 != SV1) {2494if (IsFshl && !llvm::isGuaranteedNotToBePoison(SV1))2495SV1 = Builder.CreateFreeze(SV1);2496else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(SV0))2497SV0 = Builder.CreateFreeze(SV0);2498}24992500// This is a funnel/rotate that avoids shift-by-bitwidth UB in a suboptimal way.2501// Convert to funnel shift intrinsic.2502Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;2503Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());2504ShAmt = Builder.CreateZExt(ShAmt, Sel.getType());2505return CallInst::Create(F, { SV0, SV1, ShAmt });2506}25072508static Instruction *foldSelectToCopysign(SelectInst &Sel,2509InstCombiner::BuilderTy &Builder) {2510Value *Cond = Sel.getCondition();2511Value *TVal = Sel.getTrueValue();2512Value *FVal = Sel.getFalseValue();2513Type *SelType = Sel.getType();25142515// Match select ?, TC, FC where the constants are equal but negated.2516// TODO: Generalize to handle a negated variable operand?2517const APFloat *TC, *FC;2518if (!match(TVal, m_APFloatAllowPoison(TC)) ||2519!match(FVal, m_APFloatAllowPoison(FC)) ||2520!abs(*TC).bitwiseIsEqual(abs(*FC)))2521return nullptr;25222523assert(TC != FC && "Expected equal select arms to simplify");25242525Value *X;2526const APInt *C;2527bool IsTrueIfSignSet;2528ICmpInst::Predicate Pred;2529if (!match(Cond, m_OneUse(m_ICmp(Pred, m_ElementWiseBitCast(m_Value(X)),2530m_APInt(C)))) ||2531!isSignBitCheck(Pred, *C, IsTrueIfSignSet) || X->getType() != SelType)2532return nullptr;25332534// If needed, negate the value that will be the sign argument of the copysign:2535// (bitcast X) < 0 ? -TC : TC --> copysign(TC, X)2536// (bitcast X) < 0 ? TC : -TC --> copysign(TC, -X)2537// (bitcast X) >= 0 ? -TC : TC --> copysign(TC, -X)2538// (bitcast X) >= 0 ? TC : -TC --> copysign(TC, X)2539// Note: FMF from the select can not be propagated to the new instructions.2540if (IsTrueIfSignSet ^ TC->isNegative())2541X = Builder.CreateFNeg(X);25422543// Canonicalize the magnitude argument as the positive constant since we do2544// not care about its sign.2545Value *MagArg = ConstantFP::get(SelType, abs(*TC));2546Function *F = Intrinsic::getDeclaration(Sel.getModule(), Intrinsic::copysign,2547Sel.getType());2548return CallInst::Create(F, { MagArg, X });2549}25502551Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {2552if (!isa<VectorType>(Sel.getType()))2553return nullptr;25542555Value *Cond = Sel.getCondition();2556Value *TVal = Sel.getTrueValue();2557Value *FVal = Sel.getFalseValue();2558Value *C, *X, *Y;25592560if (match(Cond, m_VecReverse(m_Value(C)))) {2561auto createSelReverse = [&](Value *C, Value *X, Value *Y) {2562Value *V = Builder.CreateSelect(C, X, Y, Sel.getName(), &Sel);2563if (auto *I = dyn_cast<Instruction>(V))2564I->copyIRFlags(&Sel);2565Module *M = Sel.getModule();2566Function *F =2567Intrinsic::getDeclaration(M, Intrinsic::vector_reverse, V->getType());2568return CallInst::Create(F, V);2569};25702571if (match(TVal, m_VecReverse(m_Value(X)))) {2572// select rev(C), rev(X), rev(Y) --> rev(select C, X, Y)2573if (match(FVal, m_VecReverse(m_Value(Y))) &&2574(Cond->hasOneUse() || TVal->hasOneUse() || FVal->hasOneUse()))2575return createSelReverse(C, X, Y);25762577// select rev(C), rev(X), FValSplat --> rev(select C, X, FValSplat)2578if ((Cond->hasOneUse() || TVal->hasOneUse()) && isSplatValue(FVal))2579return createSelReverse(C, X, FVal);2580}2581// select rev(C), TValSplat, rev(Y) --> rev(select C, TValSplat, Y)2582else if (isSplatValue(TVal) && match(FVal, m_VecReverse(m_Value(Y))) &&2583(Cond->hasOneUse() || FVal->hasOneUse()))2584return createSelReverse(C, TVal, Y);2585}25862587auto *VecTy = dyn_cast<FixedVectorType>(Sel.getType());2588if (!VecTy)2589return nullptr;25902591unsigned NumElts = VecTy->getNumElements();2592APInt PoisonElts(NumElts, 0);2593APInt AllOnesEltMask(APInt::getAllOnes(NumElts));2594if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, PoisonElts)) {2595if (V != &Sel)2596return replaceInstUsesWith(Sel, V);2597return &Sel;2598}25992600// A select of a "select shuffle" with a common operand can be rearranged2601// to select followed by "select shuffle". Because of poison, this only works2602// in the case of a shuffle with no undefined mask elements.2603ArrayRef<int> Mask;2604if (match(TVal, m_OneUse(m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) &&2605!is_contained(Mask, PoisonMaskElem) &&2606cast<ShuffleVectorInst>(TVal)->isSelect()) {2607if (X == FVal) {2608// select Cond, (shuf_sel X, Y), X --> shuf_sel X, (select Cond, Y, X)2609Value *NewSel = Builder.CreateSelect(Cond, Y, X, "sel", &Sel);2610return new ShuffleVectorInst(X, NewSel, Mask);2611}2612if (Y == FVal) {2613// select Cond, (shuf_sel X, Y), Y --> shuf_sel (select Cond, X, Y), Y2614Value *NewSel = Builder.CreateSelect(Cond, X, Y, "sel", &Sel);2615return new ShuffleVectorInst(NewSel, Y, Mask);2616}2617}2618if (match(FVal, m_OneUse(m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) &&2619!is_contained(Mask, PoisonMaskElem) &&2620cast<ShuffleVectorInst>(FVal)->isSelect()) {2621if (X == TVal) {2622// select Cond, X, (shuf_sel X, Y) --> shuf_sel X, (select Cond, X, Y)2623Value *NewSel = Builder.CreateSelect(Cond, X, Y, "sel", &Sel);2624return new ShuffleVectorInst(X, NewSel, Mask);2625}2626if (Y == TVal) {2627// select Cond, Y, (shuf_sel X, Y) --> shuf_sel (select Cond, Y, X), Y2628Value *NewSel = Builder.CreateSelect(Cond, Y, X, "sel", &Sel);2629return new ShuffleVectorInst(NewSel, Y, Mask);2630}2631}26322633return nullptr;2634}26352636static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,2637const DominatorTree &DT,2638InstCombiner::BuilderTy &Builder) {2639// Find the block's immediate dominator that ends with a conditional branch2640// that matches select's condition (maybe inverted).2641auto *IDomNode = DT[BB]->getIDom();2642if (!IDomNode)2643return nullptr;2644BasicBlock *IDom = IDomNode->getBlock();26452646Value *Cond = Sel.getCondition();2647Value *IfTrue, *IfFalse;2648BasicBlock *TrueSucc, *FalseSucc;2649if (match(IDom->getTerminator(),2650m_Br(m_Specific(Cond), m_BasicBlock(TrueSucc),2651m_BasicBlock(FalseSucc)))) {2652IfTrue = Sel.getTrueValue();2653IfFalse = Sel.getFalseValue();2654} else if (match(IDom->getTerminator(),2655m_Br(m_Not(m_Specific(Cond)), m_BasicBlock(TrueSucc),2656m_BasicBlock(FalseSucc)))) {2657IfTrue = Sel.getFalseValue();2658IfFalse = Sel.getTrueValue();2659} else2660return nullptr;26612662// Make sure the branches are actually different.2663if (TrueSucc == FalseSucc)2664return nullptr;26652666// We want to replace select %cond, %a, %b with a phi that takes value %a2667// for all incoming edges that are dominated by condition `%cond == true`,2668// and value %b for edges dominated by condition `%cond == false`. If %a2669// or %b are also phis from the same basic block, we can go further and take2670// their incoming values from the corresponding blocks.2671BasicBlockEdge TrueEdge(IDom, TrueSucc);2672BasicBlockEdge FalseEdge(IDom, FalseSucc);2673DenseMap<BasicBlock *, Value *> Inputs;2674for (auto *Pred : predecessors(BB)) {2675// Check implication.2676BasicBlockEdge Incoming(Pred, BB);2677if (DT.dominates(TrueEdge, Incoming))2678Inputs[Pred] = IfTrue->DoPHITranslation(BB, Pred);2679else if (DT.dominates(FalseEdge, Incoming))2680Inputs[Pred] = IfFalse->DoPHITranslation(BB, Pred);2681else2682return nullptr;2683// Check availability.2684if (auto *Insn = dyn_cast<Instruction>(Inputs[Pred]))2685if (!DT.dominates(Insn, Pred->getTerminator()))2686return nullptr;2687}26882689Builder.SetInsertPoint(BB, BB->begin());2690auto *PN = Builder.CreatePHI(Sel.getType(), Inputs.size());2691for (auto *Pred : predecessors(BB))2692PN->addIncoming(Inputs[Pred], Pred);2693PN->takeName(&Sel);2694return PN;2695}26962697static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,2698InstCombiner::BuilderTy &Builder) {2699// Try to replace this select with Phi in one of these blocks.2700SmallSetVector<BasicBlock *, 4> CandidateBlocks;2701CandidateBlocks.insert(Sel.getParent());2702for (Value *V : Sel.operands())2703if (auto *I = dyn_cast<Instruction>(V))2704CandidateBlocks.insert(I->getParent());27052706for (BasicBlock *BB : CandidateBlocks)2707if (auto *PN = foldSelectToPhiImpl(Sel, BB, DT, Builder))2708return PN;2709return nullptr;2710}27112712/// Tries to reduce a pattern that arises when calculating the remainder of the2713/// Euclidean division. When the divisor is a power of two and is guaranteed not2714/// to be negative, a signed remainder can be folded with a bitwise and.2715///2716/// (x % n) < 0 ? (x % n) + n : (x % n)2717/// -> x & (n - 1)2718static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,2719IRBuilderBase &Builder) {2720Value *CondVal = SI.getCondition();2721Value *TrueVal = SI.getTrueValue();2722Value *FalseVal = SI.getFalseValue();27232724ICmpInst::Predicate Pred;2725Value *Op, *RemRes, *Remainder;2726const APInt *C;2727bool TrueIfSigned = false;27282729if (!(match(CondVal, m_ICmp(Pred, m_Value(RemRes), m_APInt(C))) &&2730isSignBitCheck(Pred, *C, TrueIfSigned)))2731return nullptr;27322733// If the sign bit is not set, we have a SGE/SGT comparison, and the operands2734// of the select are inverted.2735if (!TrueIfSigned)2736std::swap(TrueVal, FalseVal);27372738auto FoldToBitwiseAnd = [&](Value *Remainder) -> Instruction * {2739Value *Add = Builder.CreateAdd(2740Remainder, Constant::getAllOnesValue(RemRes->getType()));2741return BinaryOperator::CreateAnd(Op, Add);2742};27432744// Match the general case:2745// %rem = srem i32 %x, %n2746// %cnd = icmp slt i32 %rem, 02747// %add = add i32 %rem, %n2748// %sel = select i1 %cnd, i32 %add, i32 %rem2749if (match(TrueVal, m_Add(m_Specific(RemRes), m_Value(Remainder))) &&2750match(RemRes, m_SRem(m_Value(Op), m_Specific(Remainder))) &&2751IC.isKnownToBeAPowerOfTwo(Remainder, /*OrZero*/ true) &&2752FalseVal == RemRes)2753return FoldToBitwiseAnd(Remainder);27542755// Match the case where the one arm has been replaced by constant 1:2756// %rem = srem i32 %n, 22757// %cnd = icmp slt i32 %rem, 02758// %sel = select i1 %cnd, i32 1, i32 %rem2759if (match(TrueVal, m_One()) &&2760match(RemRes, m_SRem(m_Value(Op), m_SpecificInt(2))) &&2761FalseVal == RemRes)2762return FoldToBitwiseAnd(ConstantInt::get(RemRes->getType(), 2));27632764return nullptr;2765}27662767static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {2768FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());2769if (!FI)2770return nullptr;27712772Value *Cond = FI->getOperand(0);2773Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();27742775// select (freeze(x == y)), x, y --> y2776// select (freeze(x != y)), x, y --> x2777// The freeze should be only used by this select. Otherwise, remaining uses of2778// the freeze can observe a contradictory value.2779// c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 12780// a = select c, x, y ;2781// f(a, c) ; f(poison, 1) cannot happen, but if a is folded2782// ; to y, this can happen.2783CmpInst::Predicate Pred;2784if (FI->hasOneUse() &&2785match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&2786(Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {2787return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;2788}27892790return nullptr;2791}27922793/// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI.2794static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI,2795Value *CondVal,2796bool CondIsTrue,2797const DataLayout &DL) {2798Value *InnerCondVal = SI.getCondition();2799Value *InnerTrueVal = SI.getTrueValue();2800Value *InnerFalseVal = SI.getFalseValue();2801assert(CondVal->getType() == InnerCondVal->getType() &&2802"The type of inner condition must match with the outer.");2803if (auto Implied = isImpliedCondition(CondVal, InnerCondVal, DL, CondIsTrue))2804return *Implied ? InnerTrueVal : InnerFalseVal;2805return nullptr;2806}28072808Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,2809SelectInst &SI,2810bool IsAnd) {2811assert(Op->getType()->isIntOrIntVectorTy(1) &&2812"Op must be either i1 or vector of i1.");2813if (SI.getCondition()->getType() != Op->getType())2814return nullptr;2815if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL))2816return SelectInst::Create(Op,2817IsAnd ? V : ConstantInt::getTrue(Op->getType()),2818IsAnd ? ConstantInt::getFalse(Op->getType()) : V);2819return nullptr;2820}28212822// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need2823// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.2824static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,2825InstCombinerImpl &IC) {2826Value *CondVal = SI.getCondition();28272828bool ChangedFMF = false;2829for (bool Swap : {false, true}) {2830Value *TrueVal = SI.getTrueValue();2831Value *X = SI.getFalseValue();2832CmpInst::Predicate Pred;28332834if (Swap)2835std::swap(TrueVal, X);28362837if (!match(CondVal, m_FCmp(Pred, m_Specific(X), m_AnyZeroFP())))2838continue;28392840// fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false2841// fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true2842if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) {2843if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {2844Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);2845return IC.replaceInstUsesWith(SI, Fabs);2846}2847if (Swap && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {2848Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);2849return IC.replaceInstUsesWith(SI, Fabs);2850}2851}28522853if (!match(TrueVal, m_FNeg(m_Specific(X))))2854return nullptr;28552856// Forward-propagate nnan and ninf from the fneg to the select.2857// If all inputs are not those values, then the select is not either.2858// Note: nsz is defined differently, so it may not be correct to propagate.2859FastMathFlags FMF = cast<FPMathOperator>(TrueVal)->getFastMathFlags();2860if (FMF.noNaNs() && !SI.hasNoNaNs()) {2861SI.setHasNoNaNs(true);2862ChangedFMF = true;2863}2864if (FMF.noInfs() && !SI.hasNoInfs()) {2865SI.setHasNoInfs(true);2866ChangedFMF = true;2867}28682869// With nsz, when 'Swap' is false:2870// fold (X < +/-0.0) ? -X : X or (X <= +/-0.0) ? -X : X to fabs(X)2871// fold (X > +/-0.0) ? -X : X or (X >= +/-0.0) ? -X : X to -fabs(x)2872// when 'Swap' is true:2873// fold (X > +/-0.0) ? X : -X or (X >= +/-0.0) ? X : -X to fabs(X)2874// fold (X < +/-0.0) ? X : -X or (X <= +/-0.0) ? X : -X to -fabs(X)2875//2876// Note: We require "nnan" for this fold because fcmp ignores the signbit2877// of NAN, but IEEE-754 specifies the signbit of NAN values with2878// fneg/fabs operations.2879if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())2880return nullptr;28812882if (Swap)2883Pred = FCmpInst::getSwappedPredicate(Pred);28842885bool IsLTOrLE = Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||2886Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE;2887bool IsGTOrGE = Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||2888Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE;28892890if (IsLTOrLE) {2891Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);2892return IC.replaceInstUsesWith(SI, Fabs);2893}2894if (IsGTOrGE) {2895Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);2896Instruction *NewFNeg = UnaryOperator::CreateFNeg(Fabs);2897NewFNeg->setFastMathFlags(SI.getFastMathFlags());2898return NewFNeg;2899}2900}29012902// Match select with (icmp slt (bitcast X to int), 0)2903// or (icmp sgt (bitcast X to int), -1)29042905for (bool Swap : {false, true}) {2906Value *TrueVal = SI.getTrueValue();2907Value *X = SI.getFalseValue();29082909if (Swap)2910std::swap(TrueVal, X);29112912CmpInst::Predicate Pred;2913const APInt *C;2914bool TrueIfSigned;2915if (!match(CondVal,2916m_ICmp(Pred, m_ElementWiseBitCast(m_Specific(X)), m_APInt(C))) ||2917!isSignBitCheck(Pred, *C, TrueIfSigned))2918continue;2919if (!match(TrueVal, m_FNeg(m_Specific(X))))2920return nullptr;2921if (Swap == TrueIfSigned && !CondVal->hasOneUse() && !TrueVal->hasOneUse())2922return nullptr;29232924// Fold (IsNeg ? -X : X) or (!IsNeg ? X : -X) to fabs(X)2925// Fold (IsNeg ? X : -X) or (!IsNeg ? -X : X) to -fabs(X)2926Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI);2927if (Swap != TrueIfSigned)2928return IC.replaceInstUsesWith(SI, Fabs);2929return UnaryOperator::CreateFNegFMF(Fabs, &SI);2930}29312932return ChangedFMF ? &SI : nullptr;2933}29342935// Match the following IR pattern:2936// %x.lowbits = and i8 %x, %lowbitmask2937// %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 02938// %x.biased = add i8 %x, %bias2939// %x.biased.highbits = and i8 %x.biased, %highbitmask2940// %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits2941// Define:2942// %alignment = add i8 %lowbitmask, 12943// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask)2944// and 2. %bias is equal to either %lowbitmask or %alignment,2945// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment)2946// then this pattern can be transformed into:2947// %x.offset = add i8 %x, %lowbitmask2948// %x.roundedup = and i8 %x.offset, %highbitmask2949static Value *2950foldRoundUpIntegerWithPow2Alignment(SelectInst &SI,2951InstCombiner::BuilderTy &Builder) {2952Value *Cond = SI.getCondition();2953Value *X = SI.getTrueValue();2954Value *XBiasedHighBits = SI.getFalseValue();29552956ICmpInst::Predicate Pred;2957Value *XLowBits;2958if (!match(Cond, m_ICmp(Pred, m_Value(XLowBits), m_ZeroInt())) ||2959!ICmpInst::isEquality(Pred))2960return nullptr;29612962if (Pred == ICmpInst::Predicate::ICMP_NE)2963std::swap(X, XBiasedHighBits);29642965// FIXME: we could support non non-splats here.29662967const APInt *LowBitMaskCst;2968if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowPoison(LowBitMaskCst))))2969return nullptr;29702971// Match even if the AND and ADD are swapped.2972const APInt *BiasCst, *HighBitMaskCst;2973if (!match(XBiasedHighBits,2974m_And(m_Add(m_Specific(X), m_APIntAllowPoison(BiasCst)),2975m_APIntAllowPoison(HighBitMaskCst))) &&2976!match(XBiasedHighBits,2977m_Add(m_And(m_Specific(X), m_APIntAllowPoison(HighBitMaskCst)),2978m_APIntAllowPoison(BiasCst))))2979return nullptr;29802981if (!LowBitMaskCst->isMask())2982return nullptr;29832984APInt InvertedLowBitMaskCst = ~*LowBitMaskCst;2985if (InvertedLowBitMaskCst != *HighBitMaskCst)2986return nullptr;29872988APInt AlignmentCst = *LowBitMaskCst + 1;29892990if (*BiasCst != AlignmentCst && *BiasCst != *LowBitMaskCst)2991return nullptr;29922993if (!XBiasedHighBits->hasOneUse()) {2994// We can't directly return XBiasedHighBits if it is more poisonous.2995if (*BiasCst == *LowBitMaskCst && impliesPoison(XBiasedHighBits, X))2996return XBiasedHighBits;2997return nullptr;2998}29993000// FIXME: could we preserve undef's here?3001Type *Ty = X->getType();3002Value *XOffset = Builder.CreateAdd(X, ConstantInt::get(Ty, *LowBitMaskCst),3003X->getName() + ".biased");3004Value *R = Builder.CreateAnd(XOffset, ConstantInt::get(Ty, *HighBitMaskCst));3005R->takeName(&SI);3006return R;3007}30083009namespace {3010struct DecomposedSelect {3011Value *Cond = nullptr;3012Value *TrueVal = nullptr;3013Value *FalseVal = nullptr;3014};3015} // namespace30163017/// Folds patterns like:3018/// select c2 (select c1 a b) (select c1 b a)3019/// into:3020/// select (xor c1 c2) b a3021static Instruction *3022foldSelectOfSymmetricSelect(SelectInst &OuterSelVal,3023InstCombiner::BuilderTy &Builder) {30243025Value *OuterCond, *InnerCond, *InnerTrueVal, *InnerFalseVal;3026if (!match(3027&OuterSelVal,3028m_Select(m_Value(OuterCond),3029m_OneUse(m_Select(m_Value(InnerCond), m_Value(InnerTrueVal),3030m_Value(InnerFalseVal))),3031m_OneUse(m_Select(m_Deferred(InnerCond),3032m_Deferred(InnerFalseVal),3033m_Deferred(InnerTrueVal))))))3034return nullptr;30353036if (OuterCond->getType() != InnerCond->getType())3037return nullptr;30383039Value *Xor = Builder.CreateXor(InnerCond, OuterCond);3040return SelectInst::Create(Xor, InnerFalseVal, InnerTrueVal);3041}30423043/// Look for patterns like3044/// %outer.cond = select i1 %inner.cond, i1 %alt.cond, i1 false3045/// %inner.sel = select i1 %inner.cond, i8 %inner.sel.t, i8 %inner.sel.f3046/// %outer.sel = select i1 %outer.cond, i8 %outer.sel.t, i8 %inner.sel3047/// and rewrite it as3048/// %inner.sel = select i1 %cond.alternative, i8 %sel.outer.t, i8 %sel.inner.t3049/// %sel.outer = select i1 %cond.inner, i8 %inner.sel, i8 %sel.inner.f3050static Instruction *foldNestedSelects(SelectInst &OuterSelVal,3051InstCombiner::BuilderTy &Builder) {3052// We must start with a `select`.3053DecomposedSelect OuterSel;3054match(&OuterSelVal,3055m_Select(m_Value(OuterSel.Cond), m_Value(OuterSel.TrueVal),3056m_Value(OuterSel.FalseVal)));30573058// Canonicalize inversion of the outermost `select`'s condition.3059if (match(OuterSel.Cond, m_Not(m_Value(OuterSel.Cond))))3060std::swap(OuterSel.TrueVal, OuterSel.FalseVal);30613062// The condition of the outermost select must be an `and`/`or`.3063if (!match(OuterSel.Cond, m_c_LogicalOp(m_Value(), m_Value())))3064return nullptr;30653066// Depending on the logical op, inner select might be in different hand.3067bool IsAndVariant = match(OuterSel.Cond, m_LogicalAnd());3068Value *InnerSelVal = IsAndVariant ? OuterSel.FalseVal : OuterSel.TrueVal;30693070// Profitability check - avoid increasing instruction count.3071if (none_of(ArrayRef<Value *>({OuterSelVal.getCondition(), InnerSelVal}),3072[](Value *V) { return V->hasOneUse(); }))3073return nullptr;30743075// The appropriate hand of the outermost `select` must be a select itself.3076DecomposedSelect InnerSel;3077if (!match(InnerSelVal,3078m_Select(m_Value(InnerSel.Cond), m_Value(InnerSel.TrueVal),3079m_Value(InnerSel.FalseVal))))3080return nullptr;30813082// Canonicalize inversion of the innermost `select`'s condition.3083if (match(InnerSel.Cond, m_Not(m_Value(InnerSel.Cond))))3084std::swap(InnerSel.TrueVal, InnerSel.FalseVal);30853086Value *AltCond = nullptr;3087auto matchOuterCond = [OuterSel, IsAndVariant, &AltCond](auto m_InnerCond) {3088// An unsimplified select condition can match both LogicalAnd and LogicalOr3089// (select true, true, false). Since below we assume that LogicalAnd implies3090// InnerSel match the FVal and vice versa for LogicalOr, we can't match the3091// alternative pattern here.3092return IsAndVariant ? match(OuterSel.Cond,3093m_c_LogicalAnd(m_InnerCond, m_Value(AltCond)))3094: match(OuterSel.Cond,3095m_c_LogicalOr(m_InnerCond, m_Value(AltCond)));3096};30973098// Finally, match the condition that was driving the outermost `select`,3099// it should be a logical operation between the condition that was driving3100// the innermost `select` (after accounting for the possible inversions3101// of the condition), and some other condition.3102if (matchOuterCond(m_Specific(InnerSel.Cond))) {3103// Done!3104} else if (Value * NotInnerCond; matchOuterCond(m_CombineAnd(3105m_Not(m_Specific(InnerSel.Cond)), m_Value(NotInnerCond)))) {3106// Done!3107std::swap(InnerSel.TrueVal, InnerSel.FalseVal);3108InnerSel.Cond = NotInnerCond;3109} else // Not the pattern we were looking for.3110return nullptr;31113112Value *SelInner = Builder.CreateSelect(3113AltCond, IsAndVariant ? OuterSel.TrueVal : InnerSel.FalseVal,3114IsAndVariant ? InnerSel.TrueVal : OuterSel.FalseVal);3115SelInner->takeName(InnerSelVal);3116return SelectInst::Create(InnerSel.Cond,3117IsAndVariant ? SelInner : InnerSel.TrueVal,3118!IsAndVariant ? SelInner : InnerSel.FalseVal);3119}31203121Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {3122Value *CondVal = SI.getCondition();3123Value *TrueVal = SI.getTrueValue();3124Value *FalseVal = SI.getFalseValue();3125Type *SelType = SI.getType();31263127// Avoid potential infinite loops by checking for non-constant condition.3128// TODO: Can we assert instead by improving canonicalizeSelectToShuffle()?3129// Scalar select must have simplified?3130if (!SelType->isIntOrIntVectorTy(1) || isa<Constant>(CondVal) ||3131TrueVal->getType() != CondVal->getType())3132return nullptr;31333134auto *One = ConstantInt::getTrue(SelType);3135auto *Zero = ConstantInt::getFalse(SelType);3136Value *A, *B, *C, *D;31373138// Folding select to and/or i1 isn't poison safe in general. impliesPoison3139// checks whether folding it does not convert a well-defined value into3140// poison.3141if (match(TrueVal, m_One())) {3142if (impliesPoison(FalseVal, CondVal)) {3143// Change: A = select B, true, C --> A = or B, C3144return BinaryOperator::CreateOr(CondVal, FalseVal);3145}31463147if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) &&3148impliesPoison(FalseVal, B)) {3149// (A || B) || C --> A || (B | C)3150return replaceInstUsesWith(3151SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal)));3152}31533154if (auto *LHS = dyn_cast<FCmpInst>(CondVal))3155if (auto *RHS = dyn_cast<FCmpInst>(FalseVal))3156if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false,3157/*IsSelectLogical*/ true))3158return replaceInstUsesWith(SI, V);31593160// (A && B) || (C && B) --> (A || C) && B3161if (match(CondVal, m_LogicalAnd(m_Value(A), m_Value(B))) &&3162match(FalseVal, m_LogicalAnd(m_Value(C), m_Value(D))) &&3163(CondVal->hasOneUse() || FalseVal->hasOneUse())) {3164bool CondLogicAnd = isa<SelectInst>(CondVal);3165bool FalseLogicAnd = isa<SelectInst>(FalseVal);3166auto AndFactorization = [&](Value *Common, Value *InnerCond,3167Value *InnerVal,3168bool SelFirst = false) -> Instruction * {3169Value *InnerSel = Builder.CreateSelect(InnerCond, One, InnerVal);3170if (SelFirst)3171std::swap(Common, InnerSel);3172if (FalseLogicAnd || (CondLogicAnd && Common == A))3173return SelectInst::Create(Common, InnerSel, Zero);3174else3175return BinaryOperator::CreateAnd(Common, InnerSel);3176};31773178if (A == C)3179return AndFactorization(A, B, D);3180if (A == D)3181return AndFactorization(A, B, C);3182if (B == C)3183return AndFactorization(B, A, D);3184if (B == D)3185return AndFactorization(B, A, C, CondLogicAnd && FalseLogicAnd);3186}3187}31883189if (match(FalseVal, m_Zero())) {3190if (impliesPoison(TrueVal, CondVal)) {3191// Change: A = select B, C, false --> A = and B, C3192return BinaryOperator::CreateAnd(CondVal, TrueVal);3193}31943195if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) &&3196impliesPoison(TrueVal, B)) {3197// (A && B) && C --> A && (B & C)3198return replaceInstUsesWith(3199SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal)));3200}32013202if (auto *LHS = dyn_cast<FCmpInst>(CondVal))3203if (auto *RHS = dyn_cast<FCmpInst>(TrueVal))3204if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true,3205/*IsSelectLogical*/ true))3206return replaceInstUsesWith(SI, V);32073208// (A || B) && (C || B) --> (A && C) || B3209if (match(CondVal, m_LogicalOr(m_Value(A), m_Value(B))) &&3210match(TrueVal, m_LogicalOr(m_Value(C), m_Value(D))) &&3211(CondVal->hasOneUse() || TrueVal->hasOneUse())) {3212bool CondLogicOr = isa<SelectInst>(CondVal);3213bool TrueLogicOr = isa<SelectInst>(TrueVal);3214auto OrFactorization = [&](Value *Common, Value *InnerCond,3215Value *InnerVal,3216bool SelFirst = false) -> Instruction * {3217Value *InnerSel = Builder.CreateSelect(InnerCond, InnerVal, Zero);3218if (SelFirst)3219std::swap(Common, InnerSel);3220if (TrueLogicOr || (CondLogicOr && Common == A))3221return SelectInst::Create(Common, One, InnerSel);3222else3223return BinaryOperator::CreateOr(Common, InnerSel);3224};32253226if (A == C)3227return OrFactorization(A, B, D);3228if (A == D)3229return OrFactorization(A, B, C);3230if (B == C)3231return OrFactorization(B, A, D);3232if (B == D)3233return OrFactorization(B, A, C, CondLogicOr && TrueLogicOr);3234}3235}32363237// We match the "full" 0 or 1 constant here to avoid a potential infinite3238// loop with vectors that may have undefined/poison elements.3239// select a, false, b -> select !a, b, false3240if (match(TrueVal, m_Specific(Zero))) {3241Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());3242return SelectInst::Create(NotCond, FalseVal, Zero);3243}3244// select a, b, true -> select !a, true, b3245if (match(FalseVal, m_Specific(One))) {3246Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());3247return SelectInst::Create(NotCond, One, TrueVal);3248}32493250// DeMorgan in select form: !a && !b --> !(a || b)3251// select !a, !b, false --> not (select a, true, b)3252if (match(&SI, m_LogicalAnd(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&3253(CondVal->hasOneUse() || TrueVal->hasOneUse()) &&3254!match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))3255return BinaryOperator::CreateNot(Builder.CreateSelect(A, One, B));32563257// DeMorgan in select form: !a || !b --> !(a && b)3258// select !a, true, !b --> not (select a, b, false)3259if (match(&SI, m_LogicalOr(m_Not(m_Value(A)), m_Not(m_Value(B)))) &&3260(CondVal->hasOneUse() || FalseVal->hasOneUse()) &&3261!match(A, m_ConstantExpr()) && !match(B, m_ConstantExpr()))3262return BinaryOperator::CreateNot(Builder.CreateSelect(A, B, Zero));32633264// select (select a, true, b), true, b -> select a, true, b3265if (match(CondVal, m_Select(m_Value(A), m_One(), m_Value(B))) &&3266match(TrueVal, m_One()) && match(FalseVal, m_Specific(B)))3267return replaceOperand(SI, 0, A);3268// select (select a, b, false), b, false -> select a, b, false3269if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) &&3270match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero()))3271return replaceOperand(SI, 0, A);32723273// ~(A & B) & (A | B) --> A ^ B3274if (match(&SI, m_c_LogicalAnd(m_Not(m_LogicalAnd(m_Value(A), m_Value(B))),3275m_c_LogicalOr(m_Deferred(A), m_Deferred(B)))))3276return BinaryOperator::CreateXor(A, B);32773278// select (~a | c), a, b -> select a, (select c, true, b), false3279if (match(CondVal,3280m_OneUse(m_c_Or(m_Not(m_Specific(TrueVal)), m_Value(C))))) {3281Value *OrV = Builder.CreateSelect(C, One, FalseVal);3282return SelectInst::Create(TrueVal, OrV, Zero);3283}3284// select (c & b), a, b -> select b, (select ~c, true, a), false3285if (match(CondVal, m_OneUse(m_c_And(m_Value(C), m_Specific(FalseVal))))) {3286if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {3287Value *OrV = Builder.CreateSelect(NotC, One, TrueVal);3288return SelectInst::Create(FalseVal, OrV, Zero);3289}3290}3291// select (a | c), a, b -> select a, true, (select ~c, b, false)3292if (match(CondVal, m_OneUse(m_c_Or(m_Specific(TrueVal), m_Value(C))))) {3293if (Value *NotC = getFreelyInverted(C, C->hasOneUse(), &Builder)) {3294Value *AndV = Builder.CreateSelect(NotC, FalseVal, Zero);3295return SelectInst::Create(TrueVal, One, AndV);3296}3297}3298// select (c & ~b), a, b -> select b, true, (select c, a, false)3299if (match(CondVal,3300m_OneUse(m_c_And(m_Value(C), m_Not(m_Specific(FalseVal)))))) {3301Value *AndV = Builder.CreateSelect(C, TrueVal, Zero);3302return SelectInst::Create(FalseVal, One, AndV);3303}33043305if (match(FalseVal, m_Zero()) || match(TrueVal, m_One())) {3306Use *Y = nullptr;3307bool IsAnd = match(FalseVal, m_Zero()) ? true : false;3308Value *Op1 = IsAnd ? TrueVal : FalseVal;3309if (isCheckForZeroAndMulWithOverflow(CondVal, Op1, IsAnd, Y)) {3310auto *FI = new FreezeInst(*Y, (*Y)->getName() + ".fr");3311InsertNewInstBefore(FI, cast<Instruction>(Y->getUser())->getIterator());3312replaceUse(*Y, FI);3313return replaceInstUsesWith(SI, Op1);3314}33153316if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))3317if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))3318if (auto *V = foldAndOrOfICmps(ICmp0, ICmp1, SI, IsAnd,3319/* IsLogical */ true))3320return replaceInstUsesWith(SI, V);3321}33223323// select (a || b), c, false -> select a, c, false3324// select c, (a || b), false -> select c, a, false3325// if c implies that b is false.3326if (match(CondVal, m_LogicalOr(m_Value(A), m_Value(B))) &&3327match(FalseVal, m_Zero())) {3328std::optional<bool> Res = isImpliedCondition(TrueVal, B, DL);3329if (Res && *Res == false)3330return replaceOperand(SI, 0, A);3331}3332if (match(TrueVal, m_LogicalOr(m_Value(A), m_Value(B))) &&3333match(FalseVal, m_Zero())) {3334std::optional<bool> Res = isImpliedCondition(CondVal, B, DL);3335if (Res && *Res == false)3336return replaceOperand(SI, 1, A);3337}3338// select c, true, (a && b) -> select c, true, a3339// select (a && b), true, c -> select a, true, c3340// if c = false implies that b = true3341if (match(TrueVal, m_One()) &&3342match(FalseVal, m_LogicalAnd(m_Value(A), m_Value(B)))) {3343std::optional<bool> Res = isImpliedCondition(CondVal, B, DL, false);3344if (Res && *Res == true)3345return replaceOperand(SI, 2, A);3346}3347if (match(CondVal, m_LogicalAnd(m_Value(A), m_Value(B))) &&3348match(TrueVal, m_One())) {3349std::optional<bool> Res = isImpliedCondition(FalseVal, B, DL, false);3350if (Res && *Res == true)3351return replaceOperand(SI, 0, A);3352}33533354if (match(TrueVal, m_One())) {3355Value *C;33563357// (C && A) || (!C && B) --> sel C, A, B3358// (A && C) || (!C && B) --> sel C, A, B3359// (C && A) || (B && !C) --> sel C, A, B3360// (A && C) || (B && !C) --> sel C, A, B (may require freeze)3361if (match(FalseVal, m_c_LogicalAnd(m_Not(m_Value(C)), m_Value(B))) &&3362match(CondVal, m_c_LogicalAnd(m_Specific(C), m_Value(A)))) {3363auto *SelCond = dyn_cast<SelectInst>(CondVal);3364auto *SelFVal = dyn_cast<SelectInst>(FalseVal);3365bool MayNeedFreeze = SelCond && SelFVal &&3366match(SelFVal->getTrueValue(),3367m_Not(m_Specific(SelCond->getTrueValue())));3368if (MayNeedFreeze)3369C = Builder.CreateFreeze(C);3370return SelectInst::Create(C, A, B);3371}33723373// (!C && A) || (C && B) --> sel C, B, A3374// (A && !C) || (C && B) --> sel C, B, A3375// (!C && A) || (B && C) --> sel C, B, A3376// (A && !C) || (B && C) --> sel C, B, A (may require freeze)3377if (match(CondVal, m_c_LogicalAnd(m_Not(m_Value(C)), m_Value(A))) &&3378match(FalseVal, m_c_LogicalAnd(m_Specific(C), m_Value(B)))) {3379auto *SelCond = dyn_cast<SelectInst>(CondVal);3380auto *SelFVal = dyn_cast<SelectInst>(FalseVal);3381bool MayNeedFreeze = SelCond && SelFVal &&3382match(SelCond->getTrueValue(),3383m_Not(m_Specific(SelFVal->getTrueValue())));3384if (MayNeedFreeze)3385C = Builder.CreateFreeze(C);3386return SelectInst::Create(C, B, A);3387}3388}33893390return nullptr;3391}33923393// Return true if we can safely remove the select instruction for std::bit_ceil3394// pattern.3395static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,3396const APInt *Cond1, Value *CtlzOp,3397unsigned BitWidth,3398bool &ShouldDropNUW) {3399// The challenge in recognizing std::bit_ceil(X) is that the operand is used3400// for the CTLZ proper and select condition, each possibly with some3401// operation like add and sub.3402//3403// Our aim is to make sure that -ctlz & (BitWidth - 1) == 0 even when the3404// select instruction would select 1, which allows us to get rid of the select3405// instruction.3406//3407// To see if we can do so, we do some symbolic execution with ConstantRange.3408// Specifically, we compute the range of values that Cond0 could take when3409// Cond == false. Then we successively transform the range until we obtain3410// the range of values that CtlzOp could take.3411//3412// Conceptually, we follow the def-use chain backward from Cond0 while3413// transforming the range for Cond0 until we meet the common ancestor of Cond03414// and CtlzOp. Then we follow the def-use chain forward until we obtain the3415// range for CtlzOp. That said, we only follow at most one ancestor from3416// Cond0. Likewise, we only follow at most one ancestor from CtrlOp.34173418ConstantRange CR = ConstantRange::makeExactICmpRegion(3419CmpInst::getInversePredicate(Pred), *Cond1);34203421ShouldDropNUW = false;34223423// Match the operation that's used to compute CtlzOp from CommonAncestor. If3424// CtlzOp == CommonAncestor, return true as no operation is needed. If a3425// match is found, execute the operation on CR, update CR, and return true.3426// Otherwise, return false.3427auto MatchForward = [&](Value *CommonAncestor) {3428const APInt *C = nullptr;3429if (CtlzOp == CommonAncestor)3430return true;3431if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {3432CR = CR.add(*C);3433return true;3434}3435if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {3436ShouldDropNUW = true;3437CR = ConstantRange(*C).sub(CR);3438return true;3439}3440if (match(CtlzOp, m_Not(m_Specific(CommonAncestor)))) {3441CR = CR.binaryNot();3442return true;3443}3444return false;3445};34463447const APInt *C = nullptr;3448Value *CommonAncestor;3449if (MatchForward(Cond0)) {3450// Cond0 is either CtlzOp or CtlzOp's parent. CR has been updated.3451} else if (match(Cond0, m_Add(m_Value(CommonAncestor), m_APInt(C)))) {3452CR = CR.sub(*C);3453if (!MatchForward(CommonAncestor))3454return false;3455// Cond0's parent is either CtlzOp or CtlzOp's parent. CR has been updated.3456} else {3457return false;3458}34593460// Return true if all the values in the range are either 0 or negative (if3461// treated as signed). We do so by evaluating:3462//3463// CR - 1 u>= (1 << BitWidth) - 1.3464APInt IntMax = APInt::getSignMask(BitWidth) - 1;3465CR = CR.sub(APInt(BitWidth, 1));3466return CR.icmp(ICmpInst::ICMP_UGE, IntMax);3467}34683469// Transform the std::bit_ceil(X) pattern like:3470//3471// %dec = add i32 %x, -13472// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)3473// %sub = sub i32 32, %ctlz3474// %shl = shl i32 1, %sub3475// %ugt = icmp ugt i32 %x, 13476// %sel = select i1 %ugt, i32 %shl, i32 13477//3478// into:3479//3480// %dec = add i32 %x, -13481// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)3482// %neg = sub i32 0, %ctlz3483// %masked = and i32 %ctlz, 313484// %shl = shl i32 1, %sub3485//3486// Note that the select is optimized away while the shift count is masked with3487// 31. We handle some variations of the input operand like std::bit_ceil(X +3488// 1).3489static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {3490Type *SelType = SI.getType();3491unsigned BitWidth = SelType->getScalarSizeInBits();34923493Value *FalseVal = SI.getFalseValue();3494Value *TrueVal = SI.getTrueValue();3495ICmpInst::Predicate Pred;3496const APInt *Cond1;3497Value *Cond0, *Ctlz, *CtlzOp;3498if (!match(SI.getCondition(), m_ICmp(Pred, m_Value(Cond0), m_APInt(Cond1))))3499return nullptr;35003501if (match(TrueVal, m_One())) {3502std::swap(FalseVal, TrueVal);3503Pred = CmpInst::getInversePredicate(Pred);3504}35053506bool ShouldDropNUW;35073508if (!match(FalseVal, m_One()) ||3509!match(TrueVal,3510m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),3511m_Value(Ctlz)))))) ||3512!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||3513!isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,3514ShouldDropNUW))3515return nullptr;35163517if (ShouldDropNUW)3518cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);35193520// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a3521// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth3522// is an integer constant. Masking with BitWidth-1 comes free on some3523// hardware as part of the shift instruction.3524Value *Neg = Builder.CreateNeg(Ctlz);3525Value *Masked =3526Builder.CreateAnd(Neg, ConstantInt::get(SelType, BitWidth - 1));3527return BinaryOperator::Create(Instruction::Shl, ConstantInt::get(SelType, 1),3528Masked);3529}35303531bool InstCombinerImpl::fmulByZeroIsZero(Value *MulVal, FastMathFlags FMF,3532const Instruction *CtxI) const {3533KnownFPClass Known = computeKnownFPClass(MulVal, FMF, fcNegative, CtxI);35343535return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity() &&3536(FMF.noSignedZeros() || Known.signBitIsZeroOrNaN());3537}35383539static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,3540Value *Cmp1, Value *TrueVal,3541Value *FalseVal, Instruction &CtxI,3542bool SelectIsNSZ) {3543Value *MulRHS;3544if (match(Cmp1, m_PosZeroFP()) &&3545match(TrueVal, m_c_FMul(m_Specific(Cmp0), m_Value(MulRHS)))) {3546FastMathFlags FMF = cast<FPMathOperator>(TrueVal)->getFastMathFlags();3547// nsz must be on the select, it must be ignored on the multiply. We3548// need nnan and ninf on the multiply for the other value.3549FMF.setNoSignedZeros(SelectIsNSZ);3550return IC.fmulByZeroIsZero(MulRHS, FMF, &CtxI);3551}35523553return false;3554}35553556/// Check whether the KnownBits of a select arm may be affected by the3557/// select condition.3558static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,3559unsigned Depth) {3560if (Depth == MaxAnalysisRecursionDepth)3561return false;35623563// Ignore the case where the select arm itself is affected. These cases3564// are handled more efficiently by other optimizations.3565if (Depth != 0 && Affected.contains(V))3566return true;35673568if (auto *I = dyn_cast<Instruction>(V)) {3569if (isa<PHINode>(I)) {3570if (Depth == MaxAnalysisRecursionDepth - 1)3571return false;3572Depth = MaxAnalysisRecursionDepth - 2;3573}3574return any_of(I->operands(), [&](Value *Op) {3575return Op->getType()->isIntOrIntVectorTy() &&3576hasAffectedValue(Op, Affected, Depth + 1);3577});3578}35793580return false;3581}35823583Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {3584Value *CondVal = SI.getCondition();3585Value *TrueVal = SI.getTrueValue();3586Value *FalseVal = SI.getFalseValue();3587Type *SelType = SI.getType();35883589if (Value *V = simplifySelectInst(CondVal, TrueVal, FalseVal,3590SQ.getWithInstruction(&SI)))3591return replaceInstUsesWith(SI, V);35923593if (Instruction *I = canonicalizeSelectToShuffle(SI))3594return I;35953596if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this))3597return I;35983599// If the type of select is not an integer type or if the condition and3600// the selection type are not both scalar nor both vector types, there is no3601// point in attempting to match these patterns.3602Type *CondType = CondVal->getType();3603if (!isa<Constant>(CondVal) && SelType->isIntOrIntVectorTy() &&3604CondType->isVectorTy() == SelType->isVectorTy()) {3605if (Value *S = simplifyWithOpReplaced(TrueVal, CondVal,3606ConstantInt::getTrue(CondType), SQ,3607/* AllowRefinement */ true))3608return replaceOperand(SI, 1, S);36093610if (Value *S = simplifyWithOpReplaced(FalseVal, CondVal,3611ConstantInt::getFalse(CondType), SQ,3612/* AllowRefinement */ true))3613return replaceOperand(SI, 2, S);3614}36153616if (Instruction *R = foldSelectOfBools(SI))3617return R;36183619// Selecting between two integer or vector splat integer constants?3620//3621// Note that we don't handle a scalar select of vectors:3622// select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0>3623// because that may need 3 instructions to splat the condition value:3624// extend, insertelement, shufflevector.3625//3626// Do not handle i1 TrueVal and FalseVal otherwise would result in3627// zext/sext i1 to i1.3628if (SelType->isIntOrIntVectorTy() && !SelType->isIntOrIntVectorTy(1) &&3629CondVal->getType()->isVectorTy() == SelType->isVectorTy()) {3630// select C, 1, 0 -> zext C to int3631if (match(TrueVal, m_One()) && match(FalseVal, m_Zero()))3632return new ZExtInst(CondVal, SelType);36333634// select C, -1, 0 -> sext C to int3635if (match(TrueVal, m_AllOnes()) && match(FalseVal, m_Zero()))3636return new SExtInst(CondVal, SelType);36373638// select C, 0, 1 -> zext !C to int3639if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) {3640Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());3641return new ZExtInst(NotCond, SelType);3642}36433644// select C, 0, -1 -> sext !C to int3645if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) {3646Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName());3647return new SExtInst(NotCond, SelType);3648}3649}36503651auto *SIFPOp = dyn_cast<FPMathOperator>(&SI);36523653if (auto *FCmp = dyn_cast<FCmpInst>(CondVal)) {3654FCmpInst::Predicate Pred = FCmp->getPredicate();3655Value *Cmp0 = FCmp->getOperand(0), *Cmp1 = FCmp->getOperand(1);3656// Are we selecting a value based on a comparison of the two values?3657if ((Cmp0 == TrueVal && Cmp1 == FalseVal) ||3658(Cmp0 == FalseVal && Cmp1 == TrueVal)) {3659// Canonicalize to use ordered comparisons by swapping the select3660// operands.3661//3662// e.g.3663// (X ugt Y) ? X : Y -> (X ole Y) ? Y : X3664if (FCmp->hasOneUse() && FCmpInst::isUnordered(Pred)) {3665FCmpInst::Predicate InvPred = FCmp->getInversePredicate();3666IRBuilder<>::FastMathFlagGuard FMFG(Builder);3667// FIXME: The FMF should propagate from the select, not the fcmp.3668Builder.setFastMathFlags(FCmp->getFastMathFlags());3669Value *NewCond = Builder.CreateFCmp(InvPred, Cmp0, Cmp1,3670FCmp->getName() + ".inv");3671Value *NewSel = Builder.CreateSelect(NewCond, FalseVal, TrueVal);3672return replaceInstUsesWith(SI, NewSel);3673}3674}36753676if (SIFPOp) {3677// Fold out scale-if-equals-zero pattern.3678//3679// This pattern appears in code with denormal range checks after it's3680// assumed denormals are treated as zero. This drops a canonicalization.36813682// TODO: Could relax the signed zero logic. We just need to know the sign3683// of the result matches (fmul x, y has the same sign as x).3684//3685// TODO: Handle always-canonicalizing variant that selects some value or 13686// scaling factor in the fmul visitor.36873688// TODO: Handle ldexp too36893690Value *MatchCmp0 = nullptr;3691Value *MatchCmp1 = nullptr;36923693// (select (fcmp [ou]eq x, 0.0), (fmul x, K), x => x3694// (select (fcmp [ou]ne x, 0.0), x, (fmul x, K) => x3695if (Pred == CmpInst::FCMP_OEQ || Pred == CmpInst::FCMP_UEQ) {3696MatchCmp0 = FalseVal;3697MatchCmp1 = TrueVal;3698} else if (Pred == CmpInst::FCMP_ONE || Pred == CmpInst::FCMP_UNE) {3699MatchCmp0 = TrueVal;3700MatchCmp1 = FalseVal;3701}37023703if (Cmp0 == MatchCmp0 &&3704matchFMulByZeroIfResultEqZero(*this, Cmp0, Cmp1, MatchCmp1, MatchCmp0,3705SI, SIFPOp->hasNoSignedZeros()))3706return replaceInstUsesWith(SI, Cmp0);3707}3708}37093710if (SIFPOp) {3711// TODO: Try to forward-propagate FMF from select arms to the select.37123713// Canonicalize select of FP values where NaN and -0.0 are not valid as3714// minnum/maxnum intrinsics.3715if (SIFPOp->hasNoNaNs() && SIFPOp->hasNoSignedZeros()) {3716Value *X, *Y;3717if (match(&SI, m_OrdFMax(m_Value(X), m_Value(Y))))3718return replaceInstUsesWith(3719SI, Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, X, Y, &SI));37203721if (match(&SI, m_OrdFMin(m_Value(X), m_Value(Y))))3722return replaceInstUsesWith(3723SI, Builder.CreateBinaryIntrinsic(Intrinsic::minnum, X, Y, &SI));3724}3725}37263727// Fold selecting to fabs.3728if (Instruction *Fabs = foldSelectWithFCmpToFabs(SI, *this))3729return Fabs;37303731// See if we are selecting two values based on a comparison of the two values.3732if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))3733if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))3734return Result;37353736if (Instruction *Add = foldAddSubSelect(SI, Builder))3737return Add;3738if (Instruction *Add = foldOverflowingAddSubSelect(SI, Builder))3739return Add;3740if (Instruction *Or = foldSetClearBits(SI, Builder))3741return Or;3742if (Instruction *Mul = foldSelectZeroOrMul(SI, *this))3743return Mul;37443745// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))3746auto *TI = dyn_cast<Instruction>(TrueVal);3747auto *FI = dyn_cast<Instruction>(FalseVal);3748if (TI && FI && TI->getOpcode() == FI->getOpcode())3749if (Instruction *IV = foldSelectOpOp(SI, TI, FI))3750return IV;37513752if (Instruction *I = foldSelectExtConst(SI))3753return I;37543755if (Instruction *I = foldSelectWithSRem(SI, *this, Builder))3756return I;37573758// Fold (select C, (gep Ptr, Idx), Ptr) -> (gep Ptr, (select C, Idx, 0))3759// Fold (select C, Ptr, (gep Ptr, Idx)) -> (gep Ptr, (select C, 0, Idx))3760auto SelectGepWithBase = [&](GetElementPtrInst *Gep, Value *Base,3761bool Swap) -> GetElementPtrInst * {3762Value *Ptr = Gep->getPointerOperand();3763if (Gep->getNumOperands() != 2 || Gep->getPointerOperand() != Base ||3764!Gep->hasOneUse())3765return nullptr;3766Value *Idx = Gep->getOperand(1);3767if (isa<VectorType>(CondVal->getType()) && !isa<VectorType>(Idx->getType()))3768return nullptr;3769Type *ElementType = Gep->getSourceElementType();3770Value *NewT = Idx;3771Value *NewF = Constant::getNullValue(Idx->getType());3772if (Swap)3773std::swap(NewT, NewF);3774Value *NewSI =3775Builder.CreateSelect(CondVal, NewT, NewF, SI.getName() + ".idx", &SI);3776return GetElementPtrInst::Create(ElementType, Ptr, NewSI,3777Gep->getNoWrapFlags());3778};3779if (auto *TrueGep = dyn_cast<GetElementPtrInst>(TrueVal))3780if (auto *NewGep = SelectGepWithBase(TrueGep, FalseVal, false))3781return NewGep;3782if (auto *FalseGep = dyn_cast<GetElementPtrInst>(FalseVal))3783if (auto *NewGep = SelectGepWithBase(FalseGep, TrueVal, true))3784return NewGep;37853786// See if we can fold the select into one of our operands.3787if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) {3788if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal))3789return FoldI;37903791Value *LHS, *RHS;3792Instruction::CastOps CastOp;3793SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);3794auto SPF = SPR.Flavor;3795if (SPF) {3796Value *LHS2, *RHS2;3797if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)3798if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS), SPF2, LHS2,3799RHS2, SI, SPF, RHS))3800return R;3801if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)3802if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS), SPF2, LHS2,3803RHS2, SI, SPF, LHS))3804return R;3805}38063807if (SelectPatternResult::isMinOrMax(SPF)) {3808// Canonicalize so that3809// - type casts are outside select patterns.3810// - float clamp is transformed to min/max pattern38113812bool IsCastNeeded = LHS->getType() != SelType;3813Value *CmpLHS = cast<CmpInst>(CondVal)->getOperand(0);3814Value *CmpRHS = cast<CmpInst>(CondVal)->getOperand(1);3815if (IsCastNeeded ||3816(LHS->getType()->isFPOrFPVectorTy() &&3817((CmpLHS != LHS && CmpLHS != RHS) ||3818(CmpRHS != LHS && CmpRHS != RHS)))) {3819CmpInst::Predicate MinMaxPred = getMinMaxPred(SPF, SPR.Ordered);38203821Value *Cmp;3822if (CmpInst::isIntPredicate(MinMaxPred)) {3823Cmp = Builder.CreateICmp(MinMaxPred, LHS, RHS);3824} else {3825IRBuilder<>::FastMathFlagGuard FMFG(Builder);3826auto FMF =3827cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();3828Builder.setFastMathFlags(FMF);3829Cmp = Builder.CreateFCmp(MinMaxPred, LHS, RHS);3830}38313832Value *NewSI = Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI);3833if (!IsCastNeeded)3834return replaceInstUsesWith(SI, NewSI);38353836Value *NewCast = Builder.CreateCast(CastOp, NewSI, SelType);3837return replaceInstUsesWith(SI, NewCast);3838}3839}3840}38413842// See if we can fold the select into a phi node if the condition is a select.3843if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))3844// The true/false values have to be live in the PHI predecessor's blocks.3845if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&3846canSelectOperandBeMappingIntoPredBlock(FalseVal, SI))3847if (Instruction *NV = foldOpIntoPhi(SI, PN))3848return NV;38493850if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {3851if (TrueSI->getCondition()->getType() == CondVal->getType()) {3852// Fold nested selects if the inner condition can be implied by the outer3853// condition.3854if (Value *V = simplifyNestedSelectsUsingImpliedCond(3855*TrueSI, CondVal, /*CondIsTrue=*/true, DL))3856return replaceOperand(SI, 1, V);38573858// select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)3859// We choose this as normal form to enable folding on the And and3860// shortening paths for the values (this helps getUnderlyingObjects() for3861// example).3862if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {3863Value *And = Builder.CreateLogicalAnd(CondVal, TrueSI->getCondition());3864replaceOperand(SI, 0, And);3865replaceOperand(SI, 1, TrueSI->getTrueValue());3866return &SI;3867}3868}3869}3870if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {3871if (FalseSI->getCondition()->getType() == CondVal->getType()) {3872// Fold nested selects if the inner condition can be implied by the outer3873// condition.3874if (Value *V = simplifyNestedSelectsUsingImpliedCond(3875*FalseSI, CondVal, /*CondIsTrue=*/false, DL))3876return replaceOperand(SI, 2, V);38773878// select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)3879if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {3880Value *Or = Builder.CreateLogicalOr(CondVal, FalseSI->getCondition());3881replaceOperand(SI, 0, Or);3882replaceOperand(SI, 2, FalseSI->getFalseValue());3883return &SI;3884}3885}3886}38873888// Try to simplify a binop sandwiched between 2 selects with the same3889// condition. This is not valid for div/rem because the select might be3890// preventing a division-by-zero.3891// TODO: A div/rem restriction is conservative; use something like3892// isSafeToSpeculativelyExecute().3893// select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)3894BinaryOperator *TrueBO;3895if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) && !TrueBO->isIntDivRem()) {3896if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {3897if (TrueBOSI->getCondition() == CondVal) {3898replaceOperand(*TrueBO, 0, TrueBOSI->getTrueValue());3899Worklist.push(TrueBO);3900return &SI;3901}3902}3903if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(1))) {3904if (TrueBOSI->getCondition() == CondVal) {3905replaceOperand(*TrueBO, 1, TrueBOSI->getTrueValue());3906Worklist.push(TrueBO);3907return &SI;3908}3909}3910}39113912// select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))3913BinaryOperator *FalseBO;3914if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) && !FalseBO->isIntDivRem()) {3915if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {3916if (FalseBOSI->getCondition() == CondVal) {3917replaceOperand(*FalseBO, 0, FalseBOSI->getFalseValue());3918Worklist.push(FalseBO);3919return &SI;3920}3921}3922if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(1))) {3923if (FalseBOSI->getCondition() == CondVal) {3924replaceOperand(*FalseBO, 1, FalseBOSI->getFalseValue());3925Worklist.push(FalseBO);3926return &SI;3927}3928}3929}39303931Value *NotCond;3932if (match(CondVal, m_Not(m_Value(NotCond))) &&3933!InstCombiner::shouldAvoidAbsorbingNotIntoSelect(SI)) {3934replaceOperand(SI, 0, NotCond);3935SI.swapValues();3936SI.swapProfMetadata();3937return &SI;3938}39393940if (Instruction *I = foldVectorSelect(SI))3941return I;39423943// If we can compute the condition, there's no need for a select.3944// Like the above fold, we are attempting to reduce compile-time cost by3945// putting this fold here with limitations rather than in InstSimplify.3946// The motivation for this call into value tracking is to take advantage of3947// the assumption cache, so make sure that is populated.3948if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {3949KnownBits Known(1);3950computeKnownBits(CondVal, Known, 0, &SI);3951if (Known.One.isOne())3952return replaceInstUsesWith(SI, TrueVal);3953if (Known.Zero.isOne())3954return replaceInstUsesWith(SI, FalseVal);3955}39563957if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder))3958return BitCastSel;39593960// Simplify selects that test the returned flag of cmpxchg instructions.3961if (Value *V = foldSelectCmpXchg(SI))3962return replaceInstUsesWith(SI, V);39633964if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI, *this))3965return Select;39663967if (Instruction *Funnel = foldSelectFunnelShift(SI, Builder))3968return Funnel;39693970if (Instruction *Copysign = foldSelectToCopysign(SI, Builder))3971return Copysign;39723973if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))3974return replaceInstUsesWith(SI, PN);39753976if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))3977return replaceInstUsesWith(SI, Fr);39783979if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))3980return replaceInstUsesWith(SI, V);39813982// select(mask, mload(,,mask,0), 0) -> mload(,,mask,0)3983// Load inst is intentionally not checked for hasOneUse()3984if (match(FalseVal, m_Zero()) &&3985(match(TrueVal, m_MaskedLoad(m_Value(), m_Value(), m_Specific(CondVal),3986m_CombineOr(m_Undef(), m_Zero()))) ||3987match(TrueVal, m_MaskedGather(m_Value(), m_Value(), m_Specific(CondVal),3988m_CombineOr(m_Undef(), m_Zero()))))) {3989auto *MaskedInst = cast<IntrinsicInst>(TrueVal);3990if (isa<UndefValue>(MaskedInst->getArgOperand(3)))3991MaskedInst->setArgOperand(3, FalseVal /* Zero */);3992return replaceInstUsesWith(SI, MaskedInst);3993}39943995Value *Mask;3996if (match(TrueVal, m_Zero()) &&3997(match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),3998m_CombineOr(m_Undef(), m_Zero()))) ||3999match(FalseVal, m_MaskedGather(m_Value(), m_Value(), m_Value(Mask),4000m_CombineOr(m_Undef(), m_Zero())))) &&4001(CondVal->getType() == Mask->getType())) {4002// We can remove the select by ensuring the load zeros all lanes the4003// select would have. We determine this by proving there is no overlap4004// between the load and select masks.4005// (i.e (load_mask & select_mask) == 0 == no overlap)4006bool CanMergeSelectIntoLoad = false;4007if (Value *V = simplifyAndInst(CondVal, Mask, SQ.getWithInstruction(&SI)))4008CanMergeSelectIntoLoad = match(V, m_Zero());40094010if (CanMergeSelectIntoLoad) {4011auto *MaskedInst = cast<IntrinsicInst>(FalseVal);4012if (isa<UndefValue>(MaskedInst->getArgOperand(3)))4013MaskedInst->setArgOperand(3, TrueVal /* Zero */);4014return replaceInstUsesWith(SI, MaskedInst);4015}4016}40174018if (Instruction *I = foldSelectOfSymmetricSelect(SI, Builder))4019return I;40204021if (Instruction *I = foldNestedSelects(SI, Builder))4022return I;40234024// Match logical variants of the pattern,4025// and transform them iff that gets rid of inversions.4026// (~x) | y --> ~(x & (~y))4027// (~x) & y --> ~(x | (~y))4028if (sinkNotIntoOtherHandOfLogicalOp(SI))4029return &SI;40304031if (Instruction *I = foldBitCeil(SI, Builder))4032return I;40334034// Fold:4035// (select A && B, T, F) -> (select A, (select B, T, F), F)4036// (select A || B, T, F) -> (select A, T, (select B, T, F))4037// if (select B, T, F) is foldable.4038// TODO: preserve FMF flags4039auto FoldSelectWithAndOrCond = [&](bool IsAnd, Value *A,4040Value *B) -> Instruction * {4041if (Value *V = simplifySelectInst(B, TrueVal, FalseVal,4042SQ.getWithInstruction(&SI)))4043return SelectInst::Create(A, IsAnd ? V : TrueVal, IsAnd ? FalseVal : V);40444045// Is (select B, T, F) a SPF?4046if (CondVal->hasOneUse() && SelType->isIntOrIntVectorTy()) {4047if (ICmpInst *Cmp = dyn_cast<ICmpInst>(B))4048if (Value *V = canonicalizeSPF(*Cmp, TrueVal, FalseVal, *this))4049return SelectInst::Create(A, IsAnd ? V : TrueVal,4050IsAnd ? FalseVal : V);4051}40524053return nullptr;4054};40554056Value *LHS, *RHS;4057if (match(CondVal, m_And(m_Value(LHS), m_Value(RHS)))) {4058if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))4059return I;4060if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, RHS, LHS))4061return I;4062} else if (match(CondVal, m_Or(m_Value(LHS), m_Value(RHS)))) {4063if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))4064return I;4065if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, RHS, LHS))4066return I;4067} else {4068// We cannot swap the operands of logical and/or.4069// TODO: Can we swap the operands by inserting a freeze?4070if (match(CondVal, m_LogicalAnd(m_Value(LHS), m_Value(RHS)))) {4071if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ true, LHS, RHS))4072return I;4073} else if (match(CondVal, m_LogicalOr(m_Value(LHS), m_Value(RHS)))) {4074if (Instruction *I = FoldSelectWithAndOrCond(/*IsAnd*/ false, LHS, RHS))4075return I;4076}4077}40784079// select Cond, !X, X -> xor Cond, X4080if (CondVal->getType() == SI.getType() && isKnownInversion(FalseVal, TrueVal))4081return BinaryOperator::CreateXor(CondVal, FalseVal);40824083// For vectors, this transform is only safe if the simplification does not4084// look through any lane-crossing operations. For now, limit to scalars only.4085if (SelType->isIntegerTy() &&4086(!isa<Constant>(TrueVal) || !isa<Constant>(FalseVal))) {4087// Try to simplify select arms based on KnownBits implied by the condition.4088CondContext CC(CondVal);4089findValuesAffectedByCondition(CondVal, /*IsAssume=*/false, [&](Value *V) {4090CC.AffectedValues.insert(V);4091});4092SimplifyQuery Q = SQ.getWithInstruction(&SI).getWithCondContext(CC);4093if (!CC.AffectedValues.empty()) {4094if (!isa<Constant>(TrueVal) &&4095hasAffectedValue(TrueVal, CC.AffectedValues, /*Depth=*/0)) {4096KnownBits Known = llvm::computeKnownBits(TrueVal, /*Depth=*/0, Q);4097if (Known.isConstant())4098return replaceOperand(SI, 1,4099ConstantInt::get(SelType, Known.getConstant()));4100}41014102CC.Invert = true;4103if (!isa<Constant>(FalseVal) &&4104hasAffectedValue(FalseVal, CC.AffectedValues, /*Depth=*/0)) {4105KnownBits Known = llvm::computeKnownBits(FalseVal, /*Depth=*/0, Q);4106if (Known.isConstant())4107return replaceOperand(SI, 2,4108ConstantInt::get(SelType, Known.getConstant()));4109}4110}4111}41124113return nullptr;4114}411541164117