Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
35294 views
//===- InstCombineAndOrXor.cpp --------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the visitAnd, visitOr, and visitXor functions.9//10//===----------------------------------------------------------------------===//1112#include "InstCombineInternal.h"13#include "llvm/Analysis/CmpInstAnalysis.h"14#include "llvm/Analysis/InstructionSimplify.h"15#include "llvm/IR/ConstantRange.h"16#include "llvm/IR/Intrinsics.h"17#include "llvm/IR/PatternMatch.h"18#include "llvm/Transforms/InstCombine/InstCombiner.h"19#include "llvm/Transforms/Utils/Local.h"2021using namespace llvm;22using namespace PatternMatch;2324#define DEBUG_TYPE "instcombine"2526/// This is the complement of getICmpCode, which turns an opcode and two27/// operands into either a constant true or false, or a brand new ICmp28/// instruction. The sign is passed in to determine which kind of predicate to29/// use in the new icmp instruction.30static Value *getNewICmpValue(unsigned Code, bool Sign, Value *LHS, Value *RHS,31InstCombiner::BuilderTy &Builder) {32ICmpInst::Predicate NewPred;33if (Constant *TorF = getPredForICmpCode(Code, Sign, LHS->getType(), NewPred))34return TorF;35return Builder.CreateICmp(NewPred, LHS, RHS);36}3738/// This is the complement of getFCmpCode, which turns an opcode and two39/// operands into either a FCmp instruction, or a true/false constant.40static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,41InstCombiner::BuilderTy &Builder) {42FCmpInst::Predicate NewPred;43if (Constant *TorF = getPredForFCmpCode(Code, LHS->getType(), NewPred))44return TorF;45return Builder.CreateFCmp(NewPred, LHS, RHS);46}4748/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise49/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates50/// whether to treat V, Lo, and Hi as signed or not.51Value *InstCombinerImpl::insertRangeTest(Value *V, const APInt &Lo,52const APInt &Hi, bool isSigned,53bool Inside) {54assert((isSigned ? Lo.slt(Hi) : Lo.ult(Hi)) &&55"Lo is not < Hi in range emission code!");5657Type *Ty = V->getType();5859// V >= Min && V < Hi --> V < Hi60// V < Min || V >= Hi --> V >= Hi61ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;62if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) {63Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred;64return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));65}6667// V >= Lo && V < Hi --> V - Lo u< Hi - Lo68// V < Lo || V >= Hi --> V - Lo u>= Hi - Lo69Value *VMinusLo =70Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");71Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo);72return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo);73}7475/// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns76/// that can be simplified.77/// One of A and B is considered the mask. The other is the value. This is78/// described as the "AMask" or "BMask" part of the enum. If the enum contains79/// only "Mask", then both A and B can be considered masks. If A is the mask,80/// then it was proven that (A & C) == C. This is trivial if C == A or C == 0.81/// If both A and C are constants, this proof is also easy.82/// For the following explanations, we assume that A is the mask.83///84/// "AllOnes" declares that the comparison is true only if (A & B) == A or all85/// bits of A are set in B.86/// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes87///88/// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all89/// bits of A are cleared in B.90/// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes91///92/// "Mixed" declares that (A & B) == C and C might or might not contain any93/// number of one bits and zero bits.94/// Example: (icmp eq (A & 3), 1) -> AMask_Mixed95///96/// "Not" means that in above descriptions "==" should be replaced by "!=".97/// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes98///99/// If the mask A contains a single bit, then the following is equivalent:100/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)101/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)102enum MaskedICmpType {103AMask_AllOnes = 1,104AMask_NotAllOnes = 2,105BMask_AllOnes = 4,106BMask_NotAllOnes = 8,107Mask_AllZeros = 16,108Mask_NotAllZeros = 32,109AMask_Mixed = 64,110AMask_NotMixed = 128,111BMask_Mixed = 256,112BMask_NotMixed = 512113};114115/// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)116/// satisfies.117static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,118ICmpInst::Predicate Pred) {119const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr;120match(A, m_APInt(ConstA));121match(B, m_APInt(ConstB));122match(C, m_APInt(ConstC));123bool IsEq = (Pred == ICmpInst::ICMP_EQ);124bool IsAPow2 = ConstA && ConstA->isPowerOf2();125bool IsBPow2 = ConstB && ConstB->isPowerOf2();126unsigned MaskVal = 0;127if (ConstC && ConstC->isZero()) {128// if C is zero, then both A and B qualify as mask129MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed)130: (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed));131if (IsAPow2)132MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed)133: (AMask_AllOnes | AMask_Mixed));134if (IsBPow2)135MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed)136: (BMask_AllOnes | BMask_Mixed));137return MaskVal;138}139140if (A == C) {141MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed)142: (AMask_NotAllOnes | AMask_NotMixed));143if (IsAPow2)144MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed)145: (Mask_AllZeros | AMask_Mixed));146} else if (ConstA && ConstC && ConstC->isSubsetOf(*ConstA)) {147MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed);148}149150if (B == C) {151MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed)152: (BMask_NotAllOnes | BMask_NotMixed));153if (IsBPow2)154MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed)155: (Mask_AllZeros | BMask_Mixed));156} else if (ConstB && ConstC && ConstC->isSubsetOf(*ConstB)) {157MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);158}159160return MaskVal;161}162163/// Convert an analysis of a masked ICmp into its equivalent if all boolean164/// operations had the opposite sense. Since each "NotXXX" flag (recording !=)165/// is adjacent to the corresponding normal flag (recording ==), this just166/// involves swapping those bits over.167static unsigned conjugateICmpMask(unsigned Mask) {168unsigned NewMask;169NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros |170AMask_Mixed | BMask_Mixed))171<< 1;172173NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros |174AMask_NotMixed | BMask_NotMixed))175>> 1;176177return NewMask;178}179180// Adapts the external decomposeBitTestICmp for local use.181static bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred,182Value *&X, Value *&Y, Value *&Z) {183APInt Mask;184if (!llvm::decomposeBitTestICmp(LHS, RHS, Pred, X, Mask))185return false;186187Y = ConstantInt::get(X->getType(), Mask);188Z = ConstantInt::get(X->getType(), 0);189return true;190}191192/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).193/// Return the pattern classes (from MaskedICmpType) for the left hand side and194/// the right hand side as a pair.195/// LHS and RHS are the left hand side and the right hand side ICmps and PredL196/// and PredR are their predicates, respectively.197static std::optional<std::pair<unsigned, unsigned>> getMaskedTypeForICmpPair(198Value *&A, Value *&B, Value *&C, Value *&D, Value *&E, ICmpInst *LHS,199ICmpInst *RHS, ICmpInst::Predicate &PredL, ICmpInst::Predicate &PredR) {200// Don't allow pointers. Splat vectors are fine.201if (!LHS->getOperand(0)->getType()->isIntOrIntVectorTy() ||202!RHS->getOperand(0)->getType()->isIntOrIntVectorTy())203return std::nullopt;204205// Here comes the tricky part:206// LHS might be of the form L11 & L12 == X, X == L21 & L22,207// and L11 & L12 == L21 & L22. The same goes for RHS.208// Now we must find those components L** and R**, that are equal, so209// that we can extract the parameters A, B, C, D, and E for the canonical210// above.211Value *L1 = LHS->getOperand(0);212Value *L2 = LHS->getOperand(1);213Value *L11, *L12, *L21, *L22;214// Check whether the icmp can be decomposed into a bit test.215if (decomposeBitTestICmp(L1, L2, PredL, L11, L12, L2)) {216L21 = L22 = L1 = nullptr;217} else {218// Look for ANDs in the LHS icmp.219if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {220// Any icmp can be viewed as being trivially masked; if it allows us to221// remove one, it's worth it.222L11 = L1;223L12 = Constant::getAllOnesValue(L1->getType());224}225226if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {227L21 = L2;228L22 = Constant::getAllOnesValue(L2->getType());229}230}231232// Bail if LHS was a icmp that can't be decomposed into an equality.233if (!ICmpInst::isEquality(PredL))234return std::nullopt;235236Value *R1 = RHS->getOperand(0);237Value *R2 = RHS->getOperand(1);238Value *R11, *R12;239bool Ok = false;240if (decomposeBitTestICmp(R1, R2, PredR, R11, R12, R2)) {241if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {242A = R11;243D = R12;244} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {245A = R12;246D = R11;247} else {248return std::nullopt;249}250E = R2;251R1 = nullptr;252Ok = true;253} else {254if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {255// As before, model no mask as a trivial mask if it'll let us do an256// optimization.257R11 = R1;258R12 = Constant::getAllOnesValue(R1->getType());259}260261if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {262A = R11;263D = R12;264E = R2;265Ok = true;266} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {267A = R12;268D = R11;269E = R2;270Ok = true;271}272}273274// Bail if RHS was a icmp that can't be decomposed into an equality.275if (!ICmpInst::isEquality(PredR))276return std::nullopt;277278// Look for ANDs on the right side of the RHS icmp.279if (!Ok) {280if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {281R11 = R2;282R12 = Constant::getAllOnesValue(R2->getType());283}284285if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {286A = R11;287D = R12;288E = R1;289Ok = true;290} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {291A = R12;292D = R11;293E = R1;294Ok = true;295} else {296return std::nullopt;297}298299assert(Ok && "Failed to find AND on the right side of the RHS icmp.");300}301302if (L11 == A) {303B = L12;304C = L2;305} else if (L12 == A) {306B = L11;307C = L2;308} else if (L21 == A) {309B = L22;310C = L1;311} else if (L22 == A) {312B = L21;313C = L1;314}315316unsigned LeftType = getMaskedICmpType(A, B, C, PredL);317unsigned RightType = getMaskedICmpType(A, D, E, PredR);318return std::optional<std::pair<unsigned, unsigned>>(319std::make_pair(LeftType, RightType));320}321322/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) into a single323/// (icmp(A & X) ==/!= Y), where the left-hand side is of type Mask_NotAllZeros324/// and the right hand side is of type BMask_Mixed. For example,325/// (icmp (A & 12) != 0) & (icmp (A & 15) == 8) -> (icmp (A & 15) == 8).326/// Also used for logical and/or, must be poison safe.327static Value *foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(328ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,329Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,330InstCombiner::BuilderTy &Builder) {331// We are given the canonical form:332// (icmp ne (A & B), 0) & (icmp eq (A & D), E).333// where D & E == E.334//335// If IsAnd is false, we get it in negated form:336// (icmp eq (A & B), 0) | (icmp ne (A & D), E) ->337// !((icmp ne (A & B), 0) & (icmp eq (A & D), E)).338//339// We currently handle the case of B, C, D, E are constant.340//341const APInt *BCst, *CCst, *DCst, *OrigECst;342if (!match(B, m_APInt(BCst)) || !match(C, m_APInt(CCst)) ||343!match(D, m_APInt(DCst)) || !match(E, m_APInt(OrigECst)))344return nullptr;345346ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;347348// Update E to the canonical form when D is a power of two and RHS is349// canonicalized as,350// (icmp ne (A & D), 0) -> (icmp eq (A & D), D) or351// (icmp ne (A & D), D) -> (icmp eq (A & D), 0).352APInt ECst = *OrigECst;353if (PredR != NewCC)354ECst ^= *DCst;355356// If B or D is zero, skip because if LHS or RHS can be trivially folded by357// other folding rules and this pattern won't apply any more.358if (*BCst == 0 || *DCst == 0)359return nullptr;360361// If B and D don't intersect, ie. (B & D) == 0, no folding because we can't362// deduce anything from it.363// For example,364// (icmp ne (A & 12), 0) & (icmp eq (A & 3), 1) -> no folding.365if ((*BCst & *DCst) == 0)366return nullptr;367368// If the following two conditions are met:369//370// 1. mask B covers only a single bit that's not covered by mask D, that is,371// (B & (B ^ D)) is a power of 2 (in other words, B minus the intersection of372// B and D has only one bit set) and,373//374// 2. RHS (and E) indicates that the rest of B's bits are zero (in other375// words, the intersection of B and D is zero), that is, ((B & D) & E) == 0376//377// then that single bit in B must be one and thus the whole expression can be378// folded to379// (A & (B | D)) == (B & (B ^ D)) | E.380//381// For example,382// (icmp ne (A & 12), 0) & (icmp eq (A & 7), 1) -> (icmp eq (A & 15), 9)383// (icmp ne (A & 15), 0) & (icmp eq (A & 7), 0) -> (icmp eq (A & 15), 8)384if ((((*BCst & *DCst) & ECst) == 0) &&385(*BCst & (*BCst ^ *DCst)).isPowerOf2()) {386APInt BorD = *BCst | *DCst;387APInt BandBxorDorE = (*BCst & (*BCst ^ *DCst)) | ECst;388Value *NewMask = ConstantInt::get(A->getType(), BorD);389Value *NewMaskedValue = ConstantInt::get(A->getType(), BandBxorDorE);390Value *NewAnd = Builder.CreateAnd(A, NewMask);391return Builder.CreateICmp(NewCC, NewAnd, NewMaskedValue);392}393394auto IsSubSetOrEqual = [](const APInt *C1, const APInt *C2) {395return (*C1 & *C2) == *C1;396};397auto IsSuperSetOrEqual = [](const APInt *C1, const APInt *C2) {398return (*C1 & *C2) == *C2;399};400401// In the following, we consider only the cases where B is a superset of D, B402// is a subset of D, or B == D because otherwise there's at least one bit403// covered by B but not D, in which case we can't deduce much from it, so404// no folding (aside from the single must-be-one bit case right above.)405// For example,406// (icmp ne (A & 14), 0) & (icmp eq (A & 3), 1) -> no folding.407if (!IsSubSetOrEqual(BCst, DCst) && !IsSuperSetOrEqual(BCst, DCst))408return nullptr;409410// At this point, either B is a superset of D, B is a subset of D or B == D.411412// If E is zero, if B is a subset of (or equal to) D, LHS and RHS contradict413// and the whole expression becomes false (or true if negated), otherwise, no414// folding.415// For example,416// (icmp ne (A & 3), 0) & (icmp eq (A & 7), 0) -> false.417// (icmp ne (A & 15), 0) & (icmp eq (A & 3), 0) -> no folding.418if (ECst.isZero()) {419if (IsSubSetOrEqual(BCst, DCst))420return ConstantInt::get(LHS->getType(), !IsAnd);421return nullptr;422}423424// At this point, B, D, E aren't zero and (B & D) == B, (B & D) == D or B ==425// D. If B is a superset of (or equal to) D, since E is not zero, LHS is426// subsumed by RHS (RHS implies LHS.) So the whole expression becomes427// RHS. For example,428// (icmp ne (A & 255), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).429// (icmp ne (A & 15), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).430if (IsSuperSetOrEqual(BCst, DCst))431return RHS;432// Otherwise, B is a subset of D. If B and E have a common bit set,433// ie. (B & E) != 0, then LHS is subsumed by RHS. For example.434// (icmp ne (A & 12), 0) & (icmp eq (A & 15), 8) -> (icmp eq (A & 15), 8).435assert(IsSubSetOrEqual(BCst, DCst) && "Precondition due to above code");436if ((*BCst & ECst) != 0)437return RHS;438// Otherwise, LHS and RHS contradict and the whole expression becomes false439// (or true if negated.) For example,440// (icmp ne (A & 7), 0) & (icmp eq (A & 15), 8) -> false.441// (icmp ne (A & 6), 0) & (icmp eq (A & 15), 8) -> false.442return ConstantInt::get(LHS->getType(), !IsAnd);443}444445/// Try to fold (icmp(A & B) ==/!= 0) &/| (icmp(A & D) ==/!= E) into a single446/// (icmp(A & X) ==/!= Y), where the left-hand side and the right hand side447/// aren't of the common mask pattern type.448/// Also used for logical and/or, must be poison safe.449static Value *foldLogOpOfMaskedICmpsAsymmetric(450ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, Value *A, Value *B, Value *C,451Value *D, Value *E, ICmpInst::Predicate PredL, ICmpInst::Predicate PredR,452unsigned LHSMask, unsigned RHSMask, InstCombiner::BuilderTy &Builder) {453assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&454"Expected equality predicates for masked type of icmps.");455// Handle Mask_NotAllZeros-BMask_Mixed cases.456// (icmp ne/eq (A & B), C) &/| (icmp eq/ne (A & D), E), or457// (icmp eq/ne (A & B), C) &/| (icmp ne/eq (A & D), E)458// which gets swapped to459// (icmp ne/eq (A & D), E) &/| (icmp eq/ne (A & B), C).460if (!IsAnd) {461LHSMask = conjugateICmpMask(LHSMask);462RHSMask = conjugateICmpMask(RHSMask);463}464if ((LHSMask & Mask_NotAllZeros) && (RHSMask & BMask_Mixed)) {465if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(466LHS, RHS, IsAnd, A, B, C, D, E,467PredL, PredR, Builder)) {468return V;469}470} else if ((LHSMask & BMask_Mixed) && (RHSMask & Mask_NotAllZeros)) {471if (Value *V = foldLogOpOfMaskedICmps_NotAllZeros_BMask_Mixed(472RHS, LHS, IsAnd, A, D, E, B, C,473PredR, PredL, Builder)) {474return V;475}476}477return nullptr;478}479480/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)481/// into a single (icmp(A & X) ==/!= Y).482static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,483bool IsLogical,484InstCombiner::BuilderTy &Builder) {485Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;486ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();487std::optional<std::pair<unsigned, unsigned>> MaskPair =488getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR);489if (!MaskPair)490return nullptr;491assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&492"Expected equality predicates for masked type of icmps.");493unsigned LHSMask = MaskPair->first;494unsigned RHSMask = MaskPair->second;495unsigned Mask = LHSMask & RHSMask;496if (Mask == 0) {497// Even if the two sides don't share a common pattern, check if folding can498// still happen.499if (Value *V = foldLogOpOfMaskedICmpsAsymmetric(500LHS, RHS, IsAnd, A, B, C, D, E, PredL, PredR, LHSMask, RHSMask,501Builder))502return V;503return nullptr;504}505506// In full generality:507// (icmp (A & B) Op C) | (icmp (A & D) Op E)508// == ![ (icmp (A & B) !Op C) & (icmp (A & D) !Op E) ]509//510// If the latter can be converted into (icmp (A & X) Op Y) then the former is511// equivalent to (icmp (A & X) !Op Y).512//513// Therefore, we can pretend for the rest of this function that we're dealing514// with the conjunction, provided we flip the sense of any comparisons (both515// input and output).516517// In most cases we're going to produce an EQ for the "&&" case.518ICmpInst::Predicate NewCC = IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;519if (!IsAnd) {520// Convert the masking analysis into its equivalent with negated521// comparisons.522Mask = conjugateICmpMask(Mask);523}524525if (Mask & Mask_AllZeros) {526// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)527// -> (icmp eq (A & (B|D)), 0)528if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))529return nullptr; // TODO: Use freeze?530Value *NewOr = Builder.CreateOr(B, D);531Value *NewAnd = Builder.CreateAnd(A, NewOr);532// We can't use C as zero because we might actually handle533// (icmp ne (A & B), B) & (icmp ne (A & D), D)534// with B and D, having a single bit set.535Value *Zero = Constant::getNullValue(A->getType());536return Builder.CreateICmp(NewCC, NewAnd, Zero);537}538if (Mask & BMask_AllOnes) {539// (icmp eq (A & B), B) & (icmp eq (A & D), D)540// -> (icmp eq (A & (B|D)), (B|D))541if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))542return nullptr; // TODO: Use freeze?543Value *NewOr = Builder.CreateOr(B, D);544Value *NewAnd = Builder.CreateAnd(A, NewOr);545return Builder.CreateICmp(NewCC, NewAnd, NewOr);546}547if (Mask & AMask_AllOnes) {548// (icmp eq (A & B), A) & (icmp eq (A & D), A)549// -> (icmp eq (A & (B&D)), A)550if (IsLogical && !isGuaranteedNotToBeUndefOrPoison(D))551return nullptr; // TODO: Use freeze?552Value *NewAnd1 = Builder.CreateAnd(B, D);553Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1);554return Builder.CreateICmp(NewCC, NewAnd2, A);555}556557// Remaining cases assume at least that B and D are constant, and depend on558// their actual values. This isn't strictly necessary, just a "handle the559// easy cases for now" decision.560const APInt *ConstB, *ConstD;561if (!match(B, m_APInt(ConstB)) || !match(D, m_APInt(ConstD)))562return nullptr;563564if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {565// (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and566// (icmp ne (A & B), B) & (icmp ne (A & D), D)567// -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)568// Only valid if one of the masks is a superset of the other (check "B&D" is569// the same as either B or D).570APInt NewMask = *ConstB & *ConstD;571if (NewMask == *ConstB)572return LHS;573else if (NewMask == *ConstD)574return RHS;575}576577if (Mask & AMask_NotAllOnes) {578// (icmp ne (A & B), B) & (icmp ne (A & D), D)579// -> (icmp ne (A & B), A) or (icmp ne (A & D), A)580// Only valid if one of the masks is a superset of the other (check "B|D" is581// the same as either B or D).582APInt NewMask = *ConstB | *ConstD;583if (NewMask == *ConstB)584return LHS;585else if (NewMask == *ConstD)586return RHS;587}588589if (Mask & (BMask_Mixed | BMask_NotMixed)) {590// Mixed:591// (icmp eq (A & B), C) & (icmp eq (A & D), E)592// We already know that B & C == C && D & E == E.593// If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of594// C and E, which are shared by both the mask B and the mask D, don't595// contradict, then we can transform to596// -> (icmp eq (A & (B|D)), (C|E))597// Currently, we only handle the case of B, C, D, and E being constant.598// We can't simply use C and E because we might actually handle599// (icmp ne (A & B), B) & (icmp eq (A & D), D)600// with B and D, having a single bit set.601602// NotMixed:603// (icmp ne (A & B), C) & (icmp ne (A & D), E)604// -> (icmp ne (A & (B & D)), (C & E))605// Check the intersection (B & D) for inequality.606// Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B607// and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the608// B and the D, don't contradict.609// Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous610// operation should delete these icmps if it hadn't been met.611612const APInt *OldConstC, *OldConstE;613if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))614return nullptr;615616auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * {617CC = IsNot ? CmpInst::getInversePredicate(CC) : CC;618const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC;619const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE;620621if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())622return IsNot ? nullptr : ConstantInt::get(LHS->getType(), !IsAnd);623624if (IsNot && !ConstB->isSubsetOf(*ConstD) && !ConstD->isSubsetOf(*ConstB))625return nullptr;626627APInt BD, CE;628if (IsNot) {629BD = *ConstB & *ConstD;630CE = ConstC & ConstE;631} else {632BD = *ConstB | *ConstD;633CE = ConstC | ConstE;634}635Value *NewAnd = Builder.CreateAnd(A, BD);636Value *CEVal = ConstantInt::get(A->getType(), CE);637return Builder.CreateICmp(CC, CEVal, NewAnd);638};639640if (Mask & BMask_Mixed)641return FoldBMixed(NewCC, false);642if (Mask & BMask_NotMixed) // can be else also643return FoldBMixed(NewCC, true);644}645return nullptr;646}647648/// Try to fold a signed range checked with lower bound 0 to an unsigned icmp.649/// Example: (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n650/// If \p Inverted is true then the check is for the inverted range, e.g.651/// (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n652Value *InstCombinerImpl::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,653bool Inverted) {654// Check the lower range comparison, e.g. x >= 0655// InstCombine already ensured that if there is a constant it's on the RHS.656ConstantInt *RangeStart = dyn_cast<ConstantInt>(Cmp0->getOperand(1));657if (!RangeStart)658return nullptr;659660ICmpInst::Predicate Pred0 = (Inverted ? Cmp0->getInversePredicate() :661Cmp0->getPredicate());662663// Accept x > -1 or x >= 0 (after potentially inverting the predicate).664if (!((Pred0 == ICmpInst::ICMP_SGT && RangeStart->isMinusOne()) ||665(Pred0 == ICmpInst::ICMP_SGE && RangeStart->isZero())))666return nullptr;667668ICmpInst::Predicate Pred1 = (Inverted ? Cmp1->getInversePredicate() :669Cmp1->getPredicate());670671Value *Input = Cmp0->getOperand(0);672Value *RangeEnd;673if (Cmp1->getOperand(0) == Input) {674// For the upper range compare we have: icmp x, n675RangeEnd = Cmp1->getOperand(1);676} else if (Cmp1->getOperand(1) == Input) {677// For the upper range compare we have: icmp n, x678RangeEnd = Cmp1->getOperand(0);679Pred1 = ICmpInst::getSwappedPredicate(Pred1);680} else {681return nullptr;682}683684// Check the upper range comparison, e.g. x < n685ICmpInst::Predicate NewPred;686switch (Pred1) {687case ICmpInst::ICMP_SLT: NewPred = ICmpInst::ICMP_ULT; break;688case ICmpInst::ICMP_SLE: NewPred = ICmpInst::ICMP_ULE; break;689default: return nullptr;690}691692// This simplification is only valid if the upper range is not negative.693KnownBits Known = computeKnownBits(RangeEnd, /*Depth=*/0, Cmp1);694if (!Known.isNonNegative())695return nullptr;696697if (Inverted)698NewPred = ICmpInst::getInversePredicate(NewPred);699700return Builder.CreateICmp(NewPred, Input, RangeEnd);701}702703// (or (icmp eq X, 0), (icmp eq X, Pow2OrZero))704// -> (icmp eq (and X, Pow2OrZero), X)705// (and (icmp ne X, 0), (icmp ne X, Pow2OrZero))706// -> (icmp ne (and X, Pow2OrZero), X)707static Value *708foldAndOrOfICmpsWithPow2AndWithZero(InstCombiner::BuilderTy &Builder,709ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,710const SimplifyQuery &Q) {711CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;712// Make sure we have right compares for our op.713if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred)714return nullptr;715716// Make it so we can match LHS against the (icmp eq/ne X, 0) just for717// simplicity.718if (match(RHS->getOperand(1), m_Zero()))719std::swap(LHS, RHS);720721Value *Pow2, *Op;722// Match the desired pattern:723// LHS: (icmp eq/ne X, 0)724// RHS: (icmp eq/ne X, Pow2OrZero)725// Skip if Pow2OrZero is 1. Either way it gets folded to (icmp ugt X, 1) but726// this form ends up slightly less canonical.727// We could potentially be more sophisticated than requiring LHS/RHS728// be one-use. We don't create additional instructions if only one729// of them is one-use. So cases where one is one-use and the other730// is two-use might be profitable.731if (!match(LHS, m_OneUse(m_ICmp(Pred, m_Value(Op), m_Zero()))) ||732!match(RHS, m_OneUse(m_c_ICmp(Pred, m_Specific(Op), m_Value(Pow2)))) ||733match(Pow2, m_One()) ||734!isKnownToBeAPowerOfTwo(Pow2, Q.DL, /*OrZero=*/true, /*Depth=*/0, Q.AC,735Q.CxtI, Q.DT))736return nullptr;737738Value *And = Builder.CreateAnd(Op, Pow2);739return Builder.CreateICmp(Pred, And, Op);740}741742// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)743// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)744Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS,745ICmpInst *RHS,746Instruction *CxtI,747bool IsAnd,748bool IsLogical) {749CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;750if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred)751return nullptr;752753if (!match(LHS->getOperand(1), m_Zero()) ||754!match(RHS->getOperand(1), m_Zero()))755return nullptr;756757Value *L1, *L2, *R1, *R2;758if (match(LHS->getOperand(0), m_And(m_Value(L1), m_Value(L2))) &&759match(RHS->getOperand(0), m_And(m_Value(R1), m_Value(R2)))) {760if (L1 == R2 || L2 == R2)761std::swap(R1, R2);762if (L2 == R1)763std::swap(L1, L2);764765if (L1 == R1 &&766isKnownToBeAPowerOfTwo(L2, false, 0, CxtI) &&767isKnownToBeAPowerOfTwo(R2, false, 0, CxtI)) {768// If this is a logical and/or, then we must prevent propagation of a769// poison value from the RHS by inserting freeze.770if (IsLogical)771R2 = Builder.CreateFreeze(R2);772Value *Mask = Builder.CreateOr(L2, R2);773Value *Masked = Builder.CreateAnd(L1, Mask);774auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;775return Builder.CreateICmp(NewPred, Masked, Mask);776}777}778779return nullptr;780}781782/// General pattern:783/// X & Y784///785/// Where Y is checking that all the high bits (covered by a mask 4294967168)786/// are uniform, i.e. %arg & 4294967168 can be either 4294967168 or 0787/// Pattern can be one of:788/// %t = add i32 %arg, 128789/// %r = icmp ult i32 %t, 256790/// Or791/// %t0 = shl i32 %arg, 24792/// %t1 = ashr i32 %t0, 24793/// %r = icmp eq i32 %t1, %arg794/// Or795/// %t0 = trunc i32 %arg to i8796/// %t1 = sext i8 %t0 to i32797/// %r = icmp eq i32 %t1, %arg798/// This pattern is a signed truncation check.799///800/// And X is checking that some bit in that same mask is zero.801/// I.e. can be one of:802/// %r = icmp sgt i32 %arg, -1803/// Or804/// %t = and i32 %arg, 2147483648805/// %r = icmp eq i32 %t, 0806///807/// Since we are checking that all the bits in that mask are the same,808/// and a particular bit is zero, what we are really checking is that all the809/// masked bits are zero.810/// So this should be transformed to:811/// %r = icmp ult i32 %arg, 128812static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,813Instruction &CxtI,814InstCombiner::BuilderTy &Builder) {815assert(CxtI.getOpcode() == Instruction::And);816817// Match icmp ult (add %arg, C01), C1 (C1 == C01 << 1; powers of two)818auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X,819APInt &SignBitMask) -> bool {820CmpInst::Predicate Pred;821const APInt *I01, *I1; // powers of two; I1 == I01 << 1822if (!(match(ICmp,823m_ICmp(Pred, m_Add(m_Value(X), m_Power2(I01)), m_Power2(I1))) &&824Pred == ICmpInst::ICMP_ULT && I1->ugt(*I01) && I01->shl(1) == *I1))825return false;826// Which bit is the new sign bit as per the 'signed truncation' pattern?827SignBitMask = *I01;828return true;829};830831// One icmp needs to be 'signed truncation check'.832// We need to match this first, else we will mismatch commutative cases.833Value *X1;834APInt HighestBit;835ICmpInst *OtherICmp;836if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit))837OtherICmp = ICmp0;838else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit))839OtherICmp = ICmp1;840else841return nullptr;842843assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)");844845// Try to match/decompose into: icmp eq (X & Mask), 0846auto tryToDecompose = [](ICmpInst *ICmp, Value *&X,847APInt &UnsetBitsMask) -> bool {848CmpInst::Predicate Pred = ICmp->getPredicate();849// Can it be decomposed into icmp eq (X & Mask), 0 ?850if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),851Pred, X, UnsetBitsMask,852/*LookThroughTrunc=*/false) &&853Pred == ICmpInst::ICMP_EQ)854return true;855// Is it icmp eq (X & Mask), 0 already?856const APInt *Mask;857if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) &&858Pred == ICmpInst::ICMP_EQ) {859UnsetBitsMask = *Mask;860return true;861}862return false;863};864865// And the other icmp needs to be decomposable into a bit test.866Value *X0;867APInt UnsetBitsMask;868if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))869return nullptr;870871assert(!UnsetBitsMask.isZero() && "empty mask makes no sense.");872873// Are they working on the same value?874Value *X;875if (X1 == X0) {876// Ok as is.877X = X1;878} else if (match(X0, m_Trunc(m_Specific(X1)))) {879UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits());880X = X1;881} else882return nullptr;883884// So which bits should be uniform as per the 'signed truncation check'?885// (all the bits starting with (i.e. including) HighestBit)886APInt SignBitsMask = ~(HighestBit - 1U);887888// UnsetBitsMask must have some common bits with SignBitsMask,889if (!UnsetBitsMask.intersects(SignBitsMask))890return nullptr;891892// Does UnsetBitsMask contain any bits outside of SignBitsMask?893if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) {894APInt OtherHighestBit = (~UnsetBitsMask) + 1U;895if (!OtherHighestBit.isPowerOf2())896return nullptr;897HighestBit = APIntOps::umin(HighestBit, OtherHighestBit);898}899// Else, if it does not, then all is ok as-is.900901// %r = icmp ult %X, SignBit902return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit),903CxtI.getName() + ".simplified");904}905906/// Fold (icmp eq ctpop(X) 1) | (icmp eq X 0) into (icmp ult ctpop(X) 2) and907/// fold (icmp ne ctpop(X) 1) & (icmp ne X 0) into (icmp ugt ctpop(X) 1).908/// Also used for logical and/or, must be poison safe.909static Value *foldIsPowerOf2OrZero(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,910InstCombiner::BuilderTy &Builder) {911CmpInst::Predicate Pred0, Pred1;912Value *X;913if (!match(Cmp0, m_ICmp(Pred0, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)),914m_SpecificInt(1))) ||915!match(Cmp1, m_ICmp(Pred1, m_Specific(X), m_ZeroInt())))916return nullptr;917918Value *CtPop = Cmp0->getOperand(0);919if (IsAnd && Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_NE)920return Builder.CreateICmpUGT(CtPop, ConstantInt::get(CtPop->getType(), 1));921if (!IsAnd && Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_EQ)922return Builder.CreateICmpULT(CtPop, ConstantInt::get(CtPop->getType(), 2));923924return nullptr;925}926927/// Reduce a pair of compares that check if a value has exactly 1 bit set.928/// Also used for logical and/or, must be poison safe if range attributes are929/// dropped.930static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,931InstCombiner::BuilderTy &Builder,932InstCombinerImpl &IC) {933// Handle 'and' / 'or' commutation: make the equality check the first operand.934if (JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_NE)935std::swap(Cmp0, Cmp1);936else if (!JoinedByAnd && Cmp1->getPredicate() == ICmpInst::ICMP_EQ)937std::swap(Cmp0, Cmp1);938939// (X != 0) && (ctpop(X) u< 2) --> ctpop(X) == 1940CmpInst::Predicate Pred0, Pred1;941Value *X;942if (JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&943match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),944m_SpecificInt(2))) &&945Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT) {946auto *CtPop = cast<Instruction>(Cmp1->getOperand(0));947// Drop range attributes and re-infer them in the next iteration.948CtPop->dropPoisonGeneratingAnnotations();949IC.addToWorklist(CtPop);950return Builder.CreateICmpEQ(CtPop, ConstantInt::get(CtPop->getType(), 1));951}952// (X == 0) || (ctpop(X) u> 1) --> ctpop(X) != 1953if (!JoinedByAnd && match(Cmp0, m_ICmp(Pred0, m_Value(X), m_ZeroInt())) &&954match(Cmp1, m_ICmp(Pred1, m_Intrinsic<Intrinsic::ctpop>(m_Specific(X)),955m_SpecificInt(1))) &&956Pred0 == ICmpInst::ICMP_EQ && Pred1 == ICmpInst::ICMP_UGT) {957auto *CtPop = cast<Instruction>(Cmp1->getOperand(0));958// Drop range attributes and re-infer them in the next iteration.959CtPop->dropPoisonGeneratingAnnotations();960IC.addToWorklist(CtPop);961return Builder.CreateICmpNE(CtPop, ConstantInt::get(CtPop->getType(), 1));962}963return nullptr;964}965966/// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff967/// B is a contiguous set of ones starting from the most significant bit968/// (negative power of 2), D and E are equal, and D is a contiguous set of ones969/// starting at the most significant zero bit in B. Parameter B supports masking970/// using undef/poison in either scalar or vector values.971static Value *foldNegativePower2AndShiftedMask(972Value *A, Value *B, Value *D, Value *E, ICmpInst::Predicate PredL,973ICmpInst::Predicate PredR, InstCombiner::BuilderTy &Builder) {974assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&975"Expected equality predicates for masked type of icmps.");976if (PredL != ICmpInst::ICMP_EQ || PredR != ICmpInst::ICMP_NE)977return nullptr;978979if (!match(B, m_NegatedPower2()) || !match(D, m_ShiftedMask()) ||980!match(E, m_ShiftedMask()))981return nullptr;982983// Test scalar arguments for conversion. B has been validated earlier to be a984// negative power of two and thus is guaranteed to have one or more contiguous985// ones starting from the MSB followed by zero or more contiguous zeros. D has986// been validated earlier to be a shifted set of one or more contiguous ones.987// In order to match, B leading ones and D leading zeros should be equal. The988// predicate that B be a negative power of 2 prevents the condition of there989// ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that990// D always be a shifted mask prevents the condition of D equaling 0. This991// prevents matching the condition where B contains the maximum number of992// leading one bits (-1) and D contains the maximum number of leading zero993// bits (0).994auto isReducible = [](const Value *B, const Value *D, const Value *E) {995const APInt *BCst, *DCst, *ECst;996return match(B, m_APIntAllowPoison(BCst)) && match(D, m_APInt(DCst)) &&997match(E, m_APInt(ECst)) && *DCst == *ECst &&998(isa<PoisonValue>(B) ||999(BCst->countLeadingOnes() == DCst->countLeadingZeros()));1000};10011002// Test vector type arguments for conversion.1003if (const auto *BVTy = dyn_cast<VectorType>(B->getType())) {1004const auto *BFVTy = dyn_cast<FixedVectorType>(BVTy);1005const auto *BConst = dyn_cast<Constant>(B);1006const auto *DConst = dyn_cast<Constant>(D);1007const auto *EConst = dyn_cast<Constant>(E);10081009if (!BFVTy || !BConst || !DConst || !EConst)1010return nullptr;10111012for (unsigned I = 0; I != BFVTy->getNumElements(); ++I) {1013const auto *BElt = BConst->getAggregateElement(I);1014const auto *DElt = DConst->getAggregateElement(I);1015const auto *EElt = EConst->getAggregateElement(I);10161017if (!BElt || !DElt || !EElt)1018return nullptr;1019if (!isReducible(BElt, DElt, EElt))1020return nullptr;1021}1022} else {1023// Test scalar type arguments for conversion.1024if (!isReducible(B, D, E))1025return nullptr;1026}1027return Builder.CreateICmp(ICmpInst::ICMP_ULT, A, D);1028}10291030/// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) &1031/// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and1032/// M is a contiguous shifted mask starting at the right most significant zero1033/// bit in P. SGT is supported as when P is the largest representable power of1034/// 2, an earlier optimization converts the expression into (icmp X s> -1).1035/// Parameter P supports masking using undef/poison in either scalar or vector1036/// values.1037static Value *foldPowerOf2AndShiftedMask(ICmpInst *Cmp0, ICmpInst *Cmp1,1038bool JoinedByAnd,1039InstCombiner::BuilderTy &Builder) {1040if (!JoinedByAnd)1041return nullptr;1042Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;1043ICmpInst::Predicate CmpPred0 = Cmp0->getPredicate(),1044CmpPred1 = Cmp1->getPredicate();1045// Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u<1046// 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X &1047// SignMask) == 0).1048std::optional<std::pair<unsigned, unsigned>> MaskPair =1049getMaskedTypeForICmpPair(A, B, C, D, E, Cmp0, Cmp1, CmpPred0, CmpPred1);1050if (!MaskPair)1051return nullptr;10521053const auto compareBMask = BMask_NotMixed | BMask_NotAllOnes;1054unsigned CmpMask0 = MaskPair->first;1055unsigned CmpMask1 = MaskPair->second;1056if ((CmpMask0 & Mask_AllZeros) && (CmpMask1 == compareBMask)) {1057if (Value *V = foldNegativePower2AndShiftedMask(A, B, D, E, CmpPred0,1058CmpPred1, Builder))1059return V;1060} else if ((CmpMask0 == compareBMask) && (CmpMask1 & Mask_AllZeros)) {1061if (Value *V = foldNegativePower2AndShiftedMask(A, D, B, C, CmpPred1,1062CmpPred0, Builder))1063return V;1064}1065return nullptr;1066}10671068/// Commuted variants are assumed to be handled by calling this function again1069/// with the parameters swapped.1070static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,1071ICmpInst *UnsignedICmp, bool IsAnd,1072const SimplifyQuery &Q,1073InstCombiner::BuilderTy &Builder) {1074Value *ZeroCmpOp;1075ICmpInst::Predicate EqPred;1076if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(ZeroCmpOp), m_Zero())) ||1077!ICmpInst::isEquality(EqPred))1078return nullptr;10791080ICmpInst::Predicate UnsignedPred;10811082Value *A, *B;1083if (match(UnsignedICmp,1084m_c_ICmp(UnsignedPred, m_Specific(ZeroCmpOp), m_Value(A))) &&1085match(ZeroCmpOp, m_c_Add(m_Specific(A), m_Value(B))) &&1086(ZeroICmp->hasOneUse() || UnsignedICmp->hasOneUse())) {1087auto GetKnownNonZeroAndOther = [&](Value *&NonZero, Value *&Other) {1088if (!isKnownNonZero(NonZero, Q))1089std::swap(NonZero, Other);1090return isKnownNonZero(NonZero, Q);1091};10921093// Given ZeroCmpOp = (A + B)1094// ZeroCmpOp < A && ZeroCmpOp != 0 --> (0-X) < Y iff1095// ZeroCmpOp >= A || ZeroCmpOp == 0 --> (0-X) >= Y iff1096// with X being the value (A/B) that is known to be non-zero,1097// and Y being remaining value.1098if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE &&1099IsAnd && GetKnownNonZeroAndOther(B, A))1100return Builder.CreateICmpULT(Builder.CreateNeg(B), A);1101if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ &&1102!IsAnd && GetKnownNonZeroAndOther(B, A))1103return Builder.CreateICmpUGE(Builder.CreateNeg(B), A);1104}11051106return nullptr;1107}11081109struct IntPart {1110Value *From;1111unsigned StartBit;1112unsigned NumBits;1113};11141115/// Match an extraction of bits from an integer.1116static std::optional<IntPart> matchIntPart(Value *V) {1117Value *X;1118if (!match(V, m_OneUse(m_Trunc(m_Value(X)))))1119return std::nullopt;11201121unsigned NumOriginalBits = X->getType()->getScalarSizeInBits();1122unsigned NumExtractedBits = V->getType()->getScalarSizeInBits();1123Value *Y;1124const APInt *Shift;1125// For a trunc(lshr Y, Shift) pattern, make sure we're only extracting bits1126// from Y, not any shifted-in zeroes.1127if (match(X, m_OneUse(m_LShr(m_Value(Y), m_APInt(Shift)))) &&1128Shift->ule(NumOriginalBits - NumExtractedBits))1129return {{Y, (unsigned)Shift->getZExtValue(), NumExtractedBits}};1130return {{X, 0, NumExtractedBits}};1131}11321133/// Materialize an extraction of bits from an integer in IR.1134static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {1135Value *V = P.From;1136if (P.StartBit)1137V = Builder.CreateLShr(V, P.StartBit);1138Type *TruncTy = V->getType()->getWithNewBitWidth(P.NumBits);1139if (TruncTy != V->getType())1140V = Builder.CreateTrunc(V, TruncTy);1141return V;1142}11431144/// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y011145/// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y011146/// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.1147Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,1148bool IsAnd) {1149if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())1150return nullptr;11511152CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;1153auto GetMatchPart = [&](ICmpInst *Cmp,1154unsigned OpNo) -> std::optional<IntPart> {1155if (Pred == Cmp->getPredicate())1156return matchIntPart(Cmp->getOperand(OpNo));11571158const APInt *C;1159// (icmp eq (lshr x, C), (lshr y, C)) gets optimized to:1160// (icmp ult (xor x, y), 1 << C) so also look for that.1161if (Pred == CmpInst::ICMP_EQ && Cmp->getPredicate() == CmpInst::ICMP_ULT) {1162if (!match(Cmp->getOperand(1), m_Power2(C)) ||1163!match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))1164return std::nullopt;1165}11661167// (icmp ne (lshr x, C), (lshr y, C)) gets optimized to:1168// (icmp ugt (xor x, y), (1 << C) - 1) so also look for that.1169else if (Pred == CmpInst::ICMP_NE &&1170Cmp->getPredicate() == CmpInst::ICMP_UGT) {1171if (!match(Cmp->getOperand(1), m_LowBitMask(C)) ||1172!match(Cmp->getOperand(0), m_Xor(m_Value(), m_Value())))1173return std::nullopt;1174} else {1175return std::nullopt;1176}11771178unsigned From = Pred == CmpInst::ICMP_NE ? C->popcount() : C->countr_zero();1179Instruction *I = cast<Instruction>(Cmp->getOperand(0));1180return {{I->getOperand(OpNo), From, C->getBitWidth() - From}};1181};11821183std::optional<IntPart> L0 = GetMatchPart(Cmp0, 0);1184std::optional<IntPart> R0 = GetMatchPart(Cmp0, 1);1185std::optional<IntPart> L1 = GetMatchPart(Cmp1, 0);1186std::optional<IntPart> R1 = GetMatchPart(Cmp1, 1);1187if (!L0 || !R0 || !L1 || !R1)1188return nullptr;11891190// Make sure the LHS/RHS compare a part of the same value, possibly after1191// an operand swap.1192if (L0->From != L1->From || R0->From != R1->From) {1193if (L0->From != R1->From || R0->From != L1->From)1194return nullptr;1195std::swap(L1, R1);1196}11971198// Make sure the extracted parts are adjacent, canonicalizing to L0/R0 being1199// the low part and L1/R1 being the high part.1200if (L0->StartBit + L0->NumBits != L1->StartBit ||1201R0->StartBit + R0->NumBits != R1->StartBit) {1202if (L1->StartBit + L1->NumBits != L0->StartBit ||1203R1->StartBit + R1->NumBits != R0->StartBit)1204return nullptr;1205std::swap(L0, L1);1206std::swap(R0, R1);1207}12081209// We can simplify to a comparison of these larger parts of the integers.1210IntPart L = {L0->From, L0->StartBit, L0->NumBits + L1->NumBits};1211IntPart R = {R0->From, R0->StartBit, R0->NumBits + R1->NumBits};1212Value *LValue = extractIntPart(L, Builder);1213Value *RValue = extractIntPart(R, Builder);1214return Builder.CreateICmp(Pred, LValue, RValue);1215}12161217/// Reduce logic-of-compares with equality to a constant by substituting a1218/// common operand with the constant. Callers are expected to call this with1219/// Cmp0/Cmp1 switched to handle logic op commutativity.1220static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,1221bool IsAnd, bool IsLogical,1222InstCombiner::BuilderTy &Builder,1223const SimplifyQuery &Q) {1224// Match an equality compare with a non-poison constant as Cmp0.1225// Also, give up if the compare can be constant-folded to avoid looping.1226ICmpInst::Predicate Pred0;1227Value *X;1228Constant *C;1229if (!match(Cmp0, m_ICmp(Pred0, m_Value(X), m_Constant(C))) ||1230!isGuaranteedNotToBeUndefOrPoison(C) || isa<Constant>(X))1231return nullptr;1232if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) ||1233(!IsAnd && Pred0 != ICmpInst::ICMP_NE))1234return nullptr;12351236// The other compare must include a common operand (X). Canonicalize the1237// common operand as operand 1 (Pred1 is swapped if the common operand was1238// operand 0).1239Value *Y;1240ICmpInst::Predicate Pred1;1241if (!match(Cmp1, m_c_ICmp(Pred1, m_Value(Y), m_Specific(X))))1242return nullptr;12431244// Replace variable with constant value equivalence to remove a variable use:1245// (X == C) && (Y Pred1 X) --> (X == C) && (Y Pred1 C)1246// (X != C) || (Y Pred1 X) --> (X != C) || (Y Pred1 C)1247// Can think of the 'or' substitution with the 'and' bool equivalent:1248// A || B --> A || (!A && B)1249Value *SubstituteCmp = simplifyICmpInst(Pred1, Y, C, Q);1250if (!SubstituteCmp) {1251// If we need to create a new instruction, require that the old compare can1252// be removed.1253if (!Cmp1->hasOneUse())1254return nullptr;1255SubstituteCmp = Builder.CreateICmp(Pred1, Y, C);1256}1257if (IsLogical)1258return IsAnd ? Builder.CreateLogicalAnd(Cmp0, SubstituteCmp)1259: Builder.CreateLogicalOr(Cmp0, SubstituteCmp);1260return Builder.CreateBinOp(IsAnd ? Instruction::And : Instruction::Or, Cmp0,1261SubstituteCmp);1262}12631264/// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2)1265/// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2)1266/// into a single comparison using range-based reasoning.1267/// NOTE: This is also used for logical and/or, must be poison-safe!1268Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,1269ICmpInst *ICmp2,1270bool IsAnd) {1271ICmpInst::Predicate Pred1, Pred2;1272Value *V1, *V2;1273const APInt *C1, *C2;1274if (!match(ICmp1, m_ICmp(Pred1, m_Value(V1), m_APInt(C1))) ||1275!match(ICmp2, m_ICmp(Pred2, m_Value(V2), m_APInt(C2))))1276return nullptr;12771278// Look through add of a constant offset on V1, V2, or both operands. This1279// allows us to interpret the V + C' < C'' range idiom into a proper range.1280const APInt *Offset1 = nullptr, *Offset2 = nullptr;1281if (V1 != V2) {1282Value *X;1283if (match(V1, m_Add(m_Value(X), m_APInt(Offset1))))1284V1 = X;1285if (match(V2, m_Add(m_Value(X), m_APInt(Offset2))))1286V2 = X;1287}12881289if (V1 != V2)1290return nullptr;12911292ConstantRange CR1 = ConstantRange::makeExactICmpRegion(1293IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, *C1);1294if (Offset1)1295CR1 = CR1.subtract(*Offset1);12961297ConstantRange CR2 = ConstantRange::makeExactICmpRegion(1298IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, *C2);1299if (Offset2)1300CR2 = CR2.subtract(*Offset2);13011302Type *Ty = V1->getType();1303Value *NewV = V1;1304std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);1305if (!CR) {1306if (!(ICmp1->hasOneUse() && ICmp2->hasOneUse()) || CR1.isWrappedSet() ||1307CR2.isWrappedSet())1308return nullptr;13091310// Check whether we have equal-size ranges that only differ by one bit.1311// In that case we can apply a mask to map one range onto the other.1312APInt LowerDiff = CR1.getLower() ^ CR2.getLower();1313APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);1314APInt CR1Size = CR1.getUpper() - CR1.getLower();1315if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||1316CR1Size != CR2.getUpper() - CR2.getLower())1317return nullptr;13181319CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;1320NewV = Builder.CreateAnd(NewV, ConstantInt::get(Ty, ~LowerDiff));1321}13221323if (IsAnd)1324CR = CR->inverse();13251326CmpInst::Predicate NewPred;1327APInt NewC, Offset;1328CR->getEquivalentICmp(NewPred, NewC, Offset);13291330if (Offset != 0)1331NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));1332return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));1333}13341335/// Ignore all operations which only change the sign of a value, returning the1336/// underlying magnitude value.1337static Value *stripSignOnlyFPOps(Value *Val) {1338match(Val, m_FNeg(m_Value(Val)));1339match(Val, m_FAbs(m_Value(Val)));1340match(Val, m_CopySign(m_Value(Val), m_Value()));1341return Val;1342}13431344/// Matches canonical form of isnan, fcmp ord x, 01345static bool matchIsNotNaN(FCmpInst::Predicate P, Value *LHS, Value *RHS) {1346return P == FCmpInst::FCMP_ORD && match(RHS, m_AnyZeroFP());1347}13481349/// Matches fcmp u__ x, +/-inf1350static bool matchUnorderedInfCompare(FCmpInst::Predicate P, Value *LHS,1351Value *RHS) {1352return FCmpInst::isUnordered(P) && match(RHS, m_Inf());1353}13541355/// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf1356///1357/// Clang emits this pattern for doing an isfinite check in __builtin_isnormal.1358static Value *matchIsFiniteTest(InstCombiner::BuilderTy &Builder, FCmpInst *LHS,1359FCmpInst *RHS) {1360Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);1361Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);1362FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();13631364if (!matchIsNotNaN(PredL, LHS0, LHS1) ||1365!matchUnorderedInfCompare(PredR, RHS0, RHS1))1366return nullptr;13671368IRBuilder<>::FastMathFlagGuard FMFG(Builder);1369FastMathFlags FMF = LHS->getFastMathFlags();1370FMF &= RHS->getFastMathFlags();1371Builder.setFastMathFlags(FMF);13721373return Builder.CreateFCmp(FCmpInst::getOrderedPredicate(PredR), RHS0, RHS1);1374}13751376Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,1377bool IsAnd, bool IsLogicalSelect) {1378Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);1379Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);1380FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();13811382if (LHS0 == RHS1 && RHS0 == LHS1) {1383// Swap RHS operands to match LHS.1384PredR = FCmpInst::getSwappedPredicate(PredR);1385std::swap(RHS0, RHS1);1386}13871388// Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).1389// Suppose the relation between x and y is R, where R is one of1390// U(1000), L(0100), G(0010) or E(0001), and CC0 and CC1 are the bitmasks for1391// testing the desired relations.1392//1393// Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:1394// bool(R & CC0) && bool(R & CC1)1395// = bool((R & CC0) & (R & CC1))1396// = bool(R & (CC0 & CC1)) <= by re-association, commutation, and idempotency1397//1398// Since (R & CC0) and (R & CC1) are either R or 0, we actually have this:1399// bool(R & CC0) || bool(R & CC1)1400// = bool((R & CC0) | (R & CC1))1401// = bool(R & (CC0 | CC1)) <= by reversed distribution (contribution? ;)1402if (LHS0 == RHS0 && LHS1 == RHS1) {1403unsigned FCmpCodeL = getFCmpCode(PredL);1404unsigned FCmpCodeR = getFCmpCode(PredR);1405unsigned NewPred = IsAnd ? FCmpCodeL & FCmpCodeR : FCmpCodeL | FCmpCodeR;14061407// Intersect the fast math flags.1408// TODO: We can union the fast math flags unless this is a logical select.1409IRBuilder<>::FastMathFlagGuard FMFG(Builder);1410FastMathFlags FMF = LHS->getFastMathFlags();1411FMF &= RHS->getFastMathFlags();1412Builder.setFastMathFlags(FMF);14131414return getFCmpValue(NewPred, LHS0, LHS1, Builder);1415}14161417// This transform is not valid for a logical select.1418if (!IsLogicalSelect &&1419((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||1420(PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO &&1421!IsAnd))) {1422if (LHS0->getType() != RHS0->getType())1423return nullptr;14241425// FCmp canonicalization ensures that (fcmp ord/uno X, X) and1426// (fcmp ord/uno X, C) will be transformed to (fcmp X, +0.0).1427if (match(LHS1, m_PosZeroFP()) && match(RHS1, m_PosZeroFP()))1428// Ignore the constants because they are obviously not NANs:1429// (fcmp ord x, 0.0) & (fcmp ord y, 0.0) -> (fcmp ord x, y)1430// (fcmp uno x, 0.0) | (fcmp uno y, 0.0) -> (fcmp uno x, y)1431return Builder.CreateFCmp(PredL, LHS0, RHS0);1432}14331434if (IsAnd && stripSignOnlyFPOps(LHS0) == stripSignOnlyFPOps(RHS0)) {1435// and (fcmp ord x, 0), (fcmp u* x, inf) -> fcmp o* x, inf1436// and (fcmp ord x, 0), (fcmp u* fabs(x), inf) -> fcmp o* x, inf1437if (Value *Left = matchIsFiniteTest(Builder, LHS, RHS))1438return Left;1439if (Value *Right = matchIsFiniteTest(Builder, RHS, LHS))1440return Right;1441}14421443// Turn at least two fcmps with constants into llvm.is.fpclass.1444//1445// If we can represent a combined value test with one class call, we can1446// potentially eliminate 4-6 instructions. If we can represent a test with a1447// single fcmp with fneg and fabs, that's likely a better canonical form.1448if (LHS->hasOneUse() && RHS->hasOneUse()) {1449auto [ClassValRHS, ClassMaskRHS] =1450fcmpToClassTest(PredR, *RHS->getFunction(), RHS0, RHS1);1451if (ClassValRHS) {1452auto [ClassValLHS, ClassMaskLHS] =1453fcmpToClassTest(PredL, *LHS->getFunction(), LHS0, LHS1);1454if (ClassValLHS == ClassValRHS) {1455unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)1456: (ClassMaskLHS | ClassMaskRHS);1457return Builder.CreateIntrinsic(1458Intrinsic::is_fpclass, {ClassValLHS->getType()},1459{ClassValLHS, Builder.getInt32(CombinedMask)});1460}1461}1462}14631464// Canonicalize the range check idiom:1465// and (fcmp olt/ole/ult/ule x, C), (fcmp ogt/oge/ugt/uge x, -C)1466// --> fabs(x) olt/ole/ult/ule C1467// or (fcmp ogt/oge/ugt/uge x, C), (fcmp olt/ole/ult/ule x, -C)1468// --> fabs(x) ogt/oge/ugt/uge C1469// TODO: Generalize to handle a negated variable operand?1470const APFloat *LHSC, *RHSC;1471if (LHS0 == RHS0 && LHS->hasOneUse() && RHS->hasOneUse() &&1472FCmpInst::getSwappedPredicate(PredL) == PredR &&1473match(LHS1, m_APFloatAllowPoison(LHSC)) &&1474match(RHS1, m_APFloatAllowPoison(RHSC)) &&1475LHSC->bitwiseIsEqual(neg(*RHSC))) {1476auto IsLessThanOrLessEqual = [](FCmpInst::Predicate Pred) {1477switch (Pred) {1478case FCmpInst::FCMP_OLT:1479case FCmpInst::FCMP_OLE:1480case FCmpInst::FCMP_ULT:1481case FCmpInst::FCMP_ULE:1482return true;1483default:1484return false;1485}1486};1487if (IsLessThanOrLessEqual(IsAnd ? PredR : PredL)) {1488std::swap(LHSC, RHSC);1489std::swap(PredL, PredR);1490}1491if (IsLessThanOrLessEqual(IsAnd ? PredL : PredR)) {1492BuilderTy::FastMathFlagGuard Guard(Builder);1493Builder.setFastMathFlags(LHS->getFastMathFlags() |1494RHS->getFastMathFlags());14951496Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, LHS0);1497return Builder.CreateFCmp(PredL, FAbs,1498ConstantFP::get(LHS0->getType(), *LHSC));1499}1500}15011502return nullptr;1503}15041505/// Match an fcmp against a special value that performs a test possible by1506/// llvm.is.fpclass.1507static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal,1508uint64_t &ClassMask) {1509auto *FCmp = dyn_cast<FCmpInst>(Op);1510if (!FCmp || !FCmp->hasOneUse())1511return false;15121513std::tie(ClassVal, ClassMask) =1514fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),1515FCmp->getOperand(0), FCmp->getOperand(1));1516return ClassVal != nullptr;1517}15181519/// or (is_fpclass x, mask0), (is_fpclass x, mask1)1520/// -> is_fpclass x, (mask0 | mask1)1521/// and (is_fpclass x, mask0), (is_fpclass x, mask1)1522/// -> is_fpclass x, (mask0 & mask1)1523/// xor (is_fpclass x, mask0), (is_fpclass x, mask1)1524/// -> is_fpclass x, (mask0 ^ mask1)1525Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,1526Value *Op0, Value *Op1) {1527Value *ClassVal0 = nullptr;1528Value *ClassVal1 = nullptr;1529uint64_t ClassMask0, ClassMask1;15301531// Restrict to folding one fcmp into one is.fpclass for now, don't introduce a1532// new class.1533//1534// TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is1535// better.15361537bool IsLHSClass =1538match(Op0, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(1539m_Value(ClassVal0), m_ConstantInt(ClassMask0))));1540bool IsRHSClass =1541match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(1542m_Value(ClassVal1), m_ConstantInt(ClassMask1))));1543if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&1544(IsRHSClass || matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&1545ClassVal0 == ClassVal1) {1546unsigned NewClassMask;1547switch (BO.getOpcode()) {1548case Instruction::And:1549NewClassMask = ClassMask0 & ClassMask1;1550break;1551case Instruction::Or:1552NewClassMask = ClassMask0 | ClassMask1;1553break;1554case Instruction::Xor:1555NewClassMask = ClassMask0 ^ ClassMask1;1556break;1557default:1558llvm_unreachable("not a binary logic operator");1559}15601561if (IsLHSClass) {1562auto *II = cast<IntrinsicInst>(Op0);1563II->setArgOperand(15641, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask));1565return replaceInstUsesWith(BO, II);1566}15671568if (IsRHSClass) {1569auto *II = cast<IntrinsicInst>(Op1);1570II->setArgOperand(15711, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask));1572return replaceInstUsesWith(BO, II);1573}15741575CallInst *NewClass =1576Builder.CreateIntrinsic(Intrinsic::is_fpclass, {ClassVal0->getType()},1577{ClassVal0, Builder.getInt32(NewClassMask)});1578return replaceInstUsesWith(BO, NewClass);1579}15801581return nullptr;1582}15831584/// Look for the pattern that conditionally negates a value via math operations:1585/// cond.splat = sext i1 cond1586/// sub = add cond.splat, x1587/// xor = xor sub, cond.splat1588/// and rewrite it to do the same, but via logical operations:1589/// value.neg = sub 0, value1590/// cond = select i1 neg, value.neg, value1591Instruction *InstCombinerImpl::canonicalizeConditionalNegationViaMathToSelect(1592BinaryOperator &I) {1593assert(I.getOpcode() == BinaryOperator::Xor && "Only for xor!");1594Value *Cond, *X;1595// As per complexity ordering, `xor` is not commutative here.1596if (!match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())) ||1597!match(I.getOperand(1), m_SExt(m_Value(Cond))) ||1598!Cond->getType()->isIntOrIntVectorTy(1) ||1599!match(I.getOperand(0), m_c_Add(m_SExt(m_Specific(Cond)), m_Value(X))))1600return nullptr;1601return SelectInst::Create(Cond, Builder.CreateNeg(X, X->getName() + ".neg"),1602X);1603}16041605/// This a limited reassociation for a special case (see above) where we are1606/// checking if two values are either both NAN (unordered) or not-NAN (ordered).1607/// This could be handled more generally in '-reassociation', but it seems like1608/// an unlikely pattern for a large number of logic ops and fcmps.1609static Instruction *reassociateFCmps(BinaryOperator &BO,1610InstCombiner::BuilderTy &Builder) {1611Instruction::BinaryOps Opcode = BO.getOpcode();1612assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&1613"Expecting and/or op for fcmp transform");16141615// There are 4 commuted variants of the pattern. Canonicalize operands of this1616// logic op so an fcmp is operand 0 and a matching logic op is operand 1.1617Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1), *X;1618FCmpInst::Predicate Pred;1619if (match(Op1, m_FCmp(Pred, m_Value(), m_AnyZeroFP())))1620std::swap(Op0, Op1);16211622// Match inner binop and the predicate for combining 2 NAN checks into 1.1623Value *BO10, *BO11;1624FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD1625: FCmpInst::FCMP_UNO;1626if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||1627!match(Op1, m_BinOp(Opcode, m_Value(BO10), m_Value(BO11))))1628return nullptr;16291630// The inner logic op must have a matching fcmp operand.1631Value *Y;1632if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||1633Pred != NanPred || X->getType() != Y->getType())1634std::swap(BO10, BO11);16351636if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||1637Pred != NanPred || X->getType() != Y->getType())1638return nullptr;16391640// and (fcmp ord X, 0), (and (fcmp ord Y, 0), Z) --> and (fcmp ord X, Y), Z1641// or (fcmp uno X, 0), (or (fcmp uno Y, 0), Z) --> or (fcmp uno X, Y), Z1642Value *NewFCmp = Builder.CreateFCmp(Pred, X, Y);1643if (auto *NewFCmpInst = dyn_cast<FCmpInst>(NewFCmp)) {1644// Intersect FMF from the 2 source fcmps.1645NewFCmpInst->copyIRFlags(Op0);1646NewFCmpInst->andIRFlags(BO10);1647}1648return BinaryOperator::Create(Opcode, NewFCmp, BO11);1649}16501651/// Match variations of De Morgan's Laws:1652/// (~A & ~B) == (~(A | B))1653/// (~A | ~B) == (~(A & B))1654static Instruction *matchDeMorgansLaws(BinaryOperator &I,1655InstCombiner &IC) {1656const Instruction::BinaryOps Opcode = I.getOpcode();1657assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&1658"Trying to match De Morgan's Laws with something other than and/or");16591660// Flip the logic operation.1661const Instruction::BinaryOps FlippedOpcode =1662(Opcode == Instruction::And) ? Instruction::Or : Instruction::And;16631664Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);1665Value *A, *B;1666if (match(Op0, m_OneUse(m_Not(m_Value(A)))) &&1667match(Op1, m_OneUse(m_Not(m_Value(B)))) &&1668!IC.isFreeToInvert(A, A->hasOneUse()) &&1669!IC.isFreeToInvert(B, B->hasOneUse())) {1670Value *AndOr =1671IC.Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");1672return BinaryOperator::CreateNot(AndOr);1673}16741675// The 'not' ops may require reassociation.1676// (A & ~B) & ~C --> A & ~(B | C)1677// (~B & A) & ~C --> A & ~(B | C)1678// (A | ~B) | ~C --> A | ~(B & C)1679// (~B | A) | ~C --> A | ~(B & C)1680Value *C;1681if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) &&1682match(Op1, m_Not(m_Value(C)))) {1683Value *FlippedBO = IC.Builder.CreateBinOp(FlippedOpcode, B, C);1684return BinaryOperator::Create(Opcode, A, IC.Builder.CreateNot(FlippedBO));1685}16861687return nullptr;1688}16891690bool InstCombinerImpl::shouldOptimizeCast(CastInst *CI) {1691Value *CastSrc = CI->getOperand(0);16921693// Noop casts and casts of constants should be eliminated trivially.1694if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(CastSrc))1695return false;16961697// If this cast is paired with another cast that can be eliminated, we prefer1698// to have it eliminated.1699if (const auto *PrecedingCI = dyn_cast<CastInst>(CastSrc))1700if (isEliminableCastPair(PrecedingCI, CI))1701return false;17021703return true;1704}17051706/// Fold {and,or,xor} (cast X), C.1707static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,1708InstCombinerImpl &IC) {1709Constant *C = dyn_cast<Constant>(Logic.getOperand(1));1710if (!C)1711return nullptr;17121713auto LogicOpc = Logic.getOpcode();1714Type *DestTy = Logic.getType();1715Type *SrcTy = Cast->getSrcTy();17161717// Move the logic operation ahead of a zext or sext if the constant is1718// unchanged in the smaller source type. Performing the logic in a smaller1719// type may provide more information to later folds, and the smaller logic1720// instruction may be cheaper (particularly in the case of vectors).1721Value *X;1722if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {1723if (Constant *TruncC = IC.getLosslessUnsignedTrunc(C, SrcTy)) {1724// LogicOpc (zext X), C --> zext (LogicOpc X, C)1725Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);1726return new ZExtInst(NewOp, DestTy);1727}1728}17291730if (match(Cast, m_OneUse(m_SExtLike(m_Value(X))))) {1731if (Constant *TruncC = IC.getLosslessSignedTrunc(C, SrcTy)) {1732// LogicOpc (sext X), C --> sext (LogicOpc X, C)1733Value *NewOp = IC.Builder.CreateBinOp(LogicOpc, X, TruncC);1734return new SExtInst(NewOp, DestTy);1735}1736}17371738return nullptr;1739}17401741/// Fold {and,or,xor} (cast X), Y.1742Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {1743auto LogicOpc = I.getOpcode();1744assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");17451746Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);17471748// fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the1749// type of A)1750// -> bitwise(zext(A < 0), zext(icmp))1751// -> zext(bitwise(A < 0, icmp))1752auto FoldBitwiseICmpZeroWithICmp = [&](Value *Op0,1753Value *Op1) -> Instruction * {1754ICmpInst::Predicate Pred;1755Value *A;1756bool IsMatched =1757match(Op0,1758m_OneUse(m_LShr(1759m_Value(A),1760m_SpecificInt(Op0->getType()->getScalarSizeInBits() - 1)))) &&1761match(Op1, m_OneUse(m_ZExt(m_ICmp(Pred, m_Value(), m_Value()))));17621763if (!IsMatched)1764return nullptr;17651766auto *ICmpL =1767Builder.CreateICmpSLT(A, Constant::getNullValue(A->getType()));1768auto *ICmpR = cast<ZExtInst>(Op1)->getOperand(0);1769auto *BitwiseOp = Builder.CreateBinOp(LogicOpc, ICmpL, ICmpR);17701771return new ZExtInst(BitwiseOp, Op0->getType());1772};17731774if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op0, Op1))1775return Ret;17761777if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op1, Op0))1778return Ret;17791780CastInst *Cast0 = dyn_cast<CastInst>(Op0);1781if (!Cast0)1782return nullptr;17831784// This must be a cast from an integer or integer vector source type to allow1785// transformation of the logic operation to the source type.1786Type *DestTy = I.getType();1787Type *SrcTy = Cast0->getSrcTy();1788if (!SrcTy->isIntOrIntVectorTy())1789return nullptr;17901791if (Instruction *Ret = foldLogicCastConstant(I, Cast0, *this))1792return Ret;17931794CastInst *Cast1 = dyn_cast<CastInst>(Op1);1795if (!Cast1)1796return nullptr;17971798// Both operands of the logic operation are casts. The casts must be the1799// same kind for reduction.1800Instruction::CastOps CastOpcode = Cast0->getOpcode();1801if (CastOpcode != Cast1->getOpcode())1802return nullptr;18031804// If the source types do not match, but the casts are matching extends, we1805// can still narrow the logic op.1806if (SrcTy != Cast1->getSrcTy()) {1807Value *X, *Y;1808if (match(Cast0, m_OneUse(m_ZExtOrSExt(m_Value(X)))) &&1809match(Cast1, m_OneUse(m_ZExtOrSExt(m_Value(Y))))) {1810// Cast the narrower source to the wider source type.1811unsigned XNumBits = X->getType()->getScalarSizeInBits();1812unsigned YNumBits = Y->getType()->getScalarSizeInBits();1813if (XNumBits < YNumBits)1814X = Builder.CreateCast(CastOpcode, X, Y->getType());1815else1816Y = Builder.CreateCast(CastOpcode, Y, X->getType());1817// Do the logic op in the intermediate width, then widen more.1818Value *NarrowLogic = Builder.CreateBinOp(LogicOpc, X, Y);1819return CastInst::Create(CastOpcode, NarrowLogic, DestTy);1820}18211822// Give up for other cast opcodes.1823return nullptr;1824}18251826Value *Cast0Src = Cast0->getOperand(0);1827Value *Cast1Src = Cast1->getOperand(0);18281829// fold logic(cast(A), cast(B)) -> cast(logic(A, B))1830if ((Cast0->hasOneUse() || Cast1->hasOneUse()) &&1831shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {1832Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src,1833I.getName());1834return CastInst::Create(CastOpcode, NewOp, DestTy);1835}18361837return nullptr;1838}18391840static Instruction *foldAndToXor(BinaryOperator &I,1841InstCombiner::BuilderTy &Builder) {1842assert(I.getOpcode() == Instruction::And);1843Value *Op0 = I.getOperand(0);1844Value *Op1 = I.getOperand(1);1845Value *A, *B;18461847// Operand complexity canonicalization guarantees that the 'or' is Op0.1848// (A | B) & ~(A & B) --> A ^ B1849// (A | B) & ~(B & A) --> A ^ B1850if (match(&I, m_BinOp(m_Or(m_Value(A), m_Value(B)),1851m_Not(m_c_And(m_Deferred(A), m_Deferred(B))))))1852return BinaryOperator::CreateXor(A, B);18531854// (A | ~B) & (~A | B) --> ~(A ^ B)1855// (A | ~B) & (B | ~A) --> ~(A ^ B)1856// (~B | A) & (~A | B) --> ~(A ^ B)1857// (~B | A) & (B | ~A) --> ~(A ^ B)1858if (Op0->hasOneUse() || Op1->hasOneUse())1859if (match(&I, m_BinOp(m_c_Or(m_Value(A), m_Not(m_Value(B))),1860m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B)))))1861return BinaryOperator::CreateNot(Builder.CreateXor(A, B));18621863return nullptr;1864}18651866static Instruction *foldOrToXor(BinaryOperator &I,1867InstCombiner::BuilderTy &Builder) {1868assert(I.getOpcode() == Instruction::Or);1869Value *Op0 = I.getOperand(0);1870Value *Op1 = I.getOperand(1);1871Value *A, *B;18721873// Operand complexity canonicalization guarantees that the 'and' is Op0.1874// (A & B) | ~(A | B) --> ~(A ^ B)1875// (A & B) | ~(B | A) --> ~(A ^ B)1876if (Op0->hasOneUse() || Op1->hasOneUse())1877if (match(Op0, m_And(m_Value(A), m_Value(B))) &&1878match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))1879return BinaryOperator::CreateNot(Builder.CreateXor(A, B));18801881// Operand complexity canonicalization guarantees that the 'xor' is Op0.1882// (A ^ B) | ~(A | B) --> ~(A & B)1883// (A ^ B) | ~(B | A) --> ~(A & B)1884if (Op0->hasOneUse() || Op1->hasOneUse())1885if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&1886match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))1887return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));18881889// (A & ~B) | (~A & B) --> A ^ B1890// (A & ~B) | (B & ~A) --> A ^ B1891// (~B & A) | (~A & B) --> A ^ B1892// (~B & A) | (B & ~A) --> A ^ B1893if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&1894match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))1895return BinaryOperator::CreateXor(A, B);18961897return nullptr;1898}18991900/// Return true if a constant shift amount is always less than the specified1901/// bit-width. If not, the shift could create poison in the narrower type.1902static bool canNarrowShiftAmt(Constant *C, unsigned BitWidth) {1903APInt Threshold(C->getType()->getScalarSizeInBits(), BitWidth);1904return match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));1905}19061907/// Try to use narrower ops (sink zext ops) for an 'and' with binop operand and1908/// a common zext operand: and (binop (zext X), C), (zext X).1909Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) {1910// This transform could also apply to {or, and, xor}, but there are better1911// folds for those cases, so we don't expect those patterns here. AShr is not1912// handled because it should always be transformed to LShr in this sequence.1913// The subtract transform is different because it has a constant on the left.1914// Add/mul commute the constant to RHS; sub with constant RHS becomes add.1915Value *Op0 = And.getOperand(0), *Op1 = And.getOperand(1);1916Constant *C;1917if (!match(Op0, m_OneUse(m_Add(m_Specific(Op1), m_Constant(C)))) &&1918!match(Op0, m_OneUse(m_Mul(m_Specific(Op1), m_Constant(C)))) &&1919!match(Op0, m_OneUse(m_LShr(m_Specific(Op1), m_Constant(C)))) &&1920!match(Op0, m_OneUse(m_Shl(m_Specific(Op1), m_Constant(C)))) &&1921!match(Op0, m_OneUse(m_Sub(m_Constant(C), m_Specific(Op1)))))1922return nullptr;19231924Value *X;1925if (!match(Op1, m_ZExt(m_Value(X))) || Op1->hasNUsesOrMore(3))1926return nullptr;19271928Type *Ty = And.getType();1929if (!isa<VectorType>(Ty) && !shouldChangeType(Ty, X->getType()))1930return nullptr;19311932// If we're narrowing a shift, the shift amount must be safe (less than the1933// width) in the narrower type. If the shift amount is greater, instsimplify1934// usually handles that case, but we can't guarantee/assert it.1935Instruction::BinaryOps Opc = cast<BinaryOperator>(Op0)->getOpcode();1936if (Opc == Instruction::LShr || Opc == Instruction::Shl)1937if (!canNarrowShiftAmt(C, X->getType()->getScalarSizeInBits()))1938return nullptr;19391940// and (sub C, (zext X)), (zext X) --> zext (and (sub C', X), X)1941// and (binop (zext X), C), (zext X) --> zext (and (binop X, C'), X)1942Value *NewC = ConstantExpr::getTrunc(C, X->getType());1943Value *NewBO = Opc == Instruction::Sub ? Builder.CreateBinOp(Opc, NewC, X)1944: Builder.CreateBinOp(Opc, X, NewC);1945return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);1946}19471948/// Try folding relatively complex patterns for both And and Or operations1949/// with all And and Or swapped.1950static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,1951InstCombiner::BuilderTy &Builder) {1952const Instruction::BinaryOps Opcode = I.getOpcode();1953assert(Opcode == Instruction::And || Opcode == Instruction::Or);19541955// Flip the logic operation.1956const Instruction::BinaryOps FlippedOpcode =1957(Opcode == Instruction::And) ? Instruction::Or : Instruction::And;19581959Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);1960Value *A, *B, *C, *X, *Y, *Dummy;19611962// Match following expressions:1963// (~(A | B) & C)1964// (~(A & B) | C)1965// Captures X = ~(A | B) or ~(A & B)1966const auto matchNotOrAnd =1967[Opcode, FlippedOpcode](Value *Op, auto m_A, auto m_B, auto m_C,1968Value *&X, bool CountUses = false) -> bool {1969if (CountUses && !Op->hasOneUse())1970return false;19711972if (match(Op, m_c_BinOp(FlippedOpcode,1973m_CombineAnd(m_Value(X),1974m_Not(m_c_BinOp(Opcode, m_A, m_B))),1975m_C)))1976return !CountUses || X->hasOneUse();19771978return false;1979};19801981// (~(A | B) & C) | ... --> ...1982// (~(A & B) | C) & ... --> ...1983// TODO: One use checks are conservative. We just need to check that a total1984// number of multiple used values does not exceed reduction1985// in operations.1986if (matchNotOrAnd(Op0, m_Value(A), m_Value(B), m_Value(C), X)) {1987// (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A1988// (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)1989if (matchNotOrAnd(Op1, m_Specific(A), m_Specific(C), m_Specific(B), Dummy,1990true)) {1991Value *Xor = Builder.CreateXor(B, C);1992return (Opcode == Instruction::Or)1993? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(A))1994: BinaryOperator::CreateNot(Builder.CreateAnd(Xor, A));1995}19961997// (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B1998// (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B)1999if (matchNotOrAnd(Op1, m_Specific(B), m_Specific(C), m_Specific(A), Dummy,2000true)) {2001Value *Xor = Builder.CreateXor(A, C);2002return (Opcode == Instruction::Or)2003? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(B))2004: BinaryOperator::CreateNot(Builder.CreateAnd(Xor, B));2005}20062007// (~(A | B) & C) | ~(A | C) --> ~((B & C) | A)2008// (~(A & B) | C) & ~(A & C) --> ~((B | C) & A)2009if (match(Op1, m_OneUse(m_Not(m_OneUse(2010m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)))))))2011return BinaryOperator::CreateNot(Builder.CreateBinOp(2012Opcode, Builder.CreateBinOp(FlippedOpcode, B, C), A));20132014// (~(A | B) & C) | ~(B | C) --> ~((A & C) | B)2015// (~(A & B) | C) & ~(B & C) --> ~((A | C) & B)2016if (match(Op1, m_OneUse(m_Not(m_OneUse(2017m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)))))))2018return BinaryOperator::CreateNot(Builder.CreateBinOp(2019Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B));20202021// (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B)))2022// Note, the pattern with swapped and/or is not handled because the2023// result is more undefined than a source:2024// (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid.2025if (Opcode == Instruction::Or && Op0->hasOneUse() &&2026match(Op1, m_OneUse(m_Not(m_CombineAnd(2027m_Value(Y),2028m_c_BinOp(Opcode, m_Specific(C),2029m_c_Xor(m_Specific(A), m_Specific(B)))))))) {2030// X = ~(A | B)2031// Y = (C | (A ^ B)2032Value *Or = cast<BinaryOperator>(X)->getOperand(0);2033return BinaryOperator::CreateNot(Builder.CreateAnd(Or, Y));2034}2035}20362037// (~A & B & C) | ... --> ...2038// (~A | B | C) | ... --> ...2039// TODO: One use checks are conservative. We just need to check that a total2040// number of multiple used values does not exceed reduction2041// in operations.2042if (match(Op0,2043m_OneUse(m_c_BinOp(FlippedOpcode,2044m_BinOp(FlippedOpcode, m_Value(B), m_Value(C)),2045m_CombineAnd(m_Value(X), m_Not(m_Value(A)))))) ||2046match(Op0, m_OneUse(m_c_BinOp(2047FlippedOpcode,2048m_c_BinOp(FlippedOpcode, m_Value(C),2049m_CombineAnd(m_Value(X), m_Not(m_Value(A)))),2050m_Value(B))))) {2051// X = ~A2052// (~A & B & C) | ~(A | B | C) --> ~(A | (B ^ C))2053// (~A | B | C) & ~(A & B & C) --> (~A | (B ^ C))2054if (match(Op1, m_OneUse(m_Not(m_c_BinOp(2055Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(B)),2056m_Specific(C))))) ||2057match(Op1, m_OneUse(m_Not(m_c_BinOp(2058Opcode, m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)),2059m_Specific(A))))) ||2060match(Op1, m_OneUse(m_Not(m_c_BinOp(2061Opcode, m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)),2062m_Specific(B)))))) {2063Value *Xor = Builder.CreateXor(B, C);2064return (Opcode == Instruction::Or)2065? BinaryOperator::CreateNot(Builder.CreateOr(Xor, A))2066: BinaryOperator::CreateOr(Xor, X);2067}20682069// (~A & B & C) | ~(A | B) --> (C | ~B) & ~A2070// (~A | B | C) & ~(A & B) --> (C & ~B) | ~A2071if (match(Op1, m_OneUse(m_Not(m_OneUse(2072m_c_BinOp(Opcode, m_Specific(A), m_Specific(B)))))))2073return BinaryOperator::Create(2074FlippedOpcode, Builder.CreateBinOp(Opcode, C, Builder.CreateNot(B)),2075X);20762077// (~A & B & C) | ~(A | C) --> (B | ~C) & ~A2078// (~A | B | C) & ~(A & C) --> (B & ~C) | ~A2079if (match(Op1, m_OneUse(m_Not(m_OneUse(2080m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)))))))2081return BinaryOperator::Create(2082FlippedOpcode, Builder.CreateBinOp(Opcode, B, Builder.CreateNot(C)),2083X);2084}20852086return nullptr;2087}20882089/// Try to reassociate a pair of binops so that values with one use only are2090/// part of the same instruction. This may enable folds that are limited with2091/// multi-use restrictions and makes it more likely to match other patterns that2092/// are looking for a common operand.2093static Instruction *reassociateForUses(BinaryOperator &BO,2094InstCombinerImpl::BuilderTy &Builder) {2095Instruction::BinaryOps Opcode = BO.getOpcode();2096Value *X, *Y, *Z;2097if (match(&BO,2098m_c_BinOp(Opcode, m_OneUse(m_BinOp(Opcode, m_Value(X), m_Value(Y))),2099m_OneUse(m_Value(Z))))) {2100if (!isa<Constant>(X) && !isa<Constant>(Y) && !isa<Constant>(Z)) {2101// (X op Y) op Z --> (Y op Z) op X2102if (!X->hasOneUse()) {2103Value *YZ = Builder.CreateBinOp(Opcode, Y, Z);2104return BinaryOperator::Create(Opcode, YZ, X);2105}2106// (X op Y) op Z --> (X op Z) op Y2107if (!Y->hasOneUse()) {2108Value *XZ = Builder.CreateBinOp(Opcode, X, Z);2109return BinaryOperator::Create(Opcode, XZ, Y);2110}2111}2112}21132114return nullptr;2115}21162117// Match2118// (X + C2) | C2119// (X + C2) ^ C2120// (X + C2) & C2121// and convert to do the bitwise logic first:2122// (X | C) + C22123// (X ^ C) + C22124// (X & C) + C22125// iff bits affected by logic op are lower than last bit affected by math op2126static Instruction *canonicalizeLogicFirst(BinaryOperator &I,2127InstCombiner::BuilderTy &Builder) {2128Type *Ty = I.getType();2129Instruction::BinaryOps OpC = I.getOpcode();2130Value *Op0 = I.getOperand(0);2131Value *Op1 = I.getOperand(1);2132Value *X;2133const APInt *C, *C2;21342135if (!(match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C2)))) &&2136match(Op1, m_APInt(C))))2137return nullptr;21382139unsigned Width = Ty->getScalarSizeInBits();2140unsigned LastOneMath = Width - C2->countr_zero();21412142switch (OpC) {2143case Instruction::And:2144if (C->countl_one() < LastOneMath)2145return nullptr;2146break;2147case Instruction::Xor:2148case Instruction::Or:2149if (C->countl_zero() < LastOneMath)2150return nullptr;2151break;2152default:2153llvm_unreachable("Unexpected BinaryOp!");2154}21552156Value *NewBinOp = Builder.CreateBinOp(OpC, X, ConstantInt::get(Ty, *C));2157return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, NewBinOp,2158ConstantInt::get(Ty, *C2), Op0);2159}21602161// binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) ->2162// shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt)2163// where both shifts are the same and AddC is a valid shift amount.2164Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {2165assert((I.isBitwiseLogicOp() || I.getOpcode() == Instruction::Add) &&2166"Unexpected opcode");21672168Value *ShAmt;2169Constant *ShiftedC1, *ShiftedC2, *AddC;2170Type *Ty = I.getType();2171unsigned BitWidth = Ty->getScalarSizeInBits();2172if (!match(&I, m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),2173m_Shift(m_ImmConstant(ShiftedC2),2174m_AddLike(m_Deferred(ShAmt),2175m_ImmConstant(AddC))))))2176return nullptr;21772178// Make sure the add constant is a valid shift amount.2179if (!match(AddC,2180m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(BitWidth, BitWidth))))2181return nullptr;21822183// Avoid constant expressions.2184auto *Op0Inst = dyn_cast<Instruction>(I.getOperand(0));2185auto *Op1Inst = dyn_cast<Instruction>(I.getOperand(1));2186if (!Op0Inst || !Op1Inst)2187return nullptr;21882189// Both shifts must be the same.2190Instruction::BinaryOps ShiftOp =2191static_cast<Instruction::BinaryOps>(Op0Inst->getOpcode());2192if (ShiftOp != Op1Inst->getOpcode())2193return nullptr;21942195// For adds, only left shifts are supported.2196if (I.getOpcode() == Instruction::Add && ShiftOp != Instruction::Shl)2197return nullptr;21982199Value *NewC = Builder.CreateBinOp(2200I.getOpcode(), ShiftedC1, Builder.CreateBinOp(ShiftOp, ShiftedC2, AddC));2201return BinaryOperator::Create(ShiftOp, NewC, ShAmt);2202}22032204// Fold and/or/xor with two equal intrinsic IDs:2205// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt))2206// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt)2207// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt))2208// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt)2209// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B))2210// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C)))2211// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B))2212// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C)))2213static Instruction *2214foldBitwiseLogicWithIntrinsics(BinaryOperator &I,2215InstCombiner::BuilderTy &Builder) {2216assert(I.isBitwiseLogicOp() && "Should and/or/xor");2217if (!I.getOperand(0)->hasOneUse())2218return nullptr;2219IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0));2220if (!X)2221return nullptr;22222223IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1));2224if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID()))2225return nullptr;22262227Intrinsic::ID IID = X->getIntrinsicID();2228const APInt *RHSC;2229// Try to match constant RHS.2230if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) ||2231!match(I.getOperand(1), m_APInt(RHSC))))2232return nullptr;22332234switch (IID) {2235case Intrinsic::fshl:2236case Intrinsic::fshr: {2237if (X->getOperand(2) != Y->getOperand(2))2238return nullptr;2239Value *NewOp0 =2240Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0));2241Value *NewOp1 =2242Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1));2243Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());2244return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)});2245}2246case Intrinsic::bswap:2247case Intrinsic::bitreverse: {2248Value *NewOp0 = Builder.CreateBinOp(2249I.getOpcode(), X->getOperand(0),2250Y ? Y->getOperand(0)2251: ConstantInt::get(I.getType(), IID == Intrinsic::bswap2252? RHSC->byteSwap()2253: RHSC->reverseBits()));2254Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());2255return CallInst::Create(F, {NewOp0});2256}2257default:2258return nullptr;2259}2260}22612262// Try to simplify V by replacing occurrences of Op with RepOp, but only look2263// through bitwise operations. In particular, for X | Y we try to replace Y with2264// 0 inside X and for X & Y we try to replace Y with -1 inside X.2265// Return the simplified result of X if successful, and nullptr otherwise.2266// If SimplifyOnly is true, no new instructions will be created.2267static Value *simplifyAndOrWithOpReplaced(Value *V, Value *Op, Value *RepOp,2268bool SimplifyOnly,2269InstCombinerImpl &IC,2270unsigned Depth = 0) {2271if (Op == RepOp)2272return nullptr;22732274if (V == Op)2275return RepOp;22762277auto *I = dyn_cast<BinaryOperator>(V);2278if (!I || !I->isBitwiseLogicOp() || Depth >= 3)2279return nullptr;22802281if (!I->hasOneUse())2282SimplifyOnly = true;22832284Value *NewOp0 = simplifyAndOrWithOpReplaced(I->getOperand(0), Op, RepOp,2285SimplifyOnly, IC, Depth + 1);2286Value *NewOp1 = simplifyAndOrWithOpReplaced(I->getOperand(1), Op, RepOp,2287SimplifyOnly, IC, Depth + 1);2288if (!NewOp0 && !NewOp1)2289return nullptr;22902291if (!NewOp0)2292NewOp0 = I->getOperand(0);2293if (!NewOp1)2294NewOp1 = I->getOperand(1);22952296if (Value *Res = simplifyBinOp(I->getOpcode(), NewOp0, NewOp1,2297IC.getSimplifyQuery().getWithInstruction(I)))2298return Res;22992300if (SimplifyOnly)2301return nullptr;2302return IC.Builder.CreateBinOp(I->getOpcode(), NewOp0, NewOp1);2303}23042305// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches2306// here. We should standardize that construct where it is needed or choose some2307// other way to ensure that commutated variants of patterns are not missed.2308Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {2309Type *Ty = I.getType();23102311if (Value *V = simplifyAndInst(I.getOperand(0), I.getOperand(1),2312SQ.getWithInstruction(&I)))2313return replaceInstUsesWith(I, V);23142315if (SimplifyAssociativeOrCommutative(I))2316return &I;23172318if (Instruction *X = foldVectorBinop(I))2319return X;23202321if (Instruction *Phi = foldBinopWithPhiOperands(I))2322return Phi;23232324// See if we can simplify any instructions used by the instruction whose sole2325// purpose is to compute bits we don't care about.2326if (SimplifyDemandedInstructionBits(I))2327return &I;23282329// Do this before using distributive laws to catch simple and/or/not patterns.2330if (Instruction *Xor = foldAndToXor(I, Builder))2331return Xor;23322333if (Instruction *X = foldComplexAndOrPatterns(I, Builder))2334return X;23352336// (A|B)&(A|C) -> A|(B&C) etc2337if (Value *V = foldUsingDistributiveLaws(I))2338return replaceInstUsesWith(I, V);23392340if (Instruction *R = foldBinOpShiftWithShift(I))2341return R;23422343Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);23442345Value *X, *Y;2346const APInt *C;2347if ((match(Op0, m_OneUse(m_LogicalShift(m_One(), m_Value(X)))) ||2348(match(Op0, m_OneUse(m_Shl(m_APInt(C), m_Value(X)))) && (*C)[0])) &&2349match(Op1, m_One())) {2350// (1 >> X) & 1 --> zext(X == 0)2351// (C << X) & 1 --> zext(X == 0), when C is odd2352Value *IsZero = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, 0));2353return new ZExtInst(IsZero, Ty);2354}23552356// (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y2357Value *Neg;2358if (match(&I,2359m_c_And(m_CombineAnd(m_Value(Neg),2360m_OneUse(m_Neg(m_And(m_Value(), m_One())))),2361m_Value(Y)))) {2362Value *Cmp = Builder.CreateIsNull(Neg);2363return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);2364}23652366// Canonicalize:2367// (X +/- Y) & Y --> ~X & Y when Y is a power of 2.2368if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(2369m_c_Add(m_Value(X), m_Deferred(Y)),2370m_Sub(m_Value(X), m_Deferred(Y)))))) &&2371isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I))2372return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);23732374if (match(Op1, m_APInt(C))) {2375const APInt *XorC;2376if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_APInt(XorC))))) {2377// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)2378Constant *NewC = ConstantInt::get(Ty, *C & *XorC);2379Value *And = Builder.CreateAnd(X, Op1);2380And->takeName(Op0);2381return BinaryOperator::CreateXor(And, NewC);2382}23832384const APInt *OrC;2385if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) {2386// (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2)2387// NOTE: This reduces the number of bits set in the & mask, which2388// can expose opportunities for store narrowing for scalars.2389// NOTE: SimplifyDemandedBits should have already removed bits from C12390// that aren't set in C2. Meaning we can replace (C1&C2) with C1 in2391// above, but this feels safer.2392APInt Together = *C & *OrC;2393Value *And = Builder.CreateAnd(X, ConstantInt::get(Ty, Together ^ *C));2394And->takeName(Op0);2395return BinaryOperator::CreateOr(And, ConstantInt::get(Ty, Together));2396}23972398unsigned Width = Ty->getScalarSizeInBits();2399const APInt *ShiftC;2400if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC))))) &&2401ShiftC->ult(Width)) {2402if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) {2403// We are clearing high bits that were potentially set by sext+ashr:2404// and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC2405Value *Sext = Builder.CreateSExt(X, Ty);2406Constant *ShAmtC = ConstantInt::get(Ty, ShiftC->zext(Width));2407return BinaryOperator::CreateLShr(Sext, ShAmtC);2408}2409}24102411// If this 'and' clears the sign-bits added by ashr, replace with lshr:2412// and (ashr X, ShiftC), C --> lshr X, ShiftC2413if (match(Op0, m_AShr(m_Value(X), m_APInt(ShiftC))) && ShiftC->ult(Width) &&2414C->isMask(Width - ShiftC->getZExtValue()))2415return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, *ShiftC));24162417const APInt *AddC;2418if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {2419// If we are masking the result of the add down to exactly one bit and2420// the constant we are adding has no bits set below that bit, then the2421// add is flipping a single bit. Example:2422// (X + 4) & 4 --> (X & 4) ^ 42423if (Op0->hasOneUse() && C->isPowerOf2() && (*AddC & (*C - 1)) == 0) {2424assert((*C & *AddC) != 0 && "Expected common bit");2425Value *NewAnd = Builder.CreateAnd(X, Op1);2426return BinaryOperator::CreateXor(NewAnd, Op1);2427}2428}24292430// ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the2431// bitwidth of X and OP behaves well when given trunc(C1) and X.2432auto isNarrowableBinOpcode = [](BinaryOperator *B) {2433switch (B->getOpcode()) {2434case Instruction::Xor:2435case Instruction::Or:2436case Instruction::Mul:2437case Instruction::Add:2438case Instruction::Sub:2439return true;2440default:2441return false;2442}2443};2444BinaryOperator *BO;2445if (match(Op0, m_OneUse(m_BinOp(BO))) && isNarrowableBinOpcode(BO)) {2446Instruction::BinaryOps BOpcode = BO->getOpcode();2447Value *X;2448const APInt *C1;2449// TODO: The one-use restrictions could be relaxed a little if the AND2450// is going to be removed.2451// Try to narrow the 'and' and a binop with constant operand:2452// and (bo (zext X), C1), C --> zext (and (bo X, TruncC1), TruncC)2453if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) &&2454C->isIntN(X->getType()->getScalarSizeInBits())) {2455unsigned XWidth = X->getType()->getScalarSizeInBits();2456Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth));2457Value *BinOp = isa<ZExtInst>(BO->getOperand(0))2458? Builder.CreateBinOp(BOpcode, X, TruncC1)2459: Builder.CreateBinOp(BOpcode, TruncC1, X);2460Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth));2461Value *And = Builder.CreateAnd(BinOp, TruncC);2462return new ZExtInst(And, Ty);2463}24642465// Similar to above: if the mask matches the zext input width, then the2466// 'and' can be eliminated, so we can truncate the other variable op:2467// and (bo (zext X), Y), C --> zext (bo X, (trunc Y))2468if (isa<Instruction>(BO->getOperand(0)) &&2469match(BO->getOperand(0), m_OneUse(m_ZExt(m_Value(X)))) &&2470C->isMask(X->getType()->getScalarSizeInBits())) {2471Y = BO->getOperand(1);2472Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr");2473Value *NewBO =2474Builder.CreateBinOp(BOpcode, X, TrY, BO->getName() + ".narrow");2475return new ZExtInst(NewBO, Ty);2476}2477// and (bo Y, (zext X)), C --> zext (bo (trunc Y), X)2478if (isa<Instruction>(BO->getOperand(1)) &&2479match(BO->getOperand(1), m_OneUse(m_ZExt(m_Value(X)))) &&2480C->isMask(X->getType()->getScalarSizeInBits())) {2481Y = BO->getOperand(0);2482Value *TrY = Builder.CreateTrunc(Y, X->getType(), Y->getName() + ".tr");2483Value *NewBO =2484Builder.CreateBinOp(BOpcode, TrY, X, BO->getName() + ".narrow");2485return new ZExtInst(NewBO, Ty);2486}2487}24882489// This is intentionally placed after the narrowing transforms for2490// efficiency (transform directly to the narrow logic op if possible).2491// If the mask is only needed on one incoming arm, push the 'and' op up.2492if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) ||2493match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) {2494APInt NotAndMask(~(*C));2495BinaryOperator::BinaryOps BinOp = cast<BinaryOperator>(Op0)->getOpcode();2496if (MaskedValueIsZero(X, NotAndMask, 0, &I)) {2497// Not masking anything out for the LHS, move mask to RHS.2498// and ({x}or X, Y), C --> {x}or X, (and Y, C)2499Value *NewRHS = Builder.CreateAnd(Y, Op1, Y->getName() + ".masked");2500return BinaryOperator::Create(BinOp, X, NewRHS);2501}2502if (!isa<Constant>(Y) && MaskedValueIsZero(Y, NotAndMask, 0, &I)) {2503// Not masking anything out for the RHS, move mask to LHS.2504// and ({x}or X, Y), C --> {x}or (and X, C), Y2505Value *NewLHS = Builder.CreateAnd(X, Op1, X->getName() + ".masked");2506return BinaryOperator::Create(BinOp, NewLHS, Y);2507}2508}25092510// When the mask is a power-of-2 constant and op0 is a shifted-power-of-22511// constant, test if the shift amount equals the offset bit index:2512// (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 02513// (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 02514if (C->isPowerOf2() &&2515match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) {2516int Log2ShiftC = ShiftC->exactLogBase2();2517int Log2C = C->exactLogBase2();2518bool IsShiftLeft =2519cast<BinaryOperator>(Op0)->getOpcode() == Instruction::Shl;2520int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C;2521assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask");2522Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum));2523return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C),2524ConstantInt::getNullValue(Ty));2525}25262527Constant *C1, *C2;2528const APInt *C3 = C;2529Value *X;2530if (C3->isPowerOf2()) {2531Constant *Log2C3 = ConstantInt::get(Ty, C3->countr_zero());2532if (match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),2533m_ImmConstant(C2)))) &&2534match(C1, m_Power2())) {2535Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);2536Constant *LshrC = ConstantExpr::getAdd(C2, Log2C3);2537KnownBits KnownLShrc = computeKnownBits(LshrC, 0, nullptr);2538if (KnownLShrc.getMaxValue().ult(Width)) {2539// iff C1,C3 is pow2 and C2 + cttz(C3) < BitWidth:2540// ((C1 << X) >> C2) & C3 -> X == (cttz(C3)+C2-cttz(C1)) ? C3 : 02541Constant *CmpC = ConstantExpr::getSub(LshrC, Log2C1);2542Value *Cmp = Builder.CreateICmpEQ(X, CmpC);2543return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),2544ConstantInt::getNullValue(Ty));2545}2546}25472548if (match(Op0, m_OneUse(m_Shl(m_LShr(m_ImmConstant(C1), m_Value(X)),2549m_ImmConstant(C2)))) &&2550match(C1, m_Power2())) {2551Constant *Log2C1 = ConstantExpr::getExactLogBase2(C1);2552Constant *Cmp =2553ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, Log2C3, C2, DL);2554if (Cmp && Cmp->isZeroValue()) {2555// iff C1,C3 is pow2 and Log2(C3) >= C2:2556// ((C1 >> X) << C2) & C3 -> X == (cttz(C1)+C2-cttz(C3)) ? C3 : 02557Constant *ShlC = ConstantExpr::getAdd(C2, Log2C1);2558Constant *CmpC = ConstantExpr::getSub(ShlC, Log2C3);2559Value *Cmp = Builder.CreateICmpEQ(X, CmpC);2560return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C3),2561ConstantInt::getNullValue(Ty));2562}2563}2564}2565}25662567// If we are clearing the sign bit of a floating-point value, convert this to2568// fabs, then cast back to integer.2569//2570// This is a generous interpretation for noimplicitfloat, this is not a true2571// floating-point operation.2572//2573// Assumes any IEEE-represented type has the sign bit in the high bit.2574// TODO: Unify with APInt matcher. This version allows undef unlike m_APInt2575Value *CastOp;2576if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) &&2577match(Op1, m_MaxSignedValue()) &&2578!Builder.GetInsertBlock()->getParent()->hasFnAttribute(2579Attribute::NoImplicitFloat)) {2580Type *EltTy = CastOp->getType()->getScalarType();2581if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {2582Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);2583return new BitCastInst(FAbs, I.getType());2584}2585}25862587// and(shl(zext(X), Y), SignMask) -> and(sext(X), SignMask)2588// where Y is a valid shift amount.2589if (match(&I, m_And(m_OneUse(m_Shl(m_ZExt(m_Value(X)), m_Value(Y))),2590m_SignMask())) &&2591match(Y, m_SpecificInt_ICMP(2592ICmpInst::Predicate::ICMP_EQ,2593APInt(Ty->getScalarSizeInBits(),2594Ty->getScalarSizeInBits() -2595X->getType()->getScalarSizeInBits())))) {2596auto *SExt = Builder.CreateSExt(X, Ty, X->getName() + ".signext");2597return BinaryOperator::CreateAnd(SExt, Op1);2598}25992600if (Instruction *Z = narrowMaskedBinOp(I))2601return Z;26022603if (I.getType()->isIntOrIntVectorTy(1)) {2604if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {2605if (auto *R =2606foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ true))2607return R;2608}2609if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {2610if (auto *R =2611foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ true))2612return R;2613}2614}26152616if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))2617return FoldedLogic;26182619if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))2620return DeMorgan;26212622{2623Value *A, *B, *C;2624// A & ~(A ^ B) --> A & B2625if (match(Op1, m_Not(m_c_Xor(m_Specific(Op0), m_Value(B)))))2626return BinaryOperator::CreateAnd(Op0, B);2627// ~(A ^ B) & A --> A & B2628if (match(Op0, m_Not(m_c_Xor(m_Specific(Op1), m_Value(B)))))2629return BinaryOperator::CreateAnd(Op1, B);26302631// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C2632if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&2633match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) {2634Value *NotC = Op1->hasOneUse()2635? Builder.CreateNot(C)2636: getFreelyInverted(C, C->hasOneUse(), &Builder);2637if (NotC != nullptr)2638return BinaryOperator::CreateAnd(Op0, NotC);2639}26402641// ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C2642if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B))) &&2643match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) {2644Value *NotC = Op0->hasOneUse()2645? Builder.CreateNot(C)2646: getFreelyInverted(C, C->hasOneUse(), &Builder);2647if (NotC != nullptr)2648return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C));2649}26502651// (A | B) & (~A ^ B) -> A & B2652// (A | B) & (B ^ ~A) -> A & B2653// (B | A) & (~A ^ B) -> A & B2654// (B | A) & (B ^ ~A) -> A & B2655if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&2656match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))2657return BinaryOperator::CreateAnd(A, B);26582659// (~A ^ B) & (A | B) -> A & B2660// (~A ^ B) & (B | A) -> A & B2661// (B ^ ~A) & (A | B) -> A & B2662// (B ^ ~A) & (B | A) -> A & B2663if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&2664match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))2665return BinaryOperator::CreateAnd(A, B);26662667// (~A | B) & (A ^ B) -> ~A & B2668// (~A | B) & (B ^ A) -> ~A & B2669// (B | ~A) & (A ^ B) -> ~A & B2670// (B | ~A) & (B ^ A) -> ~A & B2671if (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&2672match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))2673return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);26742675// (A ^ B) & (~A | B) -> ~A & B2676// (B ^ A) & (~A | B) -> ~A & B2677// (A ^ B) & (B | ~A) -> ~A & B2678// (B ^ A) & (B | ~A) -> ~A & B2679if (match(Op1, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&2680match(Op0, m_c_Xor(m_Specific(A), m_Specific(B))))2681return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);2682}26832684{2685ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);2686ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);2687if (LHS && RHS)2688if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ true))2689return replaceInstUsesWith(I, Res);26902691// TODO: Make this recursive; it's a little tricky because an arbitrary2692// number of 'and' instructions might have to be created.2693if (LHS && match(Op1, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) {2694bool IsLogical = isa<SelectInst>(Op1);2695// LHS & (X && Y) --> (LHS && X) && Y2696if (auto *Cmp = dyn_cast<ICmpInst>(X))2697if (Value *Res =2698foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true, IsLogical))2699return replaceInstUsesWith(I, IsLogical2700? Builder.CreateLogicalAnd(Res, Y)2701: Builder.CreateAnd(Res, Y));2702// LHS & (X && Y) --> X && (LHS & Y)2703if (auto *Cmp = dyn_cast<ICmpInst>(Y))2704if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ true,2705/* IsLogical */ false))2706return replaceInstUsesWith(I, IsLogical2707? Builder.CreateLogicalAnd(X, Res)2708: Builder.CreateAnd(X, Res));2709}2710if (RHS && match(Op0, m_OneUse(m_LogicalAnd(m_Value(X), m_Value(Y))))) {2711bool IsLogical = isa<SelectInst>(Op0);2712// (X && Y) & RHS --> (X && RHS) && Y2713if (auto *Cmp = dyn_cast<ICmpInst>(X))2714if (Value *Res =2715foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true, IsLogical))2716return replaceInstUsesWith(I, IsLogical2717? Builder.CreateLogicalAnd(Res, Y)2718: Builder.CreateAnd(Res, Y));2719// (X && Y) & RHS --> X && (Y & RHS)2720if (auto *Cmp = dyn_cast<ICmpInst>(Y))2721if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ true,2722/* IsLogical */ false))2723return replaceInstUsesWith(I, IsLogical2724? Builder.CreateLogicalAnd(X, Res)2725: Builder.CreateAnd(X, Res));2726}2727}27282729if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))2730if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))2731if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true))2732return replaceInstUsesWith(I, Res);27332734if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))2735return FoldedFCmps;27362737if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))2738return CastedAnd;27392740if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))2741return Sel;27422743// and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.2744// TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold2745// with binop identity constant. But creating a select with non-constant2746// arm may not be reversible due to poison semantics. Is that a good2747// canonicalization?2748Value *A, *B;2749if (match(&I, m_c_And(m_SExt(m_Value(A)), m_Value(B))) &&2750A->getType()->isIntOrIntVectorTy(1))2751return SelectInst::Create(A, B, Constant::getNullValue(Ty));27522753// Similarly, a 'not' of the bool translates to a swap of the select arms:2754// ~sext(A) & B / B & ~sext(A) --> A ? 0 : B2755if (match(&I, m_c_And(m_Not(m_SExt(m_Value(A))), m_Value(B))) &&2756A->getType()->isIntOrIntVectorTy(1))2757return SelectInst::Create(A, Constant::getNullValue(Ty), B);27582759// and(zext(A), B) -> A ? (B & 1) : 02760if (match(&I, m_c_And(m_OneUse(m_ZExt(m_Value(A))), m_Value(B))) &&2761A->getType()->isIntOrIntVectorTy(1))2762return SelectInst::Create(A, Builder.CreateAnd(B, ConstantInt::get(Ty, 1)),2763Constant::getNullValue(Ty));27642765// (-1 + A) & B --> A ? 0 : B where A is 0/1.2766if (match(&I, m_c_And(m_OneUse(m_Add(m_ZExtOrSelf(m_Value(A)), m_AllOnes())),2767m_Value(B)))) {2768if (A->getType()->isIntOrIntVectorTy(1))2769return SelectInst::Create(A, Constant::getNullValue(Ty), B);2770if (computeKnownBits(A, /* Depth */ 0, &I).countMaxActiveBits() <= 1) {2771return SelectInst::Create(2772Builder.CreateICmpEQ(A, Constant::getNullValue(A->getType())), B,2773Constant::getNullValue(Ty));2774}2775}27762777// (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0 -- with optional sext2778if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf(2779m_AShr(m_Value(X), m_APIntAllowPoison(C)))),2780m_Value(Y))) &&2781*C == X->getType()->getScalarSizeInBits() - 1) {2782Value *IsNeg = Builder.CreateIsNeg(X, "isneg");2783return SelectInst::Create(IsNeg, Y, ConstantInt::getNullValue(Ty));2784}2785// If there's a 'not' of the shifted value, swap the select operands:2786// ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y -- with optional sext2787if (match(&I, m_c_And(m_OneUse(m_SExtOrSelf(2788m_Not(m_AShr(m_Value(X), m_APIntAllowPoison(C))))),2789m_Value(Y))) &&2790*C == X->getType()->getScalarSizeInBits() - 1) {2791Value *IsNeg = Builder.CreateIsNeg(X, "isneg");2792return SelectInst::Create(IsNeg, ConstantInt::getNullValue(Ty), Y);2793}27942795// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions2796if (sinkNotIntoOtherHandOfLogicalOp(I))2797return &I;27982799// An and recurrence w/loop invariant step is equivelent to (and start, step)2800PHINode *PN = nullptr;2801Value *Start = nullptr, *Step = nullptr;2802if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN))2803return replaceInstUsesWith(I, Builder.CreateAnd(Start, Step));28042805if (Instruction *R = reassociateForUses(I, Builder))2806return R;28072808if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder))2809return Canonicalized;28102811if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1))2812return Folded;28132814if (Instruction *Res = foldBinOpOfDisplacedShifts(I))2815return Res;28162817if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))2818return Res;28192820if (Value *V =2821simplifyAndOrWithOpReplaced(Op0, Op1, Constant::getAllOnesValue(Ty),2822/*SimplifyOnly*/ false, *this))2823return BinaryOperator::CreateAnd(V, Op1);2824if (Value *V =2825simplifyAndOrWithOpReplaced(Op1, Op0, Constant::getAllOnesValue(Ty),2826/*SimplifyOnly*/ false, *this))2827return BinaryOperator::CreateAnd(Op0, V);28282829return nullptr;2830}28312832Instruction *InstCombinerImpl::matchBSwapOrBitReverse(Instruction &I,2833bool MatchBSwaps,2834bool MatchBitReversals) {2835SmallVector<Instruction *, 4> Insts;2836if (!recognizeBSwapOrBitReverseIdiom(&I, MatchBSwaps, MatchBitReversals,2837Insts))2838return nullptr;2839Instruction *LastInst = Insts.pop_back_val();2840LastInst->removeFromParent();28412842for (auto *Inst : Insts)2843Worklist.push(Inst);2844return LastInst;2845}28462847std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>>2848InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) {2849// TODO: Can we reduce the code duplication between this and the related2850// rotate matching code under visitSelect and visitTrunc?2851assert(Or.getOpcode() == BinaryOperator::Or && "Expecting or instruction");28522853unsigned Width = Or.getType()->getScalarSizeInBits();28542855Instruction *Or0, *Or1;2856if (!match(Or.getOperand(0), m_Instruction(Or0)) ||2857!match(Or.getOperand(1), m_Instruction(Or1)))2858return std::nullopt;28592860bool IsFshl = true; // Sub on LSHR.2861SmallVector<Value *, 3> FShiftArgs;28622863// First, find an or'd pair of opposite shifts:2864// or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)2865if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {2866Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;2867if (!match(Or0,2868m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||2869!match(Or1,2870m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||2871Or0->getOpcode() == Or1->getOpcode())2872return std::nullopt;28732874// Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).2875if (Or0->getOpcode() == BinaryOperator::LShr) {2876std::swap(Or0, Or1);2877std::swap(ShVal0, ShVal1);2878std::swap(ShAmt0, ShAmt1);2879}2880assert(Or0->getOpcode() == BinaryOperator::Shl &&2881Or1->getOpcode() == BinaryOperator::LShr &&2882"Illegal or(shift,shift) pair");28832884// Match the shift amount operands for a funnel shift pattern. This always2885// matches a subtraction on the R operand.2886auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {2887// Check for constant shift amounts that sum to the bitwidth.2888const APInt *LI, *RI;2889if (match(L, m_APIntAllowPoison(LI)) && match(R, m_APIntAllowPoison(RI)))2890if (LI->ult(Width) && RI->ult(Width) && (*LI + *RI) == Width)2891return ConstantInt::get(L->getType(), *LI);28922893Constant *LC, *RC;2894if (match(L, m_Constant(LC)) && match(R, m_Constant(RC)) &&2895match(L,2896m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&2897match(R,2898m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(Width, Width))) &&2899match(ConstantExpr::getAdd(LC, RC), m_SpecificIntAllowPoison(Width)))2900return ConstantExpr::mergeUndefsWith(LC, RC);29012902// (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.2903// We limit this to X < Width in case the backend re-expands the2904// intrinsic, and has to reintroduce a shift modulo operation (InstCombine2905// might remove it after this fold). This still doesn't guarantee that the2906// final codegen will match this original pattern.2907if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {2908KnownBits KnownL = computeKnownBits(L, /*Depth*/ 0, &Or);2909return KnownL.getMaxValue().ult(Width) ? L : nullptr;2910}29112912// For non-constant cases, the following patterns currently only work for2913// rotation patterns.2914// TODO: Add general funnel-shift compatible patterns.2915if (ShVal0 != ShVal1)2916return nullptr;29172918// For non-constant cases we don't support non-pow2 shift masks.2919// TODO: Is it worth matching urem as well?2920if (!isPowerOf2_32(Width))2921return nullptr;29222923// The shift amount may be masked with negation:2924// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))2925Value *X;2926unsigned Mask = Width - 1;2927if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&2928match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))2929return X;29302931// (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))2932if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask))))2933return L;29342935// Similar to above, but the shift amount may be extended after masking,2936// so return the extended value as the parameter for the intrinsic.2937if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&2938match(R,2939m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),2940m_SpecificInt(Mask))))2941return L;29422943if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&2944match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))2945return L;29462947return nullptr;2948};29492950Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);2951if (!ShAmt) {2952ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);2953IsFshl = false; // Sub on SHL.2954}2955if (!ShAmt)2956return std::nullopt;29572958FShiftArgs = {ShVal0, ShVal1, ShAmt};2959} else if (isa<ZExtInst>(Or0) || isa<ZExtInst>(Or1)) {2960// If there are two 'or' instructions concat variables in opposite order:2961//2962// Slot1 and Slot2 are all zero bits.2963// | Slot1 | Low | Slot2 | High |2964// LowHigh = or (shl (zext Low), ZextLowShlAmt), (zext High)2965// | Slot2 | High | Slot1 | Low |2966// HighLow = or (shl (zext High), ZextHighShlAmt), (zext Low)2967//2968// the latter 'or' can be safely convert to2969// -> HighLow = fshl LowHigh, LowHigh, ZextHighShlAmt2970// if ZextLowShlAmt + ZextHighShlAmt == Width.2971if (!isa<ZExtInst>(Or1))2972std::swap(Or0, Or1);29732974Value *High, *ZextHigh, *Low;2975const APInt *ZextHighShlAmt;2976if (!match(Or0,2977m_OneUse(m_Shl(m_Value(ZextHigh), m_APInt(ZextHighShlAmt)))))2978return std::nullopt;29792980if (!match(Or1, m_ZExt(m_Value(Low))) ||2981!match(ZextHigh, m_ZExt(m_Value(High))))2982return std::nullopt;29832984unsigned HighSize = High->getType()->getScalarSizeInBits();2985unsigned LowSize = Low->getType()->getScalarSizeInBits();2986// Make sure High does not overlap with Low and most significant bits of2987// High aren't shifted out.2988if (ZextHighShlAmt->ult(LowSize) || ZextHighShlAmt->ugt(Width - HighSize))2989return std::nullopt;29902991for (User *U : ZextHigh->users()) {2992Value *X, *Y;2993if (!match(U, m_Or(m_Value(X), m_Value(Y))))2994continue;29952996if (!isa<ZExtInst>(Y))2997std::swap(X, Y);29982999const APInt *ZextLowShlAmt;3000if (!match(X, m_Shl(m_Specific(Or1), m_APInt(ZextLowShlAmt))) ||3001!match(Y, m_Specific(ZextHigh)) || !DT.dominates(U, &Or))3002continue;30033004// HighLow is good concat. If sum of two shifts amount equals to Width,3005// LowHigh must also be a good concat.3006if (*ZextLowShlAmt + *ZextHighShlAmt != Width)3007continue;30083009// Low must not overlap with High and most significant bits of Low must3010// not be shifted out.3011assert(ZextLowShlAmt->uge(HighSize) &&3012ZextLowShlAmt->ule(Width - LowSize) && "Invalid concat");30133014FShiftArgs = {U, U, ConstantInt::get(Or0->getType(), *ZextHighShlAmt)};3015break;3016}3017}30183019if (FShiftArgs.empty())3020return std::nullopt;30213022Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;3023return std::make_pair(IID, FShiftArgs);3024}30253026/// Match UB-safe variants of the funnel shift intrinsic.3027static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {3028if (auto Opt = IC.convertOrOfShiftsToFunnelShift(Or)) {3029auto [IID, FShiftArgs] = *Opt;3030Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());3031return CallInst::Create(F, FShiftArgs);3032}30333034return nullptr;3035}30363037/// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.3038static Instruction *matchOrConcat(Instruction &Or,3039InstCombiner::BuilderTy &Builder) {3040assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'");3041Value *Op0 = Or.getOperand(0), *Op1 = Or.getOperand(1);3042Type *Ty = Or.getType();30433044unsigned Width = Ty->getScalarSizeInBits();3045if ((Width & 1) != 0)3046return nullptr;3047unsigned HalfWidth = Width / 2;30483049// Canonicalize zext (lower half) to LHS.3050if (!isa<ZExtInst>(Op0))3051std::swap(Op0, Op1);30523053// Find lower/upper half.3054Value *LowerSrc, *ShlVal, *UpperSrc;3055const APInt *C;3056if (!match(Op0, m_OneUse(m_ZExt(m_Value(LowerSrc)))) ||3057!match(Op1, m_OneUse(m_Shl(m_Value(ShlVal), m_APInt(C)))) ||3058!match(ShlVal, m_OneUse(m_ZExt(m_Value(UpperSrc)))))3059return nullptr;3060if (*C != HalfWidth || LowerSrc->getType() != UpperSrc->getType() ||3061LowerSrc->getType()->getScalarSizeInBits() != HalfWidth)3062return nullptr;30633064auto ConcatIntrinsicCalls = [&](Intrinsic::ID id, Value *Lo, Value *Hi) {3065Value *NewLower = Builder.CreateZExt(Lo, Ty);3066Value *NewUpper = Builder.CreateZExt(Hi, Ty);3067NewUpper = Builder.CreateShl(NewUpper, HalfWidth);3068Value *BinOp = Builder.CreateOr(NewLower, NewUpper);3069Function *F = Intrinsic::getDeclaration(Or.getModule(), id, Ty);3070return Builder.CreateCall(F, BinOp);3071};30723073// BSWAP: Push the concat down, swapping the lower/upper sources.3074// concat(bswap(x),bswap(y)) -> bswap(concat(x,y))3075Value *LowerBSwap, *UpperBSwap;3076if (match(LowerSrc, m_BSwap(m_Value(LowerBSwap))) &&3077match(UpperSrc, m_BSwap(m_Value(UpperBSwap))))3078return ConcatIntrinsicCalls(Intrinsic::bswap, UpperBSwap, LowerBSwap);30793080// BITREVERSE: Push the concat down, swapping the lower/upper sources.3081// concat(bitreverse(x),bitreverse(y)) -> bitreverse(concat(x,y))3082Value *LowerBRev, *UpperBRev;3083if (match(LowerSrc, m_BitReverse(m_Value(LowerBRev))) &&3084match(UpperSrc, m_BitReverse(m_Value(UpperBRev))))3085return ConcatIntrinsicCalls(Intrinsic::bitreverse, UpperBRev, LowerBRev);30863087return nullptr;3088}30893090/// If all elements of two constant vectors are 0/-1 and inverses, return true.3091static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {3092unsigned NumElts = cast<FixedVectorType>(C1->getType())->getNumElements();3093for (unsigned i = 0; i != NumElts; ++i) {3094Constant *EltC1 = C1->getAggregateElement(i);3095Constant *EltC2 = C2->getAggregateElement(i);3096if (!EltC1 || !EltC2)3097return false;30983099// One element must be all ones, and the other must be all zeros.3100if (!((match(EltC1, m_Zero()) && match(EltC2, m_AllOnes())) ||3101(match(EltC2, m_Zero()) && match(EltC1, m_AllOnes()))))3102return false;3103}3104return true;3105}31063107/// We have an expression of the form (A & C) | (B & D). If A is a scalar or3108/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of3109/// B, it can be used as the condition operand of a select instruction.3110/// We will detect (A & C) | ~(B | D) when the flag ABIsTheSame enabled.3111Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B,3112bool ABIsTheSame) {3113// We may have peeked through bitcasts in the caller.3114// Exit immediately if we don't have (vector) integer types.3115Type *Ty = A->getType();3116if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy())3117return nullptr;31183119// If A is the 'not' operand of B and has enough signbits, we have our answer.3120if (ABIsTheSame ? (A == B) : match(B, m_Not(m_Specific(A)))) {3121// If these are scalars or vectors of i1, A can be used directly.3122if (Ty->isIntOrIntVectorTy(1))3123return A;31243125// If we look through a vector bitcast, the caller will bitcast the operands3126// to match the condition's number of bits (N x i1).3127// To make this poison-safe, disallow bitcast from wide element to narrow3128// element. That could allow poison in lanes where it was not present in the3129// original code.3130A = peekThroughBitcast(A);3131if (A->getType()->isIntOrIntVectorTy()) {3132unsigned NumSignBits = ComputeNumSignBits(A);3133if (NumSignBits == A->getType()->getScalarSizeInBits() &&3134NumSignBits <= Ty->getScalarSizeInBits())3135return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(A->getType()));3136}3137return nullptr;3138}31393140// TODO: add support for sext and constant case3141if (ABIsTheSame)3142return nullptr;31433144// If both operands are constants, see if the constants are inverse bitmasks.3145Constant *AConst, *BConst;3146if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst)))3147if (AConst == ConstantExpr::getNot(BConst) &&3148ComputeNumSignBits(A) == Ty->getScalarSizeInBits())3149return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty));31503151// Look for more complex patterns. The 'not' op may be hidden behind various3152// casts. Look through sexts and bitcasts to find the booleans.3153Value *Cond;3154Value *NotB;3155if (match(A, m_SExt(m_Value(Cond))) &&3156Cond->getType()->isIntOrIntVectorTy(1)) {3157// A = sext i1 Cond; B = sext (not (i1 Cond))3158if (match(B, m_SExt(m_Not(m_Specific(Cond)))))3159return Cond;31603161// A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond)))3162// TODO: The one-use checks are unnecessary or misplaced. If the caller3163// checked for uses on logic ops/casts, that should be enough to3164// make this transform worthwhile.3165if (match(B, m_OneUse(m_Not(m_Value(NotB))))) {3166NotB = peekThroughBitcast(NotB, true);3167if (match(NotB, m_SExt(m_Specific(Cond))))3168return Cond;3169}3170}31713172// All scalar (and most vector) possibilities should be handled now.3173// Try more matches that only apply to non-splat constant vectors.3174if (!Ty->isVectorTy())3175return nullptr;31763177// If both operands are xor'd with constants using the same sexted boolean3178// operand, see if the constants are inverse bitmasks.3179// TODO: Use ConstantExpr::getNot()?3180if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AConst)))) &&3181match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BConst)))) &&3182Cond->getType()->isIntOrIntVectorTy(1) &&3183areInverseVectorBitmasks(AConst, BConst)) {3184AConst = ConstantExpr::getTrunc(AConst, CmpInst::makeCmpResultType(Ty));3185return Builder.CreateXor(Cond, AConst);3186}3187return nullptr;3188}31893190/// We have an expression of the form (A & B) | (C & D). Try to simplify this3191/// to "A' ? B : D", where A' is a boolean or vector of booleans.3192/// When InvertFalseVal is set to true, we try to match the pattern3193/// where we have peeked through a 'not' op and A and C are the same:3194/// (A & B) | ~(A | D) --> (A & B) | (~A & ~D) --> A' ? B : ~D3195Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *B, Value *C,3196Value *D, bool InvertFalseVal) {3197// The potential condition of the select may be bitcasted. In that case, look3198// through its bitcast and the corresponding bitcast of the 'not' condition.3199Type *OrigType = A->getType();3200A = peekThroughBitcast(A, true);3201C = peekThroughBitcast(C, true);3202if (Value *Cond = getSelectCondition(A, C, InvertFalseVal)) {3203// ((bc Cond) & B) | ((bc ~Cond) & D) --> bc (select Cond, (bc B), (bc D))3204// If this is a vector, we may need to cast to match the condition's length.3205// The bitcasts will either all exist or all not exist. The builder will3206// not create unnecessary casts if the types already match.3207Type *SelTy = A->getType();3208if (auto *VecTy = dyn_cast<VectorType>(Cond->getType())) {3209// For a fixed or scalable vector get N from <{vscale x} N x iM>3210unsigned Elts = VecTy->getElementCount().getKnownMinValue();3211// For a fixed or scalable vector, get the size in bits of N x iM; for a3212// scalar this is just M.3213unsigned SelEltSize = SelTy->getPrimitiveSizeInBits().getKnownMinValue();3214Type *EltTy = Builder.getIntNTy(SelEltSize / Elts);3215SelTy = VectorType::get(EltTy, VecTy->getElementCount());3216}3217Value *BitcastB = Builder.CreateBitCast(B, SelTy);3218if (InvertFalseVal)3219D = Builder.CreateNot(D);3220Value *BitcastD = Builder.CreateBitCast(D, SelTy);3221Value *Select = Builder.CreateSelect(Cond, BitcastB, BitcastD);3222return Builder.CreateBitCast(Select, OrigType);3223}32243225return nullptr;3226}32273228// (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1)))3229// (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1)))3230static Value *foldAndOrOfICmpEqConstantAndICmp(ICmpInst *LHS, ICmpInst *RHS,3231bool IsAnd, bool IsLogical,3232IRBuilderBase &Builder) {3233Value *LHS0 = LHS->getOperand(0);3234Value *RHS0 = RHS->getOperand(0);3235Value *RHS1 = RHS->getOperand(1);32363237ICmpInst::Predicate LPred =3238IsAnd ? LHS->getInversePredicate() : LHS->getPredicate();3239ICmpInst::Predicate RPred =3240IsAnd ? RHS->getInversePredicate() : RHS->getPredicate();32413242const APInt *CInt;3243if (LPred != ICmpInst::ICMP_EQ ||3244!match(LHS->getOperand(1), m_APIntAllowPoison(CInt)) ||3245!LHS0->getType()->isIntOrIntVectorTy() ||3246!(LHS->hasOneUse() || RHS->hasOneUse()))3247return nullptr;32483249auto MatchRHSOp = [LHS0, CInt](const Value *RHSOp) {3250return match(RHSOp,3251m_Add(m_Specific(LHS0), m_SpecificIntAllowPoison(-*CInt))) ||3252(CInt->isZero() && RHSOp == LHS0);3253};32543255Value *Other;3256if (RPred == ICmpInst::ICMP_ULT && MatchRHSOp(RHS1))3257Other = RHS0;3258else if (RPred == ICmpInst::ICMP_UGT && MatchRHSOp(RHS0))3259Other = RHS1;3260else3261return nullptr;32623263if (IsLogical)3264Other = Builder.CreateFreeze(Other);32653266return Builder.CreateICmp(3267IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE,3268Builder.CreateSub(LHS0, ConstantInt::get(LHS0->getType(), *CInt + 1)),3269Other);3270}32713272/// Fold (icmp)&(icmp) or (icmp)|(icmp) if possible.3273/// If IsLogical is true, then the and/or is in select form and the transform3274/// must be poison-safe.3275Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,3276Instruction &I, bool IsAnd,3277bool IsLogical) {3278const SimplifyQuery Q = SQ.getWithInstruction(&I);32793280// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)3281// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2)3282// if K1 and K2 are a one-bit mask.3283if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, &I, IsAnd, IsLogical))3284return V;32853286ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();3287Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);3288Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);32893290const APInt *LHSC = nullptr, *RHSC = nullptr;3291match(LHS1, m_APInt(LHSC));3292match(RHS1, m_APInt(RHSC));32933294// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)3295// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)3296if (predicatesFoldable(PredL, PredR)) {3297if (LHS0 == RHS1 && LHS1 == RHS0) {3298PredL = ICmpInst::getSwappedPredicate(PredL);3299std::swap(LHS0, LHS1);3300}3301if (LHS0 == RHS0 && LHS1 == RHS1) {3302unsigned Code = IsAnd ? getICmpCode(PredL) & getICmpCode(PredR)3303: getICmpCode(PredL) | getICmpCode(PredR);3304bool IsSigned = LHS->isSigned() || RHS->isSigned();3305return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder);3306}3307}33083309// handle (roughly):3310// (icmp ne (A & B), C) | (icmp ne (A & D), E)3311// (icmp eq (A & B), C) & (icmp eq (A & D), E)3312if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, IsAnd, IsLogical, Builder))3313return V;33143315if (Value *V =3316foldAndOrOfICmpEqConstantAndICmp(LHS, RHS, IsAnd, IsLogical, Builder))3317return V;3318// We can treat logical like bitwise here, because both operands are used on3319// the LHS, and as such poison from both will propagate.3320if (Value *V = foldAndOrOfICmpEqConstantAndICmp(RHS, LHS, IsAnd,3321/*IsLogical*/ false, Builder))3322return V;33233324if (Value *V =3325foldAndOrOfICmpsWithConstEq(LHS, RHS, IsAnd, IsLogical, Builder, Q))3326return V;3327// We can convert this case to bitwise and, because both operands are used3328// on the LHS, and as such poison from both will propagate.3329if (Value *V = foldAndOrOfICmpsWithConstEq(RHS, LHS, IsAnd,3330/*IsLogical*/ false, Builder, Q))3331return V;33323333if (Value *V = foldIsPowerOf2OrZero(LHS, RHS, IsAnd, Builder))3334return V;3335if (Value *V = foldIsPowerOf2OrZero(RHS, LHS, IsAnd, Builder))3336return V;33373338// TODO: One of these directions is fine with logical and/or, the other could3339// be supported by inserting freeze.3340if (!IsLogical) {3341// E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n3342// E.g. (icmp sge x, 0) & (icmp slt x, n) --> icmp ult x, n3343if (Value *V = simplifyRangeCheck(LHS, RHS, /*Inverted=*/!IsAnd))3344return V;33453346// E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n3347// E.g. (icmp slt x, n) & (icmp sge x, 0) --> icmp ult x, n3348if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/!IsAnd))3349return V;3350}33513352// TODO: Add conjugated or fold, check whether it is safe for logical and/or.3353if (IsAnd && !IsLogical)3354if (Value *V = foldSignedTruncationCheck(LHS, RHS, I, Builder))3355return V;33563357if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder, *this))3358return V;33593360if (Value *V = foldPowerOf2AndShiftedMask(LHS, RHS, IsAnd, Builder))3361return V;33623363// TODO: Verify whether this is safe for logical and/or.3364if (!IsLogical) {3365if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder))3366return X;3367if (Value *X = foldUnsignedUnderflowCheck(RHS, LHS, IsAnd, Q, Builder))3368return X;3369}33703371if (Value *X = foldEqOfParts(LHS, RHS, IsAnd))3372return X;33733374// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)3375// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)3376// TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs.3377if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&3378PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) &&3379LHS0->getType() == RHS0->getType()) {3380Value *NewOr = Builder.CreateOr(LHS0, RHS0);3381return Builder.CreateICmp(PredL, NewOr,3382Constant::getNullValue(NewOr->getType()));3383}33843385// (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1)3386// (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1)3387if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&3388PredL == PredR && match(LHS1, m_AllOnes()) && match(RHS1, m_AllOnes()) &&3389LHS0->getType() == RHS0->getType()) {3390Value *NewAnd = Builder.CreateAnd(LHS0, RHS0);3391return Builder.CreateICmp(PredL, NewAnd,3392Constant::getAllOnesValue(LHS0->getType()));3393}33943395if (!IsLogical)3396if (Value *V =3397foldAndOrOfICmpsWithPow2AndWithZero(Builder, LHS, RHS, IsAnd, Q))3398return V;33993400// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).3401if (!LHSC || !RHSC)3402return nullptr;34033404// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C23405// (trunc x) != C1 | (and x, CA) != C2 -> (and x, CA|CMAX) != C1|C23406// where CMAX is the all ones value for the truncated type,3407// iff the lower bits of C2 and CA are zero.3408if (PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&3409PredL == PredR && LHS->hasOneUse() && RHS->hasOneUse()) {3410Value *V;3411const APInt *AndC, *SmallC = nullptr, *BigC = nullptr;34123413// (trunc x) == C1 & (and x, CA) == C23414// (and x, CA) == C2 & (trunc x) == C13415if (match(RHS0, m_Trunc(m_Value(V))) &&3416match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) {3417SmallC = RHSC;3418BigC = LHSC;3419} else if (match(LHS0, m_Trunc(m_Value(V))) &&3420match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) {3421SmallC = LHSC;3422BigC = RHSC;3423}34243425if (SmallC && BigC) {3426unsigned BigBitSize = BigC->getBitWidth();3427unsigned SmallBitSize = SmallC->getBitWidth();34283429// Check that the low bits are zero.3430APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);3431if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) {3432Value *NewAnd = Builder.CreateAnd(V, Low | *AndC);3433APInt N = SmallC->zext(BigBitSize) | *BigC;3434Value *NewVal = ConstantInt::get(NewAnd->getType(), N);3435return Builder.CreateICmp(PredL, NewAnd, NewVal);3436}3437}3438}34393440// Match naive pattern (and its inverted form) for checking if two values3441// share same sign. An example of the pattern:3442// (icmp slt (X & Y), 0) | (icmp sgt (X | Y), -1) -> (icmp sgt (X ^ Y), -1)3443// Inverted form (example):3444// (icmp slt (X | Y), 0) & (icmp sgt (X & Y), -1) -> (icmp slt (X ^ Y), 0)3445bool TrueIfSignedL, TrueIfSignedR;3446if (isSignBitCheck(PredL, *LHSC, TrueIfSignedL) &&3447isSignBitCheck(PredR, *RHSC, TrueIfSignedR) &&3448(RHS->hasOneUse() || LHS->hasOneUse())) {3449Value *X, *Y;3450if (IsAnd) {3451if ((TrueIfSignedL && !TrueIfSignedR &&3452match(LHS0, m_Or(m_Value(X), m_Value(Y))) &&3453match(RHS0, m_c_And(m_Specific(X), m_Specific(Y)))) ||3454(!TrueIfSignedL && TrueIfSignedR &&3455match(LHS0, m_And(m_Value(X), m_Value(Y))) &&3456match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y))))) {3457Value *NewXor = Builder.CreateXor(X, Y);3458return Builder.CreateIsNeg(NewXor);3459}3460} else {3461if ((TrueIfSignedL && !TrueIfSignedR &&3462match(LHS0, m_And(m_Value(X), m_Value(Y))) &&3463match(RHS0, m_c_Or(m_Specific(X), m_Specific(Y)))) ||3464(!TrueIfSignedL && TrueIfSignedR &&3465match(LHS0, m_Or(m_Value(X), m_Value(Y))) &&3466match(RHS0, m_c_And(m_Specific(X), m_Specific(Y))))) {3467Value *NewXor = Builder.CreateXor(X, Y);3468return Builder.CreateIsNotNeg(NewXor);3469}3470}3471}34723473return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);3474}34753476static Value *foldOrOfInversions(BinaryOperator &I,3477InstCombiner::BuilderTy &Builder) {3478assert(I.getOpcode() == Instruction::Or &&3479"Simplification only supports or at the moment.");34803481Value *Cmp1, *Cmp2, *Cmp3, *Cmp4;3482if (!match(I.getOperand(0), m_And(m_Value(Cmp1), m_Value(Cmp2))) ||3483!match(I.getOperand(1), m_And(m_Value(Cmp3), m_Value(Cmp4))))3484return nullptr;34853486// Check if any two pairs of the and operations are inversions of each other.3487if (isKnownInversion(Cmp1, Cmp3) && isKnownInversion(Cmp2, Cmp4))3488return Builder.CreateXor(Cmp1, Cmp4);3489if (isKnownInversion(Cmp1, Cmp4) && isKnownInversion(Cmp2, Cmp3))3490return Builder.CreateXor(Cmp1, Cmp3);34913492return nullptr;3493}34943495// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches3496// here. We should standardize that construct where it is needed or choose some3497// other way to ensure that commutated variants of patterns are not missed.3498Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {3499if (Value *V = simplifyOrInst(I.getOperand(0), I.getOperand(1),3500SQ.getWithInstruction(&I)))3501return replaceInstUsesWith(I, V);35023503if (SimplifyAssociativeOrCommutative(I))3504return &I;35053506if (Instruction *X = foldVectorBinop(I))3507return X;35083509if (Instruction *Phi = foldBinopWithPhiOperands(I))3510return Phi;35113512// See if we can simplify any instructions used by the instruction whose sole3513// purpose is to compute bits we don't care about.3514if (SimplifyDemandedInstructionBits(I))3515return &I;35163517// Do this before using distributive laws to catch simple and/or/not patterns.3518if (Instruction *Xor = foldOrToXor(I, Builder))3519return Xor;35203521if (Instruction *X = foldComplexAndOrPatterns(I, Builder))3522return X;35233524// (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D3525// (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C3526if (Value *V = foldOrOfInversions(I, Builder))3527return replaceInstUsesWith(I, V);35283529// (A&B)|(A&C) -> A&(B|C) etc3530if (Value *V = foldUsingDistributiveLaws(I))3531return replaceInstUsesWith(I, V);35323533Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);3534Type *Ty = I.getType();3535if (Ty->isIntOrIntVectorTy(1)) {3536if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {3537if (auto *R =3538foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false))3539return R;3540}3541if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {3542if (auto *R =3543foldAndOrOfSelectUsingImpliedCond(Op0, *SI1, /* IsAnd */ false))3544return R;3545}3546}35473548if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))3549return FoldedLogic;35503551if (Instruction *BitOp = matchBSwapOrBitReverse(I, /*MatchBSwaps*/ true,3552/*MatchBitReversals*/ true))3553return BitOp;35543555if (Instruction *Funnel = matchFunnelShift(I, *this))3556return Funnel;35573558if (Instruction *Concat = matchOrConcat(I, Builder))3559return replaceInstUsesWith(I, Concat);35603561if (Instruction *R = foldBinOpShiftWithShift(I))3562return R;35633564if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))3565return R;35663567Value *X, *Y;3568const APInt *CV;3569if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&3570!CV->isAllOnes() && MaskedValueIsZero(Y, *CV, 0, &I)) {3571// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 03572// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).3573Value *Or = Builder.CreateOr(X, Y);3574return BinaryOperator::CreateXor(Or, ConstantInt::get(Ty, *CV));3575}35763577// If the operands have no common bits set:3578// or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)3579if (match(&I, m_c_DisjointOr(m_OneUse(m_Mul(m_Value(X), m_Value(Y))),3580m_Deferred(X)))) {3581Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1));3582return BinaryOperator::CreateMul(X, IncrementY);3583}35843585// (A & C) | (B & D)3586Value *A, *B, *C, *D;3587if (match(Op0, m_And(m_Value(A), m_Value(C))) &&3588match(Op1, m_And(m_Value(B), m_Value(D)))) {35893590// (A & C0) | (B & C1)3591const APInt *C0, *C1;3592if (match(C, m_APInt(C0)) && match(D, m_APInt(C1))) {3593Value *X;3594if (*C0 == ~*C1) {3595// ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B3596if (match(A, m_c_Or(m_Value(X), m_Specific(B))))3597return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C0), B);3598// (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A3599if (match(B, m_c_Or(m_Specific(A), m_Value(X))))3600return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C1), A);36013602// ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B3603if (match(A, m_c_Xor(m_Value(X), m_Specific(B))))3604return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C0), B);3605// (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A3606if (match(B, m_c_Xor(m_Specific(A), m_Value(X))))3607return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C1), A);3608}36093610if ((*C0 & *C1).isZero()) {3611// ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1)3612// iff (C0 & C1) == 0 and (X & ~C0) == 03613if (match(A, m_c_Or(m_Value(X), m_Specific(B))) &&3614MaskedValueIsZero(X, ~*C0, 0, &I)) {3615Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);3616return BinaryOperator::CreateAnd(A, C01);3617}3618// (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)3619// iff (C0 & C1) == 0 and (X & ~C1) == 03620if (match(B, m_c_Or(m_Value(X), m_Specific(A))) &&3621MaskedValueIsZero(X, ~*C1, 0, &I)) {3622Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);3623return BinaryOperator::CreateAnd(B, C01);3624}3625// ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)3626// iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0.3627const APInt *C2, *C3;3628if (match(A, m_Or(m_Value(X), m_APInt(C2))) &&3629match(B, m_Or(m_Specific(X), m_APInt(C3))) &&3630(*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) {3631Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield");3632Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);3633return BinaryOperator::CreateAnd(Or, C01);3634}3635}3636}36373638// Don't try to form a select if it's unlikely that we'll get rid of at3639// least one of the operands. A select is generally more expensive than the3640// 'or' that it is replacing.3641if (Op0->hasOneUse() || Op1->hasOneUse()) {3642// (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants.3643if (Value *V = matchSelectFromAndOr(A, C, B, D))3644return replaceInstUsesWith(I, V);3645if (Value *V = matchSelectFromAndOr(A, C, D, B))3646return replaceInstUsesWith(I, V);3647if (Value *V = matchSelectFromAndOr(C, A, B, D))3648return replaceInstUsesWith(I, V);3649if (Value *V = matchSelectFromAndOr(C, A, D, B))3650return replaceInstUsesWith(I, V);3651if (Value *V = matchSelectFromAndOr(B, D, A, C))3652return replaceInstUsesWith(I, V);3653if (Value *V = matchSelectFromAndOr(B, D, C, A))3654return replaceInstUsesWith(I, V);3655if (Value *V = matchSelectFromAndOr(D, B, A, C))3656return replaceInstUsesWith(I, V);3657if (Value *V = matchSelectFromAndOr(D, B, C, A))3658return replaceInstUsesWith(I, V);3659}3660}36613662if (match(Op0, m_And(m_Value(A), m_Value(C))) &&3663match(Op1, m_Not(m_Or(m_Value(B), m_Value(D)))) &&3664(Op0->hasOneUse() || Op1->hasOneUse())) {3665// (Cond & C) | ~(Cond | D) -> Cond ? C : ~D3666if (Value *V = matchSelectFromAndOr(A, C, B, D, true))3667return replaceInstUsesWith(I, V);3668if (Value *V = matchSelectFromAndOr(A, C, D, B, true))3669return replaceInstUsesWith(I, V);3670if (Value *V = matchSelectFromAndOr(C, A, B, D, true))3671return replaceInstUsesWith(I, V);3672if (Value *V = matchSelectFromAndOr(C, A, D, B, true))3673return replaceInstUsesWith(I, V);3674}36753676// (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C3677if (match(Op0, m_Xor(m_Value(A), m_Value(B))))3678if (match(Op1,3679m_c_Xor(m_c_Xor(m_Specific(B), m_Value(C)), m_Specific(A))) ||3680match(Op1, m_c_Xor(m_c_Xor(m_Specific(A), m_Value(C)), m_Specific(B))))3681return BinaryOperator::CreateOr(Op0, C);36823683// ((B ^ C) ^ A) | (A ^ B) -> (A ^ B) | C3684if (match(Op1, m_Xor(m_Value(A), m_Value(B))))3685if (match(Op0,3686m_c_Xor(m_c_Xor(m_Specific(B), m_Value(C)), m_Specific(A))) ||3687match(Op0, m_c_Xor(m_c_Xor(m_Specific(A), m_Value(C)), m_Specific(B))))3688return BinaryOperator::CreateOr(Op1, C);36893690if (Instruction *DeMorgan = matchDeMorgansLaws(I, *this))3691return DeMorgan;36923693// Canonicalize xor to the RHS.3694bool SwappedForXor = false;3695if (match(Op0, m_Xor(m_Value(), m_Value()))) {3696std::swap(Op0, Op1);3697SwappedForXor = true;3698}36993700if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {3701// (A | ?) | (A ^ B) --> (A | ?) | B3702// (B | ?) | (A ^ B) --> (B | ?) | A3703if (match(Op0, m_c_Or(m_Specific(A), m_Value())))3704return BinaryOperator::CreateOr(Op0, B);3705if (match(Op0, m_c_Or(m_Specific(B), m_Value())))3706return BinaryOperator::CreateOr(Op0, A);37073708// (A & B) | (A ^ B) --> A | B3709// (B & A) | (A ^ B) --> A | B3710if (match(Op0, m_c_And(m_Specific(A), m_Specific(B))))3711return BinaryOperator::CreateOr(A, B);37123713// ~A | (A ^ B) --> ~(A & B)3714// ~B | (A ^ B) --> ~(A & B)3715// The swap above should always make Op0 the 'not'.3716if ((Op0->hasOneUse() || Op1->hasOneUse()) &&3717(match(Op0, m_Not(m_Specific(A))) || match(Op0, m_Not(m_Specific(B)))))3718return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));37193720// Same as above, but peek through an 'and' to the common operand:3721// ~(A & ?) | (A ^ B) --> ~((A & ?) & B)3722// ~(B & ?) | (A ^ B) --> ~((B & ?) & A)3723Instruction *And;3724if ((Op0->hasOneUse() || Op1->hasOneUse()) &&3725match(Op0, m_Not(m_CombineAnd(m_Instruction(And),3726m_c_And(m_Specific(A), m_Value())))))3727return BinaryOperator::CreateNot(Builder.CreateAnd(And, B));3728if ((Op0->hasOneUse() || Op1->hasOneUse()) &&3729match(Op0, m_Not(m_CombineAnd(m_Instruction(And),3730m_c_And(m_Specific(B), m_Value())))))3731return BinaryOperator::CreateNot(Builder.CreateAnd(And, A));37323733// (~A | C) | (A ^ B) --> ~(A & B) | C3734// (~B | C) | (A ^ B) --> ~(A & B) | C3735if (Op0->hasOneUse() && Op1->hasOneUse() &&3736(match(Op0, m_c_Or(m_Not(m_Specific(A)), m_Value(C))) ||3737match(Op0, m_c_Or(m_Not(m_Specific(B)), m_Value(C))))) {3738Value *Nand = Builder.CreateNot(Builder.CreateAnd(A, B), "nand");3739return BinaryOperator::CreateOr(Nand, C);3740}3741}37423743if (SwappedForXor)3744std::swap(Op0, Op1);37453746{3747ICmpInst *LHS = dyn_cast<ICmpInst>(Op0);3748ICmpInst *RHS = dyn_cast<ICmpInst>(Op1);3749if (LHS && RHS)3750if (Value *Res = foldAndOrOfICmps(LHS, RHS, I, /* IsAnd */ false))3751return replaceInstUsesWith(I, Res);37523753// TODO: Make this recursive; it's a little tricky because an arbitrary3754// number of 'or' instructions might have to be created.3755Value *X, *Y;3756if (LHS && match(Op1, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) {3757bool IsLogical = isa<SelectInst>(Op1);3758// LHS | (X || Y) --> (LHS || X) || Y3759if (auto *Cmp = dyn_cast<ICmpInst>(X))3760if (Value *Res =3761foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false, IsLogical))3762return replaceInstUsesWith(I, IsLogical3763? Builder.CreateLogicalOr(Res, Y)3764: Builder.CreateOr(Res, Y));3765// LHS | (X || Y) --> X || (LHS | Y)3766if (auto *Cmp = dyn_cast<ICmpInst>(Y))3767if (Value *Res = foldAndOrOfICmps(LHS, Cmp, I, /* IsAnd */ false,3768/* IsLogical */ false))3769return replaceInstUsesWith(I, IsLogical3770? Builder.CreateLogicalOr(X, Res)3771: Builder.CreateOr(X, Res));3772}3773if (RHS && match(Op0, m_OneUse(m_LogicalOr(m_Value(X), m_Value(Y))))) {3774bool IsLogical = isa<SelectInst>(Op0);3775// (X || Y) | RHS --> (X || RHS) || Y3776if (auto *Cmp = dyn_cast<ICmpInst>(X))3777if (Value *Res =3778foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false, IsLogical))3779return replaceInstUsesWith(I, IsLogical3780? Builder.CreateLogicalOr(Res, Y)3781: Builder.CreateOr(Res, Y));3782// (X || Y) | RHS --> X || (Y | RHS)3783if (auto *Cmp = dyn_cast<ICmpInst>(Y))3784if (Value *Res = foldAndOrOfICmps(Cmp, RHS, I, /* IsAnd */ false,3785/* IsLogical */ false))3786return replaceInstUsesWith(I, IsLogical3787? Builder.CreateLogicalOr(X, Res)3788: Builder.CreateOr(X, Res));3789}3790}37913792if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))3793if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))3794if (Value *Res = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false))3795return replaceInstUsesWith(I, Res);37963797if (Instruction *FoldedFCmps = reassociateFCmps(I, Builder))3798return FoldedFCmps;37993800if (Instruction *CastedOr = foldCastedBitwiseLogic(I))3801return CastedOr;38023803if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))3804return Sel;38053806// or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.3807// TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold3808// with binop identity constant. But creating a select with non-constant3809// arm may not be reversible due to poison semantics. Is that a good3810// canonicalization?3811if (match(&I, m_c_Or(m_OneUse(m_SExt(m_Value(A))), m_Value(B))) &&3812A->getType()->isIntOrIntVectorTy(1))3813return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), B);38143815// Note: If we've gotten to the point of visiting the outer OR, then the3816// inner one couldn't be simplified. If it was a constant, then it won't3817// be simplified by a later pass either, so we try swapping the inner/outer3818// ORs in the hopes that we'll be able to simplify it this way.3819// (X|C) | V --> (X|V) | C3820ConstantInt *CI;3821if (Op0->hasOneUse() && !match(Op1, m_ConstantInt()) &&3822match(Op0, m_Or(m_Value(A), m_ConstantInt(CI)))) {3823Value *Inner = Builder.CreateOr(A, Op1);3824Inner->takeName(Op0);3825return BinaryOperator::CreateOr(Inner, CI);3826}38273828// Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D))3829// Since this OR statement hasn't been optimized further yet, we hope3830// that this transformation will allow the new ORs to be optimized.3831{3832Value *X = nullptr, *Y = nullptr;3833if (Op0->hasOneUse() && Op1->hasOneUse() &&3834match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&3835match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {3836Value *orTrue = Builder.CreateOr(A, C);3837Value *orFalse = Builder.CreateOr(B, D);3838return SelectInst::Create(X, orTrue, orFalse);3839}3840}38413842// or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.3843{3844Value *X, *Y;3845if (match(&I, m_c_Or(m_OneUse(m_AShr(3846m_NSWSub(m_Value(Y), m_Value(X)),3847m_SpecificInt(Ty->getScalarSizeInBits() - 1))),3848m_Deferred(X)))) {3849Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);3850Value *AllOnes = ConstantInt::getAllOnesValue(Ty);3851return SelectInst::Create(NewICmpInst, AllOnes, X);3852}3853}38543855{3856// ((A & B) ^ A) | ((A & B) ^ B) -> A ^ B3857// (A ^ (A & B)) | (B ^ (A & B)) -> A ^ B3858// ((A & B) ^ B) | ((A & B) ^ A) -> A ^ B3859// (B ^ (A & B)) | (A ^ (A & B)) -> A ^ B3860const auto TryXorOpt = [&](Value *Lhs, Value *Rhs) -> Instruction * {3861if (match(Lhs, m_c_Xor(m_And(m_Value(A), m_Value(B)), m_Deferred(A))) &&3862match(Rhs,3863m_c_Xor(m_And(m_Specific(A), m_Specific(B)), m_Specific(B)))) {3864return BinaryOperator::CreateXor(A, B);3865}3866return nullptr;3867};38683869if (Instruction *Result = TryXorOpt(Op0, Op1))3870return Result;3871if (Instruction *Result = TryXorOpt(Op1, Op0))3872return Result;3873}38743875if (Instruction *V =3876canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))3877return V;38783879CmpInst::Predicate Pred;3880Value *Mul, *Ov, *MulIsNotZero, *UMulWithOv;3881// Check if the OR weakens the overflow condition for umul.with.overflow by3882// treating any non-zero result as overflow. In that case, we overflow if both3883// umul.with.overflow operands are != 0, as in that case the result can only3884// be 0, iff the multiplication overflows.3885if (match(&I,3886m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_Value(UMulWithOv)),3887m_Value(Ov)),3888m_CombineAnd(m_ICmp(Pred,3889m_CombineAnd(m_ExtractValue<0>(3890m_Deferred(UMulWithOv)),3891m_Value(Mul)),3892m_ZeroInt()),3893m_Value(MulIsNotZero)))) &&3894(Ov->hasOneUse() || (MulIsNotZero->hasOneUse() && Mul->hasOneUse())) &&3895Pred == CmpInst::ICMP_NE) {3896Value *A, *B;3897if (match(UMulWithOv, m_Intrinsic<Intrinsic::umul_with_overflow>(3898m_Value(A), m_Value(B)))) {3899Value *NotNullA = Builder.CreateIsNotNull(A);3900Value *NotNullB = Builder.CreateIsNotNull(B);3901return BinaryOperator::CreateAnd(NotNullA, NotNullB);3902}3903}39043905/// Res, Overflow = xxx_with_overflow X, C13906/// Try to canonicalize the pattern "Overflow | icmp pred Res, C2" into3907/// "Overflow | icmp pred X, C2 +/- C1".3908const WithOverflowInst *WO;3909const Value *WOV;3910const APInt *C1, *C2;3911if (match(&I, m_c_Or(m_CombineAnd(m_ExtractValue<1>(m_CombineAnd(3912m_WithOverflowInst(WO), m_Value(WOV))),3913m_Value(Ov)),3914m_OneUse(m_ICmp(Pred, m_ExtractValue<0>(m_Deferred(WOV)),3915m_APInt(C2))))) &&3916(WO->getBinaryOp() == Instruction::Add ||3917WO->getBinaryOp() == Instruction::Sub) &&3918(ICmpInst::isEquality(Pred) ||3919WO->isSigned() == ICmpInst::isSigned(Pred)) &&3920match(WO->getRHS(), m_APInt(C1))) {3921bool Overflow;3922APInt NewC = WO->getBinaryOp() == Instruction::Add3923? (ICmpInst::isSigned(Pred) ? C2->ssub_ov(*C1, Overflow)3924: C2->usub_ov(*C1, Overflow))3925: (ICmpInst::isSigned(Pred) ? C2->sadd_ov(*C1, Overflow)3926: C2->uadd_ov(*C1, Overflow));3927if (!Overflow || ICmpInst::isEquality(Pred)) {3928Value *NewCmp = Builder.CreateICmp(3929Pred, WO->getLHS(), ConstantInt::get(WO->getLHS()->getType(), NewC));3930return BinaryOperator::CreateOr(Ov, NewCmp);3931}3932}39333934// (~x) | y --> ~(x & (~y)) iff that gets rid of inversions3935if (sinkNotIntoOtherHandOfLogicalOp(I))3936return &I;39373938// Improve "get low bit mask up to and including bit X" pattern:3939// (1 << X) | ((1 << X) + -1) --> -1 l>> (bitwidth(x) - 1 - X)3940if (match(&I, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X)), m_AllOnes()),3941m_Shl(m_One(), m_Deferred(X)))) &&3942match(&I, m_c_Or(m_OneUse(m_Value()), m_Value()))) {3943Value *Sub = Builder.CreateSub(3944ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1), X);3945return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty), Sub);3946}39473948// An or recurrence w/loop invariant step is equivelent to (or start, step)3949PHINode *PN = nullptr;3950Value *Start = nullptr, *Step = nullptr;3951if (matchSimpleRecurrence(&I, PN, Start, Step) && DT.dominates(Step, PN))3952return replaceInstUsesWith(I, Builder.CreateOr(Start, Step));39533954// (A & B) | (C | D) or (C | D) | (A & B)3955// Can be combined if C or D is of type (A/B & X)3956if (match(&I, m_c_Or(m_OneUse(m_And(m_Value(A), m_Value(B))),3957m_OneUse(m_Or(m_Value(C), m_Value(D)))))) {3958// (A & B) | (C | ?) -> C | (? | (A & B))3959// (A & B) | (C | ?) -> C | (? | (A & B))3960// (A & B) | (C | ?) -> C | (? | (A & B))3961// (A & B) | (C | ?) -> C | (? | (A & B))3962// (C | ?) | (A & B) -> C | (? | (A & B))3963// (C | ?) | (A & B) -> C | (? | (A & B))3964// (C | ?) | (A & B) -> C | (? | (A & B))3965// (C | ?) | (A & B) -> C | (? | (A & B))3966if (match(D, m_OneUse(m_c_And(m_Specific(A), m_Value()))) ||3967match(D, m_OneUse(m_c_And(m_Specific(B), m_Value()))))3968return BinaryOperator::CreateOr(3969C, Builder.CreateOr(D, Builder.CreateAnd(A, B)));3970// (A & B) | (? | D) -> (? | (A & B)) | D3971// (A & B) | (? | D) -> (? | (A & B)) | D3972// (A & B) | (? | D) -> (? | (A & B)) | D3973// (A & B) | (? | D) -> (? | (A & B)) | D3974// (? | D) | (A & B) -> (? | (A & B)) | D3975// (? | D) | (A & B) -> (? | (A & B)) | D3976// (? | D) | (A & B) -> (? | (A & B)) | D3977// (? | D) | (A & B) -> (? | (A & B)) | D3978if (match(C, m_OneUse(m_c_And(m_Specific(A), m_Value()))) ||3979match(C, m_OneUse(m_c_And(m_Specific(B), m_Value()))))3980return BinaryOperator::CreateOr(3981Builder.CreateOr(C, Builder.CreateAnd(A, B)), D);3982}39833984if (Instruction *R = reassociateForUses(I, Builder))3985return R;39863987if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder))3988return Canonicalized;39893990if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1))3991return Folded;39923993if (Instruction *Res = foldBinOpOfDisplacedShifts(I))3994return Res;39953996// If we are setting the sign bit of a floating-point value, convert3997// this to fneg(fabs), then cast back to integer.3998//3999// If the result isn't immediately cast back to a float, this will increase4000// the number of instructions. This is still probably a better canonical form4001// as it enables FP value tracking.4002//4003// Assumes any IEEE-represented type has the sign bit in the high bit.4004//4005// This is generous interpretation of noimplicitfloat, this is not a true4006// floating-point operation.4007Value *CastOp;4008if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) &&4009match(Op1, m_SignMask()) &&4010!Builder.GetInsertBlock()->getParent()->hasFnAttribute(4011Attribute::NoImplicitFloat)) {4012Type *EltTy = CastOp->getType()->getScalarType();4013if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {4014Value *FAbs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, CastOp);4015Value *FNegFAbs = Builder.CreateFNeg(FAbs);4016return new BitCastInst(FNegFAbs, I.getType());4017}4018}40194020// (X & C1) | C2 -> X & (C1 | C2) iff (X & C2) == C24021if (match(Op0, m_OneUse(m_And(m_Value(X), m_APInt(C1)))) &&4022match(Op1, m_APInt(C2))) {4023KnownBits KnownX = computeKnownBits(X, /*Depth*/ 0, &I);4024if ((KnownX.One & *C2) == *C2)4025return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));4026}40274028if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))4029return Res;40304031if (Value *V =4032simplifyAndOrWithOpReplaced(Op0, Op1, Constant::getNullValue(Ty),4033/*SimplifyOnly*/ false, *this))4034return BinaryOperator::CreateOr(V, Op1);4035if (Value *V =4036simplifyAndOrWithOpReplaced(Op1, Op0, Constant::getNullValue(Ty),4037/*SimplifyOnly*/ false, *this))4038return BinaryOperator::CreateOr(Op0, V);40394040if (cast<PossiblyDisjointInst>(I).isDisjoint())4041if (Value *V = SimplifyAddWithRemainder(I))4042return replaceInstUsesWith(I, V);40434044return nullptr;4045}40464047/// A ^ B can be specified using other logic ops in a variety of patterns. We4048/// can fold these early and efficiently by morphing an existing instruction.4049static Instruction *foldXorToXor(BinaryOperator &I,4050InstCombiner::BuilderTy &Builder) {4051assert(I.getOpcode() == Instruction::Xor);4052Value *Op0 = I.getOperand(0);4053Value *Op1 = I.getOperand(1);4054Value *A, *B;40554056// There are 4 commuted variants for each of the basic patterns.40574058// (A & B) ^ (A | B) -> A ^ B4059// (A & B) ^ (B | A) -> A ^ B4060// (A | B) ^ (A & B) -> A ^ B4061// (A | B) ^ (B & A) -> A ^ B4062if (match(&I, m_c_Xor(m_And(m_Value(A), m_Value(B)),4063m_c_Or(m_Deferred(A), m_Deferred(B)))))4064return BinaryOperator::CreateXor(A, B);40654066// (A | ~B) ^ (~A | B) -> A ^ B4067// (~B | A) ^ (~A | B) -> A ^ B4068// (~A | B) ^ (A | ~B) -> A ^ B4069// (B | ~A) ^ (A | ~B) -> A ^ B4070if (match(&I, m_Xor(m_c_Or(m_Value(A), m_Not(m_Value(B))),4071m_c_Or(m_Not(m_Deferred(A)), m_Deferred(B)))))4072return BinaryOperator::CreateXor(A, B);40734074// (A & ~B) ^ (~A & B) -> A ^ B4075// (~B & A) ^ (~A & B) -> A ^ B4076// (~A & B) ^ (A & ~B) -> A ^ B4077// (B & ~A) ^ (A & ~B) -> A ^ B4078if (match(&I, m_Xor(m_c_And(m_Value(A), m_Not(m_Value(B))),4079m_c_And(m_Not(m_Deferred(A)), m_Deferred(B)))))4080return BinaryOperator::CreateXor(A, B);40814082// For the remaining cases we need to get rid of one of the operands.4083if (!Op0->hasOneUse() && !Op1->hasOneUse())4084return nullptr;40854086// (A | B) ^ ~(A & B) -> ~(A ^ B)4087// (A | B) ^ ~(B & A) -> ~(A ^ B)4088// (A & B) ^ ~(A | B) -> ~(A ^ B)4089// (A & B) ^ ~(B | A) -> ~(A ^ B)4090// Complexity sorting ensures the not will be on the right side.4091if ((match(Op0, m_Or(m_Value(A), m_Value(B))) &&4092match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) ||4093(match(Op0, m_And(m_Value(A), m_Value(B))) &&4094match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))))4095return BinaryOperator::CreateNot(Builder.CreateXor(A, B));40964097return nullptr;4098}40994100Value *InstCombinerImpl::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,4101BinaryOperator &I) {4102assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS &&4103I.getOperand(1) == RHS && "Should be 'xor' with these operands");41044105ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();4106Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);4107Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);41084109if (predicatesFoldable(PredL, PredR)) {4110if (LHS0 == RHS1 && LHS1 == RHS0) {4111std::swap(LHS0, LHS1);4112PredL = ICmpInst::getSwappedPredicate(PredL);4113}4114if (LHS0 == RHS0 && LHS1 == RHS1) {4115// (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)4116unsigned Code = getICmpCode(PredL) ^ getICmpCode(PredR);4117bool IsSigned = LHS->isSigned() || RHS->isSigned();4118return getNewICmpValue(Code, IsSigned, LHS0, LHS1, Builder);4119}4120}41214122// TODO: This can be generalized to compares of non-signbits using4123// decomposeBitTestICmp(). It could be enhanced more by using (something like)4124// foldLogOpOfMaskedICmps().4125const APInt *LC, *RC;4126if (match(LHS1, m_APInt(LC)) && match(RHS1, m_APInt(RC)) &&4127LHS0->getType() == RHS0->getType() &&4128LHS0->getType()->isIntOrIntVectorTy()) {4129// Convert xor of signbit tests to signbit test of xor'd values:4130// (X > -1) ^ (Y > -1) --> (X ^ Y) < 04131// (X < 0) ^ (Y < 0) --> (X ^ Y) < 04132// (X > -1) ^ (Y < 0) --> (X ^ Y) > -14133// (X < 0) ^ (Y > -1) --> (X ^ Y) > -14134bool TrueIfSignedL, TrueIfSignedR;4135if ((LHS->hasOneUse() || RHS->hasOneUse()) &&4136isSignBitCheck(PredL, *LC, TrueIfSignedL) &&4137isSignBitCheck(PredR, *RC, TrueIfSignedR)) {4138Value *XorLR = Builder.CreateXor(LHS0, RHS0);4139return TrueIfSignedL == TrueIfSignedR ? Builder.CreateIsNeg(XorLR) :4140Builder.CreateIsNotNeg(XorLR);4141}41424143// Fold (icmp pred1 X, C1) ^ (icmp pred2 X, C2)4144// into a single comparison using range-based reasoning.4145if (LHS0 == RHS0) {4146ConstantRange CR1 = ConstantRange::makeExactICmpRegion(PredL, *LC);4147ConstantRange CR2 = ConstantRange::makeExactICmpRegion(PredR, *RC);4148auto CRUnion = CR1.exactUnionWith(CR2);4149auto CRIntersect = CR1.exactIntersectWith(CR2);4150if (CRUnion && CRIntersect)4151if (auto CR = CRUnion->exactIntersectWith(CRIntersect->inverse())) {4152if (CR->isFullSet())4153return ConstantInt::getTrue(I.getType());4154if (CR->isEmptySet())4155return ConstantInt::getFalse(I.getType());41564157CmpInst::Predicate NewPred;4158APInt NewC, Offset;4159CR->getEquivalentICmp(NewPred, NewC, Offset);41604161if ((Offset.isZero() && (LHS->hasOneUse() || RHS->hasOneUse())) ||4162(LHS->hasOneUse() && RHS->hasOneUse())) {4163Value *NewV = LHS0;4164Type *Ty = LHS0->getType();4165if (!Offset.isZero())4166NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));4167return Builder.CreateICmp(NewPred, NewV,4168ConstantInt::get(Ty, NewC));4169}4170}4171}4172}41734174// Instead of trying to imitate the folds for and/or, decompose this 'xor'4175// into those logic ops. That is, try to turn this into an and-of-icmps4176// because we have many folds for that pattern.4177//4178// This is based on a truth table definition of xor:4179// X ^ Y --> (X | Y) & !(X & Y)4180if (Value *OrICmp = simplifyBinOp(Instruction::Or, LHS, RHS, SQ)) {4181// TODO: If OrICmp is true, then the definition of xor simplifies to !(X&Y).4182// TODO: If OrICmp is false, the whole thing is false (InstSimplify?).4183if (Value *AndICmp = simplifyBinOp(Instruction::And, LHS, RHS, SQ)) {4184// TODO: Independently handle cases where the 'and' side is a constant.4185ICmpInst *X = nullptr, *Y = nullptr;4186if (OrICmp == LHS && AndICmp == RHS) {4187// (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y4188X = LHS;4189Y = RHS;4190}4191if (OrICmp == RHS && AndICmp == LHS) {4192// !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X4193X = RHS;4194Y = LHS;4195}4196if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) {4197// Invert the predicate of 'Y', thus inverting its output.4198Y->setPredicate(Y->getInversePredicate());4199// So, are there other uses of Y?4200if (!Y->hasOneUse()) {4201// We need to adapt other uses of Y though. Get a value that matches4202// the original value of Y before inversion. While this increases4203// immediate instruction count, we have just ensured that all the4204// users are freely-invertible, so that 'not' *will* get folded away.4205BuilderTy::InsertPointGuard Guard(Builder);4206// Set insertion point to right after the Y.4207Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator()));4208Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");4209// Replace all uses of Y (excluding the one in NotY!) with NotY.4210Worklist.pushUsersToWorkList(*Y);4211Y->replaceUsesWithIf(NotY,4212[NotY](Use &U) { return U.getUser() != NotY; });4213}4214// All done.4215return Builder.CreateAnd(LHS, RHS);4216}4217}4218}42194220return nullptr;4221}42224223/// If we have a masked merge, in the canonical form of:4224/// (assuming that A only has one use.)4225/// | A | |B|4226/// ((x ^ y) & M) ^ y4227/// | D |4228/// * If M is inverted:4229/// | D |4230/// ((x ^ y) & ~M) ^ y4231/// We can canonicalize by swapping the final xor operand4232/// to eliminate the 'not' of the mask.4233/// ((x ^ y) & M) ^ x4234/// * If M is a constant, and D has one use, we transform to 'and' / 'or' ops4235/// because that shortens the dependency chain and improves analysis:4236/// (x & M) | (y & ~M)4237static Instruction *visitMaskedMerge(BinaryOperator &I,4238InstCombiner::BuilderTy &Builder) {4239Value *B, *X, *D;4240Value *M;4241if (!match(&I, m_c_Xor(m_Value(B),4242m_OneUse(m_c_And(4243m_CombineAnd(m_c_Xor(m_Deferred(B), m_Value(X)),4244m_Value(D)),4245m_Value(M))))))4246return nullptr;42474248Value *NotM;4249if (match(M, m_Not(m_Value(NotM)))) {4250// De-invert the mask and swap the value in B part.4251Value *NewA = Builder.CreateAnd(D, NotM);4252return BinaryOperator::CreateXor(NewA, X);4253}42544255Constant *C;4256if (D->hasOneUse() && match(M, m_Constant(C))) {4257// Propagating undef is unsafe. Clamp undef elements to -1.4258Type *EltTy = C->getType()->getScalarType();4259C = Constant::replaceUndefsWith(C, ConstantInt::getAllOnesValue(EltTy));4260// Unfold.4261Value *LHS = Builder.CreateAnd(X, C);4262Value *NotC = Builder.CreateNot(C);4263Value *RHS = Builder.CreateAnd(B, NotC);4264return BinaryOperator::CreateOr(LHS, RHS);4265}42664267return nullptr;4268}42694270static Instruction *foldNotXor(BinaryOperator &I,4271InstCombiner::BuilderTy &Builder) {4272Value *X, *Y;4273// FIXME: one-use check is not needed in general, but currently we are unable4274// to fold 'not' into 'icmp', if that 'icmp' has multiple uses. (D35182)4275if (!match(&I, m_Not(m_OneUse(m_Xor(m_Value(X), m_Value(Y))))))4276return nullptr;42774278auto hasCommonOperand = [](Value *A, Value *B, Value *C, Value *D) {4279return A == C || A == D || B == C || B == D;4280};42814282Value *A, *B, *C, *D;4283// Canonicalize ~((A & B) ^ (A | ?)) -> (A & B) | ~(A | ?)4284// 4 commuted variants4285if (match(X, m_And(m_Value(A), m_Value(B))) &&4286match(Y, m_Or(m_Value(C), m_Value(D))) && hasCommonOperand(A, B, C, D)) {4287Value *NotY = Builder.CreateNot(Y);4288return BinaryOperator::CreateOr(X, NotY);4289};42904291// Canonicalize ~((A | ?) ^ (A & B)) -> (A & B) | ~(A | ?)4292// 4 commuted variants4293if (match(Y, m_And(m_Value(A), m_Value(B))) &&4294match(X, m_Or(m_Value(C), m_Value(D))) && hasCommonOperand(A, B, C, D)) {4295Value *NotX = Builder.CreateNot(X);4296return BinaryOperator::CreateOr(Y, NotX);4297};42984299return nullptr;4300}43014302/// Canonicalize a shifty way to code absolute value to the more common pattern4303/// that uses negation and select.4304static Instruction *canonicalizeAbs(BinaryOperator &Xor,4305InstCombiner::BuilderTy &Builder) {4306assert(Xor.getOpcode() == Instruction::Xor && "Expected an xor instruction.");43074308// There are 4 potential commuted variants. Move the 'ashr' candidate to Op1.4309// We're relying on the fact that we only do this transform when the shift has4310// exactly 2 uses and the add has exactly 1 use (otherwise, we might increase4311// instructions).4312Value *Op0 = Xor.getOperand(0), *Op1 = Xor.getOperand(1);4313if (Op0->hasNUses(2))4314std::swap(Op0, Op1);43154316Type *Ty = Xor.getType();4317Value *A;4318const APInt *ShAmt;4319if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&4320Op1->hasNUses(2) && *ShAmt == Ty->getScalarSizeInBits() - 1 &&4321match(Op0, m_OneUse(m_c_Add(m_Specific(A), m_Specific(Op1))))) {4322// Op1 = ashr i32 A, 31 ; smear the sign bit4323// xor (add A, Op1), Op1 ; add -1 and flip bits if negative4324// --> (A < 0) ? -A : A4325Value *IsNeg = Builder.CreateIsNeg(A);4326// Copy the nsw flags from the add to the negate.4327auto *Add = cast<BinaryOperator>(Op0);4328Value *NegA = Add->hasNoUnsignedWrap()4329? Constant::getNullValue(A->getType())4330: Builder.CreateNeg(A, "", Add->hasNoSignedWrap());4331return SelectInst::Create(IsNeg, NegA, A);4332}4333return nullptr;4334}43354336static bool canFreelyInvert(InstCombiner &IC, Value *Op,4337Instruction *IgnoredUser) {4338auto *I = dyn_cast<Instruction>(Op);4339return I && IC.isFreeToInvert(I, /*WillInvertAllUses=*/true) &&4340IC.canFreelyInvertAllUsersOf(I, IgnoredUser);4341}43424343static Value *freelyInvert(InstCombinerImpl &IC, Value *Op,4344Instruction *IgnoredUser) {4345auto *I = cast<Instruction>(Op);4346IC.Builder.SetInsertPoint(*I->getInsertionPointAfterDef());4347Value *NotOp = IC.Builder.CreateNot(Op, Op->getName() + ".not");4348Op->replaceUsesWithIf(NotOp,4349[NotOp](Use &U) { return U.getUser() != NotOp; });4350IC.freelyInvertAllUsersOf(NotOp, IgnoredUser);4351return NotOp;4352}43534354// Transform4355// z = ~(x &/| y)4356// into:4357// z = ((~x) |/& (~y))4358// iff both x and y are free to invert and all uses of z can be freely updated.4359bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) {4360Value *Op0, *Op1;4361if (!match(&I, m_LogicalOp(m_Value(Op0), m_Value(Op1))))4362return false;43634364// If this logic op has not been simplified yet, just bail out and let that4365// happen first. Otherwise, the code below may wrongly invert.4366if (Op0 == Op1)4367return false;43684369Instruction::BinaryOps NewOpc =4370match(&I, m_LogicalAnd()) ? Instruction::Or : Instruction::And;4371bool IsBinaryOp = isa<BinaryOperator>(I);43724373// Can our users be adapted?4374if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))4375return false;43764377// And can the operands be adapted?4378if (!canFreelyInvert(*this, Op0, &I) || !canFreelyInvert(*this, Op1, &I))4379return false;43804381Op0 = freelyInvert(*this, Op0, &I);4382Op1 = freelyInvert(*this, Op1, &I);43834384Builder.SetInsertPoint(*I.getInsertionPointAfterDef());4385Value *NewLogicOp;4386if (IsBinaryOp)4387NewLogicOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");4388else4389NewLogicOp =4390Builder.CreateLogicalOp(NewOpc, Op0, Op1, I.getName() + ".not");43914392replaceInstUsesWith(I, NewLogicOp);4393// We can not just create an outer `not`, it will most likely be immediately4394// folded back, reconstructing our initial pattern, and causing an4395// infinite combine loop, so immediately manually fold it away.4396freelyInvertAllUsersOf(NewLogicOp);4397return true;4398}43994400// Transform4401// z = (~x) &/| y4402// into:4403// z = ~(x |/& (~y))4404// iff y is free to invert and all uses of z can be freely updated.4405bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) {4406Value *Op0, *Op1;4407if (!match(&I, m_LogicalOp(m_Value(Op0), m_Value(Op1))))4408return false;4409Instruction::BinaryOps NewOpc =4410match(&I, m_LogicalAnd()) ? Instruction::Or : Instruction::And;4411bool IsBinaryOp = isa<BinaryOperator>(I);44124413Value *NotOp0 = nullptr;4414Value *NotOp1 = nullptr;4415Value **OpToInvert = nullptr;4416if (match(Op0, m_Not(m_Value(NotOp0))) && canFreelyInvert(*this, Op1, &I)) {4417Op0 = NotOp0;4418OpToInvert = &Op1;4419} else if (match(Op1, m_Not(m_Value(NotOp1))) &&4420canFreelyInvert(*this, Op0, &I)) {4421Op1 = NotOp1;4422OpToInvert = &Op0;4423} else4424return false;44254426// And can our users be adapted?4427if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))4428return false;44294430*OpToInvert = freelyInvert(*this, *OpToInvert, &I);44314432Builder.SetInsertPoint(*I.getInsertionPointAfterDef());4433Value *NewBinOp;4434if (IsBinaryOp)4435NewBinOp = Builder.CreateBinOp(NewOpc, Op0, Op1, I.getName() + ".not");4436else4437NewBinOp = Builder.CreateLogicalOp(NewOpc, Op0, Op1, I.getName() + ".not");4438replaceInstUsesWith(I, NewBinOp);4439// We can not just create an outer `not`, it will most likely be immediately4440// folded back, reconstructing our initial pattern, and causing an4441// infinite combine loop, so immediately manually fold it away.4442freelyInvertAllUsersOf(NewBinOp);4443return true;4444}44454446Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {4447Value *NotOp;4448if (!match(&I, m_Not(m_Value(NotOp))))4449return nullptr;44504451// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.4452// We must eliminate the and/or (one-use) for these transforms to not increase4453// the instruction count.4454//4455// ~(~X & Y) --> (X | ~Y)4456// ~(Y & ~X) --> (X | ~Y)4457//4458// Note: The logical matches do not check for the commuted patterns because4459// those are handled via SimplifySelectsFeedingBinaryOp().4460Type *Ty = I.getType();4461Value *X, *Y;4462if (match(NotOp, m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y))))) {4463Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");4464return BinaryOperator::CreateOr(X, NotY);4465}4466if (match(NotOp, m_OneUse(m_LogicalAnd(m_Not(m_Value(X)), m_Value(Y))))) {4467Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");4468return SelectInst::Create(X, ConstantInt::getTrue(Ty), NotY);4469}44704471// ~(~X | Y) --> (X & ~Y)4472// ~(Y | ~X) --> (X & ~Y)4473if (match(NotOp, m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y))))) {4474Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");4475return BinaryOperator::CreateAnd(X, NotY);4476}4477if (match(NotOp, m_OneUse(m_LogicalOr(m_Not(m_Value(X)), m_Value(Y))))) {4478Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");4479return SelectInst::Create(X, NotY, ConstantInt::getFalse(Ty));4480}44814482// Is this a 'not' (~) fed by a binary operator?4483BinaryOperator *NotVal;4484if (match(NotOp, m_BinOp(NotVal))) {4485// ~((-X) | Y) --> (X - 1) & (~Y)4486if (match(NotVal,4487m_OneUse(m_c_Or(m_OneUse(m_Neg(m_Value(X))), m_Value(Y))))) {4488Value *DecX = Builder.CreateAdd(X, ConstantInt::getAllOnesValue(Ty));4489Value *NotY = Builder.CreateNot(Y);4490return BinaryOperator::CreateAnd(DecX, NotY);4491}44924493// ~(~X >>s Y) --> (X >>s Y)4494if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))4495return BinaryOperator::CreateAShr(X, Y);44964497// Treat lshr with non-negative operand as ashr.4498// ~(~X >>u Y) --> (X >>s Y) iff X is known negative4499if (match(NotVal, m_LShr(m_Not(m_Value(X)), m_Value(Y))) &&4500isKnownNegative(X, SQ.getWithInstruction(NotVal)))4501return BinaryOperator::CreateAShr(X, Y);45024503// Bit-hack form of a signbit test for iN type:4504// ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN4505unsigned FullShift = Ty->getScalarSizeInBits() - 1;4506if (match(NotVal, m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))))) {4507Value *IsNotNeg = Builder.CreateIsNotNeg(X, "isnotneg");4508return new SExtInst(IsNotNeg, Ty);4509}45104511// If we are inverting a right-shifted constant, we may be able to eliminate4512// the 'not' by inverting the constant and using the opposite shift type.4513// Canonicalization rules ensure that only a negative constant uses 'ashr',4514// but we must check that in case that transform has not fired yet.45154516// ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits)4517Constant *C;4518if (match(NotVal, m_AShr(m_Constant(C), m_Value(Y))) &&4519match(C, m_Negative()))4520return BinaryOperator::CreateLShr(ConstantExpr::getNot(C), Y);45214522// ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits)4523if (match(NotVal, m_LShr(m_Constant(C), m_Value(Y))) &&4524match(C, m_NonNegative()))4525return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);45264527// ~(X + C) --> ~C - X4528if (match(NotVal, m_Add(m_Value(X), m_ImmConstant(C))))4529return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X);45304531// ~(X - Y) --> ~X + Y4532// FIXME: is it really beneficial to sink the `not` here?4533if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))4534if (isa<Constant>(X) || NotVal->hasOneUse())4535return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);45364537// ~(~X + Y) --> X - Y4538if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y))))4539return BinaryOperator::CreateWithCopiedFlags(Instruction::Sub, X, Y,4540NotVal);4541}45424543// not (cmp A, B) = !cmp A, B4544CmpInst::Predicate Pred;4545if (match(NotOp, m_Cmp(Pred, m_Value(), m_Value())) &&4546(NotOp->hasOneUse() ||4547InstCombiner::canFreelyInvertAllUsersOf(cast<Instruction>(NotOp),4548/*IgnoredUser=*/nullptr))) {4549cast<CmpInst>(NotOp)->setPredicate(CmpInst::getInversePredicate(Pred));4550freelyInvertAllUsersOf(NotOp);4551return &I;4552}45534554// Move a 'not' ahead of casts of a bool to enable logic reduction:4555// not (bitcast (sext i1 X)) --> bitcast (sext (not i1 X))4556if (match(NotOp, m_OneUse(m_BitCast(m_OneUse(m_SExt(m_Value(X)))))) && X->getType()->isIntOrIntVectorTy(1)) {4557Type *SextTy = cast<BitCastOperator>(NotOp)->getSrcTy();4558Value *NotX = Builder.CreateNot(X);4559Value *Sext = Builder.CreateSExt(NotX, SextTy);4560return CastInst::CreateBitOrPointerCast(Sext, Ty);4561}45624563if (auto *NotOpI = dyn_cast<Instruction>(NotOp))4564if (sinkNotIntoLogicalOp(*NotOpI))4565return &I;45664567// Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:4568// ~min(~X, ~Y) --> max(X, Y)4569// ~max(~X, Y) --> min(X, ~Y)4570auto *II = dyn_cast<IntrinsicInst>(NotOp);4571if (II && II->hasOneUse()) {4572if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {4573Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());4574Value *NotY = Builder.CreateNot(Y);4575Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY);4576return replaceInstUsesWith(I, InvMaxMin);4577}45784579if (II->getIntrinsicID() == Intrinsic::is_fpclass) {4580ConstantInt *ClassMask = cast<ConstantInt>(II->getArgOperand(1));4581II->setArgOperand(45821, ConstantInt::get(ClassMask->getType(),4583~ClassMask->getZExtValue() & fcAllFlags));4584return replaceInstUsesWith(I, II);4585}4586}45874588if (NotOp->hasOneUse()) {4589// Pull 'not' into operands of select if both operands are one-use compares4590// or one is one-use compare and the other one is a constant.4591// Inverting the predicates eliminates the 'not' operation.4592// Example:4593// not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->4594// select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)4595// not (select ?, (cmp TPred, ?, ?), true -->4596// select ?, (cmp InvTPred, ?, ?), false4597if (auto *Sel = dyn_cast<SelectInst>(NotOp)) {4598Value *TV = Sel->getTrueValue();4599Value *FV = Sel->getFalseValue();4600auto *CmpT = dyn_cast<CmpInst>(TV);4601auto *CmpF = dyn_cast<CmpInst>(FV);4602bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);4603bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);4604if (InvertibleT && InvertibleF) {4605if (CmpT)4606CmpT->setPredicate(CmpT->getInversePredicate());4607else4608Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));4609if (CmpF)4610CmpF->setPredicate(CmpF->getInversePredicate());4611else4612Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));4613return replaceInstUsesWith(I, Sel);4614}4615}4616}46174618if (Instruction *NewXor = foldNotXor(I, Builder))4619return NewXor;46204621// TODO: Could handle multi-use better by checking if all uses of NotOp (other4622// than I) can be inverted.4623if (Value *R = getFreelyInverted(NotOp, NotOp->hasOneUse(), &Builder))4624return replaceInstUsesWith(I, R);46254626return nullptr;4627}46284629// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches4630// here. We should standardize that construct where it is needed or choose some4631// other way to ensure that commutated variants of patterns are not missed.4632Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {4633if (Value *V = simplifyXorInst(I.getOperand(0), I.getOperand(1),4634SQ.getWithInstruction(&I)))4635return replaceInstUsesWith(I, V);46364637if (SimplifyAssociativeOrCommutative(I))4638return &I;46394640if (Instruction *X = foldVectorBinop(I))4641return X;46424643if (Instruction *Phi = foldBinopWithPhiOperands(I))4644return Phi;46454646if (Instruction *NewXor = foldXorToXor(I, Builder))4647return NewXor;46484649// (A&B)^(A&C) -> A&(B^C) etc4650if (Value *V = foldUsingDistributiveLaws(I))4651return replaceInstUsesWith(I, V);46524653// See if we can simplify any instructions used by the instruction whose sole4654// purpose is to compute bits we don't care about.4655if (SimplifyDemandedInstructionBits(I))4656return &I;46574658if (Instruction *R = foldNot(I))4659return R;46604661if (Instruction *R = foldBinOpShiftWithShift(I))4662return R;46634664// Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)4665// This it a special case in haveNoCommonBitsSet, but the computeKnownBits4666// calls in there are unnecessary as SimplifyDemandedInstructionBits should4667// have already taken care of those cases.4668Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);4669Value *M;4670if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),4671m_c_And(m_Deferred(M), m_Value())))) {4672if (isGuaranteedNotToBeUndef(M))4673return BinaryOperator::CreateDisjointOr(Op0, Op1);4674else4675return BinaryOperator::CreateOr(Op0, Op1);4676}46774678if (Instruction *Xor = visitMaskedMerge(I, Builder))4679return Xor;46804681Value *X, *Y;4682Constant *C1;4683if (match(Op1, m_Constant(C1))) {4684Constant *C2;46854686if (match(Op0, m_OneUse(m_Or(m_Value(X), m_ImmConstant(C2)))) &&4687match(C1, m_ImmConstant())) {4688// (X | C2) ^ C1 --> (X & ~C2) ^ (C1^C2)4689C2 = Constant::replaceUndefsWith(4690C2, Constant::getAllOnesValue(C2->getType()->getScalarType()));4691Value *And = Builder.CreateAnd(4692X, Constant::mergeUndefsWith(ConstantExpr::getNot(C2), C1));4693return BinaryOperator::CreateXor(4694And, Constant::mergeUndefsWith(ConstantExpr::getXor(C1, C2), C1));4695}46964697// Use DeMorgan and reassociation to eliminate a 'not' op.4698if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {4699// (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C14700Value *And = Builder.CreateAnd(X, ConstantExpr::getNot(C2));4701return BinaryOperator::CreateXor(And, ConstantExpr::getNot(C1));4702}4703if (match(Op0, m_OneUse(m_And(m_Not(m_Value(X)), m_Constant(C2))))) {4704// (~X & C2) ^ C1 --> ((X | ~C2) ^ -1) ^ C1 --> (X | ~C2) ^ ~C14705Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));4706return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));4707}47084709// Convert xor ([trunc] (ashr X, BW-1)), C =>4710// select(X >s -1, C, ~C)4711// The ashr creates "AllZeroOrAllOne's", which then optionally inverses the4712// constant depending on whether this input is less than 0.4713const APInt *CA;4714if (match(Op0, m_OneUse(m_TruncOrSelf(4715m_AShr(m_Value(X), m_APIntAllowPoison(CA))))) &&4716*CA == X->getType()->getScalarSizeInBits() - 1 &&4717!match(C1, m_AllOnes())) {4718assert(!C1->isZeroValue() && "Unexpected xor with 0");4719Value *IsNotNeg = Builder.CreateIsNotNeg(X);4720return SelectInst::Create(IsNotNeg, Op1, Builder.CreateNot(Op1));4721}4722}47234724Type *Ty = I.getType();4725{4726const APInt *RHSC;4727if (match(Op1, m_APInt(RHSC))) {4728Value *X;4729const APInt *C;4730// (C - X) ^ signmaskC --> (C + signmaskC) - X4731if (RHSC->isSignMask() && match(Op0, m_Sub(m_APInt(C), m_Value(X))))4732return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C + *RHSC), X);47334734// (X + C) ^ signmaskC --> X + (C + signmaskC)4735if (RHSC->isSignMask() && match(Op0, m_Add(m_Value(X), m_APInt(C))))4736return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C + *RHSC));47374738// (X | C) ^ RHSC --> X ^ (C ^ RHSC) iff X & C == 04739if (match(Op0, m_Or(m_Value(X), m_APInt(C))) &&4740MaskedValueIsZero(X, *C, 0, &I))4741return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC));47424743// When X is a power-of-two or zero and zero input is poison:4744// ctlz(i32 X) ^ 31 --> cttz(X)4745// cttz(i32 X) ^ 31 --> ctlz(X)4746auto *II = dyn_cast<IntrinsicInst>(Op0);4747if (II && II->hasOneUse() && *RHSC == Ty->getScalarSizeInBits() - 1) {4748Intrinsic::ID IID = II->getIntrinsicID();4749if ((IID == Intrinsic::ctlz || IID == Intrinsic::cttz) &&4750match(II->getArgOperand(1), m_One()) &&4751isKnownToBeAPowerOfTwo(II->getArgOperand(0), /*OrZero */ true)) {4752IID = (IID == Intrinsic::ctlz) ? Intrinsic::cttz : Intrinsic::ctlz;4753Function *F = Intrinsic::getDeclaration(II->getModule(), IID, Ty);4754return CallInst::Create(F, {II->getArgOperand(0), Builder.getTrue()});4755}4756}47574758// If RHSC is inverting the remaining bits of shifted X,4759// canonicalize to a 'not' before the shift to help SCEV and codegen:4760// (X << C) ^ RHSC --> ~X << C4761if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) &&4762*RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).shl(*C)) {4763Value *NotX = Builder.CreateNot(X);4764return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C));4765}4766// (X >>u C) ^ RHSC --> ~X >>u C4767if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) &&4768*RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).lshr(*C)) {4769Value *NotX = Builder.CreateNot(X);4770return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C));4771}4772// TODO: We could handle 'ashr' here as well. That would be matching4773// a 'not' op and moving it before the shift. Doing that requires4774// preventing the inverse fold in canShiftBinOpWithConstantRHS().4775}47764777// If we are XORing the sign bit of a floating-point value, convert4778// this to fneg, then cast back to integer.4779//4780// This is generous interpretation of noimplicitfloat, this is not a true4781// floating-point operation.4782//4783// Assumes any IEEE-represented type has the sign bit in the high bit.4784// TODO: Unify with APInt matcher. This version allows undef unlike m_APInt4785Value *CastOp;4786if (match(Op0, m_ElementWiseBitCast(m_Value(CastOp))) &&4787match(Op1, m_SignMask()) &&4788!Builder.GetInsertBlock()->getParent()->hasFnAttribute(4789Attribute::NoImplicitFloat)) {4790Type *EltTy = CastOp->getType()->getScalarType();4791if (EltTy->isFloatingPointTy() && EltTy->isIEEE()) {4792Value *FNeg = Builder.CreateFNeg(CastOp);4793return new BitCastInst(FNeg, I.getType());4794}4795}4796}47974798// FIXME: This should not be limited to scalar (pull into APInt match above).4799{4800Value *X;4801ConstantInt *C1, *C2, *C3;4802// ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3)4803if (match(Op1, m_ConstantInt(C3)) &&4804match(Op0, m_LShr(m_Xor(m_Value(X), m_ConstantInt(C1)),4805m_ConstantInt(C2))) &&4806Op0->hasOneUse()) {4807// fold (C1 >> C2) ^ C34808APInt FoldConst = C1->getValue().lshr(C2->getValue());4809FoldConst ^= C3->getValue();4810// Prepare the two operands.4811auto *Opnd0 = Builder.CreateLShr(X, C2);4812Opnd0->takeName(Op0);4813return BinaryOperator::CreateXor(Opnd0, ConstantInt::get(Ty, FoldConst));4814}4815}48164817if (Instruction *FoldedLogic = foldBinOpIntoSelectOrPhi(I))4818return FoldedLogic;48194820// Y ^ (X | Y) --> X & ~Y4821// Y ^ (Y | X) --> X & ~Y4822if (match(Op1, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op0)))))4823return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op0));4824// (X | Y) ^ Y --> X & ~Y4825// (Y | X) ^ Y --> X & ~Y4826if (match(Op0, m_OneUse(m_c_Or(m_Value(X), m_Specific(Op1)))))4827return BinaryOperator::CreateAnd(X, Builder.CreateNot(Op1));48284829// Y ^ (X & Y) --> ~X & Y4830// Y ^ (Y & X) --> ~X & Y4831if (match(Op1, m_OneUse(m_c_And(m_Value(X), m_Specific(Op0)))))4832return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(X));4833// (X & Y) ^ Y --> ~X & Y4834// (Y & X) ^ Y --> ~X & Y4835// Canonical form is (X & C) ^ C; don't touch that.4836// TODO: A 'not' op is better for analysis and codegen, but demanded bits must4837// be fixed to prefer that (otherwise we get infinite looping).4838if (!match(Op1, m_Constant()) &&4839match(Op0, m_OneUse(m_c_And(m_Value(X), m_Specific(Op1)))))4840return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(X));48414842Value *A, *B, *C;4843// (A ^ B) ^ (A | C) --> (~A & C) ^ B -- There are 4 commuted variants.4844if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),4845m_OneUse(m_c_Or(m_Deferred(A), m_Value(C))))))4846return BinaryOperator::CreateXor(4847Builder.CreateAnd(Builder.CreateNot(A), C), B);48484849// (A ^ B) ^ (B | C) --> (~B & C) ^ A -- There are 4 commuted variants.4850if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_Value(A), m_Value(B))),4851m_OneUse(m_c_Or(m_Deferred(B), m_Value(C))))))4852return BinaryOperator::CreateXor(4853Builder.CreateAnd(Builder.CreateNot(B), C), A);48544855// (A & B) ^ (A ^ B) -> (A | B)4856if (match(Op0, m_And(m_Value(A), m_Value(B))) &&4857match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))4858return BinaryOperator::CreateOr(A, B);4859// (A ^ B) ^ (A & B) -> (A | B)4860if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&4861match(Op1, m_c_And(m_Specific(A), m_Specific(B))))4862return BinaryOperator::CreateOr(A, B);48634864// (A & ~B) ^ ~A -> ~(A & B)4865// (~B & A) ^ ~A -> ~(A & B)4866if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&4867match(Op1, m_Not(m_Specific(A))))4868return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));48694870// (~A & B) ^ A --> A | B -- There are 4 commuted variants.4871if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A))))4872return BinaryOperator::CreateOr(A, B);48734874// (~A | B) ^ A --> ~(A & B)4875if (match(Op0, m_OneUse(m_c_Or(m_Not(m_Specific(Op1)), m_Value(B)))))4876return BinaryOperator::CreateNot(Builder.CreateAnd(Op1, B));48774878// A ^ (~A | B) --> ~(A & B)4879if (match(Op1, m_OneUse(m_c_Or(m_Not(m_Specific(Op0)), m_Value(B)))))4880return BinaryOperator::CreateNot(Builder.CreateAnd(Op0, B));48814882// (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.4883// TODO: Loosen one-use restriction if common operand is a constant.4884Value *D;4885if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B)))) &&4886match(Op1, m_OneUse(m_Or(m_Value(C), m_Value(D))))) {4887if (B == C || B == D)4888std::swap(A, B);4889if (A == C)4890std::swap(C, D);4891if (A == D) {4892Value *NotA = Builder.CreateNot(A);4893return BinaryOperator::CreateAnd(Builder.CreateXor(B, C), NotA);4894}4895}48964897// (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants.4898if (I.getType()->isIntOrIntVectorTy(1) &&4899match(Op0, m_OneUse(m_LogicalAnd(m_Value(A), m_Value(B)))) &&4900match(Op1, m_OneUse(m_LogicalOr(m_Value(C), m_Value(D))))) {4901bool NeedFreeze = isa<SelectInst>(Op0) && isa<SelectInst>(Op1) && B == D;4902if (B == C || B == D)4903std::swap(A, B);4904if (A == C)4905std::swap(C, D);4906if (A == D) {4907if (NeedFreeze)4908A = Builder.CreateFreeze(A);4909Value *NotB = Builder.CreateNot(B);4910return SelectInst::Create(A, NotB, C);4911}4912}49134914if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))4915if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))4916if (Value *V = foldXorOfICmps(LHS, RHS, I))4917return replaceInstUsesWith(I, V);49184919if (Instruction *CastedXor = foldCastedBitwiseLogic(I))4920return CastedXor;49214922if (Instruction *Abs = canonicalizeAbs(I, Builder))4923return Abs;49244925// Otherwise, if all else failed, try to hoist the xor-by-constant:4926// (X ^ C) ^ Y --> (X ^ Y) ^ C4927// Just like we do in other places, we completely avoid the fold4928// for constantexprs, at least to avoid endless combine loop.4929if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X),4930m_Unless(m_ConstantExpr())),4931m_ImmConstant(C1))),4932m_Value(Y))))4933return BinaryOperator::CreateXor(Builder.CreateXor(X, Y), C1);49344935if (Instruction *R = reassociateForUses(I, Builder))4936return R;49374938if (Instruction *Canonicalized = canonicalizeLogicFirst(I, Builder))4939return Canonicalized;49404941if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1))4942return Folded;49434944if (Instruction *Folded = canonicalizeConditionalNegationViaMathToSelect(I))4945return Folded;49464947if (Instruction *Res = foldBinOpOfDisplacedShifts(I))4948return Res;49494950if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))4951return Res;49524953return nullptr;4954}495549564957