Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
35266 views
//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the visit functions for add, fadd, sub, and fsub.9//10//===----------------------------------------------------------------------===//1112#include "InstCombineInternal.h"13#include "llvm/ADT/APFloat.h"14#include "llvm/ADT/APInt.h"15#include "llvm/ADT/STLExtras.h"16#include "llvm/ADT/SmallVector.h"17#include "llvm/Analysis/InstructionSimplify.h"18#include "llvm/Analysis/ValueTracking.h"19#include "llvm/IR/Constant.h"20#include "llvm/IR/Constants.h"21#include "llvm/IR/InstrTypes.h"22#include "llvm/IR/Instruction.h"23#include "llvm/IR/Instructions.h"24#include "llvm/IR/Operator.h"25#include "llvm/IR/PatternMatch.h"26#include "llvm/IR/Type.h"27#include "llvm/IR/Value.h"28#include "llvm/Support/AlignOf.h"29#include "llvm/Support/Casting.h"30#include "llvm/Support/KnownBits.h"31#include "llvm/Transforms/InstCombine/InstCombiner.h"32#include <cassert>33#include <utility>3435using namespace llvm;36using namespace PatternMatch;3738#define DEBUG_TYPE "instcombine"3940namespace {4142/// Class representing coefficient of floating-point addend.43/// This class needs to be highly efficient, which is especially true for44/// the constructor. As of I write this comment, the cost of the default45/// constructor is merely 4-byte-store-zero (Assuming compiler is able to46/// perform write-merging).47///48class FAddendCoef {49public:50// The constructor has to initialize a APFloat, which is unnecessary for51// most addends which have coefficient either 1 or -1. So, the constructor52// is expensive. In order to avoid the cost of the constructor, we should53// reuse some instances whenever possible. The pre-created instances54// FAddCombine::Add[0-5] embodies this idea.55FAddendCoef() = default;56~FAddendCoef();5758// If possible, don't define operator+/operator- etc because these59// operators inevitably call FAddendCoef's constructor which is not cheap.60void operator=(const FAddendCoef &A);61void operator+=(const FAddendCoef &A);62void operator*=(const FAddendCoef &S);6364void set(short C) {65assert(!insaneIntVal(C) && "Insane coefficient");66IsFp = false; IntVal = C;67}6869void set(const APFloat& C);7071void negate();7273bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }74Value *getValue(Type *) const;7576bool isOne() const { return isInt() && IntVal == 1; }77bool isTwo() const { return isInt() && IntVal == 2; }78bool isMinusOne() const { return isInt() && IntVal == -1; }79bool isMinusTwo() const { return isInt() && IntVal == -2; }8081private:82bool insaneIntVal(int V) { return V > 4 || V < -4; }8384APFloat *getFpValPtr() { return reinterpret_cast<APFloat *>(&FpValBuf); }8586const APFloat *getFpValPtr() const {87return reinterpret_cast<const APFloat *>(&FpValBuf);88}8990const APFloat &getFpVal() const {91assert(IsFp && BufHasFpVal && "Incorret state");92return *getFpValPtr();93}9495APFloat &getFpVal() {96assert(IsFp && BufHasFpVal && "Incorret state");97return *getFpValPtr();98}99100bool isInt() const { return !IsFp; }101102// If the coefficient is represented by an integer, promote it to a103// floating point.104void convertToFpType(const fltSemantics &Sem);105106// Construct an APFloat from a signed integer.107// TODO: We should get rid of this function when APFloat can be constructed108// from an *SIGNED* integer.109APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);110111bool IsFp = false;112113// True iff FpValBuf contains an instance of APFloat.114bool BufHasFpVal = false;115116// The integer coefficient of an individual addend is either 1 or -1,117// and we try to simplify at most 4 addends from neighboring at most118// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt119// is overkill of this end.120short IntVal = 0;121122AlignedCharArrayUnion<APFloat> FpValBuf;123};124125/// FAddend is used to represent floating-point addend. An addend is126/// represented as <C, V>, where the V is a symbolic value, and C is a127/// constant coefficient. A constant addend is represented as <C, 0>.128class FAddend {129public:130FAddend() = default;131132void operator+=(const FAddend &T) {133assert((Val == T.Val) && "Symbolic-values disagree");134Coeff += T.Coeff;135}136137Value *getSymVal() const { return Val; }138const FAddendCoef &getCoef() const { return Coeff; }139140bool isConstant() const { return Val == nullptr; }141bool isZero() const { return Coeff.isZero(); }142143void set(short Coefficient, Value *V) {144Coeff.set(Coefficient);145Val = V;146}147void set(const APFloat &Coefficient, Value *V) {148Coeff.set(Coefficient);149Val = V;150}151void set(const ConstantFP *Coefficient, Value *V) {152Coeff.set(Coefficient->getValueAPF());153Val = V;154}155156void negate() { Coeff.negate(); }157158/// Drill down the U-D chain one step to find the definition of V, and159/// try to break the definition into one or two addends.160static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);161162/// Similar to FAddend::drillDownOneStep() except that the value being163/// splitted is the addend itself.164unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;165166private:167void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }168169// This addend has the value of "Coeff * Val".170Value *Val = nullptr;171FAddendCoef Coeff;172};173174/// FAddCombine is the class for optimizing an unsafe fadd/fsub along175/// with its neighboring at most two instructions.176///177class FAddCombine {178public:179FAddCombine(InstCombiner::BuilderTy &B) : Builder(B) {}180181Value *simplify(Instruction *FAdd);182183private:184using AddendVect = SmallVector<const FAddend *, 4>;185186Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);187188/// Convert given addend to a Value189Value *createAddendVal(const FAddend &A, bool& NeedNeg);190191/// Return the number of instructions needed to emit the N-ary addition.192unsigned calcInstrNumber(const AddendVect& Vect);193194Value *createFSub(Value *Opnd0, Value *Opnd1);195Value *createFAdd(Value *Opnd0, Value *Opnd1);196Value *createFMul(Value *Opnd0, Value *Opnd1);197Value *createFNeg(Value *V);198Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);199void createInstPostProc(Instruction *NewInst, bool NoNumber = false);200201// Debugging stuff are clustered here.202#ifndef NDEBUG203unsigned CreateInstrNum;204void initCreateInstNum() { CreateInstrNum = 0; }205void incCreateInstNum() { CreateInstrNum++; }206#else207void initCreateInstNum() {}208void incCreateInstNum() {}209#endif210211InstCombiner::BuilderTy &Builder;212Instruction *Instr = nullptr;213};214215} // end anonymous namespace216217//===----------------------------------------------------------------------===//218//219// Implementation of220// {FAddendCoef, FAddend, FAddition, FAddCombine}.221//222//===----------------------------------------------------------------------===//223FAddendCoef::~FAddendCoef() {224if (BufHasFpVal)225getFpValPtr()->~APFloat();226}227228void FAddendCoef::set(const APFloat& C) {229APFloat *P = getFpValPtr();230231if (isInt()) {232// As the buffer is meanless byte stream, we cannot call233// APFloat::operator=().234new(P) APFloat(C);235} else236*P = C;237238IsFp = BufHasFpVal = true;239}240241void FAddendCoef::convertToFpType(const fltSemantics &Sem) {242if (!isInt())243return;244245APFloat *P = getFpValPtr();246if (IntVal > 0)247new(P) APFloat(Sem, IntVal);248else {249new(P) APFloat(Sem, 0 - IntVal);250P->changeSign();251}252IsFp = BufHasFpVal = true;253}254255APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {256if (Val >= 0)257return APFloat(Sem, Val);258259APFloat T(Sem, 0 - Val);260T.changeSign();261262return T;263}264265void FAddendCoef::operator=(const FAddendCoef &That) {266if (That.isInt())267set(That.IntVal);268else269set(That.getFpVal());270}271272void FAddendCoef::operator+=(const FAddendCoef &That) {273RoundingMode RndMode = RoundingMode::NearestTiesToEven;274if (isInt() == That.isInt()) {275if (isInt())276IntVal += That.IntVal;277else278getFpVal().add(That.getFpVal(), RndMode);279return;280}281282if (isInt()) {283const APFloat &T = That.getFpVal();284convertToFpType(T.getSemantics());285getFpVal().add(T, RndMode);286return;287}288289APFloat &T = getFpVal();290T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);291}292293void FAddendCoef::operator*=(const FAddendCoef &That) {294if (That.isOne())295return;296297if (That.isMinusOne()) {298negate();299return;300}301302if (isInt() && That.isInt()) {303int Res = IntVal * (int)That.IntVal;304assert(!insaneIntVal(Res) && "Insane int value");305IntVal = Res;306return;307}308309const fltSemantics &Semantic =310isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();311312if (isInt())313convertToFpType(Semantic);314APFloat &F0 = getFpVal();315316if (That.isInt())317F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),318APFloat::rmNearestTiesToEven);319else320F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);321}322323void FAddendCoef::negate() {324if (isInt())325IntVal = 0 - IntVal;326else327getFpVal().changeSign();328}329330Value *FAddendCoef::getValue(Type *Ty) const {331return isInt() ?332ConstantFP::get(Ty, float(IntVal)) :333ConstantFP::get(Ty->getContext(), getFpVal());334}335336// The definition of <Val> Addends337// =========================================338// A + B <1, A>, <1,B>339// A - B <1, A>, <1,B>340// 0 - B <-1, B>341// C * A, <C, A>342// A + C <1, A> <C, NULL>343// 0 +/- 0 <0, NULL> (corner case)344//345// Legend: A and B are not constant, C is constant346unsigned FAddend::drillValueDownOneStep347(Value *Val, FAddend &Addend0, FAddend &Addend1) {348Instruction *I = nullptr;349if (!Val || !(I = dyn_cast<Instruction>(Val)))350return 0;351352unsigned Opcode = I->getOpcode();353354if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {355ConstantFP *C0, *C1;356Value *Opnd0 = I->getOperand(0);357Value *Opnd1 = I->getOperand(1);358if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())359Opnd0 = nullptr;360361if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())362Opnd1 = nullptr;363364if (Opnd0) {365if (!C0)366Addend0.set(1, Opnd0);367else368Addend0.set(C0, nullptr);369}370371if (Opnd1) {372FAddend &Addend = Opnd0 ? Addend1 : Addend0;373if (!C1)374Addend.set(1, Opnd1);375else376Addend.set(C1, nullptr);377if (Opcode == Instruction::FSub)378Addend.negate();379}380381if (Opnd0 || Opnd1)382return Opnd0 && Opnd1 ? 2 : 1;383384// Both operands are zero. Weird!385Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);386return 1;387}388389if (I->getOpcode() == Instruction::FMul) {390Value *V0 = I->getOperand(0);391Value *V1 = I->getOperand(1);392if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {393Addend0.set(C, V1);394return 1;395}396397if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {398Addend0.set(C, V0);399return 1;400}401}402403return 0;404}405406// Try to break *this* addend into two addends. e.g. Suppose this addend is407// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,408// i.e. <2.3, X> and <2.3, Y>.409unsigned FAddend::drillAddendDownOneStep410(FAddend &Addend0, FAddend &Addend1) const {411if (isConstant())412return 0;413414unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);415if (!BreakNum || Coeff.isOne())416return BreakNum;417418Addend0.Scale(Coeff);419420if (BreakNum == 2)421Addend1.Scale(Coeff);422423return BreakNum;424}425426Value *FAddCombine::simplify(Instruction *I) {427assert(I->hasAllowReassoc() && I->hasNoSignedZeros() &&428"Expected 'reassoc'+'nsz' instruction");429430// Currently we are not able to handle vector type.431if (I->getType()->isVectorTy())432return nullptr;433434assert((I->getOpcode() == Instruction::FAdd ||435I->getOpcode() == Instruction::FSub) && "Expect add/sub");436437// Save the instruction before calling other member-functions.438Instr = I;439440FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;441442unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);443444// Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.445unsigned Opnd0_ExpNum = 0;446unsigned Opnd1_ExpNum = 0;447448if (!Opnd0.isConstant())449Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);450451// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.452if (OpndNum == 2 && !Opnd1.isConstant())453Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);454455// Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1456if (Opnd0_ExpNum && Opnd1_ExpNum) {457AddendVect AllOpnds;458AllOpnds.push_back(&Opnd0_0);459AllOpnds.push_back(&Opnd1_0);460if (Opnd0_ExpNum == 2)461AllOpnds.push_back(&Opnd0_1);462if (Opnd1_ExpNum == 2)463AllOpnds.push_back(&Opnd1_1);464465// Compute instruction quota. We should save at least one instruction.466unsigned InstQuota = 0;467468Value *V0 = I->getOperand(0);469Value *V1 = I->getOperand(1);470InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&471(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;472473if (Value *R = simplifyFAdd(AllOpnds, InstQuota))474return R;475}476477if (OpndNum != 2) {478// The input instruction is : "I=0.0 +/- V". If the "V" were able to be479// splitted into two addends, say "V = X - Y", the instruction would have480// been optimized into "I = Y - X" in the previous steps.481//482const FAddendCoef &CE = Opnd0.getCoef();483return CE.isOne() ? Opnd0.getSymVal() : nullptr;484}485486// step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]487if (Opnd1_ExpNum) {488AddendVect AllOpnds;489AllOpnds.push_back(&Opnd0);490AllOpnds.push_back(&Opnd1_0);491if (Opnd1_ExpNum == 2)492AllOpnds.push_back(&Opnd1_1);493494if (Value *R = simplifyFAdd(AllOpnds, 1))495return R;496}497498// step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]499if (Opnd0_ExpNum) {500AddendVect AllOpnds;501AllOpnds.push_back(&Opnd1);502AllOpnds.push_back(&Opnd0_0);503if (Opnd0_ExpNum == 2)504AllOpnds.push_back(&Opnd0_1);505506if (Value *R = simplifyFAdd(AllOpnds, 1))507return R;508}509510return nullptr;511}512513Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {514unsigned AddendNum = Addends.size();515assert(AddendNum <= 4 && "Too many addends");516517// For saving intermediate results;518unsigned NextTmpIdx = 0;519FAddend TmpResult[3];520521// Simplified addends are placed <SimpVect>.522AddendVect SimpVect;523524// The outer loop works on one symbolic-value at a time. Suppose the input525// addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...526// The symbolic-values will be processed in this order: x, y, z.527for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {528529const FAddend *ThisAddend = Addends[SymIdx];530if (!ThisAddend) {531// This addend was processed before.532continue;533}534535Value *Val = ThisAddend->getSymVal();536537// If the resulting expr has constant-addend, this constant-addend is538// desirable to reside at the top of the resulting expression tree. Placing539// constant close to super-expr(s) will potentially reveal some540// optimization opportunities in super-expr(s). Here we do not implement541// this logic intentionally and rely on SimplifyAssociativeOrCommutative542// call later.543544unsigned StartIdx = SimpVect.size();545SimpVect.push_back(ThisAddend);546547// The inner loop collects addends sharing same symbolic-value, and these548// addends will be later on folded into a single addend. Following above549// example, if the symbolic value "y" is being processed, the inner loop550// will collect two addends "<b1,y>" and "<b2,Y>". These two addends will551// be later on folded into "<b1+b2, y>".552for (unsigned SameSymIdx = SymIdx + 1;553SameSymIdx < AddendNum; SameSymIdx++) {554const FAddend *T = Addends[SameSymIdx];555if (T && T->getSymVal() == Val) {556// Set null such that next iteration of the outer loop will not process557// this addend again.558Addends[SameSymIdx] = nullptr;559SimpVect.push_back(T);560}561}562563// If multiple addends share same symbolic value, fold them together.564if (StartIdx + 1 != SimpVect.size()) {565FAddend &R = TmpResult[NextTmpIdx ++];566R = *SimpVect[StartIdx];567for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)568R += *SimpVect[Idx];569570// Pop all addends being folded and push the resulting folded addend.571SimpVect.resize(StartIdx);572if (!R.isZero()) {573SimpVect.push_back(&R);574}575}576}577578assert((NextTmpIdx <= std::size(TmpResult) + 1) && "out-of-bound access");579580Value *Result;581if (!SimpVect.empty())582Result = createNaryFAdd(SimpVect, InstrQuota);583else {584// The addition is folded to 0.0.585Result = ConstantFP::get(Instr->getType(), 0.0);586}587588return Result;589}590591Value *FAddCombine::createNaryFAdd592(const AddendVect &Opnds, unsigned InstrQuota) {593assert(!Opnds.empty() && "Expect at least one addend");594595// Step 1: Check if the # of instructions needed exceeds the quota.596597unsigned InstrNeeded = calcInstrNumber(Opnds);598if (InstrNeeded > InstrQuota)599return nullptr;600601initCreateInstNum();602603// step 2: Emit the N-ary addition.604// Note that at most three instructions are involved in Fadd-InstCombine: the605// addition in question, and at most two neighboring instructions.606// The resulting optimized addition should have at least one less instruction607// than the original addition expression tree. This implies that the resulting608// N-ary addition has at most two instructions, and we don't need to worry609// about tree-height when constructing the N-ary addition.610611Value *LastVal = nullptr;612bool LastValNeedNeg = false;613614// Iterate the addends, creating fadd/fsub using adjacent two addends.615for (const FAddend *Opnd : Opnds) {616bool NeedNeg;617Value *V = createAddendVal(*Opnd, NeedNeg);618if (!LastVal) {619LastVal = V;620LastValNeedNeg = NeedNeg;621continue;622}623624if (LastValNeedNeg == NeedNeg) {625LastVal = createFAdd(LastVal, V);626continue;627}628629if (LastValNeedNeg)630LastVal = createFSub(V, LastVal);631else632LastVal = createFSub(LastVal, V);633634LastValNeedNeg = false;635}636637if (LastValNeedNeg) {638LastVal = createFNeg(LastVal);639}640641#ifndef NDEBUG642assert(CreateInstrNum == InstrNeeded &&643"Inconsistent in instruction numbers");644#endif645646return LastVal;647}648649Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {650Value *V = Builder.CreateFSub(Opnd0, Opnd1);651if (Instruction *I = dyn_cast<Instruction>(V))652createInstPostProc(I);653return V;654}655656Value *FAddCombine::createFNeg(Value *V) {657Value *NewV = Builder.CreateFNeg(V);658if (Instruction *I = dyn_cast<Instruction>(NewV))659createInstPostProc(I, true); // fneg's don't receive instruction numbers.660return NewV;661}662663Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {664Value *V = Builder.CreateFAdd(Opnd0, Opnd1);665if (Instruction *I = dyn_cast<Instruction>(V))666createInstPostProc(I);667return V;668}669670Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {671Value *V = Builder.CreateFMul(Opnd0, Opnd1);672if (Instruction *I = dyn_cast<Instruction>(V))673createInstPostProc(I);674return V;675}676677void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {678NewInstr->setDebugLoc(Instr->getDebugLoc());679680// Keep track of the number of instruction created.681if (!NoNumber)682incCreateInstNum();683684// Propagate fast-math flags685NewInstr->setFastMathFlags(Instr->getFastMathFlags());686}687688// Return the number of instruction needed to emit the N-ary addition.689// NOTE: Keep this function in sync with createAddendVal().690unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {691unsigned OpndNum = Opnds.size();692unsigned InstrNeeded = OpndNum - 1;693694// Adjust the number of instructions needed to emit the N-ary add.695for (const FAddend *Opnd : Opnds) {696if (Opnd->isConstant())697continue;698699// The constant check above is really for a few special constant700// coefficients.701if (isa<UndefValue>(Opnd->getSymVal()))702continue;703704const FAddendCoef &CE = Opnd->getCoef();705// Let the addend be "c * x". If "c == +/-1", the value of the addend706// is immediately available; otherwise, it needs exactly one instruction707// to evaluate the value.708if (!CE.isMinusOne() && !CE.isOne())709InstrNeeded++;710}711return InstrNeeded;712}713714// Input Addend Value NeedNeg(output)715// ================================================================716// Constant C C false717// <+/-1, V> V coefficient is -1718// <2/-2, V> "fadd V, V" coefficient is -2719// <C, V> "fmul V, C" false720//721// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.722Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {723const FAddendCoef &Coeff = Opnd.getCoef();724725if (Opnd.isConstant()) {726NeedNeg = false;727return Coeff.getValue(Instr->getType());728}729730Value *OpndVal = Opnd.getSymVal();731732if (Coeff.isMinusOne() || Coeff.isOne()) {733NeedNeg = Coeff.isMinusOne();734return OpndVal;735}736737if (Coeff.isTwo() || Coeff.isMinusTwo()) {738NeedNeg = Coeff.isMinusTwo();739return createFAdd(OpndVal, OpndVal);740}741742NeedNeg = false;743return createFMul(OpndVal, Coeff.getValue(Instr->getType()));744}745746// Checks if any operand is negative and we can convert add to sub.747// This function checks for following negative patterns748// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))749// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))750// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even751static Value *checkForNegativeOperand(BinaryOperator &I,752InstCombiner::BuilderTy &Builder) {753Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);754755// This function creates 2 instructions to replace ADD, we need at least one756// of LHS or RHS to have one use to ensure benefit in transform.757if (!LHS->hasOneUse() && !RHS->hasOneUse())758return nullptr;759760Value *X = nullptr, *Y = nullptr, *Z = nullptr;761const APInt *C1 = nullptr, *C2 = nullptr;762763// if ONE is on other side, swap764if (match(RHS, m_Add(m_Value(X), m_One())))765std::swap(LHS, RHS);766767if (match(LHS, m_Add(m_Value(X), m_One()))) {768// if XOR on other side, swap769if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))770std::swap(X, RHS);771772if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {773// X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))774// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))775if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {776Value *NewAnd = Builder.CreateAnd(Z, *C1);777return Builder.CreateSub(RHS, NewAnd, "sub");778} else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {779// X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))780// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))781Value *NewOr = Builder.CreateOr(Z, ~(*C1));782return Builder.CreateSub(RHS, NewOr, "sub");783}784}785}786787// Restore LHS and RHS788LHS = I.getOperand(0);789RHS = I.getOperand(1);790791// if XOR is on other side, swap792if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))793std::swap(LHS, RHS);794795// C2 is ODD796// LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))797// ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))798if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))799if (C1->countr_zero() == 0)800if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {801Value *NewOr = Builder.CreateOr(Z, ~(*C2));802return Builder.CreateSub(RHS, NewOr, "sub");803}804return nullptr;805}806807/// Wrapping flags may allow combining constants separated by an extend.808static Instruction *foldNoWrapAdd(BinaryOperator &Add,809InstCombiner::BuilderTy &Builder) {810Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);811Type *Ty = Add.getType();812Constant *Op1C;813if (!match(Op1, m_Constant(Op1C)))814return nullptr;815816// Try this match first because it results in an add in the narrow type.817// (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))818Value *X;819const APInt *C1, *C2;820if (match(Op1, m_APInt(C1)) &&821match(Op0, m_ZExt(m_NUWAddLike(m_Value(X), m_APInt(C2)))) &&822C1->isNegative() && C1->sge(-C2->sext(C1->getBitWidth()))) {823APInt NewC = *C2 + C1->trunc(C2->getBitWidth());824// If the smaller add will fold to zero, we don't need to check one use.825if (NewC.isZero())826return new ZExtInst(X, Ty);827// Otherwise only do this if the existing zero extend will be removed.828if (Op0->hasOneUse())829return new ZExtInst(830Builder.CreateNUWAdd(X, ConstantInt::get(X->getType(), NewC)), Ty);831}832833// More general combining of constants in the wide type.834// (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)835// or (zext nneg (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)836Constant *NarrowC;837if (match(Op0, m_OneUse(m_SExtLike(838m_NSWAddLike(m_Value(X), m_Constant(NarrowC)))))) {839Value *WideC = Builder.CreateSExt(NarrowC, Ty);840Value *NewC = Builder.CreateAdd(WideC, Op1C);841Value *WideX = Builder.CreateSExt(X, Ty);842return BinaryOperator::CreateAdd(WideX, NewC);843}844// (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)845if (match(Op0,846m_OneUse(m_ZExt(m_NUWAddLike(m_Value(X), m_Constant(NarrowC)))))) {847Value *WideC = Builder.CreateZExt(NarrowC, Ty);848Value *NewC = Builder.CreateAdd(WideC, Op1C);849Value *WideX = Builder.CreateZExt(X, Ty);850return BinaryOperator::CreateAdd(WideX, NewC);851}852return nullptr;853}854855Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {856Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);857Type *Ty = Add.getType();858Constant *Op1C;859if (!match(Op1, m_ImmConstant(Op1C)))860return nullptr;861862if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))863return NV;864865Value *X;866Constant *Op00C;867868// add (sub C1, X), C2 --> sub (add C1, C2), X869if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))870return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);871872Value *Y;873874// add (sub X, Y), -1 --> add (not Y), X875if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&876match(Op1, m_AllOnes()))877return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X);878879// zext(bool) + C -> bool ? C + 1 : C880if (match(Op0, m_ZExt(m_Value(X))) &&881X->getType()->getScalarSizeInBits() == 1)882return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);883// sext(bool) + C -> bool ? C - 1 : C884if (match(Op0, m_SExt(m_Value(X))) &&885X->getType()->getScalarSizeInBits() == 1)886return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);887888// ~X + C --> (C-1) - X889if (match(Op0, m_Not(m_Value(X)))) {890// ~X + C has NSW and (C-1) won't oveflow => (C-1)-X can have NSW891auto *COne = ConstantInt::get(Op1C->getType(), 1);892bool WillNotSOV = willNotOverflowSignedSub(Op1C, COne, Add);893BinaryOperator *Res =894BinaryOperator::CreateSub(ConstantExpr::getSub(Op1C, COne), X);895Res->setHasNoSignedWrap(Add.hasNoSignedWrap() && WillNotSOV);896return Res;897}898899// (iN X s>> (N - 1)) + 1 --> zext (X > -1)900const APInt *C;901unsigned BitWidth = Ty->getScalarSizeInBits();902if (match(Op0, m_OneUse(m_AShr(m_Value(X),903m_SpecificIntAllowPoison(BitWidth - 1)))) &&904match(Op1, m_One()))905return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty);906907if (!match(Op1, m_APInt(C)))908return nullptr;909910// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`911Constant *Op01C;912if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) {913BinaryOperator *NewAdd =914BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));915NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap() &&916willNotOverflowSignedAdd(Op01C, Op1C, Add));917NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap());918return NewAdd;919}920921// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)922const APInt *C2;923if (match(Op0, m_Or(m_Value(), m_APInt(C2))) && *C2 == -*C)924return BinaryOperator::CreateXor(Op0, ConstantInt::get(Add.getType(), *C2));925926if (C->isSignMask()) {927// If wrapping is not allowed, then the addition must set the sign bit:928// X + (signmask) --> X | signmask929if (Add.hasNoSignedWrap() || Add.hasNoUnsignedWrap())930return BinaryOperator::CreateOr(Op0, Op1);931932// If wrapping is allowed, then the addition flips the sign bit of LHS:933// X + (signmask) --> X ^ signmask934return BinaryOperator::CreateXor(Op0, Op1);935}936937// Is this add the last step in a convoluted sext?938// add(zext(xor i16 X, -32768), -32768) --> sext X939if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&940C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)941return CastInst::Create(Instruction::SExt, X, Ty);942943if (match(Op0, m_Xor(m_Value(X), m_APInt(C2)))) {944// (X ^ signmask) + C --> (X + (signmask ^ C))945if (C2->isSignMask())946return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C2 ^ *C));947948// If X has no high-bits set above an xor mask:949// add (xor X, LowMaskC), C --> sub (LowMaskC + C), X950if (C2->isMask()) {951KnownBits LHSKnown = computeKnownBits(X, 0, &Add);952if ((*C2 | LHSKnown.Zero).isAllOnes())953return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X);954}955956// Look for a math+logic pattern that corresponds to sext-in-register of a957// value with cleared high bits. Convert that into a pair of shifts:958// add (xor X, 0x80), 0xF..F80 --> (X << ShAmtC) >>s ShAmtC959// add (xor X, 0xF..F80), 0x80 --> (X << ShAmtC) >>s ShAmtC960if (Op0->hasOneUse() && *C2 == -(*C)) {961unsigned BitWidth = Ty->getScalarSizeInBits();962unsigned ShAmt = 0;963if (C->isPowerOf2())964ShAmt = BitWidth - C->logBase2() - 1;965else if (C2->isPowerOf2())966ShAmt = BitWidth - C2->logBase2() - 1;967if (ShAmt && MaskedValueIsZero(X, APInt::getHighBitsSet(BitWidth, ShAmt),9680, &Add)) {969Constant *ShAmtC = ConstantInt::get(Ty, ShAmt);970Value *NewShl = Builder.CreateShl(X, ShAmtC, "sext");971return BinaryOperator::CreateAShr(NewShl, ShAmtC);972}973}974}975976if (C->isOne() && Op0->hasOneUse()) {977// add (sext i1 X), 1 --> zext (not X)978// TODO: The smallest IR representation is (select X, 0, 1), and that would979// not require the one-use check. But we need to remove a transform in980// visitSelect and make sure that IR value tracking for select is equal or981// better than for these ops.982if (match(Op0, m_SExt(m_Value(X))) &&983X->getType()->getScalarSizeInBits() == 1)984return new ZExtInst(Builder.CreateNot(X), Ty);985986// Shifts and add used to flip and mask off the low bit:987// add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1988const APInt *C3;989if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) &&990C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {991Value *NotX = Builder.CreateNot(X);992return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));993}994}995996// Fold (add (zext (add X, -1)), 1) -> (zext X) if X is non-zero.997// TODO: There's a general form for any constant on the outer add.998if (C->isOne()) {999if (match(Op0, m_ZExt(m_Add(m_Value(X), m_AllOnes())))) {1000const SimplifyQuery Q = SQ.getWithInstruction(&Add);1001if (llvm::isKnownNonZero(X, Q))1002return new ZExtInst(X, Ty);1003}1004}10051006return nullptr;1007}10081009// match variations of a^2 + 2*a*b + b^21010//1011// to reuse the code between the FP and Int versions, the instruction OpCodes1012// and constant types have been turned into template parameters.1013//1014// Mul2Rhs: The constant to perform the multiplicative equivalent of X*2 with;1015// should be `m_SpecificFP(2.0)` for FP and `m_SpecificInt(1)` for Int1016// (we're matching `X<<1` instead of `X*2` for Int)1017template <bool FP, typename Mul2Rhs>1018static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,1019Value *&B) {1020constexpr unsigned MulOp = FP ? Instruction::FMul : Instruction::Mul;1021constexpr unsigned AddOp = FP ? Instruction::FAdd : Instruction::Add;1022constexpr unsigned Mul2Op = FP ? Instruction::FMul : Instruction::Shl;10231024// (a * a) + (((a * 2) + b) * b)1025if (match(&I, m_c_BinOp(1026AddOp, m_OneUse(m_BinOp(MulOp, m_Value(A), m_Deferred(A))),1027m_OneUse(m_c_BinOp(1028MulOp,1029m_c_BinOp(AddOp, m_BinOp(Mul2Op, m_Deferred(A), M2Rhs),1030m_Value(B)),1031m_Deferred(B))))))1032return true;10331034// ((a * b) * 2) or ((a * 2) * b)1035// +1036// (a * a + b * b) or (b * b + a * a)1037return match(1038&I, m_c_BinOp(1039AddOp,1040m_CombineOr(1041m_OneUse(m_BinOp(1042Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),1043m_OneUse(m_c_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),1044m_Value(B)))),1045m_OneUse(1046m_c_BinOp(AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),1047m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));1048}10491050// Fold integer variations of a^2 + 2*a*b + b^2 -> (a + b)^21051Instruction *InstCombinerImpl::foldSquareSumInt(BinaryOperator &I) {1052Value *A, *B;1053if (matchesSquareSum</*FP*/ false>(I, m_SpecificInt(1), A, B)) {1054Value *AB = Builder.CreateAdd(A, B);1055return BinaryOperator::CreateMul(AB, AB);1056}1057return nullptr;1058}10591060// Fold floating point variations of a^2 + 2*a*b + b^2 -> (a + b)^21061// Requires `nsz` and `reassoc`.1062Instruction *InstCombinerImpl::foldSquareSumFP(BinaryOperator &I) {1063assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "Assumption mismatch");1064Value *A, *B;1065if (matchesSquareSum</*FP*/ true>(I, m_SpecificFP(2.0), A, B)) {1066Value *AB = Builder.CreateFAddFMF(A, B, &I);1067return BinaryOperator::CreateFMulFMF(AB, AB, &I);1068}1069return nullptr;1070}10711072// Matches multiplication expression Op * C where C is a constant. Returns the1073// constant value in C and the other operand in Op. Returns true if such a1074// match is found.1075static bool MatchMul(Value *E, Value *&Op, APInt &C) {1076const APInt *AI;1077if (match(E, m_Mul(m_Value(Op), m_APInt(AI)))) {1078C = *AI;1079return true;1080}1081if (match(E, m_Shl(m_Value(Op), m_APInt(AI)))) {1082C = APInt(AI->getBitWidth(), 1);1083C <<= *AI;1084return true;1085}1086return false;1087}10881089// Matches remainder expression Op % C where C is a constant. Returns the1090// constant value in C and the other operand in Op. Returns the signedness of1091// the remainder operation in IsSigned. Returns true if such a match is1092// found.1093static bool MatchRem(Value *E, Value *&Op, APInt &C, bool &IsSigned) {1094const APInt *AI;1095IsSigned = false;1096if (match(E, m_SRem(m_Value(Op), m_APInt(AI)))) {1097IsSigned = true;1098C = *AI;1099return true;1100}1101if (match(E, m_URem(m_Value(Op), m_APInt(AI)))) {1102C = *AI;1103return true;1104}1105if (match(E, m_And(m_Value(Op), m_APInt(AI))) && (*AI + 1).isPowerOf2()) {1106C = *AI + 1;1107return true;1108}1109return false;1110}11111112// Matches division expression Op / C with the given signedness as indicated1113// by IsSigned, where C is a constant. Returns the constant value in C and the1114// other operand in Op. Returns true if such a match is found.1115static bool MatchDiv(Value *E, Value *&Op, APInt &C, bool IsSigned) {1116const APInt *AI;1117if (IsSigned && match(E, m_SDiv(m_Value(Op), m_APInt(AI)))) {1118C = *AI;1119return true;1120}1121if (!IsSigned) {1122if (match(E, m_UDiv(m_Value(Op), m_APInt(AI)))) {1123C = *AI;1124return true;1125}1126if (match(E, m_LShr(m_Value(Op), m_APInt(AI)))) {1127C = APInt(AI->getBitWidth(), 1);1128C <<= *AI;1129return true;1130}1131}1132return false;1133}11341135// Returns whether C0 * C1 with the given signedness overflows.1136static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) {1137bool overflow;1138if (IsSigned)1139(void)C0.smul_ov(C1, overflow);1140else1141(void)C0.umul_ov(C1, overflow);1142return overflow;1143}11441145// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)1146// does not overflow.1147// Simplifies (X / C0) * C1 + (X % C0) * C2 to1148// (X / C0) * (C1 - C2 * C0) + X * C21149Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {1150Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);1151Value *X, *MulOpV;1152APInt C0, MulOpC;1153bool IsSigned;1154// Match I = X % C0 + MulOpV * C01155if (((MatchRem(LHS, X, C0, IsSigned) && MatchMul(RHS, MulOpV, MulOpC)) ||1156(MatchRem(RHS, X, C0, IsSigned) && MatchMul(LHS, MulOpV, MulOpC))) &&1157C0 == MulOpC) {1158Value *RemOpV;1159APInt C1;1160bool Rem2IsSigned;1161// Match MulOpC = RemOpV % C11162if (MatchRem(MulOpV, RemOpV, C1, Rem2IsSigned) &&1163IsSigned == Rem2IsSigned) {1164Value *DivOpV;1165APInt DivOpC;1166// Match RemOpV = X / C01167if (MatchDiv(RemOpV, DivOpV, DivOpC, IsSigned) && X == DivOpV &&1168C0 == DivOpC && !MulWillOverflow(C0, C1, IsSigned)) {1169Value *NewDivisor = ConstantInt::get(X->getType(), C0 * C1);1170return IsSigned ? Builder.CreateSRem(X, NewDivisor, "srem")1171: Builder.CreateURem(X, NewDivisor, "urem");1172}1173}1174}11751176// Match I = (X / C0) * C1 + (X % C0) * C21177Value *Div, *Rem;1178APInt C1, C2;1179if (!LHS->hasOneUse() || !MatchMul(LHS, Div, C1))1180Div = LHS, C1 = APInt(I.getType()->getScalarSizeInBits(), 1);1181if (!RHS->hasOneUse() || !MatchMul(RHS, Rem, C2))1182Rem = RHS, C2 = APInt(I.getType()->getScalarSizeInBits(), 1);1183if (match(Div, m_IRem(m_Value(), m_Value()))) {1184std::swap(Div, Rem);1185std::swap(C1, C2);1186}1187Value *DivOpV;1188APInt DivOpC;1189if (MatchRem(Rem, X, C0, IsSigned) &&1190MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC) {1191APInt NewC = C1 - C2 * C0;1192if (!NewC.isZero() && !Rem->hasOneUse())1193return nullptr;1194if (!isGuaranteedNotToBeUndef(X, &AC, &I, &DT))1195return nullptr;1196Value *MulXC2 = Builder.CreateMul(X, ConstantInt::get(X->getType(), C2));1197if (NewC.isZero())1198return MulXC2;1199return Builder.CreateAdd(1200Builder.CreateMul(Div, ConstantInt::get(X->getType(), NewC)), MulXC2);1201}12021203return nullptr;1204}12051206/// Fold1207/// (1 << NBits) - 11208/// Into:1209/// ~(-(1 << NBits))1210/// Because a 'not' is better for bit-tracking analysis and other transforms1211/// than an 'add'. The new shl is always nsw, and is nuw if old `and` was.1212static Instruction *canonicalizeLowbitMask(BinaryOperator &I,1213InstCombiner::BuilderTy &Builder) {1214Value *NBits;1215if (!match(&I, m_Add(m_OneUse(m_Shl(m_One(), m_Value(NBits))), m_AllOnes())))1216return nullptr;12171218Constant *MinusOne = Constant::getAllOnesValue(NBits->getType());1219Value *NotMask = Builder.CreateShl(MinusOne, NBits, "notmask");1220// Be wary of constant folding.1221if (auto *BOp = dyn_cast<BinaryOperator>(NotMask)) {1222// Always NSW. But NUW propagates from `add`.1223BOp->setHasNoSignedWrap();1224BOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());1225}12261227return BinaryOperator::CreateNot(NotMask, I.getName());1228}12291230static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {1231assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");1232Type *Ty = I.getType();1233auto getUAddSat = [&]() {1234return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);1235};12361237// add (umin X, ~Y), Y --> uaddsat X, Y1238Value *X, *Y;1239if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),1240m_Deferred(Y))))1241return CallInst::Create(getUAddSat(), { X, Y });12421243// add (umin X, ~C), C --> uaddsat X, C1244const APInt *C, *NotC;1245if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&1246*C == ~*NotC)1247return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });12481249return nullptr;1250}12511252// Transform:1253// (add A, (shl (neg B), Y))1254// -> (sub A, (shl B, Y))1255static Instruction *combineAddSubWithShlAddSub(InstCombiner::BuilderTy &Builder,1256const BinaryOperator &I) {1257Value *A, *B, *Cnt;1258if (match(&I,1259m_c_Add(m_OneUse(m_Shl(m_OneUse(m_Neg(m_Value(B))), m_Value(Cnt))),1260m_Value(A)))) {1261Value *NewShl = Builder.CreateShl(B, Cnt);1262return BinaryOperator::CreateSub(A, NewShl);1263}1264return nullptr;1265}12661267/// Try to reduce signed division by power-of-2 to an arithmetic shift right.1268static Instruction *foldAddToAshr(BinaryOperator &Add) {1269// Division must be by power-of-2, but not the minimum signed value.1270Value *X;1271const APInt *DivC;1272if (!match(Add.getOperand(0), m_SDiv(m_Value(X), m_Power2(DivC))) ||1273DivC->isNegative())1274return nullptr;12751276// Rounding is done by adding -1 if the dividend (X) is negative and has any1277// low bits set. It recognizes two canonical patterns:1278// 1. For an 'ugt' cmp with the signed minimum value (SMIN), the1279// pattern is: sext (icmp ugt (X & (DivC - 1)), SMIN).1280// 2. For an 'eq' cmp, the pattern's: sext (icmp eq X & (SMIN + 1), SMIN + 1).1281// Note that, by the time we end up here, if possible, ugt has been1282// canonicalized into eq.1283const APInt *MaskC, *MaskCCmp;1284ICmpInst::Predicate Pred;1285if (!match(Add.getOperand(1),1286m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),1287m_APInt(MaskCCmp)))))1288return nullptr;12891290if ((Pred != ICmpInst::ICMP_UGT || !MaskCCmp->isSignMask()) &&1291(Pred != ICmpInst::ICMP_EQ || *MaskCCmp != *MaskC))1292return nullptr;12931294APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());1295bool IsMaskValid = Pred == ICmpInst::ICMP_UGT1296? (*MaskC == (SMin | (*DivC - 1)))1297: (*DivC == 2 && *MaskC == SMin + 1);1298if (!IsMaskValid)1299return nullptr;13001301// (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)1302return BinaryOperator::CreateAShr(1303X, ConstantInt::get(Add.getType(), DivC->exactLogBase2()));1304}13051306Instruction *InstCombinerImpl::1307canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(1308BinaryOperator &I) {1309assert((I.getOpcode() == Instruction::Add ||1310I.getOpcode() == Instruction::Or ||1311I.getOpcode() == Instruction::Sub) &&1312"Expecting add/or/sub instruction");13131314// We have a subtraction/addition between a (potentially truncated) *logical*1315// right-shift of X and a "select".1316Value *X, *Select;1317Instruction *LowBitsToSkip, *Extract;1318if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd(1319m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)),1320m_Instruction(Extract))),1321m_Value(Select))))1322return nullptr;13231324// `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS.1325if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select)1326return nullptr;13271328Type *XTy = X->getType();1329bool HadTrunc = I.getType() != XTy;13301331// If there was a truncation of extracted value, then we'll need to produce1332// one extra instruction, so we need to ensure one instruction will go away.1333if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())))1334return nullptr;13351336// Extraction should extract high NBits bits, with shift amount calculated as:1337// low bits to skip = shift bitwidth - high bits to extract1338// The shift amount itself may be extended, and we need to look past zero-ext1339// when matching NBits, that will matter for matching later.1340Constant *C;1341Value *NBits;1342if (!match(1343LowBitsToSkip,1344m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) ||1345!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,1346APInt(C->getType()->getScalarSizeInBits(),1347X->getType()->getScalarSizeInBits()))))1348return nullptr;13491350// Sign-extending value can be zero-extended if we `sub`tract it,1351// or sign-extended otherwise.1352auto SkipExtInMagic = [&I](Value *&V) {1353if (I.getOpcode() == Instruction::Sub)1354match(V, m_ZExtOrSelf(m_Value(V)));1355else1356match(V, m_SExtOrSelf(m_Value(V)));1357};13581359// Now, finally validate the sign-extending magic.1360// `select` itself may be appropriately extended, look past that.1361SkipExtInMagic(Select);13621363ICmpInst::Predicate Pred;1364const APInt *Thr;1365Value *SignExtendingValue, *Zero;1366bool ShouldSignext;1367// It must be a select between two values we will later establish to be a1368// sign-extending value and a zero constant. The condition guarding the1369// sign-extension must be based on a sign bit of the same X we had in `lshr`.1370if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)),1371m_Value(SignExtendingValue), m_Value(Zero))) ||1372!isSignBitCheck(Pred, *Thr, ShouldSignext))1373return nullptr;13741375// icmp-select pair is commutative.1376if (!ShouldSignext)1377std::swap(SignExtendingValue, Zero);13781379// If we should not perform sign-extension then we must add/or/subtract zero.1380if (!match(Zero, m_Zero()))1381return nullptr;1382// Otherwise, it should be some constant, left-shifted by the same NBits we1383// had in `lshr`. Said left-shift can also be appropriately extended.1384// Again, we must look past zero-ext when looking for NBits.1385SkipExtInMagic(SignExtendingValue);1386Constant *SignExtendingValueBaseConstant;1387if (!match(SignExtendingValue,1388m_Shl(m_Constant(SignExtendingValueBaseConstant),1389m_ZExtOrSelf(m_Specific(NBits)))))1390return nullptr;1391// If we `sub`, then the constant should be one, else it should be all-ones.1392if (I.getOpcode() == Instruction::Sub1393? !match(SignExtendingValueBaseConstant, m_One())1394: !match(SignExtendingValueBaseConstant, m_AllOnes()))1395return nullptr;13961397auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip,1398Extract->getName() + ".sext");1399NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness.1400if (!HadTrunc)1401return NewAShr;14021403Builder.Insert(NewAShr);1404return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType());1405}14061407/// This is a specialization of a more general transform from1408/// foldUsingDistributiveLaws. If that code can be made to work optimally1409/// for multi-use cases or propagating nsw/nuw, then we would not need this.1410static Instruction *factorizeMathWithShlOps(BinaryOperator &I,1411InstCombiner::BuilderTy &Builder) {1412// TODO: Also handle mul by doubling the shift amount?1413assert((I.getOpcode() == Instruction::Add ||1414I.getOpcode() == Instruction::Sub) &&1415"Expected add/sub");1416auto *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));1417auto *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));1418if (!Op0 || !Op1 || !(Op0->hasOneUse() || Op1->hasOneUse()))1419return nullptr;14201421Value *X, *Y, *ShAmt;1422if (!match(Op0, m_Shl(m_Value(X), m_Value(ShAmt))) ||1423!match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt))))1424return nullptr;14251426// No-wrap propagates only when all ops have no-wrap.1427bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() &&1428Op1->hasNoSignedWrap();1429bool HasNUW = I.hasNoUnsignedWrap() && Op0->hasNoUnsignedWrap() &&1430Op1->hasNoUnsignedWrap();14311432// add/sub (X << ShAmt), (Y << ShAmt) --> (add/sub X, Y) << ShAmt1433Value *NewMath = Builder.CreateBinOp(I.getOpcode(), X, Y);1434if (auto *NewI = dyn_cast<BinaryOperator>(NewMath)) {1435NewI->setHasNoSignedWrap(HasNSW);1436NewI->setHasNoUnsignedWrap(HasNUW);1437}1438auto *NewShl = BinaryOperator::CreateShl(NewMath, ShAmt);1439NewShl->setHasNoSignedWrap(HasNSW);1440NewShl->setHasNoUnsignedWrap(HasNUW);1441return NewShl;1442}14431444/// Reduce a sequence of masked half-width multiplies to a single multiply.1445/// ((XLow * YHigh) + (YLow * XHigh)) << HalfBits) + (XLow * YLow) --> X * Y1446static Instruction *foldBoxMultiply(BinaryOperator &I) {1447unsigned BitWidth = I.getType()->getScalarSizeInBits();1448// Skip the odd bitwidth types.1449if ((BitWidth & 0x1))1450return nullptr;14511452unsigned HalfBits = BitWidth >> 1;1453APInt HalfMask = APInt::getMaxValue(HalfBits);14541455// ResLo = (CrossSum << HalfBits) + (YLo * XLo)1456Value *XLo, *YLo;1457Value *CrossSum;1458// Require one-use on the multiply to avoid increasing the number of1459// multiplications.1460if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),1461m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))1462return nullptr;14631464// XLo = X & HalfMask1465// YLo = Y & HalfMask1466// TODO: Refactor with SimplifyDemandedBits or KnownBits known leading zeros1467// to enhance robustness1468Value *X, *Y;1469if (!match(XLo, m_And(m_Value(X), m_SpecificInt(HalfMask))) ||1470!match(YLo, m_And(m_Value(Y), m_SpecificInt(HalfMask))))1471return nullptr;14721473// CrossSum = (X' * (Y >> Halfbits)) + (Y' * (X >> HalfBits))1474// X' can be either X or XLo in the pattern (and the same for Y')1475if (match(CrossSum,1476m_c_Add(m_c_Mul(m_LShr(m_Specific(Y), m_SpecificInt(HalfBits)),1477m_CombineOr(m_Specific(X), m_Specific(XLo))),1478m_c_Mul(m_LShr(m_Specific(X), m_SpecificInt(HalfBits)),1479m_CombineOr(m_Specific(Y), m_Specific(YLo))))))1480return BinaryOperator::CreateMul(X, Y);14811482return nullptr;1483}14841485Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {1486if (Value *V = simplifyAddInst(I.getOperand(0), I.getOperand(1),1487I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),1488SQ.getWithInstruction(&I)))1489return replaceInstUsesWith(I, V);14901491if (SimplifyAssociativeOrCommutative(I))1492return &I;14931494if (Instruction *X = foldVectorBinop(I))1495return X;14961497if (Instruction *Phi = foldBinopWithPhiOperands(I))1498return Phi;14991500// (A*B)+(A*C) -> A*(B+C) etc1501if (Value *V = foldUsingDistributiveLaws(I))1502return replaceInstUsesWith(I, V);15031504if (Instruction *R = foldBoxMultiply(I))1505return R;15061507if (Instruction *R = factorizeMathWithShlOps(I, Builder))1508return R;15091510if (Instruction *X = foldAddWithConstant(I))1511return X;15121513if (Instruction *X = foldNoWrapAdd(I, Builder))1514return X;15151516if (Instruction *R = foldBinOpShiftWithShift(I))1517return R;15181519if (Instruction *R = combineAddSubWithShlAddSub(Builder, I))1520return R;15211522Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);1523Type *Ty = I.getType();1524if (Ty->isIntOrIntVectorTy(1))1525return BinaryOperator::CreateXor(LHS, RHS);15261527// X + X --> X << 11528if (LHS == RHS) {1529auto *Shl = BinaryOperator::CreateShl(LHS, ConstantInt::get(Ty, 1));1530Shl->setHasNoSignedWrap(I.hasNoSignedWrap());1531Shl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());1532return Shl;1533}15341535Value *A, *B;1536if (match(LHS, m_Neg(m_Value(A)))) {1537// -A + -B --> -(A + B)1538if (match(RHS, m_Neg(m_Value(B))))1539return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B));15401541// -A + B --> B - A1542auto *Sub = BinaryOperator::CreateSub(RHS, A);1543auto *OB0 = cast<OverflowingBinaryOperator>(LHS);1544Sub->setHasNoSignedWrap(I.hasNoSignedWrap() && OB0->hasNoSignedWrap());15451546return Sub;1547}15481549// A + -B --> A - B1550if (match(RHS, m_Neg(m_Value(B))))1551return BinaryOperator::CreateSub(LHS, B);15521553if (Value *V = checkForNegativeOperand(I, Builder))1554return replaceInstUsesWith(I, V);15551556// (A + 1) + ~B --> A - B1557// ~B + (A + 1) --> A - B1558// (~B + A) + 1 --> A - B1559// (A + ~B) + 1 --> A - B1560if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))) ||1561match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))1562return BinaryOperator::CreateSub(A, B);15631564// (A + RHS) + RHS --> A + (RHS << 1)1565if (match(LHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(RHS)))))1566return BinaryOperator::CreateAdd(A, Builder.CreateShl(RHS, 1, "reass.add"));15671568// LHS + (A + LHS) --> A + (LHS << 1)1569if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS)))))1570return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add"));15711572{1573// (A + C1) + (C2 - B) --> (A - B) + (C1 + C2)1574Constant *C1, *C2;1575if (match(&I, m_c_Add(m_Add(m_Value(A), m_ImmConstant(C1)),1576m_Sub(m_ImmConstant(C2), m_Value(B)))) &&1577(LHS->hasOneUse() || RHS->hasOneUse())) {1578Value *Sub = Builder.CreateSub(A, B);1579return BinaryOperator::CreateAdd(Sub, ConstantExpr::getAdd(C1, C2));1580}15811582// Canonicalize a constant sub operand as an add operand for better folding:1583// (C1 - A) + B --> (B - A) + C11584if (match(&I, m_c_Add(m_OneUse(m_Sub(m_ImmConstant(C1), m_Value(A))),1585m_Value(B)))) {1586Value *Sub = Builder.CreateSub(B, A, "reass.sub");1587return BinaryOperator::CreateAdd(Sub, C1);1588}1589}15901591// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)1592if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);15931594// ((X s/ C1) << C2) + X => X s% -C1 where -C1 is 1 << C21595const APInt *C1, *C2;1596if (match(LHS, m_Shl(m_SDiv(m_Specific(RHS), m_APInt(C1)), m_APInt(C2)))) {1597APInt one(C2->getBitWidth(), 1);1598APInt minusC1 = -(*C1);1599if (minusC1 == (one << *C2)) {1600Constant *NewRHS = ConstantInt::get(RHS->getType(), minusC1);1601return BinaryOperator::CreateSRem(RHS, NewRHS);1602}1603}16041605// (A & 2^C1) + A => A & (2^C1 - 1) iff bit C1 in A is a sign bit1606if (match(&I, m_c_Add(m_And(m_Value(A), m_APInt(C1)), m_Deferred(A))) &&1607C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countl_zero())) {1608Constant *NewMask = ConstantInt::get(RHS->getType(), *C1 - 1);1609return BinaryOperator::CreateAnd(A, NewMask);1610}16111612// ZExt (B - A) + ZExt(A) --> ZExt(B)1613if ((match(RHS, m_ZExt(m_Value(A))) &&1614match(LHS, m_ZExt(m_NUWSub(m_Value(B), m_Specific(A))))) ||1615(match(LHS, m_ZExt(m_Value(A))) &&1616match(RHS, m_ZExt(m_NUWSub(m_Value(B), m_Specific(A))))))1617return new ZExtInst(B, LHS->getType());16181619// zext(A) + sext(A) --> 0 if A is i11620if (match(&I, m_c_BinOp(m_ZExt(m_Value(A)), m_SExt(m_Deferred(A)))) &&1621A->getType()->isIntOrIntVectorTy(1))1622return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));16231624// A+B --> A|B iff A and B have no bits set in common.1625WithCache<const Value *> LHSCache(LHS), RHSCache(RHS);1626if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))1627return BinaryOperator::CreateDisjointOr(LHS, RHS);16281629if (Instruction *Ext = narrowMathIfNoOverflow(I))1630return Ext;16311632// (add (xor A, B) (and A, B)) --> (or A, B)1633// (add (and A, B) (xor A, B)) --> (or A, B)1634if (match(&I, m_c_BinOp(m_Xor(m_Value(A), m_Value(B)),1635m_c_And(m_Deferred(A), m_Deferred(B)))))1636return BinaryOperator::CreateOr(A, B);16371638// (add (or A, B) (and A, B)) --> (add A, B)1639// (add (and A, B) (or A, B)) --> (add A, B)1640if (match(&I, m_c_BinOp(m_Or(m_Value(A), m_Value(B)),1641m_c_And(m_Deferred(A), m_Deferred(B))))) {1642// Replacing operands in-place to preserve nuw/nsw flags.1643replaceOperand(I, 0, A);1644replaceOperand(I, 1, B);1645return &I;1646}16471648// (add A (or A, -A)) --> (and (add A, -1) A)1649// (add A (or -A, A)) --> (and (add A, -1) A)1650// (add (or A, -A) A) --> (and (add A, -1) A)1651// (add (or -A, A) A) --> (and (add A, -1) A)1652if (match(&I, m_c_BinOp(m_Value(A), m_OneUse(m_c_Or(m_Neg(m_Deferred(A)),1653m_Deferred(A)))))) {1654Value *Add =1655Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()), "",1656I.hasNoUnsignedWrap(), I.hasNoSignedWrap());1657return BinaryOperator::CreateAnd(Add, A);1658}16591660// Canonicalize ((A & -A) - 1) --> ((A - 1) & ~A)1661// Forms all commutable operations, and simplifies ctpop -> cttz folds.1662if (match(&I,1663m_Add(m_OneUse(m_c_And(m_Value(A), m_OneUse(m_Neg(m_Deferred(A))))),1664m_AllOnes()))) {1665Constant *AllOnes = ConstantInt::getAllOnesValue(RHS->getType());1666Value *Dec = Builder.CreateAdd(A, AllOnes);1667Value *Not = Builder.CreateXor(A, AllOnes);1668return BinaryOperator::CreateAnd(Dec, Not);1669}16701671// Disguised reassociation/factorization:1672// ~(A * C1) + A1673// ((A * -C1) - 1) + A1674// ((A * -C1) + A) - 11675// (A * (1 - C1)) - 11676if (match(&I,1677m_c_Add(m_OneUse(m_Not(m_OneUse(m_Mul(m_Value(A), m_APInt(C1))))),1678m_Deferred(A)))) {1679Type *Ty = I.getType();1680Constant *NewMulC = ConstantInt::get(Ty, 1 - *C1);1681Value *NewMul = Builder.CreateMul(A, NewMulC);1682return BinaryOperator::CreateAdd(NewMul, ConstantInt::getAllOnesValue(Ty));1683}16841685// (A * -2**C) + B --> B - (A << C)1686const APInt *NegPow2C;1687if (match(&I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_NegatedPower2(NegPow2C))),1688m_Value(B)))) {1689Constant *ShiftAmtC = ConstantInt::get(Ty, NegPow2C->countr_zero());1690Value *Shl = Builder.CreateShl(A, ShiftAmtC);1691return BinaryOperator::CreateSub(B, Shl);1692}16931694// Canonicalize signum variant that ends in add:1695// (A s>> (BW - 1)) + (zext (A s> 0)) --> (A s>> (BW - 1)) | (zext (A != 0))1696ICmpInst::Predicate Pred;1697uint64_t BitWidth = Ty->getScalarSizeInBits();1698if (match(LHS, m_AShr(m_Value(A), m_SpecificIntAllowPoison(BitWidth - 1))) &&1699match(RHS, m_OneUse(m_ZExt(1700m_OneUse(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))))) &&1701Pred == CmpInst::ICMP_SGT) {1702Value *NotZero = Builder.CreateIsNotNull(A, "isnotnull");1703Value *Zext = Builder.CreateZExt(NotZero, Ty, "isnotnull.zext");1704return BinaryOperator::CreateOr(LHS, Zext);1705}17061707{1708Value *Cond, *Ext;1709Constant *C;1710// (add X, (sext/zext (icmp eq X, C)))1711// -> (select (icmp eq X, C), (add C, (sext/zext 1)), X)1712auto CondMatcher = m_CombineAnd(1713m_Value(Cond), m_ICmp(Pred, m_Deferred(A), m_ImmConstant(C)));17141715if (match(&I,1716m_c_Add(m_Value(A),1717m_CombineAnd(m_Value(Ext), m_ZExtOrSExt(CondMatcher)))) &&1718Pred == ICmpInst::ICMP_EQ && Ext->hasOneUse()) {1719Value *Add = isa<ZExtInst>(Ext) ? InstCombiner::AddOne(C)1720: InstCombiner::SubOne(C);1721return replaceInstUsesWith(I, Builder.CreateSelect(Cond, Add, A));1722}1723}17241725if (Instruction *Ashr = foldAddToAshr(I))1726return Ashr;17271728// (~X) + (~Y) --> -2 - (X + Y)1729{1730// To ensure we can save instructions we need to ensure that we consume both1731// LHS/RHS (i.e they have a `not`).1732bool ConsumesLHS, ConsumesRHS;1733if (isFreeToInvert(LHS, LHS->hasOneUse(), ConsumesLHS) && ConsumesLHS &&1734isFreeToInvert(RHS, RHS->hasOneUse(), ConsumesRHS) && ConsumesRHS) {1735Value *NotLHS = getFreelyInverted(LHS, LHS->hasOneUse(), &Builder);1736Value *NotRHS = getFreelyInverted(RHS, RHS->hasOneUse(), &Builder);1737assert(NotLHS != nullptr && NotRHS != nullptr &&1738"isFreeToInvert desynced with getFreelyInverted");1739Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);1740return BinaryOperator::CreateSub(1741ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);1742}1743}17441745if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))1746return R;17471748// TODO(jingyue): Consider willNotOverflowSignedAdd and1749// willNotOverflowUnsignedAdd to reduce the number of invocations of1750// computeKnownBits.1751bool Changed = false;1752if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHSCache, RHSCache, I)) {1753Changed = true;1754I.setHasNoSignedWrap(true);1755}1756if (!I.hasNoUnsignedWrap() &&1757willNotOverflowUnsignedAdd(LHSCache, RHSCache, I)) {1758Changed = true;1759I.setHasNoUnsignedWrap(true);1760}17611762if (Instruction *V = canonicalizeLowbitMask(I, Builder))1763return V;17641765if (Instruction *V =1766canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))1767return V;17681769if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))1770return SatAdd;17711772// usub.sat(A, B) + B => umax(A, B)1773if (match(&I, m_c_BinOp(1774m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Value(A), m_Value(B))),1775m_Deferred(B)))) {1776return replaceInstUsesWith(I,1777Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));1778}17791780// ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.1781if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&1782match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&1783haveNoCommonBitsSet(A, B, SQ.getWithInstruction(&I)))1784return replaceInstUsesWith(1785I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},1786{Builder.CreateOr(A, B)}));17871788// Fold the log2_ceil idiom:1789// zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))1790// -->1791// BW - ctlz(A - 1, false)1792const APInt *XorC;1793if (match(&I,1794m_c_Add(1795m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),1796m_One())),1797m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(1798m_OneUse(m_TruncOrSelf(m_OneUse(1799m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),1800m_APInt(XorC))))))) &&1801(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&1802*XorC == A->getType()->getScalarSizeInBits() - 1) {1803Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));1804Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},1805{Sub, Builder.getFalse()});1806Value *Ret = Builder.CreateSub(1807ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),1808Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);1809return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));1810}18111812if (Instruction *Res = foldSquareSumInt(I))1813return Res;18141815if (Instruction *Res = foldBinOpOfDisplacedShifts(I))1816return Res;18171818if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))1819return Res;18201821return Changed ? &I : nullptr;1822}18231824/// Eliminate an op from a linear interpolation (lerp) pattern.1825static Instruction *factorizeLerp(BinaryOperator &I,1826InstCombiner::BuilderTy &Builder) {1827Value *X, *Y, *Z;1828if (!match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y),1829m_OneUse(m_FSub(m_FPOne(),1830m_Value(Z))))),1831m_OneUse(m_c_FMul(m_Value(X), m_Deferred(Z))))))1832return nullptr;18331834// (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants]1835Value *XY = Builder.CreateFSubFMF(X, Y, &I);1836Value *MulZ = Builder.CreateFMulFMF(Z, XY, &I);1837return BinaryOperator::CreateFAddFMF(Y, MulZ, &I);1838}18391840/// Factor a common operand out of fadd/fsub of fmul/fdiv.1841static Instruction *factorizeFAddFSub(BinaryOperator &I,1842InstCombiner::BuilderTy &Builder) {1843assert((I.getOpcode() == Instruction::FAdd ||1844I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub");1845assert(I.hasAllowReassoc() && I.hasNoSignedZeros() &&1846"FP factorization requires FMF");18471848if (Instruction *Lerp = factorizeLerp(I, Builder))1849return Lerp;18501851Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);1852if (!Op0->hasOneUse() || !Op1->hasOneUse())1853return nullptr;18541855Value *X, *Y, *Z;1856bool IsFMul;1857if ((match(Op0, m_FMul(m_Value(X), m_Value(Z))) &&1858match(Op1, m_c_FMul(m_Value(Y), m_Specific(Z)))) ||1859(match(Op0, m_FMul(m_Value(Z), m_Value(X))) &&1860match(Op1, m_c_FMul(m_Value(Y), m_Specific(Z)))))1861IsFMul = true;1862else if (match(Op0, m_FDiv(m_Value(X), m_Value(Z))) &&1863match(Op1, m_FDiv(m_Value(Y), m_Specific(Z))))1864IsFMul = false;1865else1866return nullptr;18671868// (X * Z) + (Y * Z) --> (X + Y) * Z1869// (X * Z) - (Y * Z) --> (X - Y) * Z1870// (X / Z) + (Y / Z) --> (X + Y) / Z1871// (X / Z) - (Y / Z) --> (X - Y) / Z1872bool IsFAdd = I.getOpcode() == Instruction::FAdd;1873Value *XY = IsFAdd ? Builder.CreateFAddFMF(X, Y, &I)1874: Builder.CreateFSubFMF(X, Y, &I);18751876// Bail out if we just created a denormal constant.1877// TODO: This is copied from a previous implementation. Is it necessary?1878const APFloat *C;1879if (match(XY, m_APFloat(C)) && !C->isNormal())1880return nullptr;18811882return IsFMul ? BinaryOperator::CreateFMulFMF(XY, Z, &I)1883: BinaryOperator::CreateFDivFMF(XY, Z, &I);1884}18851886Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {1887if (Value *V = simplifyFAddInst(I.getOperand(0), I.getOperand(1),1888I.getFastMathFlags(),1889SQ.getWithInstruction(&I)))1890return replaceInstUsesWith(I, V);18911892if (SimplifyAssociativeOrCommutative(I))1893return &I;18941895if (Instruction *X = foldVectorBinop(I))1896return X;18971898if (Instruction *Phi = foldBinopWithPhiOperands(I))1899return Phi;19001901if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))1902return FoldedFAdd;19031904// (-X) + Y --> Y - X1905Value *X, *Y;1906if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y))))1907return BinaryOperator::CreateFSubFMF(Y, X, &I);19081909// Similar to above, but look through fmul/fdiv for the negated term.1910// (-X * Y) + Z --> Z - (X * Y) [4 commuted variants]1911Value *Z;1912if (match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))),1913m_Value(Z)))) {1914Value *XY = Builder.CreateFMulFMF(X, Y, &I);1915return BinaryOperator::CreateFSubFMF(Z, XY, &I);1916}1917// (-X / Y) + Z --> Z - (X / Y) [2 commuted variants]1918// (X / -Y) + Z --> Z - (X / Y) [2 commuted variants]1919if (match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y))),1920m_Value(Z))) ||1921match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))),1922m_Value(Z)))) {1923Value *XY = Builder.CreateFDivFMF(X, Y, &I);1924return BinaryOperator::CreateFSubFMF(Z, XY, &I);1925}19261927// Check for (fadd double (sitofp x), y), see if we can merge this into an1928// integer add followed by a promotion.1929if (Instruction *R = foldFBinOpOfIntCasts(I))1930return R;19311932Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);1933// Handle specials cases for FAdd with selects feeding the operation1934if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS))1935return replaceInstUsesWith(I, V);19361937if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {1938if (Instruction *F = factorizeFAddFSub(I, Builder))1939return F;19401941if (Instruction *F = foldSquareSumFP(I))1942return F;19431944// Try to fold fadd into start value of reduction intrinsic.1945if (match(&I, m_c_FAdd(m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(1946m_AnyZeroFP(), m_Value(X))),1947m_Value(Y)))) {1948// fadd (rdx 0.0, X), Y --> rdx Y, X1949return replaceInstUsesWith(1950I, Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,1951{X->getType()}, {Y, X}, &I));1952}1953const APFloat *StartC, *C;1954if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(1955m_APFloat(StartC), m_Value(X)))) &&1956match(RHS, m_APFloat(C))) {1957// fadd (rdx StartC, X), C --> rdx (C + StartC), X1958Constant *NewStartC = ConstantFP::get(I.getType(), *C + *StartC);1959return replaceInstUsesWith(1960I, Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,1961{X->getType()}, {NewStartC, X}, &I));1962}19631964// (X * MulC) + X --> X * (MulC + 1.0)1965Constant *MulC;1966if (match(&I, m_c_FAdd(m_FMul(m_Value(X), m_ImmConstant(MulC)),1967m_Deferred(X)))) {1968if (Constant *NewMulC = ConstantFoldBinaryOpOperands(1969Instruction::FAdd, MulC, ConstantFP::get(I.getType(), 1.0), DL))1970return BinaryOperator::CreateFMulFMF(X, NewMulC, &I);1971}19721973// (-X - Y) + (X + Z) --> Z - Y1974if (match(&I, m_c_FAdd(m_FSub(m_FNeg(m_Value(X)), m_Value(Y)),1975m_c_FAdd(m_Deferred(X), m_Value(Z)))))1976return BinaryOperator::CreateFSubFMF(Z, Y, &I);19771978if (Value *V = FAddCombine(Builder).simplify(&I))1979return replaceInstUsesWith(I, V);1980}19811982// minumum(X, Y) + maximum(X, Y) => X + Y.1983if (match(&I,1984m_c_FAdd(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),1985m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),1986m_Deferred(Y))))) {1987BinaryOperator *Result = BinaryOperator::CreateFAddFMF(X, Y, &I);1988// We cannot preserve ninf if nnan flag is not set.1989// If X is NaN and Y is Inf then in original program we had NaN + NaN,1990// while in optimized version NaN + Inf and this is a poison with ninf flag.1991if (!Result->hasNoNaNs())1992Result->setHasNoInfs(false);1993return Result;1994}19951996return nullptr;1997}19981999/// Optimize pointer differences into the same array into a size. Consider:2000/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer2001/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.2002Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,2003Type *Ty, bool IsNUW) {2004// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize2005// this.2006bool Swapped = false;2007GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;2008if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) {2009std::swap(LHS, RHS);2010Swapped = true;2011}20122013// Require at least one GEP with a common base pointer on both sides.2014if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) {2015// (gep X, ...) - X2016if (LHSGEP->getOperand(0)->stripPointerCasts() ==2017RHS->stripPointerCasts()) {2018GEP1 = LHSGEP;2019} else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) {2020// (gep X, ...) - (gep X, ...)2021if (LHSGEP->getOperand(0)->stripPointerCasts() ==2022RHSGEP->getOperand(0)->stripPointerCasts()) {2023GEP1 = LHSGEP;2024GEP2 = RHSGEP;2025}2026}2027}20282029if (!GEP1)2030return nullptr;20312032// To avoid duplicating the offset arithmetic, rewrite the GEP to use the2033// computed offset. This may erase the original GEP, so be sure to cache the2034// inbounds flag before emitting the offset.2035// TODO: We should probably do this even if there is only one GEP.2036bool RewriteGEPs = GEP2 != nullptr;20372038// Emit the offset of the GEP and an intptr_t.2039bool GEP1IsInBounds = GEP1->isInBounds();2040Value *Result = EmitGEPOffset(GEP1, RewriteGEPs);20412042// If this is a single inbounds GEP and the original sub was nuw,2043// then the final multiplication is also nuw.2044if (auto *I = dyn_cast<Instruction>(Result))2045if (IsNUW && !GEP2 && !Swapped && GEP1IsInBounds &&2046I->getOpcode() == Instruction::Mul)2047I->setHasNoUnsignedWrap();20482049// If we have a 2nd GEP of the same base pointer, subtract the offsets.2050// If both GEPs are inbounds, then the subtract does not have signed overflow.2051if (GEP2) {2052bool GEP2IsInBounds = GEP2->isInBounds();2053Value *Offset = EmitGEPOffset(GEP2, RewriteGEPs);2054Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false,2055GEP1IsInBounds && GEP2IsInBounds);2056}20572058// If we have p - gep(p, ...) then we have to negate the result.2059if (Swapped)2060Result = Builder.CreateNeg(Result, "diff.neg");20612062return Builder.CreateIntCast(Result, Ty, true);2063}20642065static Instruction *foldSubOfMinMax(BinaryOperator &I,2066InstCombiner::BuilderTy &Builder) {2067Value *Op0 = I.getOperand(0);2068Value *Op1 = I.getOperand(1);2069Type *Ty = I.getType();2070auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op1);2071if (!MinMax)2072return nullptr;20732074// sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)2075// sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)2076Value *X = MinMax->getLHS();2077Value *Y = MinMax->getRHS();2078if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&2079(Op0->hasOneUse() || Op1->hasOneUse())) {2080Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());2081Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);2082return CallInst::Create(F, {X, Y});2083}20842085// sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))2086// sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))2087Value *Z;2088if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {2089if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) {2090Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Y, Z});2091return BinaryOperator::CreateAdd(X, USub);2092}2093if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) {2094Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Z, Y});2095return BinaryOperator::CreateAdd(X, USub);2096}2097}20982099// sub Op0, smin((sub nsw Op0, Z), 0) --> smax Op0, Z2100// sub Op0, smax((sub nsw Op0, Z), 0) --> smin Op0, Z2101if (MinMax->isSigned() && match(Y, m_ZeroInt()) &&2102match(X, m_NSWSub(m_Specific(Op0), m_Value(Z)))) {2103Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());2104Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);2105return CallInst::Create(F, {Op0, Z});2106}21072108return nullptr;2109}21102111Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {2112if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1),2113I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),2114SQ.getWithInstruction(&I)))2115return replaceInstUsesWith(I, V);21162117if (Instruction *X = foldVectorBinop(I))2118return X;21192120if (Instruction *Phi = foldBinopWithPhiOperands(I))2121return Phi;21222123Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);21242125// If this is a 'B = x-(-A)', change to B = x+A.2126// We deal with this without involving Negator to preserve NSW flag.2127if (Value *V = dyn_castNegVal(Op1)) {2128BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);21292130if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {2131assert(BO->getOpcode() == Instruction::Sub &&2132"Expected a subtraction operator!");2133if (BO->hasNoSignedWrap() && I.hasNoSignedWrap())2134Res->setHasNoSignedWrap(true);2135} else {2136if (cast<Constant>(Op1)->isNotMinSignedValue() && I.hasNoSignedWrap())2137Res->setHasNoSignedWrap(true);2138}21392140return Res;2141}21422143// Try this before Negator to preserve NSW flag.2144if (Instruction *R = factorizeMathWithShlOps(I, Builder))2145return R;21462147Constant *C;2148if (match(Op0, m_ImmConstant(C))) {2149Value *X;2150Constant *C2;21512152// C-(X+C2) --> (C-C2)-X2153if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2)))) {2154// C-C2 never overflow, and C-(X+C2), (X+C2) has NSW/NUW2155// => (C-C2)-X can have NSW/NUW2156bool WillNotSOV = willNotOverflowSignedSub(C, C2, I);2157BinaryOperator *Res =2158BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);2159auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);2160Res->setHasNoSignedWrap(I.hasNoSignedWrap() && OBO1->hasNoSignedWrap() &&2161WillNotSOV);2162Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap() &&2163OBO1->hasNoUnsignedWrap());2164return Res;2165}2166}21672168auto TryToNarrowDeduceFlags = [this, &I, &Op0, &Op1]() -> Instruction * {2169if (Instruction *Ext = narrowMathIfNoOverflow(I))2170return Ext;21712172bool Changed = false;2173if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {2174Changed = true;2175I.setHasNoSignedWrap(true);2176}2177if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) {2178Changed = true;2179I.setHasNoUnsignedWrap(true);2180}21812182return Changed ? &I : nullptr;2183};21842185// First, let's try to interpret `sub a, b` as `add a, (sub 0, b)`,2186// and let's try to sink `(sub 0, b)` into `b` itself. But only if this isn't2187// a pure negation used by a select that looks like abs/nabs.2188bool IsNegation = match(Op0, m_ZeroInt());2189if (!IsNegation || none_of(I.users(), [&I, Op1](const User *U) {2190const Instruction *UI = dyn_cast<Instruction>(U);2191if (!UI)2192return false;2193return match(UI,2194m_Select(m_Value(), m_Specific(Op1), m_Specific(&I))) ||2195match(UI, m_Select(m_Value(), m_Specific(&I), m_Specific(Op1)));2196})) {2197if (Value *NegOp1 = Negator::Negate(IsNegation, /* IsNSW */ IsNegation &&2198I.hasNoSignedWrap(),2199Op1, *this))2200return BinaryOperator::CreateAdd(NegOp1, Op0);2201}2202if (IsNegation)2203return TryToNarrowDeduceFlags(); // Should have been handled in Negator!22042205// (A*B)-(A*C) -> A*(B-C) etc2206if (Value *V = foldUsingDistributiveLaws(I))2207return replaceInstUsesWith(I, V);22082209if (I.getType()->isIntOrIntVectorTy(1))2210return BinaryOperator::CreateXor(Op0, Op1);22112212// Replace (-1 - A) with (~A).2213if (match(Op0, m_AllOnes()))2214return BinaryOperator::CreateNot(Op1);22152216// (X + -1) - Y --> ~Y + X2217Value *X, *Y;2218if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))2219return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);22202221// Reassociate sub/add sequences to create more add instructions and2222// reduce dependency chains:2223// ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1)2224Value *Z;2225if (match(Op0, m_OneUse(m_c_Add(m_OneUse(m_Sub(m_Value(X), m_Value(Y))),2226m_Value(Z))))) {2227Value *XZ = Builder.CreateAdd(X, Z);2228Value *YW = Builder.CreateAdd(Y, Op1);2229return BinaryOperator::CreateSub(XZ, YW);2230}22312232// ((X - Y) - Op1) --> X - (Y + Op1)2233if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y))))) {2234OverflowingBinaryOperator *LHSSub = cast<OverflowingBinaryOperator>(Op0);2235bool HasNUW = I.hasNoUnsignedWrap() && LHSSub->hasNoUnsignedWrap();2236bool HasNSW = HasNUW && I.hasNoSignedWrap() && LHSSub->hasNoSignedWrap();2237Value *Add = Builder.CreateAdd(Y, Op1, "", /* HasNUW */ HasNUW,2238/* HasNSW */ HasNSW);2239BinaryOperator *Sub = BinaryOperator::CreateSub(X, Add);2240Sub->setHasNoUnsignedWrap(HasNUW);2241Sub->setHasNoSignedWrap(HasNSW);2242return Sub;2243}22442245{2246// (X + Z) - (Y + Z) --> (X - Y)2247// This is done in other passes, but we want to be able to consume this2248// pattern in InstCombine so we can generate it without creating infinite2249// loops.2250if (match(Op0, m_Add(m_Value(X), m_Value(Z))) &&2251match(Op1, m_c_Add(m_Value(Y), m_Specific(Z))))2252return BinaryOperator::CreateSub(X, Y);22532254// (X + C0) - (Y + C1) --> (X - Y) + (C0 - C1)2255Constant *CX, *CY;2256if (match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(CX)))) &&2257match(Op1, m_OneUse(m_Add(m_Value(Y), m_ImmConstant(CY))))) {2258Value *OpsSub = Builder.CreateSub(X, Y);2259Constant *ConstsSub = ConstantExpr::getSub(CX, CY);2260return BinaryOperator::CreateAdd(OpsSub, ConstsSub);2261}2262}22632264// (~X) - (~Y) --> Y - X2265{2266// Need to ensure we can consume at least one of the `not` instructions,2267// otherwise this can inf loop.2268bool ConsumesOp0, ConsumesOp1;2269if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&2270isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&2271(ConsumesOp0 || ConsumesOp1)) {2272Value *NotOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);2273Value *NotOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);2274assert(NotOp0 != nullptr && NotOp1 != nullptr &&2275"isFreeToInvert desynced with getFreelyInverted");2276return BinaryOperator::CreateSub(NotOp1, NotOp0);2277}2278}22792280auto m_AddRdx = [](Value *&Vec) {2281return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));2282};2283Value *V0, *V1;2284if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&2285V0->getType() == V1->getType()) {2286// Difference of sums is sum of differences:2287// add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)2288Value *Sub = Builder.CreateSub(V0, V1);2289Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add,2290{Sub->getType()}, {Sub});2291return replaceInstUsesWith(I, Rdx);2292}22932294if (Constant *C = dyn_cast<Constant>(Op0)) {2295Value *X;2296if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))2297// C - (zext bool) --> bool ? C - 1 : C2298return SelectInst::Create(X, InstCombiner::SubOne(C), C);2299if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))2300// C - (sext bool) --> bool ? C + 1 : C2301return SelectInst::Create(X, InstCombiner::AddOne(C), C);23022303// C - ~X == X + (1+C)2304if (match(Op1, m_Not(m_Value(X))))2305return BinaryOperator::CreateAdd(X, InstCombiner::AddOne(C));23062307// Try to fold constant sub into select arguments.2308if (SelectInst *SI = dyn_cast<SelectInst>(Op1))2309if (Instruction *R = FoldOpIntoSelect(I, SI))2310return R;23112312// Try to fold constant sub into PHI values.2313if (PHINode *PN = dyn_cast<PHINode>(Op1))2314if (Instruction *R = foldOpIntoPhi(I, PN))2315return R;23162317Constant *C2;23182319// C-(C2-X) --> X+(C-C2)2320if (match(Op1, m_Sub(m_ImmConstant(C2), m_Value(X))))2321return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));2322}23232324const APInt *Op0C;2325if (match(Op0, m_APInt(Op0C))) {2326if (Op0C->isMask()) {2327// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known2328// zero. We don't use information from dominating conditions so this2329// transform is easier to reverse if necessary.2330KnownBits RHSKnown = llvm::computeKnownBits(2331Op1, 0, SQ.getWithInstruction(&I).getWithoutDomCondCache());2332if ((*Op0C | RHSKnown.Zero).isAllOnes())2333return BinaryOperator::CreateXor(Op1, Op0);2334}23352336// C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when:2337// (C3 - ((C2 & C3) - 1)) is pow22338// ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1)2339// C2 is negative pow2 || sub nuw2340const APInt *C2, *C3;2341BinaryOperator *InnerSub;2342if (match(Op1, m_OneUse(m_And(m_BinOp(InnerSub), m_APInt(C2)))) &&2343match(InnerSub, m_Sub(m_APInt(C3), m_Value(X))) &&2344(InnerSub->hasNoUnsignedWrap() || C2->isNegatedPowerOf2())) {2345APInt C2AndC3 = *C2 & *C3;2346APInt C2AndC3Minus1 = C2AndC3 - 1;2347APInt C2AddC3 = *C2 + *C3;2348if ((*C3 - C2AndC3Minus1).isPowerOf2() &&2349C2AndC3Minus1.isSubsetOf(C2AddC3)) {2350Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), *C2));2351return BinaryOperator::CreateAdd(2352And, ConstantInt::get(I.getType(), *Op0C - C2AndC3));2353}2354}2355}23562357{2358Value *Y;2359// X-(X+Y) == -Y X-(Y+X) == -Y2360if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y))))2361return BinaryOperator::CreateNeg(Y);23622363// (X-Y)-X == -Y2364if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))2365return BinaryOperator::CreateNeg(Y);2366}23672368// (sub (or A, B) (and A, B)) --> (xor A, B)2369{2370Value *A, *B;2371if (match(Op1, m_And(m_Value(A), m_Value(B))) &&2372match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))2373return BinaryOperator::CreateXor(A, B);2374}23752376// (sub (add A, B) (or A, B)) --> (and A, B)2377{2378Value *A, *B;2379if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&2380match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))2381return BinaryOperator::CreateAnd(A, B);2382}23832384// (sub (add A, B) (and A, B)) --> (or A, B)2385{2386Value *A, *B;2387if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&2388match(Op1, m_c_And(m_Specific(A), m_Specific(B))))2389return BinaryOperator::CreateOr(A, B);2390}23912392// (sub (and A, B) (or A, B)) --> neg (xor A, B)2393{2394Value *A, *B;2395if (match(Op0, m_And(m_Value(A), m_Value(B))) &&2396match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&2397(Op0->hasOneUse() || Op1->hasOneUse()))2398return BinaryOperator::CreateNeg(Builder.CreateXor(A, B));2399}24002401// (sub (or A, B), (xor A, B)) --> (and A, B)2402{2403Value *A, *B;2404if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&2405match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))2406return BinaryOperator::CreateAnd(A, B);2407}24082409// (sub (xor A, B) (or A, B)) --> neg (and A, B)2410{2411Value *A, *B;2412if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&2413match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&2414(Op0->hasOneUse() || Op1->hasOneUse()))2415return BinaryOperator::CreateNeg(Builder.CreateAnd(A, B));2416}24172418{2419Value *Y;2420// ((X | Y) - X) --> (~X & Y)2421if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))2422return BinaryOperator::CreateAnd(2423Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));2424}24252426{2427// (sub (and Op1, (neg X)), Op1) --> neg (and Op1, (add X, -1))2428Value *X;2429if (match(Op0, m_OneUse(m_c_And(m_Specific(Op1),2430m_OneUse(m_Neg(m_Value(X))))))) {2431return BinaryOperator::CreateNeg(Builder.CreateAnd(2432Op1, Builder.CreateAdd(X, Constant::getAllOnesValue(I.getType()))));2433}2434}24352436{2437// (sub (and Op1, C), Op1) --> neg (and Op1, ~C)2438Constant *C;2439if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_Constant(C))))) {2440return BinaryOperator::CreateNeg(2441Builder.CreateAnd(Op1, Builder.CreateNot(C)));2442}2443}24442445{2446// (sub (xor X, (sext C)), (sext C)) => (select C, (neg X), X)2447// (sub (sext C), (xor X, (sext C))) => (select C, X, (neg X))2448Value *C, *X;2449auto m_SubXorCmp = [&C, &X](Value *LHS, Value *RHS) {2450return match(LHS, m_OneUse(m_c_Xor(m_Value(X), m_Specific(RHS)))) &&2451match(RHS, m_SExt(m_Value(C))) &&2452(C->getType()->getScalarSizeInBits() == 1);2453};2454if (m_SubXorCmp(Op0, Op1))2455return SelectInst::Create(C, Builder.CreateNeg(X), X);2456if (m_SubXorCmp(Op1, Op0))2457return SelectInst::Create(C, X, Builder.CreateNeg(X));2458}24592460if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))2461return R;24622463if (Instruction *R = foldSubOfMinMax(I, Builder))2464return R;24652466{2467// If we have a subtraction between some value and a select between2468// said value and something else, sink subtraction into select hands, i.e.:2469// sub (select %Cond, %TrueVal, %FalseVal), %Op12470// ->2471// select %Cond, (sub %TrueVal, %Op1), (sub %FalseVal, %Op1)2472// or2473// sub %Op0, (select %Cond, %TrueVal, %FalseVal)2474// ->2475// select %Cond, (sub %Op0, %TrueVal), (sub %Op0, %FalseVal)2476// This will result in select between new subtraction and 0.2477auto SinkSubIntoSelect =2478[Ty = I.getType()](Value *Select, Value *OtherHandOfSub,2479auto SubBuilder) -> Instruction * {2480Value *Cond, *TrueVal, *FalseVal;2481if (!match(Select, m_OneUse(m_Select(m_Value(Cond), m_Value(TrueVal),2482m_Value(FalseVal)))))2483return nullptr;2484if (OtherHandOfSub != TrueVal && OtherHandOfSub != FalseVal)2485return nullptr;2486// While it is really tempting to just create two subtractions and let2487// InstCombine fold one of those to 0, it isn't possible to do so2488// because of worklist visitation order. So ugly it is.2489bool OtherHandOfSubIsTrueVal = OtherHandOfSub == TrueVal;2490Value *NewSub = SubBuilder(OtherHandOfSubIsTrueVal ? FalseVal : TrueVal);2491Constant *Zero = Constant::getNullValue(Ty);2492SelectInst *NewSel =2493SelectInst::Create(Cond, OtherHandOfSubIsTrueVal ? Zero : NewSub,2494OtherHandOfSubIsTrueVal ? NewSub : Zero);2495// Preserve prof metadata if any.2496NewSel->copyMetadata(cast<Instruction>(*Select));2497return NewSel;2498};2499if (Instruction *NewSel = SinkSubIntoSelect(2500/*Select=*/Op0, /*OtherHandOfSub=*/Op1,2501[Builder = &Builder, Op1](Value *OtherHandOfSelect) {2502return Builder->CreateSub(OtherHandOfSelect,2503/*OtherHandOfSub=*/Op1);2504}))2505return NewSel;2506if (Instruction *NewSel = SinkSubIntoSelect(2507/*Select=*/Op1, /*OtherHandOfSub=*/Op0,2508[Builder = &Builder, Op0](Value *OtherHandOfSelect) {2509return Builder->CreateSub(/*OtherHandOfSub=*/Op0,2510OtherHandOfSelect);2511}))2512return NewSel;2513}25142515// (X - (X & Y)) --> (X & ~Y)2516if (match(Op1, m_c_And(m_Specific(Op0), m_Value(Y))) &&2517(Op1->hasOneUse() || isa<Constant>(Y)))2518return BinaryOperator::CreateAnd(2519Op0, Builder.CreateNot(Y, Y->getName() + ".not"));25202521// ~X - Min/Max(~X, Y) -> ~Min/Max(X, ~Y) - X2522// ~X - Min/Max(Y, ~X) -> ~Min/Max(X, ~Y) - X2523// Min/Max(~X, Y) - ~X -> X - ~Min/Max(X, ~Y)2524// Min/Max(Y, ~X) - ~X -> X - ~Min/Max(X, ~Y)2525// As long as Y is freely invertible, this will be neutral or a win.2526// Note: We don't generate the inverse max/min, just create the 'not' of2527// it and let other folds do the rest.2528if (match(Op0, m_Not(m_Value(X))) &&2529match(Op1, m_c_MaxOrMin(m_Specific(Op0), m_Value(Y))) &&2530!Op0->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {2531Value *Not = Builder.CreateNot(Op1);2532return BinaryOperator::CreateSub(Not, X);2533}2534if (match(Op1, m_Not(m_Value(X))) &&2535match(Op0, m_c_MaxOrMin(m_Specific(Op1), m_Value(Y))) &&2536!Op1->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {2537Value *Not = Builder.CreateNot(Op0);2538return BinaryOperator::CreateSub(X, Not);2539}25402541// Optimize pointer differences into the same array into a size. Consider:2542// &A[10] - &A[0]: we should compile this to "10".2543Value *LHSOp, *RHSOp;2544if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&2545match(Op1, m_PtrToInt(m_Value(RHSOp))))2546if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),2547I.hasNoUnsignedWrap()))2548return replaceInstUsesWith(I, Res);25492550// trunc(p)-trunc(q) -> trunc(p-q)2551if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&2552match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))2553if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),2554/* IsNUW */ false))2555return replaceInstUsesWith(I, Res);25562557// Canonicalize a shifty way to code absolute value to the common pattern.2558// There are 2 potential commuted variants.2559// We're relying on the fact that we only do this transform when the shift has2560// exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase2561// instructions).2562Value *A;2563const APInt *ShAmt;2564Type *Ty = I.getType();2565unsigned BitWidth = Ty->getScalarSizeInBits();2566if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&2567Op1->hasNUses(2) && *ShAmt == BitWidth - 1 &&2568match(Op0, m_OneUse(m_c_Xor(m_Specific(A), m_Specific(Op1))))) {2569// B = ashr i32 A, 31 ; smear the sign bit2570// sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1)2571// --> (A < 0) ? -A : A2572Value *IsNeg = Builder.CreateIsNeg(A);2573// Copy the nsw flags from the sub to the negate.2574Value *NegA = I.hasNoUnsignedWrap()2575? Constant::getNullValue(A->getType())2576: Builder.CreateNeg(A, "", I.hasNoSignedWrap());2577return SelectInst::Create(IsNeg, NegA, A);2578}25792580// If we are subtracting a low-bit masked subset of some value from an add2581// of that same value with no low bits changed, that is clearing some low bits2582// of the sum:2583// sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC2584const APInt *AddC, *AndC;2585if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) &&2586match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) {2587unsigned Cttz = AddC->countr_zero();2588APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));2589if ((HighMask & *AndC).isZero())2590return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));2591}25922593if (Instruction *V =2594canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))2595return V;25962597// X - usub.sat(X, Y) => umin(X, Y)2598if (match(Op1, m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Specific(Op0),2599m_Value(Y)))))2600return replaceInstUsesWith(2601I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y}));26022603// umax(X, Op1) - Op1 --> usub.sat(X, Op1)2604// TODO: The one-use restriction is not strictly necessary, but it may2605// require improving other pattern matching and/or codegen.2606if (match(Op0, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op1)))))2607return replaceInstUsesWith(2608I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));26092610// Op0 - umin(X, Op0) --> usub.sat(Op0, X)2611if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0)))))2612return replaceInstUsesWith(2613I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X}));26142615// Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)2616if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {2617Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});2618return BinaryOperator::CreateNeg(USub);2619}26202621// umin(X, Op1) - Op1 --> 0 - usub.sat(Op1, X)2622if (match(Op0, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op1))))) {2623Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op1, X});2624return BinaryOperator::CreateNeg(USub);2625}26262627// C - ctpop(X) => ctpop(~X) if C is bitwidth2628if (match(Op0, m_SpecificInt(BitWidth)) &&2629match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))2630return replaceInstUsesWith(2631I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},2632{Builder.CreateNot(X)}));26332634// Reduce multiplies for difference-of-squares by factoring:2635// (X * X) - (Y * Y) --> (X + Y) * (X - Y)2636if (match(Op0, m_OneUse(m_Mul(m_Value(X), m_Deferred(X)))) &&2637match(Op1, m_OneUse(m_Mul(m_Value(Y), m_Deferred(Y))))) {2638auto *OBO0 = cast<OverflowingBinaryOperator>(Op0);2639auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);2640bool PropagateNSW = I.hasNoSignedWrap() && OBO0->hasNoSignedWrap() &&2641OBO1->hasNoSignedWrap() && BitWidth > 2;2642bool PropagateNUW = I.hasNoUnsignedWrap() && OBO0->hasNoUnsignedWrap() &&2643OBO1->hasNoUnsignedWrap() && BitWidth > 1;2644Value *Add = Builder.CreateAdd(X, Y, "add", PropagateNUW, PropagateNSW);2645Value *Sub = Builder.CreateSub(X, Y, "sub", PropagateNUW, PropagateNSW);2646Value *Mul = Builder.CreateMul(Add, Sub, "", PropagateNUW, PropagateNSW);2647return replaceInstUsesWith(I, Mul);2648}26492650// max(X,Y) nsw/nuw - min(X,Y) --> abs(X nsw - Y)2651if (match(Op0, m_OneUse(m_c_SMax(m_Value(X), m_Value(Y)))) &&2652match(Op1, m_OneUse(m_c_SMin(m_Specific(X), m_Specific(Y))))) {2653if (I.hasNoUnsignedWrap() || I.hasNoSignedWrap()) {2654Value *Sub =2655Builder.CreateSub(X, Y, "sub", /*HasNUW=*/false, /*HasNSW=*/true);2656Value *Call =2657Builder.CreateBinaryIntrinsic(Intrinsic::abs, Sub, Builder.getTrue());2658return replaceInstUsesWith(I, Call);2659}2660}26612662if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))2663return Res;26642665return TryToNarrowDeduceFlags();2666}26672668/// This eliminates floating-point negation in either 'fneg(X)' or2669/// 'fsub(-0.0, X)' form by combining into a constant operand.2670static Instruction *foldFNegIntoConstant(Instruction &I, const DataLayout &DL) {2671// This is limited with one-use because fneg is assumed better for2672// reassociation and cheaper in codegen than fmul/fdiv.2673// TODO: Should the m_OneUse restriction be removed?2674Instruction *FNegOp;2675if (!match(&I, m_FNeg(m_OneUse(m_Instruction(FNegOp)))))2676return nullptr;26772678Value *X;2679Constant *C;26802681// Fold negation into constant operand.2682// -(X * C) --> X * (-C)2683if (match(FNegOp, m_FMul(m_Value(X), m_Constant(C))))2684if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))2685return BinaryOperator::CreateFMulFMF(X, NegC, &I);2686// -(X / C) --> X / (-C)2687if (match(FNegOp, m_FDiv(m_Value(X), m_Constant(C))))2688if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))2689return BinaryOperator::CreateFDivFMF(X, NegC, &I);2690// -(C / X) --> (-C) / X2691if (match(FNegOp, m_FDiv(m_Constant(C), m_Value(X))))2692if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) {2693Instruction *FDiv = BinaryOperator::CreateFDivFMF(NegC, X, &I);26942695// Intersect 'nsz' and 'ninf' because those special value exceptions may2696// not apply to the fdiv. Everything else propagates from the fneg.2697// TODO: We could propagate nsz/ninf from fdiv alone?2698FastMathFlags FMF = I.getFastMathFlags();2699FastMathFlags OpFMF = FNegOp->getFastMathFlags();2700FDiv->setHasNoSignedZeros(FMF.noSignedZeros() && OpFMF.noSignedZeros());2701FDiv->setHasNoInfs(FMF.noInfs() && OpFMF.noInfs());2702return FDiv;2703}2704// With NSZ [ counter-example with -0.0: -(-0.0 + 0.0) != 0.0 + -0.0 ]:2705// -(X + C) --> -X + -C --> -C - X2706if (I.hasNoSignedZeros() && match(FNegOp, m_FAdd(m_Value(X), m_Constant(C))))2707if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))2708return BinaryOperator::CreateFSubFMF(NegC, X, &I);27092710return nullptr;2711}27122713Instruction *InstCombinerImpl::hoistFNegAboveFMulFDiv(Value *FNegOp,2714Instruction &FMFSource) {2715Value *X, *Y;2716if (match(FNegOp, m_FMul(m_Value(X), m_Value(Y)))) {2717return cast<Instruction>(Builder.CreateFMulFMF(2718Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));2719}27202721if (match(FNegOp, m_FDiv(m_Value(X), m_Value(Y)))) {2722return cast<Instruction>(Builder.CreateFDivFMF(2723Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));2724}27252726if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(FNegOp)) {2727// Make sure to preserve flags and metadata on the call.2728if (II->getIntrinsicID() == Intrinsic::ldexp) {2729FastMathFlags FMF = FMFSource.getFastMathFlags() | II->getFastMathFlags();2730IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);2731Builder.setFastMathFlags(FMF);27322733CallInst *New = Builder.CreateCall(2734II->getCalledFunction(),2735{Builder.CreateFNeg(II->getArgOperand(0)), II->getArgOperand(1)});2736New->copyMetadata(*II);2737return New;2738}2739}27402741return nullptr;2742}27432744Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {2745Value *Op = I.getOperand(0);27462747if (Value *V = simplifyFNegInst(Op, I.getFastMathFlags(),2748getSimplifyQuery().getWithInstruction(&I)))2749return replaceInstUsesWith(I, V);27502751if (Instruction *X = foldFNegIntoConstant(I, DL))2752return X;27532754Value *X, *Y;27552756// If we can ignore the sign of zeros: -(X - Y) --> (Y - X)2757if (I.hasNoSignedZeros() &&2758match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))2759return BinaryOperator::CreateFSubFMF(Y, X, &I);27602761Value *OneUse;2762if (!match(Op, m_OneUse(m_Value(OneUse))))2763return nullptr;27642765if (Instruction *R = hoistFNegAboveFMulFDiv(OneUse, I))2766return replaceInstUsesWith(I, R);27672768// Try to eliminate fneg if at least 1 arm of the select is negated.2769Value *Cond;2770if (match(OneUse, m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))) {2771// Unlike most transforms, this one is not safe to propagate nsz unless2772// it is present on the original select. We union the flags from the select2773// and fneg and then remove nsz if needed.2774auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) {2775S->copyFastMathFlags(&I);2776if (auto *OldSel = dyn_cast<SelectInst>(Op)) {2777FastMathFlags FMF = I.getFastMathFlags() | OldSel->getFastMathFlags();2778S->setFastMathFlags(FMF);2779if (!OldSel->hasNoSignedZeros() && !CommonOperand &&2780!isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition()))2781S->setHasNoSignedZeros(false);2782}2783};2784// -(Cond ? -P : Y) --> Cond ? P : -Y2785Value *P;2786if (match(X, m_FNeg(m_Value(P)))) {2787Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");2788SelectInst *NewSel = SelectInst::Create(Cond, P, NegY);2789propagateSelectFMF(NewSel, P == Y);2790return NewSel;2791}2792// -(Cond ? X : -P) --> Cond ? -X : P2793if (match(Y, m_FNeg(m_Value(P)))) {2794Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");2795SelectInst *NewSel = SelectInst::Create(Cond, NegX, P);2796propagateSelectFMF(NewSel, P == X);2797return NewSel;2798}27992800// -(Cond ? X : C) --> Cond ? -X : -C2801// -(Cond ? C : Y) --> Cond ? -C : -Y2802if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) {2803Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");2804Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");2805SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);2806propagateSelectFMF(NewSel, /*CommonOperand=*/true);2807return NewSel;2808}2809}28102811// fneg (copysign x, y) -> copysign x, (fneg y)2812if (match(OneUse, m_CopySign(m_Value(X), m_Value(Y)))) {2813// The source copysign has an additional value input, so we can't propagate2814// flags the copysign doesn't also have.2815FastMathFlags FMF = I.getFastMathFlags();2816FMF &= cast<FPMathOperator>(OneUse)->getFastMathFlags();28172818IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);2819Builder.setFastMathFlags(FMF);28202821Value *NegY = Builder.CreateFNeg(Y);2822Value *NewCopySign = Builder.CreateCopySign(X, NegY);2823return replaceInstUsesWith(I, NewCopySign);2824}28252826return nullptr;2827}28282829Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {2830if (Value *V = simplifyFSubInst(I.getOperand(0), I.getOperand(1),2831I.getFastMathFlags(),2832getSimplifyQuery().getWithInstruction(&I)))2833return replaceInstUsesWith(I, V);28342835if (Instruction *X = foldVectorBinop(I))2836return X;28372838if (Instruction *Phi = foldBinopWithPhiOperands(I))2839return Phi;28402841// Subtraction from -0.0 is the canonical form of fneg.2842// fsub -0.0, X ==> fneg X2843// fsub nsz 0.0, X ==> fneg nsz X2844//2845// FIXME This matcher does not respect FTZ or DAZ yet:2846// fsub -0.0, Denorm ==> +-02847// fneg Denorm ==> -Denorm2848Value *Op;2849if (match(&I, m_FNeg(m_Value(Op))))2850return UnaryOperator::CreateFNegFMF(Op, &I);28512852if (Instruction *X = foldFNegIntoConstant(I, DL))2853return X;28542855if (Instruction *R = foldFBinOpOfIntCasts(I))2856return R;28572858Value *X, *Y;2859Constant *C;28602861Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);2862// If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)2863// Canonicalize to fadd to make analysis easier.2864// This can also help codegen because fadd is commutative.2865// Note that if this fsub was really an fneg, the fadd with -0.0 will get2866// killed later. We still limit that particular transform with 'hasOneUse'2867// because an fneg is assumed better/cheaper than a generic fsub.2868if (I.hasNoSignedZeros() ||2869cannotBeNegativeZero(Op0, 0, getSimplifyQuery().getWithInstruction(&I))) {2870if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {2871Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);2872return BinaryOperator::CreateFAddFMF(Op0, NewSub, &I);2873}2874}28752876// (-X) - Op1 --> -(X + Op1)2877if (I.hasNoSignedZeros() && !isa<ConstantExpr>(Op0) &&2878match(Op0, m_OneUse(m_FNeg(m_Value(X))))) {2879Value *FAdd = Builder.CreateFAddFMF(X, Op1, &I);2880return UnaryOperator::CreateFNegFMF(FAdd, &I);2881}28822883if (isa<Constant>(Op0))2884if (SelectInst *SI = dyn_cast<SelectInst>(Op1))2885if (Instruction *NV = FoldOpIntoSelect(I, SI))2886return NV;28872888// X - C --> X + (-C)2889// But don't transform constant expressions because there's an inverse fold2890// for X + (-Y) --> X - Y.2891if (match(Op1, m_ImmConstant(C)))2892if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))2893return BinaryOperator::CreateFAddFMF(Op0, NegC, &I);28942895// X - (-Y) --> X + Y2896if (match(Op1, m_FNeg(m_Value(Y))))2897return BinaryOperator::CreateFAddFMF(Op0, Y, &I);28982899// Similar to above, but look through a cast of the negated value:2900// X - (fptrunc(-Y)) --> X + fptrunc(Y)2901Type *Ty = I.getType();2902if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y))))))2903return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPTrunc(Y, Ty), &I);29042905// X - (fpext(-Y)) --> X + fpext(Y)2906if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y))))))2907return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I);29082909// Similar to above, but look through fmul/fdiv of the negated value:2910// Op0 - (-X * Y) --> Op0 + (X * Y)2911// Op0 - (Y * -X) --> Op0 + (X * Y)2912if (match(Op1, m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))))) {2913Value *FMul = Builder.CreateFMulFMF(X, Y, &I);2914return BinaryOperator::CreateFAddFMF(Op0, FMul, &I);2915}2916// Op0 - (-X / Y) --> Op0 + (X / Y)2917// Op0 - (X / -Y) --> Op0 + (X / Y)2918if (match(Op1, m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y)))) ||2919match(Op1, m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))))) {2920Value *FDiv = Builder.CreateFDivFMF(X, Y, &I);2921return BinaryOperator::CreateFAddFMF(Op0, FDiv, &I);2922}29232924// Handle special cases for FSub with selects feeding the operation2925if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))2926return replaceInstUsesWith(I, V);29272928if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {2929// (Y - X) - Y --> -X2930if (match(Op0, m_FSub(m_Specific(Op1), m_Value(X))))2931return UnaryOperator::CreateFNegFMF(X, &I);29322933// Y - (X + Y) --> -X2934// Y - (Y + X) --> -X2935if (match(Op1, m_c_FAdd(m_Specific(Op0), m_Value(X))))2936return UnaryOperator::CreateFNegFMF(X, &I);29372938// (X * C) - X --> X * (C - 1.0)2939if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {2940if (Constant *CSubOne = ConstantFoldBinaryOpOperands(2941Instruction::FSub, C, ConstantFP::get(Ty, 1.0), DL))2942return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);2943}2944// X - (X * C) --> X * (1.0 - C)2945if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {2946if (Constant *OneSubC = ConstantFoldBinaryOpOperands(2947Instruction::FSub, ConstantFP::get(Ty, 1.0), C, DL))2948return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);2949}29502951// Reassociate fsub/fadd sequences to create more fadd instructions and2952// reduce dependency chains:2953// ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1)2954Value *Z;2955if (match(Op0, m_OneUse(m_c_FAdd(m_OneUse(m_FSub(m_Value(X), m_Value(Y))),2956m_Value(Z))))) {2957Value *XZ = Builder.CreateFAddFMF(X, Z, &I);2958Value *YW = Builder.CreateFAddFMF(Y, Op1, &I);2959return BinaryOperator::CreateFSubFMF(XZ, YW, &I);2960}29612962auto m_FaddRdx = [](Value *&Sum, Value *&Vec) {2963return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(m_Value(Sum),2964m_Value(Vec)));2965};2966Value *A0, *A1, *V0, *V1;2967if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) &&2968V0->getType() == V1->getType()) {2969// Difference of sums is sum of differences:2970// add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A12971Value *Sub = Builder.CreateFSubFMF(V0, V1, &I);2972Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,2973{Sub->getType()}, {A0, Sub}, &I);2974return BinaryOperator::CreateFSubFMF(Rdx, A1, &I);2975}29762977if (Instruction *F = factorizeFAddFSub(I, Builder))2978return F;29792980// TODO: This performs reassociative folds for FP ops. Some fraction of the2981// functionality has been subsumed by simple pattern matching here and in2982// InstSimplify. We should let a dedicated reassociation pass handle more2983// complex pattern matching and remove this from InstCombine.2984if (Value *V = FAddCombine(Builder).simplify(&I))2985return replaceInstUsesWith(I, V);29862987// (X - Y) - Op1 --> X - (Y + Op1)2988if (match(Op0, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {2989Value *FAdd = Builder.CreateFAddFMF(Y, Op1, &I);2990return BinaryOperator::CreateFSubFMF(X, FAdd, &I);2991}2992}29932994return nullptr;2995}299629972998