Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
35266 views
1
//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the visit functions for add, fadd, sub, and fsub.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "InstCombineInternal.h"
14
#include "llvm/ADT/APFloat.h"
15
#include "llvm/ADT/APInt.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/Analysis/InstructionSimplify.h"
19
#include "llvm/Analysis/ValueTracking.h"
20
#include "llvm/IR/Constant.h"
21
#include "llvm/IR/Constants.h"
22
#include "llvm/IR/InstrTypes.h"
23
#include "llvm/IR/Instruction.h"
24
#include "llvm/IR/Instructions.h"
25
#include "llvm/IR/Operator.h"
26
#include "llvm/IR/PatternMatch.h"
27
#include "llvm/IR/Type.h"
28
#include "llvm/IR/Value.h"
29
#include "llvm/Support/AlignOf.h"
30
#include "llvm/Support/Casting.h"
31
#include "llvm/Support/KnownBits.h"
32
#include "llvm/Transforms/InstCombine/InstCombiner.h"
33
#include <cassert>
34
#include <utility>
35
36
using namespace llvm;
37
using namespace PatternMatch;
38
39
#define DEBUG_TYPE "instcombine"
40
41
namespace {
42
43
/// Class representing coefficient of floating-point addend.
44
/// This class needs to be highly efficient, which is especially true for
45
/// the constructor. As of I write this comment, the cost of the default
46
/// constructor is merely 4-byte-store-zero (Assuming compiler is able to
47
/// perform write-merging).
48
///
49
class FAddendCoef {
50
public:
51
// The constructor has to initialize a APFloat, which is unnecessary for
52
// most addends which have coefficient either 1 or -1. So, the constructor
53
// is expensive. In order to avoid the cost of the constructor, we should
54
// reuse some instances whenever possible. The pre-created instances
55
// FAddCombine::Add[0-5] embodies this idea.
56
FAddendCoef() = default;
57
~FAddendCoef();
58
59
// If possible, don't define operator+/operator- etc because these
60
// operators inevitably call FAddendCoef's constructor which is not cheap.
61
void operator=(const FAddendCoef &A);
62
void operator+=(const FAddendCoef &A);
63
void operator*=(const FAddendCoef &S);
64
65
void set(short C) {
66
assert(!insaneIntVal(C) && "Insane coefficient");
67
IsFp = false; IntVal = C;
68
}
69
70
void set(const APFloat& C);
71
72
void negate();
73
74
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
75
Value *getValue(Type *) const;
76
77
bool isOne() const { return isInt() && IntVal == 1; }
78
bool isTwo() const { return isInt() && IntVal == 2; }
79
bool isMinusOne() const { return isInt() && IntVal == -1; }
80
bool isMinusTwo() const { return isInt() && IntVal == -2; }
81
82
private:
83
bool insaneIntVal(int V) { return V > 4 || V < -4; }
84
85
APFloat *getFpValPtr() { return reinterpret_cast<APFloat *>(&FpValBuf); }
86
87
const APFloat *getFpValPtr() const {
88
return reinterpret_cast<const APFloat *>(&FpValBuf);
89
}
90
91
const APFloat &getFpVal() const {
92
assert(IsFp && BufHasFpVal && "Incorret state");
93
return *getFpValPtr();
94
}
95
96
APFloat &getFpVal() {
97
assert(IsFp && BufHasFpVal && "Incorret state");
98
return *getFpValPtr();
99
}
100
101
bool isInt() const { return !IsFp; }
102
103
// If the coefficient is represented by an integer, promote it to a
104
// floating point.
105
void convertToFpType(const fltSemantics &Sem);
106
107
// Construct an APFloat from a signed integer.
108
// TODO: We should get rid of this function when APFloat can be constructed
109
// from an *SIGNED* integer.
110
APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
111
112
bool IsFp = false;
113
114
// True iff FpValBuf contains an instance of APFloat.
115
bool BufHasFpVal = false;
116
117
// The integer coefficient of an individual addend is either 1 or -1,
118
// and we try to simplify at most 4 addends from neighboring at most
119
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
120
// is overkill of this end.
121
short IntVal = 0;
122
123
AlignedCharArrayUnion<APFloat> FpValBuf;
124
};
125
126
/// FAddend is used to represent floating-point addend. An addend is
127
/// represented as <C, V>, where the V is a symbolic value, and C is a
128
/// constant coefficient. A constant addend is represented as <C, 0>.
129
class FAddend {
130
public:
131
FAddend() = default;
132
133
void operator+=(const FAddend &T) {
134
assert((Val == T.Val) && "Symbolic-values disagree");
135
Coeff += T.Coeff;
136
}
137
138
Value *getSymVal() const { return Val; }
139
const FAddendCoef &getCoef() const { return Coeff; }
140
141
bool isConstant() const { return Val == nullptr; }
142
bool isZero() const { return Coeff.isZero(); }
143
144
void set(short Coefficient, Value *V) {
145
Coeff.set(Coefficient);
146
Val = V;
147
}
148
void set(const APFloat &Coefficient, Value *V) {
149
Coeff.set(Coefficient);
150
Val = V;
151
}
152
void set(const ConstantFP *Coefficient, Value *V) {
153
Coeff.set(Coefficient->getValueAPF());
154
Val = V;
155
}
156
157
void negate() { Coeff.negate(); }
158
159
/// Drill down the U-D chain one step to find the definition of V, and
160
/// try to break the definition into one or two addends.
161
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
162
163
/// Similar to FAddend::drillDownOneStep() except that the value being
164
/// splitted is the addend itself.
165
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
166
167
private:
168
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
169
170
// This addend has the value of "Coeff * Val".
171
Value *Val = nullptr;
172
FAddendCoef Coeff;
173
};
174
175
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
176
/// with its neighboring at most two instructions.
177
///
178
class FAddCombine {
179
public:
180
FAddCombine(InstCombiner::BuilderTy &B) : Builder(B) {}
181
182
Value *simplify(Instruction *FAdd);
183
184
private:
185
using AddendVect = SmallVector<const FAddend *, 4>;
186
187
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
188
189
/// Convert given addend to a Value
190
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
191
192
/// Return the number of instructions needed to emit the N-ary addition.
193
unsigned calcInstrNumber(const AddendVect& Vect);
194
195
Value *createFSub(Value *Opnd0, Value *Opnd1);
196
Value *createFAdd(Value *Opnd0, Value *Opnd1);
197
Value *createFMul(Value *Opnd0, Value *Opnd1);
198
Value *createFNeg(Value *V);
199
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
200
void createInstPostProc(Instruction *NewInst, bool NoNumber = false);
201
202
// Debugging stuff are clustered here.
203
#ifndef NDEBUG
204
unsigned CreateInstrNum;
205
void initCreateInstNum() { CreateInstrNum = 0; }
206
void incCreateInstNum() { CreateInstrNum++; }
207
#else
208
void initCreateInstNum() {}
209
void incCreateInstNum() {}
210
#endif
211
212
InstCombiner::BuilderTy &Builder;
213
Instruction *Instr = nullptr;
214
};
215
216
} // end anonymous namespace
217
218
//===----------------------------------------------------------------------===//
219
//
220
// Implementation of
221
// {FAddendCoef, FAddend, FAddition, FAddCombine}.
222
//
223
//===----------------------------------------------------------------------===//
224
FAddendCoef::~FAddendCoef() {
225
if (BufHasFpVal)
226
getFpValPtr()->~APFloat();
227
}
228
229
void FAddendCoef::set(const APFloat& C) {
230
APFloat *P = getFpValPtr();
231
232
if (isInt()) {
233
// As the buffer is meanless byte stream, we cannot call
234
// APFloat::operator=().
235
new(P) APFloat(C);
236
} else
237
*P = C;
238
239
IsFp = BufHasFpVal = true;
240
}
241
242
void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
243
if (!isInt())
244
return;
245
246
APFloat *P = getFpValPtr();
247
if (IntVal > 0)
248
new(P) APFloat(Sem, IntVal);
249
else {
250
new(P) APFloat(Sem, 0 - IntVal);
251
P->changeSign();
252
}
253
IsFp = BufHasFpVal = true;
254
}
255
256
APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
257
if (Val >= 0)
258
return APFloat(Sem, Val);
259
260
APFloat T(Sem, 0 - Val);
261
T.changeSign();
262
263
return T;
264
}
265
266
void FAddendCoef::operator=(const FAddendCoef &That) {
267
if (That.isInt())
268
set(That.IntVal);
269
else
270
set(That.getFpVal());
271
}
272
273
void FAddendCoef::operator+=(const FAddendCoef &That) {
274
RoundingMode RndMode = RoundingMode::NearestTiesToEven;
275
if (isInt() == That.isInt()) {
276
if (isInt())
277
IntVal += That.IntVal;
278
else
279
getFpVal().add(That.getFpVal(), RndMode);
280
return;
281
}
282
283
if (isInt()) {
284
const APFloat &T = That.getFpVal();
285
convertToFpType(T.getSemantics());
286
getFpVal().add(T, RndMode);
287
return;
288
}
289
290
APFloat &T = getFpVal();
291
T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
292
}
293
294
void FAddendCoef::operator*=(const FAddendCoef &That) {
295
if (That.isOne())
296
return;
297
298
if (That.isMinusOne()) {
299
negate();
300
return;
301
}
302
303
if (isInt() && That.isInt()) {
304
int Res = IntVal * (int)That.IntVal;
305
assert(!insaneIntVal(Res) && "Insane int value");
306
IntVal = Res;
307
return;
308
}
309
310
const fltSemantics &Semantic =
311
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
312
313
if (isInt())
314
convertToFpType(Semantic);
315
APFloat &F0 = getFpVal();
316
317
if (That.isInt())
318
F0.multiply(createAPFloatFromInt(Semantic, That.IntVal),
319
APFloat::rmNearestTiesToEven);
320
else
321
F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven);
322
}
323
324
void FAddendCoef::negate() {
325
if (isInt())
326
IntVal = 0 - IntVal;
327
else
328
getFpVal().changeSign();
329
}
330
331
Value *FAddendCoef::getValue(Type *Ty) const {
332
return isInt() ?
333
ConstantFP::get(Ty, float(IntVal)) :
334
ConstantFP::get(Ty->getContext(), getFpVal());
335
}
336
337
// The definition of <Val> Addends
338
// =========================================
339
// A + B <1, A>, <1,B>
340
// A - B <1, A>, <1,B>
341
// 0 - B <-1, B>
342
// C * A, <C, A>
343
// A + C <1, A> <C, NULL>
344
// 0 +/- 0 <0, NULL> (corner case)
345
//
346
// Legend: A and B are not constant, C is constant
347
unsigned FAddend::drillValueDownOneStep
348
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
349
Instruction *I = nullptr;
350
if (!Val || !(I = dyn_cast<Instruction>(Val)))
351
return 0;
352
353
unsigned Opcode = I->getOpcode();
354
355
if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) {
356
ConstantFP *C0, *C1;
357
Value *Opnd0 = I->getOperand(0);
358
Value *Opnd1 = I->getOperand(1);
359
if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
360
Opnd0 = nullptr;
361
362
if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
363
Opnd1 = nullptr;
364
365
if (Opnd0) {
366
if (!C0)
367
Addend0.set(1, Opnd0);
368
else
369
Addend0.set(C0, nullptr);
370
}
371
372
if (Opnd1) {
373
FAddend &Addend = Opnd0 ? Addend1 : Addend0;
374
if (!C1)
375
Addend.set(1, Opnd1);
376
else
377
Addend.set(C1, nullptr);
378
if (Opcode == Instruction::FSub)
379
Addend.negate();
380
}
381
382
if (Opnd0 || Opnd1)
383
return Opnd0 && Opnd1 ? 2 : 1;
384
385
// Both operands are zero. Weird!
386
Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);
387
return 1;
388
}
389
390
if (I->getOpcode() == Instruction::FMul) {
391
Value *V0 = I->getOperand(0);
392
Value *V1 = I->getOperand(1);
393
if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) {
394
Addend0.set(C, V1);
395
return 1;
396
}
397
398
if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) {
399
Addend0.set(C, V0);
400
return 1;
401
}
402
}
403
404
return 0;
405
}
406
407
// Try to break *this* addend into two addends. e.g. Suppose this addend is
408
// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends,
409
// i.e. <2.3, X> and <2.3, Y>.
410
unsigned FAddend::drillAddendDownOneStep
411
(FAddend &Addend0, FAddend &Addend1) const {
412
if (isConstant())
413
return 0;
414
415
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
416
if (!BreakNum || Coeff.isOne())
417
return BreakNum;
418
419
Addend0.Scale(Coeff);
420
421
if (BreakNum == 2)
422
Addend1.Scale(Coeff);
423
424
return BreakNum;
425
}
426
427
Value *FAddCombine::simplify(Instruction *I) {
428
assert(I->hasAllowReassoc() && I->hasNoSignedZeros() &&
429
"Expected 'reassoc'+'nsz' instruction");
430
431
// Currently we are not able to handle vector type.
432
if (I->getType()->isVectorTy())
433
return nullptr;
434
435
assert((I->getOpcode() == Instruction::FAdd ||
436
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
437
438
// Save the instruction before calling other member-functions.
439
Instr = I;
440
441
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
442
443
unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1);
444
445
// Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1.
446
unsigned Opnd0_ExpNum = 0;
447
unsigned Opnd1_ExpNum = 0;
448
449
if (!Opnd0.isConstant())
450
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
451
452
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
453
if (OpndNum == 2 && !Opnd1.isConstant())
454
Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1);
455
456
// Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1
457
if (Opnd0_ExpNum && Opnd1_ExpNum) {
458
AddendVect AllOpnds;
459
AllOpnds.push_back(&Opnd0_0);
460
AllOpnds.push_back(&Opnd1_0);
461
if (Opnd0_ExpNum == 2)
462
AllOpnds.push_back(&Opnd0_1);
463
if (Opnd1_ExpNum == 2)
464
AllOpnds.push_back(&Opnd1_1);
465
466
// Compute instruction quota. We should save at least one instruction.
467
unsigned InstQuota = 0;
468
469
Value *V0 = I->getOperand(0);
470
Value *V1 = I->getOperand(1);
471
InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
472
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
473
474
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
475
return R;
476
}
477
478
if (OpndNum != 2) {
479
// The input instruction is : "I=0.0 +/- V". If the "V" were able to be
480
// splitted into two addends, say "V = X - Y", the instruction would have
481
// been optimized into "I = Y - X" in the previous steps.
482
//
483
const FAddendCoef &CE = Opnd0.getCoef();
484
return CE.isOne() ? Opnd0.getSymVal() : nullptr;
485
}
486
487
// step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
488
if (Opnd1_ExpNum) {
489
AddendVect AllOpnds;
490
AllOpnds.push_back(&Opnd0);
491
AllOpnds.push_back(&Opnd1_0);
492
if (Opnd1_ExpNum == 2)
493
AllOpnds.push_back(&Opnd1_1);
494
495
if (Value *R = simplifyFAdd(AllOpnds, 1))
496
return R;
497
}
498
499
// step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1]
500
if (Opnd0_ExpNum) {
501
AddendVect AllOpnds;
502
AllOpnds.push_back(&Opnd1);
503
AllOpnds.push_back(&Opnd0_0);
504
if (Opnd0_ExpNum == 2)
505
AllOpnds.push_back(&Opnd0_1);
506
507
if (Value *R = simplifyFAdd(AllOpnds, 1))
508
return R;
509
}
510
511
return nullptr;
512
}
513
514
Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
515
unsigned AddendNum = Addends.size();
516
assert(AddendNum <= 4 && "Too many addends");
517
518
// For saving intermediate results;
519
unsigned NextTmpIdx = 0;
520
FAddend TmpResult[3];
521
522
// Simplified addends are placed <SimpVect>.
523
AddendVect SimpVect;
524
525
// The outer loop works on one symbolic-value at a time. Suppose the input
526
// addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
527
// The symbolic-values will be processed in this order: x, y, z.
528
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
529
530
const FAddend *ThisAddend = Addends[SymIdx];
531
if (!ThisAddend) {
532
// This addend was processed before.
533
continue;
534
}
535
536
Value *Val = ThisAddend->getSymVal();
537
538
// If the resulting expr has constant-addend, this constant-addend is
539
// desirable to reside at the top of the resulting expression tree. Placing
540
// constant close to super-expr(s) will potentially reveal some
541
// optimization opportunities in super-expr(s). Here we do not implement
542
// this logic intentionally and rely on SimplifyAssociativeOrCommutative
543
// call later.
544
545
unsigned StartIdx = SimpVect.size();
546
SimpVect.push_back(ThisAddend);
547
548
// The inner loop collects addends sharing same symbolic-value, and these
549
// addends will be later on folded into a single addend. Following above
550
// example, if the symbolic value "y" is being processed, the inner loop
551
// will collect two addends "<b1,y>" and "<b2,Y>". These two addends will
552
// be later on folded into "<b1+b2, y>".
553
for (unsigned SameSymIdx = SymIdx + 1;
554
SameSymIdx < AddendNum; SameSymIdx++) {
555
const FAddend *T = Addends[SameSymIdx];
556
if (T && T->getSymVal() == Val) {
557
// Set null such that next iteration of the outer loop will not process
558
// this addend again.
559
Addends[SameSymIdx] = nullptr;
560
SimpVect.push_back(T);
561
}
562
}
563
564
// If multiple addends share same symbolic value, fold them together.
565
if (StartIdx + 1 != SimpVect.size()) {
566
FAddend &R = TmpResult[NextTmpIdx ++];
567
R = *SimpVect[StartIdx];
568
for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++)
569
R += *SimpVect[Idx];
570
571
// Pop all addends being folded and push the resulting folded addend.
572
SimpVect.resize(StartIdx);
573
if (!R.isZero()) {
574
SimpVect.push_back(&R);
575
}
576
}
577
}
578
579
assert((NextTmpIdx <= std::size(TmpResult) + 1) && "out-of-bound access");
580
581
Value *Result;
582
if (!SimpVect.empty())
583
Result = createNaryFAdd(SimpVect, InstrQuota);
584
else {
585
// The addition is folded to 0.0.
586
Result = ConstantFP::get(Instr->getType(), 0.0);
587
}
588
589
return Result;
590
}
591
592
Value *FAddCombine::createNaryFAdd
593
(const AddendVect &Opnds, unsigned InstrQuota) {
594
assert(!Opnds.empty() && "Expect at least one addend");
595
596
// Step 1: Check if the # of instructions needed exceeds the quota.
597
598
unsigned InstrNeeded = calcInstrNumber(Opnds);
599
if (InstrNeeded > InstrQuota)
600
return nullptr;
601
602
initCreateInstNum();
603
604
// step 2: Emit the N-ary addition.
605
// Note that at most three instructions are involved in Fadd-InstCombine: the
606
// addition in question, and at most two neighboring instructions.
607
// The resulting optimized addition should have at least one less instruction
608
// than the original addition expression tree. This implies that the resulting
609
// N-ary addition has at most two instructions, and we don't need to worry
610
// about tree-height when constructing the N-ary addition.
611
612
Value *LastVal = nullptr;
613
bool LastValNeedNeg = false;
614
615
// Iterate the addends, creating fadd/fsub using adjacent two addends.
616
for (const FAddend *Opnd : Opnds) {
617
bool NeedNeg;
618
Value *V = createAddendVal(*Opnd, NeedNeg);
619
if (!LastVal) {
620
LastVal = V;
621
LastValNeedNeg = NeedNeg;
622
continue;
623
}
624
625
if (LastValNeedNeg == NeedNeg) {
626
LastVal = createFAdd(LastVal, V);
627
continue;
628
}
629
630
if (LastValNeedNeg)
631
LastVal = createFSub(V, LastVal);
632
else
633
LastVal = createFSub(LastVal, V);
634
635
LastValNeedNeg = false;
636
}
637
638
if (LastValNeedNeg) {
639
LastVal = createFNeg(LastVal);
640
}
641
642
#ifndef NDEBUG
643
assert(CreateInstrNum == InstrNeeded &&
644
"Inconsistent in instruction numbers");
645
#endif
646
647
return LastVal;
648
}
649
650
Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) {
651
Value *V = Builder.CreateFSub(Opnd0, Opnd1);
652
if (Instruction *I = dyn_cast<Instruction>(V))
653
createInstPostProc(I);
654
return V;
655
}
656
657
Value *FAddCombine::createFNeg(Value *V) {
658
Value *NewV = Builder.CreateFNeg(V);
659
if (Instruction *I = dyn_cast<Instruction>(NewV))
660
createInstPostProc(I, true); // fneg's don't receive instruction numbers.
661
return NewV;
662
}
663
664
Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) {
665
Value *V = Builder.CreateFAdd(Opnd0, Opnd1);
666
if (Instruction *I = dyn_cast<Instruction>(V))
667
createInstPostProc(I);
668
return V;
669
}
670
671
Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) {
672
Value *V = Builder.CreateFMul(Opnd0, Opnd1);
673
if (Instruction *I = dyn_cast<Instruction>(V))
674
createInstPostProc(I);
675
return V;
676
}
677
678
void FAddCombine::createInstPostProc(Instruction *NewInstr, bool NoNumber) {
679
NewInstr->setDebugLoc(Instr->getDebugLoc());
680
681
// Keep track of the number of instruction created.
682
if (!NoNumber)
683
incCreateInstNum();
684
685
// Propagate fast-math flags
686
NewInstr->setFastMathFlags(Instr->getFastMathFlags());
687
}
688
689
// Return the number of instruction needed to emit the N-ary addition.
690
// NOTE: Keep this function in sync with createAddendVal().
691
unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
692
unsigned OpndNum = Opnds.size();
693
unsigned InstrNeeded = OpndNum - 1;
694
695
// Adjust the number of instructions needed to emit the N-ary add.
696
for (const FAddend *Opnd : Opnds) {
697
if (Opnd->isConstant())
698
continue;
699
700
// The constant check above is really for a few special constant
701
// coefficients.
702
if (isa<UndefValue>(Opnd->getSymVal()))
703
continue;
704
705
const FAddendCoef &CE = Opnd->getCoef();
706
// Let the addend be "c * x". If "c == +/-1", the value of the addend
707
// is immediately available; otherwise, it needs exactly one instruction
708
// to evaluate the value.
709
if (!CE.isMinusOne() && !CE.isOne())
710
InstrNeeded++;
711
}
712
return InstrNeeded;
713
}
714
715
// Input Addend Value NeedNeg(output)
716
// ================================================================
717
// Constant C C false
718
// <+/-1, V> V coefficient is -1
719
// <2/-2, V> "fadd V, V" coefficient is -2
720
// <C, V> "fmul V, C" false
721
//
722
// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber.
723
Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) {
724
const FAddendCoef &Coeff = Opnd.getCoef();
725
726
if (Opnd.isConstant()) {
727
NeedNeg = false;
728
return Coeff.getValue(Instr->getType());
729
}
730
731
Value *OpndVal = Opnd.getSymVal();
732
733
if (Coeff.isMinusOne() || Coeff.isOne()) {
734
NeedNeg = Coeff.isMinusOne();
735
return OpndVal;
736
}
737
738
if (Coeff.isTwo() || Coeff.isMinusTwo()) {
739
NeedNeg = Coeff.isMinusTwo();
740
return createFAdd(OpndVal, OpndVal);
741
}
742
743
NeedNeg = false;
744
return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
745
}
746
747
// Checks if any operand is negative and we can convert add to sub.
748
// This function checks for following negative patterns
749
// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
750
// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
751
// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
752
static Value *checkForNegativeOperand(BinaryOperator &I,
753
InstCombiner::BuilderTy &Builder) {
754
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
755
756
// This function creates 2 instructions to replace ADD, we need at least one
757
// of LHS or RHS to have one use to ensure benefit in transform.
758
if (!LHS->hasOneUse() && !RHS->hasOneUse())
759
return nullptr;
760
761
Value *X = nullptr, *Y = nullptr, *Z = nullptr;
762
const APInt *C1 = nullptr, *C2 = nullptr;
763
764
// if ONE is on other side, swap
765
if (match(RHS, m_Add(m_Value(X), m_One())))
766
std::swap(LHS, RHS);
767
768
if (match(LHS, m_Add(m_Value(X), m_One()))) {
769
// if XOR on other side, swap
770
if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
771
std::swap(X, RHS);
772
773
if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {
774
// X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
775
// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
776
if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
777
Value *NewAnd = Builder.CreateAnd(Z, *C1);
778
return Builder.CreateSub(RHS, NewAnd, "sub");
779
} else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {
780
// X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
781
// ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
782
Value *NewOr = Builder.CreateOr(Z, ~(*C1));
783
return Builder.CreateSub(RHS, NewOr, "sub");
784
}
785
}
786
}
787
788
// Restore LHS and RHS
789
LHS = I.getOperand(0);
790
RHS = I.getOperand(1);
791
792
// if XOR is on other side, swap
793
if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
794
std::swap(LHS, RHS);
795
796
// C2 is ODD
797
// LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
798
// ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
799
if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
800
if (C1->countr_zero() == 0)
801
if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
802
Value *NewOr = Builder.CreateOr(Z, ~(*C2));
803
return Builder.CreateSub(RHS, NewOr, "sub");
804
}
805
return nullptr;
806
}
807
808
/// Wrapping flags may allow combining constants separated by an extend.
809
static Instruction *foldNoWrapAdd(BinaryOperator &Add,
810
InstCombiner::BuilderTy &Builder) {
811
Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
812
Type *Ty = Add.getType();
813
Constant *Op1C;
814
if (!match(Op1, m_Constant(Op1C)))
815
return nullptr;
816
817
// Try this match first because it results in an add in the narrow type.
818
// (zext (X +nuw C2)) + C1 --> zext (X + (C2 + trunc(C1)))
819
Value *X;
820
const APInt *C1, *C2;
821
if (match(Op1, m_APInt(C1)) &&
822
match(Op0, m_ZExt(m_NUWAddLike(m_Value(X), m_APInt(C2)))) &&
823
C1->isNegative() && C1->sge(-C2->sext(C1->getBitWidth()))) {
824
APInt NewC = *C2 + C1->trunc(C2->getBitWidth());
825
// If the smaller add will fold to zero, we don't need to check one use.
826
if (NewC.isZero())
827
return new ZExtInst(X, Ty);
828
// Otherwise only do this if the existing zero extend will be removed.
829
if (Op0->hasOneUse())
830
return new ZExtInst(
831
Builder.CreateNUWAdd(X, ConstantInt::get(X->getType(), NewC)), Ty);
832
}
833
834
// More general combining of constants in the wide type.
835
// (sext (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
836
// or (zext nneg (X +nsw NarrowC)) + C --> (sext X) + (sext(NarrowC) + C)
837
Constant *NarrowC;
838
if (match(Op0, m_OneUse(m_SExtLike(
839
m_NSWAddLike(m_Value(X), m_Constant(NarrowC)))))) {
840
Value *WideC = Builder.CreateSExt(NarrowC, Ty);
841
Value *NewC = Builder.CreateAdd(WideC, Op1C);
842
Value *WideX = Builder.CreateSExt(X, Ty);
843
return BinaryOperator::CreateAdd(WideX, NewC);
844
}
845
// (zext (X +nuw NarrowC)) + C --> (zext X) + (zext(NarrowC) + C)
846
if (match(Op0,
847
m_OneUse(m_ZExt(m_NUWAddLike(m_Value(X), m_Constant(NarrowC)))))) {
848
Value *WideC = Builder.CreateZExt(NarrowC, Ty);
849
Value *NewC = Builder.CreateAdd(WideC, Op1C);
850
Value *WideX = Builder.CreateZExt(X, Ty);
851
return BinaryOperator::CreateAdd(WideX, NewC);
852
}
853
return nullptr;
854
}
855
856
Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
857
Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1);
858
Type *Ty = Add.getType();
859
Constant *Op1C;
860
if (!match(Op1, m_ImmConstant(Op1C)))
861
return nullptr;
862
863
if (Instruction *NV = foldBinOpIntoSelectOrPhi(Add))
864
return NV;
865
866
Value *X;
867
Constant *Op00C;
868
869
// add (sub C1, X), C2 --> sub (add C1, C2), X
870
if (match(Op0, m_Sub(m_Constant(Op00C), m_Value(X))))
871
return BinaryOperator::CreateSub(ConstantExpr::getAdd(Op00C, Op1C), X);
872
873
Value *Y;
874
875
// add (sub X, Y), -1 --> add (not Y), X
876
if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y)))) &&
877
match(Op1, m_AllOnes()))
878
return BinaryOperator::CreateAdd(Builder.CreateNot(Y), X);
879
880
// zext(bool) + C -> bool ? C + 1 : C
881
if (match(Op0, m_ZExt(m_Value(X))) &&
882
X->getType()->getScalarSizeInBits() == 1)
883
return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);
884
// sext(bool) + C -> bool ? C - 1 : C
885
if (match(Op0, m_SExt(m_Value(X))) &&
886
X->getType()->getScalarSizeInBits() == 1)
887
return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
888
889
// ~X + C --> (C-1) - X
890
if (match(Op0, m_Not(m_Value(X)))) {
891
// ~X + C has NSW and (C-1) won't oveflow => (C-1)-X can have NSW
892
auto *COne = ConstantInt::get(Op1C->getType(), 1);
893
bool WillNotSOV = willNotOverflowSignedSub(Op1C, COne, Add);
894
BinaryOperator *Res =
895
BinaryOperator::CreateSub(ConstantExpr::getSub(Op1C, COne), X);
896
Res->setHasNoSignedWrap(Add.hasNoSignedWrap() && WillNotSOV);
897
return Res;
898
}
899
900
// (iN X s>> (N - 1)) + 1 --> zext (X > -1)
901
const APInt *C;
902
unsigned BitWidth = Ty->getScalarSizeInBits();
903
if (match(Op0, m_OneUse(m_AShr(m_Value(X),
904
m_SpecificIntAllowPoison(BitWidth - 1)))) &&
905
match(Op1, m_One()))
906
return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty);
907
908
if (!match(Op1, m_APInt(C)))
909
return nullptr;
910
911
// (X | Op01C) + Op1C --> X + (Op01C + Op1C) iff the `or` is actually an `add`
912
Constant *Op01C;
913
if (match(Op0, m_DisjointOr(m_Value(X), m_ImmConstant(Op01C)))) {
914
BinaryOperator *NewAdd =
915
BinaryOperator::CreateAdd(X, ConstantExpr::getAdd(Op01C, Op1C));
916
NewAdd->setHasNoSignedWrap(Add.hasNoSignedWrap() &&
917
willNotOverflowSignedAdd(Op01C, Op1C, Add));
918
NewAdd->setHasNoUnsignedWrap(Add.hasNoUnsignedWrap());
919
return NewAdd;
920
}
921
922
// (X | C2) + C --> (X | C2) ^ C2 iff (C2 == -C)
923
const APInt *C2;
924
if (match(Op0, m_Or(m_Value(), m_APInt(C2))) && *C2 == -*C)
925
return BinaryOperator::CreateXor(Op0, ConstantInt::get(Add.getType(), *C2));
926
927
if (C->isSignMask()) {
928
// If wrapping is not allowed, then the addition must set the sign bit:
929
// X + (signmask) --> X | signmask
930
if (Add.hasNoSignedWrap() || Add.hasNoUnsignedWrap())
931
return BinaryOperator::CreateOr(Op0, Op1);
932
933
// If wrapping is allowed, then the addition flips the sign bit of LHS:
934
// X + (signmask) --> X ^ signmask
935
return BinaryOperator::CreateXor(Op0, Op1);
936
}
937
938
// Is this add the last step in a convoluted sext?
939
// add(zext(xor i16 X, -32768), -32768) --> sext X
940
if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) &&
941
C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C)
942
return CastInst::Create(Instruction::SExt, X, Ty);
943
944
if (match(Op0, m_Xor(m_Value(X), m_APInt(C2)))) {
945
// (X ^ signmask) + C --> (X + (signmask ^ C))
946
if (C2->isSignMask())
947
return BinaryOperator::CreateAdd(X, ConstantInt::get(Ty, *C2 ^ *C));
948
949
// If X has no high-bits set above an xor mask:
950
// add (xor X, LowMaskC), C --> sub (LowMaskC + C), X
951
if (C2->isMask()) {
952
KnownBits LHSKnown = computeKnownBits(X, 0, &Add);
953
if ((*C2 | LHSKnown.Zero).isAllOnes())
954
return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X);
955
}
956
957
// Look for a math+logic pattern that corresponds to sext-in-register of a
958
// value with cleared high bits. Convert that into a pair of shifts:
959
// add (xor X, 0x80), 0xF..F80 --> (X << ShAmtC) >>s ShAmtC
960
// add (xor X, 0xF..F80), 0x80 --> (X << ShAmtC) >>s ShAmtC
961
if (Op0->hasOneUse() && *C2 == -(*C)) {
962
unsigned BitWidth = Ty->getScalarSizeInBits();
963
unsigned ShAmt = 0;
964
if (C->isPowerOf2())
965
ShAmt = BitWidth - C->logBase2() - 1;
966
else if (C2->isPowerOf2())
967
ShAmt = BitWidth - C2->logBase2() - 1;
968
if (ShAmt && MaskedValueIsZero(X, APInt::getHighBitsSet(BitWidth, ShAmt),
969
0, &Add)) {
970
Constant *ShAmtC = ConstantInt::get(Ty, ShAmt);
971
Value *NewShl = Builder.CreateShl(X, ShAmtC, "sext");
972
return BinaryOperator::CreateAShr(NewShl, ShAmtC);
973
}
974
}
975
}
976
977
if (C->isOne() && Op0->hasOneUse()) {
978
// add (sext i1 X), 1 --> zext (not X)
979
// TODO: The smallest IR representation is (select X, 0, 1), and that would
980
// not require the one-use check. But we need to remove a transform in
981
// visitSelect and make sure that IR value tracking for select is equal or
982
// better than for these ops.
983
if (match(Op0, m_SExt(m_Value(X))) &&
984
X->getType()->getScalarSizeInBits() == 1)
985
return new ZExtInst(Builder.CreateNot(X), Ty);
986
987
// Shifts and add used to flip and mask off the low bit:
988
// add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1
989
const APInt *C3;
990
if (match(Op0, m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3))) &&
991
C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) {
992
Value *NotX = Builder.CreateNot(X);
993
return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1));
994
}
995
}
996
997
// Fold (add (zext (add X, -1)), 1) -> (zext X) if X is non-zero.
998
// TODO: There's a general form for any constant on the outer add.
999
if (C->isOne()) {
1000
if (match(Op0, m_ZExt(m_Add(m_Value(X), m_AllOnes())))) {
1001
const SimplifyQuery Q = SQ.getWithInstruction(&Add);
1002
if (llvm::isKnownNonZero(X, Q))
1003
return new ZExtInst(X, Ty);
1004
}
1005
}
1006
1007
return nullptr;
1008
}
1009
1010
// match variations of a^2 + 2*a*b + b^2
1011
//
1012
// to reuse the code between the FP and Int versions, the instruction OpCodes
1013
// and constant types have been turned into template parameters.
1014
//
1015
// Mul2Rhs: The constant to perform the multiplicative equivalent of X*2 with;
1016
// should be `m_SpecificFP(2.0)` for FP and `m_SpecificInt(1)` for Int
1017
// (we're matching `X<<1` instead of `X*2` for Int)
1018
template <bool FP, typename Mul2Rhs>
1019
static bool matchesSquareSum(BinaryOperator &I, Mul2Rhs M2Rhs, Value *&A,
1020
Value *&B) {
1021
constexpr unsigned MulOp = FP ? Instruction::FMul : Instruction::Mul;
1022
constexpr unsigned AddOp = FP ? Instruction::FAdd : Instruction::Add;
1023
constexpr unsigned Mul2Op = FP ? Instruction::FMul : Instruction::Shl;
1024
1025
// (a * a) + (((a * 2) + b) * b)
1026
if (match(&I, m_c_BinOp(
1027
AddOp, m_OneUse(m_BinOp(MulOp, m_Value(A), m_Deferred(A))),
1028
m_OneUse(m_c_BinOp(
1029
MulOp,
1030
m_c_BinOp(AddOp, m_BinOp(Mul2Op, m_Deferred(A), M2Rhs),
1031
m_Value(B)),
1032
m_Deferred(B))))))
1033
return true;
1034
1035
// ((a * b) * 2) or ((a * 2) * b)
1036
// +
1037
// (a * a + b * b) or (b * b + a * a)
1038
return match(
1039
&I, m_c_BinOp(
1040
AddOp,
1041
m_CombineOr(
1042
m_OneUse(m_BinOp(
1043
Mul2Op, m_BinOp(MulOp, m_Value(A), m_Value(B)), M2Rhs)),
1044
m_OneUse(m_c_BinOp(MulOp, m_BinOp(Mul2Op, m_Value(A), M2Rhs),
1045
m_Value(B)))),
1046
m_OneUse(
1047
m_c_BinOp(AddOp, m_BinOp(MulOp, m_Deferred(A), m_Deferred(A)),
1048
m_BinOp(MulOp, m_Deferred(B), m_Deferred(B))))));
1049
}
1050
1051
// Fold integer variations of a^2 + 2*a*b + b^2 -> (a + b)^2
1052
Instruction *InstCombinerImpl::foldSquareSumInt(BinaryOperator &I) {
1053
Value *A, *B;
1054
if (matchesSquareSum</*FP*/ false>(I, m_SpecificInt(1), A, B)) {
1055
Value *AB = Builder.CreateAdd(A, B);
1056
return BinaryOperator::CreateMul(AB, AB);
1057
}
1058
return nullptr;
1059
}
1060
1061
// Fold floating point variations of a^2 + 2*a*b + b^2 -> (a + b)^2
1062
// Requires `nsz` and `reassoc`.
1063
Instruction *InstCombinerImpl::foldSquareSumFP(BinaryOperator &I) {
1064
assert(I.hasAllowReassoc() && I.hasNoSignedZeros() && "Assumption mismatch");
1065
Value *A, *B;
1066
if (matchesSquareSum</*FP*/ true>(I, m_SpecificFP(2.0), A, B)) {
1067
Value *AB = Builder.CreateFAddFMF(A, B, &I);
1068
return BinaryOperator::CreateFMulFMF(AB, AB, &I);
1069
}
1070
return nullptr;
1071
}
1072
1073
// Matches multiplication expression Op * C where C is a constant. Returns the
1074
// constant value in C and the other operand in Op. Returns true if such a
1075
// match is found.
1076
static bool MatchMul(Value *E, Value *&Op, APInt &C) {
1077
const APInt *AI;
1078
if (match(E, m_Mul(m_Value(Op), m_APInt(AI)))) {
1079
C = *AI;
1080
return true;
1081
}
1082
if (match(E, m_Shl(m_Value(Op), m_APInt(AI)))) {
1083
C = APInt(AI->getBitWidth(), 1);
1084
C <<= *AI;
1085
return true;
1086
}
1087
return false;
1088
}
1089
1090
// Matches remainder expression Op % C where C is a constant. Returns the
1091
// constant value in C and the other operand in Op. Returns the signedness of
1092
// the remainder operation in IsSigned. Returns true if such a match is
1093
// found.
1094
static bool MatchRem(Value *E, Value *&Op, APInt &C, bool &IsSigned) {
1095
const APInt *AI;
1096
IsSigned = false;
1097
if (match(E, m_SRem(m_Value(Op), m_APInt(AI)))) {
1098
IsSigned = true;
1099
C = *AI;
1100
return true;
1101
}
1102
if (match(E, m_URem(m_Value(Op), m_APInt(AI)))) {
1103
C = *AI;
1104
return true;
1105
}
1106
if (match(E, m_And(m_Value(Op), m_APInt(AI))) && (*AI + 1).isPowerOf2()) {
1107
C = *AI + 1;
1108
return true;
1109
}
1110
return false;
1111
}
1112
1113
// Matches division expression Op / C with the given signedness as indicated
1114
// by IsSigned, where C is a constant. Returns the constant value in C and the
1115
// other operand in Op. Returns true if such a match is found.
1116
static bool MatchDiv(Value *E, Value *&Op, APInt &C, bool IsSigned) {
1117
const APInt *AI;
1118
if (IsSigned && match(E, m_SDiv(m_Value(Op), m_APInt(AI)))) {
1119
C = *AI;
1120
return true;
1121
}
1122
if (!IsSigned) {
1123
if (match(E, m_UDiv(m_Value(Op), m_APInt(AI)))) {
1124
C = *AI;
1125
return true;
1126
}
1127
if (match(E, m_LShr(m_Value(Op), m_APInt(AI)))) {
1128
C = APInt(AI->getBitWidth(), 1);
1129
C <<= *AI;
1130
return true;
1131
}
1132
}
1133
return false;
1134
}
1135
1136
// Returns whether C0 * C1 with the given signedness overflows.
1137
static bool MulWillOverflow(APInt &C0, APInt &C1, bool IsSigned) {
1138
bool overflow;
1139
if (IsSigned)
1140
(void)C0.smul_ov(C1, overflow);
1141
else
1142
(void)C0.umul_ov(C1, overflow);
1143
return overflow;
1144
}
1145
1146
// Simplifies X % C0 + (( X / C0 ) % C1) * C0 to X % (C0 * C1), where (C0 * C1)
1147
// does not overflow.
1148
// Simplifies (X / C0) * C1 + (X % C0) * C2 to
1149
// (X / C0) * (C1 - C2 * C0) + X * C2
1150
Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
1151
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1152
Value *X, *MulOpV;
1153
APInt C0, MulOpC;
1154
bool IsSigned;
1155
// Match I = X % C0 + MulOpV * C0
1156
if (((MatchRem(LHS, X, C0, IsSigned) && MatchMul(RHS, MulOpV, MulOpC)) ||
1157
(MatchRem(RHS, X, C0, IsSigned) && MatchMul(LHS, MulOpV, MulOpC))) &&
1158
C0 == MulOpC) {
1159
Value *RemOpV;
1160
APInt C1;
1161
bool Rem2IsSigned;
1162
// Match MulOpC = RemOpV % C1
1163
if (MatchRem(MulOpV, RemOpV, C1, Rem2IsSigned) &&
1164
IsSigned == Rem2IsSigned) {
1165
Value *DivOpV;
1166
APInt DivOpC;
1167
// Match RemOpV = X / C0
1168
if (MatchDiv(RemOpV, DivOpV, DivOpC, IsSigned) && X == DivOpV &&
1169
C0 == DivOpC && !MulWillOverflow(C0, C1, IsSigned)) {
1170
Value *NewDivisor = ConstantInt::get(X->getType(), C0 * C1);
1171
return IsSigned ? Builder.CreateSRem(X, NewDivisor, "srem")
1172
: Builder.CreateURem(X, NewDivisor, "urem");
1173
}
1174
}
1175
}
1176
1177
// Match I = (X / C0) * C1 + (X % C0) * C2
1178
Value *Div, *Rem;
1179
APInt C1, C2;
1180
if (!LHS->hasOneUse() || !MatchMul(LHS, Div, C1))
1181
Div = LHS, C1 = APInt(I.getType()->getScalarSizeInBits(), 1);
1182
if (!RHS->hasOneUse() || !MatchMul(RHS, Rem, C2))
1183
Rem = RHS, C2 = APInt(I.getType()->getScalarSizeInBits(), 1);
1184
if (match(Div, m_IRem(m_Value(), m_Value()))) {
1185
std::swap(Div, Rem);
1186
std::swap(C1, C2);
1187
}
1188
Value *DivOpV;
1189
APInt DivOpC;
1190
if (MatchRem(Rem, X, C0, IsSigned) &&
1191
MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC) {
1192
APInt NewC = C1 - C2 * C0;
1193
if (!NewC.isZero() && !Rem->hasOneUse())
1194
return nullptr;
1195
if (!isGuaranteedNotToBeUndef(X, &AC, &I, &DT))
1196
return nullptr;
1197
Value *MulXC2 = Builder.CreateMul(X, ConstantInt::get(X->getType(), C2));
1198
if (NewC.isZero())
1199
return MulXC2;
1200
return Builder.CreateAdd(
1201
Builder.CreateMul(Div, ConstantInt::get(X->getType(), NewC)), MulXC2);
1202
}
1203
1204
return nullptr;
1205
}
1206
1207
/// Fold
1208
/// (1 << NBits) - 1
1209
/// Into:
1210
/// ~(-(1 << NBits))
1211
/// Because a 'not' is better for bit-tracking analysis and other transforms
1212
/// than an 'add'. The new shl is always nsw, and is nuw if old `and` was.
1213
static Instruction *canonicalizeLowbitMask(BinaryOperator &I,
1214
InstCombiner::BuilderTy &Builder) {
1215
Value *NBits;
1216
if (!match(&I, m_Add(m_OneUse(m_Shl(m_One(), m_Value(NBits))), m_AllOnes())))
1217
return nullptr;
1218
1219
Constant *MinusOne = Constant::getAllOnesValue(NBits->getType());
1220
Value *NotMask = Builder.CreateShl(MinusOne, NBits, "notmask");
1221
// Be wary of constant folding.
1222
if (auto *BOp = dyn_cast<BinaryOperator>(NotMask)) {
1223
// Always NSW. But NUW propagates from `add`.
1224
BOp->setHasNoSignedWrap();
1225
BOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1226
}
1227
1228
return BinaryOperator::CreateNot(NotMask, I.getName());
1229
}
1230
1231
static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
1232
assert(I.getOpcode() == Instruction::Add && "Expecting add instruction");
1233
Type *Ty = I.getType();
1234
auto getUAddSat = [&]() {
1235
return Intrinsic::getDeclaration(I.getModule(), Intrinsic::uadd_sat, Ty);
1236
};
1237
1238
// add (umin X, ~Y), Y --> uaddsat X, Y
1239
Value *X, *Y;
1240
if (match(&I, m_c_Add(m_c_UMin(m_Value(X), m_Not(m_Value(Y))),
1241
m_Deferred(Y))))
1242
return CallInst::Create(getUAddSat(), { X, Y });
1243
1244
// add (umin X, ~C), C --> uaddsat X, C
1245
const APInt *C, *NotC;
1246
if (match(&I, m_Add(m_UMin(m_Value(X), m_APInt(NotC)), m_APInt(C))) &&
1247
*C == ~*NotC)
1248
return CallInst::Create(getUAddSat(), { X, ConstantInt::get(Ty, *C) });
1249
1250
return nullptr;
1251
}
1252
1253
// Transform:
1254
// (add A, (shl (neg B), Y))
1255
// -> (sub A, (shl B, Y))
1256
static Instruction *combineAddSubWithShlAddSub(InstCombiner::BuilderTy &Builder,
1257
const BinaryOperator &I) {
1258
Value *A, *B, *Cnt;
1259
if (match(&I,
1260
m_c_Add(m_OneUse(m_Shl(m_OneUse(m_Neg(m_Value(B))), m_Value(Cnt))),
1261
m_Value(A)))) {
1262
Value *NewShl = Builder.CreateShl(B, Cnt);
1263
return BinaryOperator::CreateSub(A, NewShl);
1264
}
1265
return nullptr;
1266
}
1267
1268
/// Try to reduce signed division by power-of-2 to an arithmetic shift right.
1269
static Instruction *foldAddToAshr(BinaryOperator &Add) {
1270
// Division must be by power-of-2, but not the minimum signed value.
1271
Value *X;
1272
const APInt *DivC;
1273
if (!match(Add.getOperand(0), m_SDiv(m_Value(X), m_Power2(DivC))) ||
1274
DivC->isNegative())
1275
return nullptr;
1276
1277
// Rounding is done by adding -1 if the dividend (X) is negative and has any
1278
// low bits set. It recognizes two canonical patterns:
1279
// 1. For an 'ugt' cmp with the signed minimum value (SMIN), the
1280
// pattern is: sext (icmp ugt (X & (DivC - 1)), SMIN).
1281
// 2. For an 'eq' cmp, the pattern's: sext (icmp eq X & (SMIN + 1), SMIN + 1).
1282
// Note that, by the time we end up here, if possible, ugt has been
1283
// canonicalized into eq.
1284
const APInt *MaskC, *MaskCCmp;
1285
ICmpInst::Predicate Pred;
1286
if (!match(Add.getOperand(1),
1287
m_SExt(m_ICmp(Pred, m_And(m_Specific(X), m_APInt(MaskC)),
1288
m_APInt(MaskCCmp)))))
1289
return nullptr;
1290
1291
if ((Pred != ICmpInst::ICMP_UGT || !MaskCCmp->isSignMask()) &&
1292
(Pred != ICmpInst::ICMP_EQ || *MaskCCmp != *MaskC))
1293
return nullptr;
1294
1295
APInt SMin = APInt::getSignedMinValue(Add.getType()->getScalarSizeInBits());
1296
bool IsMaskValid = Pred == ICmpInst::ICMP_UGT
1297
? (*MaskC == (SMin | (*DivC - 1)))
1298
: (*DivC == 2 && *MaskC == SMin + 1);
1299
if (!IsMaskValid)
1300
return nullptr;
1301
1302
// (X / DivC) + sext ((X & (SMin | (DivC - 1)) >u SMin) --> X >>s log2(DivC)
1303
return BinaryOperator::CreateAShr(
1304
X, ConstantInt::get(Add.getType(), DivC->exactLogBase2()));
1305
}
1306
1307
Instruction *InstCombinerImpl::
1308
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
1309
BinaryOperator &I) {
1310
assert((I.getOpcode() == Instruction::Add ||
1311
I.getOpcode() == Instruction::Or ||
1312
I.getOpcode() == Instruction::Sub) &&
1313
"Expecting add/or/sub instruction");
1314
1315
// We have a subtraction/addition between a (potentially truncated) *logical*
1316
// right-shift of X and a "select".
1317
Value *X, *Select;
1318
Instruction *LowBitsToSkip, *Extract;
1319
if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd(
1320
m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)),
1321
m_Instruction(Extract))),
1322
m_Value(Select))))
1323
return nullptr;
1324
1325
// `add`/`or` is commutative; but for `sub`, "select" *must* be on RHS.
1326
if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select)
1327
return nullptr;
1328
1329
Type *XTy = X->getType();
1330
bool HadTrunc = I.getType() != XTy;
1331
1332
// If there was a truncation of extracted value, then we'll need to produce
1333
// one extra instruction, so we need to ensure one instruction will go away.
1334
if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
1335
return nullptr;
1336
1337
// Extraction should extract high NBits bits, with shift amount calculated as:
1338
// low bits to skip = shift bitwidth - high bits to extract
1339
// The shift amount itself may be extended, and we need to look past zero-ext
1340
// when matching NBits, that will matter for matching later.
1341
Constant *C;
1342
Value *NBits;
1343
if (!match(
1344
LowBitsToSkip,
1345
m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) ||
1346
!match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
1347
APInt(C->getType()->getScalarSizeInBits(),
1348
X->getType()->getScalarSizeInBits()))))
1349
return nullptr;
1350
1351
// Sign-extending value can be zero-extended if we `sub`tract it,
1352
// or sign-extended otherwise.
1353
auto SkipExtInMagic = [&I](Value *&V) {
1354
if (I.getOpcode() == Instruction::Sub)
1355
match(V, m_ZExtOrSelf(m_Value(V)));
1356
else
1357
match(V, m_SExtOrSelf(m_Value(V)));
1358
};
1359
1360
// Now, finally validate the sign-extending magic.
1361
// `select` itself may be appropriately extended, look past that.
1362
SkipExtInMagic(Select);
1363
1364
ICmpInst::Predicate Pred;
1365
const APInt *Thr;
1366
Value *SignExtendingValue, *Zero;
1367
bool ShouldSignext;
1368
// It must be a select between two values we will later establish to be a
1369
// sign-extending value and a zero constant. The condition guarding the
1370
// sign-extension must be based on a sign bit of the same X we had in `lshr`.
1371
if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)),
1372
m_Value(SignExtendingValue), m_Value(Zero))) ||
1373
!isSignBitCheck(Pred, *Thr, ShouldSignext))
1374
return nullptr;
1375
1376
// icmp-select pair is commutative.
1377
if (!ShouldSignext)
1378
std::swap(SignExtendingValue, Zero);
1379
1380
// If we should not perform sign-extension then we must add/or/subtract zero.
1381
if (!match(Zero, m_Zero()))
1382
return nullptr;
1383
// Otherwise, it should be some constant, left-shifted by the same NBits we
1384
// had in `lshr`. Said left-shift can also be appropriately extended.
1385
// Again, we must look past zero-ext when looking for NBits.
1386
SkipExtInMagic(SignExtendingValue);
1387
Constant *SignExtendingValueBaseConstant;
1388
if (!match(SignExtendingValue,
1389
m_Shl(m_Constant(SignExtendingValueBaseConstant),
1390
m_ZExtOrSelf(m_Specific(NBits)))))
1391
return nullptr;
1392
// If we `sub`, then the constant should be one, else it should be all-ones.
1393
if (I.getOpcode() == Instruction::Sub
1394
? !match(SignExtendingValueBaseConstant, m_One())
1395
: !match(SignExtendingValueBaseConstant, m_AllOnes()))
1396
return nullptr;
1397
1398
auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip,
1399
Extract->getName() + ".sext");
1400
NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness.
1401
if (!HadTrunc)
1402
return NewAShr;
1403
1404
Builder.Insert(NewAShr);
1405
return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType());
1406
}
1407
1408
/// This is a specialization of a more general transform from
1409
/// foldUsingDistributiveLaws. If that code can be made to work optimally
1410
/// for multi-use cases or propagating nsw/nuw, then we would not need this.
1411
static Instruction *factorizeMathWithShlOps(BinaryOperator &I,
1412
InstCombiner::BuilderTy &Builder) {
1413
// TODO: Also handle mul by doubling the shift amount?
1414
assert((I.getOpcode() == Instruction::Add ||
1415
I.getOpcode() == Instruction::Sub) &&
1416
"Expected add/sub");
1417
auto *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
1418
auto *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
1419
if (!Op0 || !Op1 || !(Op0->hasOneUse() || Op1->hasOneUse()))
1420
return nullptr;
1421
1422
Value *X, *Y, *ShAmt;
1423
if (!match(Op0, m_Shl(m_Value(X), m_Value(ShAmt))) ||
1424
!match(Op1, m_Shl(m_Value(Y), m_Specific(ShAmt))))
1425
return nullptr;
1426
1427
// No-wrap propagates only when all ops have no-wrap.
1428
bool HasNSW = I.hasNoSignedWrap() && Op0->hasNoSignedWrap() &&
1429
Op1->hasNoSignedWrap();
1430
bool HasNUW = I.hasNoUnsignedWrap() && Op0->hasNoUnsignedWrap() &&
1431
Op1->hasNoUnsignedWrap();
1432
1433
// add/sub (X << ShAmt), (Y << ShAmt) --> (add/sub X, Y) << ShAmt
1434
Value *NewMath = Builder.CreateBinOp(I.getOpcode(), X, Y);
1435
if (auto *NewI = dyn_cast<BinaryOperator>(NewMath)) {
1436
NewI->setHasNoSignedWrap(HasNSW);
1437
NewI->setHasNoUnsignedWrap(HasNUW);
1438
}
1439
auto *NewShl = BinaryOperator::CreateShl(NewMath, ShAmt);
1440
NewShl->setHasNoSignedWrap(HasNSW);
1441
NewShl->setHasNoUnsignedWrap(HasNUW);
1442
return NewShl;
1443
}
1444
1445
/// Reduce a sequence of masked half-width multiplies to a single multiply.
1446
/// ((XLow * YHigh) + (YLow * XHigh)) << HalfBits) + (XLow * YLow) --> X * Y
1447
static Instruction *foldBoxMultiply(BinaryOperator &I) {
1448
unsigned BitWidth = I.getType()->getScalarSizeInBits();
1449
// Skip the odd bitwidth types.
1450
if ((BitWidth & 0x1))
1451
return nullptr;
1452
1453
unsigned HalfBits = BitWidth >> 1;
1454
APInt HalfMask = APInt::getMaxValue(HalfBits);
1455
1456
// ResLo = (CrossSum << HalfBits) + (YLo * XLo)
1457
Value *XLo, *YLo;
1458
Value *CrossSum;
1459
// Require one-use on the multiply to avoid increasing the number of
1460
// multiplications.
1461
if (!match(&I, m_c_Add(m_Shl(m_Value(CrossSum), m_SpecificInt(HalfBits)),
1462
m_OneUse(m_Mul(m_Value(YLo), m_Value(XLo))))))
1463
return nullptr;
1464
1465
// XLo = X & HalfMask
1466
// YLo = Y & HalfMask
1467
// TODO: Refactor with SimplifyDemandedBits or KnownBits known leading zeros
1468
// to enhance robustness
1469
Value *X, *Y;
1470
if (!match(XLo, m_And(m_Value(X), m_SpecificInt(HalfMask))) ||
1471
!match(YLo, m_And(m_Value(Y), m_SpecificInt(HalfMask))))
1472
return nullptr;
1473
1474
// CrossSum = (X' * (Y >> Halfbits)) + (Y' * (X >> HalfBits))
1475
// X' can be either X or XLo in the pattern (and the same for Y')
1476
if (match(CrossSum,
1477
m_c_Add(m_c_Mul(m_LShr(m_Specific(Y), m_SpecificInt(HalfBits)),
1478
m_CombineOr(m_Specific(X), m_Specific(XLo))),
1479
m_c_Mul(m_LShr(m_Specific(X), m_SpecificInt(HalfBits)),
1480
m_CombineOr(m_Specific(Y), m_Specific(YLo))))))
1481
return BinaryOperator::CreateMul(X, Y);
1482
1483
return nullptr;
1484
}
1485
1486
Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
1487
if (Value *V = simplifyAddInst(I.getOperand(0), I.getOperand(1),
1488
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
1489
SQ.getWithInstruction(&I)))
1490
return replaceInstUsesWith(I, V);
1491
1492
if (SimplifyAssociativeOrCommutative(I))
1493
return &I;
1494
1495
if (Instruction *X = foldVectorBinop(I))
1496
return X;
1497
1498
if (Instruction *Phi = foldBinopWithPhiOperands(I))
1499
return Phi;
1500
1501
// (A*B)+(A*C) -> A*(B+C) etc
1502
if (Value *V = foldUsingDistributiveLaws(I))
1503
return replaceInstUsesWith(I, V);
1504
1505
if (Instruction *R = foldBoxMultiply(I))
1506
return R;
1507
1508
if (Instruction *R = factorizeMathWithShlOps(I, Builder))
1509
return R;
1510
1511
if (Instruction *X = foldAddWithConstant(I))
1512
return X;
1513
1514
if (Instruction *X = foldNoWrapAdd(I, Builder))
1515
return X;
1516
1517
if (Instruction *R = foldBinOpShiftWithShift(I))
1518
return R;
1519
1520
if (Instruction *R = combineAddSubWithShlAddSub(Builder, I))
1521
return R;
1522
1523
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1524
Type *Ty = I.getType();
1525
if (Ty->isIntOrIntVectorTy(1))
1526
return BinaryOperator::CreateXor(LHS, RHS);
1527
1528
// X + X --> X << 1
1529
if (LHS == RHS) {
1530
auto *Shl = BinaryOperator::CreateShl(LHS, ConstantInt::get(Ty, 1));
1531
Shl->setHasNoSignedWrap(I.hasNoSignedWrap());
1532
Shl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
1533
return Shl;
1534
}
1535
1536
Value *A, *B;
1537
if (match(LHS, m_Neg(m_Value(A)))) {
1538
// -A + -B --> -(A + B)
1539
if (match(RHS, m_Neg(m_Value(B))))
1540
return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B));
1541
1542
// -A + B --> B - A
1543
auto *Sub = BinaryOperator::CreateSub(RHS, A);
1544
auto *OB0 = cast<OverflowingBinaryOperator>(LHS);
1545
Sub->setHasNoSignedWrap(I.hasNoSignedWrap() && OB0->hasNoSignedWrap());
1546
1547
return Sub;
1548
}
1549
1550
// A + -B --> A - B
1551
if (match(RHS, m_Neg(m_Value(B))))
1552
return BinaryOperator::CreateSub(LHS, B);
1553
1554
if (Value *V = checkForNegativeOperand(I, Builder))
1555
return replaceInstUsesWith(I, V);
1556
1557
// (A + 1) + ~B --> A - B
1558
// ~B + (A + 1) --> A - B
1559
// (~B + A) + 1 --> A - B
1560
// (A + ~B) + 1 --> A - B
1561
if (match(&I, m_c_BinOp(m_Add(m_Value(A), m_One()), m_Not(m_Value(B)))) ||
1562
match(&I, m_BinOp(m_c_Add(m_Not(m_Value(B)), m_Value(A)), m_One())))
1563
return BinaryOperator::CreateSub(A, B);
1564
1565
// (A + RHS) + RHS --> A + (RHS << 1)
1566
if (match(LHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(RHS)))))
1567
return BinaryOperator::CreateAdd(A, Builder.CreateShl(RHS, 1, "reass.add"));
1568
1569
// LHS + (A + LHS) --> A + (LHS << 1)
1570
if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS)))))
1571
return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add"));
1572
1573
{
1574
// (A + C1) + (C2 - B) --> (A - B) + (C1 + C2)
1575
Constant *C1, *C2;
1576
if (match(&I, m_c_Add(m_Add(m_Value(A), m_ImmConstant(C1)),
1577
m_Sub(m_ImmConstant(C2), m_Value(B)))) &&
1578
(LHS->hasOneUse() || RHS->hasOneUse())) {
1579
Value *Sub = Builder.CreateSub(A, B);
1580
return BinaryOperator::CreateAdd(Sub, ConstantExpr::getAdd(C1, C2));
1581
}
1582
1583
// Canonicalize a constant sub operand as an add operand for better folding:
1584
// (C1 - A) + B --> (B - A) + C1
1585
if (match(&I, m_c_Add(m_OneUse(m_Sub(m_ImmConstant(C1), m_Value(A))),
1586
m_Value(B)))) {
1587
Value *Sub = Builder.CreateSub(B, A, "reass.sub");
1588
return BinaryOperator::CreateAdd(Sub, C1);
1589
}
1590
}
1591
1592
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
1593
if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);
1594
1595
// ((X s/ C1) << C2) + X => X s% -C1 where -C1 is 1 << C2
1596
const APInt *C1, *C2;
1597
if (match(LHS, m_Shl(m_SDiv(m_Specific(RHS), m_APInt(C1)), m_APInt(C2)))) {
1598
APInt one(C2->getBitWidth(), 1);
1599
APInt minusC1 = -(*C1);
1600
if (minusC1 == (one << *C2)) {
1601
Constant *NewRHS = ConstantInt::get(RHS->getType(), minusC1);
1602
return BinaryOperator::CreateSRem(RHS, NewRHS);
1603
}
1604
}
1605
1606
// (A & 2^C1) + A => A & (2^C1 - 1) iff bit C1 in A is a sign bit
1607
if (match(&I, m_c_Add(m_And(m_Value(A), m_APInt(C1)), m_Deferred(A))) &&
1608
C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countl_zero())) {
1609
Constant *NewMask = ConstantInt::get(RHS->getType(), *C1 - 1);
1610
return BinaryOperator::CreateAnd(A, NewMask);
1611
}
1612
1613
// ZExt (B - A) + ZExt(A) --> ZExt(B)
1614
if ((match(RHS, m_ZExt(m_Value(A))) &&
1615
match(LHS, m_ZExt(m_NUWSub(m_Value(B), m_Specific(A))))) ||
1616
(match(LHS, m_ZExt(m_Value(A))) &&
1617
match(RHS, m_ZExt(m_NUWSub(m_Value(B), m_Specific(A))))))
1618
return new ZExtInst(B, LHS->getType());
1619
1620
// zext(A) + sext(A) --> 0 if A is i1
1621
if (match(&I, m_c_BinOp(m_ZExt(m_Value(A)), m_SExt(m_Deferred(A)))) &&
1622
A->getType()->isIntOrIntVectorTy(1))
1623
return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
1624
1625
// A+B --> A|B iff A and B have no bits set in common.
1626
WithCache<const Value *> LHSCache(LHS), RHSCache(RHS);
1627
if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ.getWithInstruction(&I)))
1628
return BinaryOperator::CreateDisjointOr(LHS, RHS);
1629
1630
if (Instruction *Ext = narrowMathIfNoOverflow(I))
1631
return Ext;
1632
1633
// (add (xor A, B) (and A, B)) --> (or A, B)
1634
// (add (and A, B) (xor A, B)) --> (or A, B)
1635
if (match(&I, m_c_BinOp(m_Xor(m_Value(A), m_Value(B)),
1636
m_c_And(m_Deferred(A), m_Deferred(B)))))
1637
return BinaryOperator::CreateOr(A, B);
1638
1639
// (add (or A, B) (and A, B)) --> (add A, B)
1640
// (add (and A, B) (or A, B)) --> (add A, B)
1641
if (match(&I, m_c_BinOp(m_Or(m_Value(A), m_Value(B)),
1642
m_c_And(m_Deferred(A), m_Deferred(B))))) {
1643
// Replacing operands in-place to preserve nuw/nsw flags.
1644
replaceOperand(I, 0, A);
1645
replaceOperand(I, 1, B);
1646
return &I;
1647
}
1648
1649
// (add A (or A, -A)) --> (and (add A, -1) A)
1650
// (add A (or -A, A)) --> (and (add A, -1) A)
1651
// (add (or A, -A) A) --> (and (add A, -1) A)
1652
// (add (or -A, A) A) --> (and (add A, -1) A)
1653
if (match(&I, m_c_BinOp(m_Value(A), m_OneUse(m_c_Or(m_Neg(m_Deferred(A)),
1654
m_Deferred(A)))))) {
1655
Value *Add =
1656
Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()), "",
1657
I.hasNoUnsignedWrap(), I.hasNoSignedWrap());
1658
return BinaryOperator::CreateAnd(Add, A);
1659
}
1660
1661
// Canonicalize ((A & -A) - 1) --> ((A - 1) & ~A)
1662
// Forms all commutable operations, and simplifies ctpop -> cttz folds.
1663
if (match(&I,
1664
m_Add(m_OneUse(m_c_And(m_Value(A), m_OneUse(m_Neg(m_Deferred(A))))),
1665
m_AllOnes()))) {
1666
Constant *AllOnes = ConstantInt::getAllOnesValue(RHS->getType());
1667
Value *Dec = Builder.CreateAdd(A, AllOnes);
1668
Value *Not = Builder.CreateXor(A, AllOnes);
1669
return BinaryOperator::CreateAnd(Dec, Not);
1670
}
1671
1672
// Disguised reassociation/factorization:
1673
// ~(A * C1) + A
1674
// ((A * -C1) - 1) + A
1675
// ((A * -C1) + A) - 1
1676
// (A * (1 - C1)) - 1
1677
if (match(&I,
1678
m_c_Add(m_OneUse(m_Not(m_OneUse(m_Mul(m_Value(A), m_APInt(C1))))),
1679
m_Deferred(A)))) {
1680
Type *Ty = I.getType();
1681
Constant *NewMulC = ConstantInt::get(Ty, 1 - *C1);
1682
Value *NewMul = Builder.CreateMul(A, NewMulC);
1683
return BinaryOperator::CreateAdd(NewMul, ConstantInt::getAllOnesValue(Ty));
1684
}
1685
1686
// (A * -2**C) + B --> B - (A << C)
1687
const APInt *NegPow2C;
1688
if (match(&I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_NegatedPower2(NegPow2C))),
1689
m_Value(B)))) {
1690
Constant *ShiftAmtC = ConstantInt::get(Ty, NegPow2C->countr_zero());
1691
Value *Shl = Builder.CreateShl(A, ShiftAmtC);
1692
return BinaryOperator::CreateSub(B, Shl);
1693
}
1694
1695
// Canonicalize signum variant that ends in add:
1696
// (A s>> (BW - 1)) + (zext (A s> 0)) --> (A s>> (BW - 1)) | (zext (A != 0))
1697
ICmpInst::Predicate Pred;
1698
uint64_t BitWidth = Ty->getScalarSizeInBits();
1699
if (match(LHS, m_AShr(m_Value(A), m_SpecificIntAllowPoison(BitWidth - 1))) &&
1700
match(RHS, m_OneUse(m_ZExt(
1701
m_OneUse(m_ICmp(Pred, m_Specific(A), m_ZeroInt()))))) &&
1702
Pred == CmpInst::ICMP_SGT) {
1703
Value *NotZero = Builder.CreateIsNotNull(A, "isnotnull");
1704
Value *Zext = Builder.CreateZExt(NotZero, Ty, "isnotnull.zext");
1705
return BinaryOperator::CreateOr(LHS, Zext);
1706
}
1707
1708
{
1709
Value *Cond, *Ext;
1710
Constant *C;
1711
// (add X, (sext/zext (icmp eq X, C)))
1712
// -> (select (icmp eq X, C), (add C, (sext/zext 1)), X)
1713
auto CondMatcher = m_CombineAnd(
1714
m_Value(Cond), m_ICmp(Pred, m_Deferred(A), m_ImmConstant(C)));
1715
1716
if (match(&I,
1717
m_c_Add(m_Value(A),
1718
m_CombineAnd(m_Value(Ext), m_ZExtOrSExt(CondMatcher)))) &&
1719
Pred == ICmpInst::ICMP_EQ && Ext->hasOneUse()) {
1720
Value *Add = isa<ZExtInst>(Ext) ? InstCombiner::AddOne(C)
1721
: InstCombiner::SubOne(C);
1722
return replaceInstUsesWith(I, Builder.CreateSelect(Cond, Add, A));
1723
}
1724
}
1725
1726
if (Instruction *Ashr = foldAddToAshr(I))
1727
return Ashr;
1728
1729
// (~X) + (~Y) --> -2 - (X + Y)
1730
{
1731
// To ensure we can save instructions we need to ensure that we consume both
1732
// LHS/RHS (i.e they have a `not`).
1733
bool ConsumesLHS, ConsumesRHS;
1734
if (isFreeToInvert(LHS, LHS->hasOneUse(), ConsumesLHS) && ConsumesLHS &&
1735
isFreeToInvert(RHS, RHS->hasOneUse(), ConsumesRHS) && ConsumesRHS) {
1736
Value *NotLHS = getFreelyInverted(LHS, LHS->hasOneUse(), &Builder);
1737
Value *NotRHS = getFreelyInverted(RHS, RHS->hasOneUse(), &Builder);
1738
assert(NotLHS != nullptr && NotRHS != nullptr &&
1739
"isFreeToInvert desynced with getFreelyInverted");
1740
Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
1741
return BinaryOperator::CreateSub(
1742
ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);
1743
}
1744
}
1745
1746
if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
1747
return R;
1748
1749
// TODO(jingyue): Consider willNotOverflowSignedAdd and
1750
// willNotOverflowUnsignedAdd to reduce the number of invocations of
1751
// computeKnownBits.
1752
bool Changed = false;
1753
if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHSCache, RHSCache, I)) {
1754
Changed = true;
1755
I.setHasNoSignedWrap(true);
1756
}
1757
if (!I.hasNoUnsignedWrap() &&
1758
willNotOverflowUnsignedAdd(LHSCache, RHSCache, I)) {
1759
Changed = true;
1760
I.setHasNoUnsignedWrap(true);
1761
}
1762
1763
if (Instruction *V = canonicalizeLowbitMask(I, Builder))
1764
return V;
1765
1766
if (Instruction *V =
1767
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
1768
return V;
1769
1770
if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
1771
return SatAdd;
1772
1773
// usub.sat(A, B) + B => umax(A, B)
1774
if (match(&I, m_c_BinOp(
1775
m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Value(A), m_Value(B))),
1776
m_Deferred(B)))) {
1777
return replaceInstUsesWith(I,
1778
Builder.CreateIntrinsic(Intrinsic::umax, {I.getType()}, {A, B}));
1779
}
1780
1781
// ctpop(A) + ctpop(B) => ctpop(A | B) if A and B have no bits set in common.
1782
if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(A)))) &&
1783
match(RHS, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(B)))) &&
1784
haveNoCommonBitsSet(A, B, SQ.getWithInstruction(&I)))
1785
return replaceInstUsesWith(
1786
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
1787
{Builder.CreateOr(A, B)}));
1788
1789
// Fold the log2_ceil idiom:
1790
// zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
1791
// -->
1792
// BW - ctlz(A - 1, false)
1793
const APInt *XorC;
1794
if (match(&I,
1795
m_c_Add(
1796
m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
1797
m_One())),
1798
m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
1799
m_OneUse(m_TruncOrSelf(m_OneUse(
1800
m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
1801
m_APInt(XorC))))))) &&
1802
(Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
1803
*XorC == A->getType()->getScalarSizeInBits() - 1) {
1804
Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
1805
Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
1806
{Sub, Builder.getFalse()});
1807
Value *Ret = Builder.CreateSub(
1808
ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
1809
Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
1810
return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
1811
}
1812
1813
if (Instruction *Res = foldSquareSumInt(I))
1814
return Res;
1815
1816
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
1817
return Res;
1818
1819
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
1820
return Res;
1821
1822
return Changed ? &I : nullptr;
1823
}
1824
1825
/// Eliminate an op from a linear interpolation (lerp) pattern.
1826
static Instruction *factorizeLerp(BinaryOperator &I,
1827
InstCombiner::BuilderTy &Builder) {
1828
Value *X, *Y, *Z;
1829
if (!match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_Value(Y),
1830
m_OneUse(m_FSub(m_FPOne(),
1831
m_Value(Z))))),
1832
m_OneUse(m_c_FMul(m_Value(X), m_Deferred(Z))))))
1833
return nullptr;
1834
1835
// (Y * (1.0 - Z)) + (X * Z) --> Y + Z * (X - Y) [8 commuted variants]
1836
Value *XY = Builder.CreateFSubFMF(X, Y, &I);
1837
Value *MulZ = Builder.CreateFMulFMF(Z, XY, &I);
1838
return BinaryOperator::CreateFAddFMF(Y, MulZ, &I);
1839
}
1840
1841
/// Factor a common operand out of fadd/fsub of fmul/fdiv.
1842
static Instruction *factorizeFAddFSub(BinaryOperator &I,
1843
InstCombiner::BuilderTy &Builder) {
1844
assert((I.getOpcode() == Instruction::FAdd ||
1845
I.getOpcode() == Instruction::FSub) && "Expecting fadd/fsub");
1846
assert(I.hasAllowReassoc() && I.hasNoSignedZeros() &&
1847
"FP factorization requires FMF");
1848
1849
if (Instruction *Lerp = factorizeLerp(I, Builder))
1850
return Lerp;
1851
1852
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
1853
if (!Op0->hasOneUse() || !Op1->hasOneUse())
1854
return nullptr;
1855
1856
Value *X, *Y, *Z;
1857
bool IsFMul;
1858
if ((match(Op0, m_FMul(m_Value(X), m_Value(Z))) &&
1859
match(Op1, m_c_FMul(m_Value(Y), m_Specific(Z)))) ||
1860
(match(Op0, m_FMul(m_Value(Z), m_Value(X))) &&
1861
match(Op1, m_c_FMul(m_Value(Y), m_Specific(Z)))))
1862
IsFMul = true;
1863
else if (match(Op0, m_FDiv(m_Value(X), m_Value(Z))) &&
1864
match(Op1, m_FDiv(m_Value(Y), m_Specific(Z))))
1865
IsFMul = false;
1866
else
1867
return nullptr;
1868
1869
// (X * Z) + (Y * Z) --> (X + Y) * Z
1870
// (X * Z) - (Y * Z) --> (X - Y) * Z
1871
// (X / Z) + (Y / Z) --> (X + Y) / Z
1872
// (X / Z) - (Y / Z) --> (X - Y) / Z
1873
bool IsFAdd = I.getOpcode() == Instruction::FAdd;
1874
Value *XY = IsFAdd ? Builder.CreateFAddFMF(X, Y, &I)
1875
: Builder.CreateFSubFMF(X, Y, &I);
1876
1877
// Bail out if we just created a denormal constant.
1878
// TODO: This is copied from a previous implementation. Is it necessary?
1879
const APFloat *C;
1880
if (match(XY, m_APFloat(C)) && !C->isNormal())
1881
return nullptr;
1882
1883
return IsFMul ? BinaryOperator::CreateFMulFMF(XY, Z, &I)
1884
: BinaryOperator::CreateFDivFMF(XY, Z, &I);
1885
}
1886
1887
Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
1888
if (Value *V = simplifyFAddInst(I.getOperand(0), I.getOperand(1),
1889
I.getFastMathFlags(),
1890
SQ.getWithInstruction(&I)))
1891
return replaceInstUsesWith(I, V);
1892
1893
if (SimplifyAssociativeOrCommutative(I))
1894
return &I;
1895
1896
if (Instruction *X = foldVectorBinop(I))
1897
return X;
1898
1899
if (Instruction *Phi = foldBinopWithPhiOperands(I))
1900
return Phi;
1901
1902
if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
1903
return FoldedFAdd;
1904
1905
// (-X) + Y --> Y - X
1906
Value *X, *Y;
1907
if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y))))
1908
return BinaryOperator::CreateFSubFMF(Y, X, &I);
1909
1910
// Similar to above, but look through fmul/fdiv for the negated term.
1911
// (-X * Y) + Z --> Z - (X * Y) [4 commuted variants]
1912
Value *Z;
1913
if (match(&I, m_c_FAdd(m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))),
1914
m_Value(Z)))) {
1915
Value *XY = Builder.CreateFMulFMF(X, Y, &I);
1916
return BinaryOperator::CreateFSubFMF(Z, XY, &I);
1917
}
1918
// (-X / Y) + Z --> Z - (X / Y) [2 commuted variants]
1919
// (X / -Y) + Z --> Z - (X / Y) [2 commuted variants]
1920
if (match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y))),
1921
m_Value(Z))) ||
1922
match(&I, m_c_FAdd(m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))),
1923
m_Value(Z)))) {
1924
Value *XY = Builder.CreateFDivFMF(X, Y, &I);
1925
return BinaryOperator::CreateFSubFMF(Z, XY, &I);
1926
}
1927
1928
// Check for (fadd double (sitofp x), y), see if we can merge this into an
1929
// integer add followed by a promotion.
1930
if (Instruction *R = foldFBinOpOfIntCasts(I))
1931
return R;
1932
1933
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1934
// Handle specials cases for FAdd with selects feeding the operation
1935
if (Value *V = SimplifySelectsFeedingBinaryOp(I, LHS, RHS))
1936
return replaceInstUsesWith(I, V);
1937
1938
if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
1939
if (Instruction *F = factorizeFAddFSub(I, Builder))
1940
return F;
1941
1942
if (Instruction *F = foldSquareSumFP(I))
1943
return F;
1944
1945
// Try to fold fadd into start value of reduction intrinsic.
1946
if (match(&I, m_c_FAdd(m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(
1947
m_AnyZeroFP(), m_Value(X))),
1948
m_Value(Y)))) {
1949
// fadd (rdx 0.0, X), Y --> rdx Y, X
1950
return replaceInstUsesWith(
1951
I, Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,
1952
{X->getType()}, {Y, X}, &I));
1953
}
1954
const APFloat *StartC, *C;
1955
if (match(LHS, m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(
1956
m_APFloat(StartC), m_Value(X)))) &&
1957
match(RHS, m_APFloat(C))) {
1958
// fadd (rdx StartC, X), C --> rdx (C + StartC), X
1959
Constant *NewStartC = ConstantFP::get(I.getType(), *C + *StartC);
1960
return replaceInstUsesWith(
1961
I, Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,
1962
{X->getType()}, {NewStartC, X}, &I));
1963
}
1964
1965
// (X * MulC) + X --> X * (MulC + 1.0)
1966
Constant *MulC;
1967
if (match(&I, m_c_FAdd(m_FMul(m_Value(X), m_ImmConstant(MulC)),
1968
m_Deferred(X)))) {
1969
if (Constant *NewMulC = ConstantFoldBinaryOpOperands(
1970
Instruction::FAdd, MulC, ConstantFP::get(I.getType(), 1.0), DL))
1971
return BinaryOperator::CreateFMulFMF(X, NewMulC, &I);
1972
}
1973
1974
// (-X - Y) + (X + Z) --> Z - Y
1975
if (match(&I, m_c_FAdd(m_FSub(m_FNeg(m_Value(X)), m_Value(Y)),
1976
m_c_FAdd(m_Deferred(X), m_Value(Z)))))
1977
return BinaryOperator::CreateFSubFMF(Z, Y, &I);
1978
1979
if (Value *V = FAddCombine(Builder).simplify(&I))
1980
return replaceInstUsesWith(I, V);
1981
}
1982
1983
// minumum(X, Y) + maximum(X, Y) => X + Y.
1984
if (match(&I,
1985
m_c_FAdd(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
1986
m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
1987
m_Deferred(Y))))) {
1988
BinaryOperator *Result = BinaryOperator::CreateFAddFMF(X, Y, &I);
1989
// We cannot preserve ninf if nnan flag is not set.
1990
// If X is NaN and Y is Inf then in original program we had NaN + NaN,
1991
// while in optimized version NaN + Inf and this is a poison with ninf flag.
1992
if (!Result->hasNoNaNs())
1993
Result->setHasNoInfs(false);
1994
return Result;
1995
}
1996
1997
return nullptr;
1998
}
1999
2000
/// Optimize pointer differences into the same array into a size. Consider:
2001
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
2002
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
2003
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
2004
Type *Ty, bool IsNUW) {
2005
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
2006
// this.
2007
bool Swapped = false;
2008
GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
2009
if (!isa<GEPOperator>(LHS) && isa<GEPOperator>(RHS)) {
2010
std::swap(LHS, RHS);
2011
Swapped = true;
2012
}
2013
2014
// Require at least one GEP with a common base pointer on both sides.
2015
if (auto *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
2016
// (gep X, ...) - X
2017
if (LHSGEP->getOperand(0)->stripPointerCasts() ==
2018
RHS->stripPointerCasts()) {
2019
GEP1 = LHSGEP;
2020
} else if (auto *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
2021
// (gep X, ...) - (gep X, ...)
2022
if (LHSGEP->getOperand(0)->stripPointerCasts() ==
2023
RHSGEP->getOperand(0)->stripPointerCasts()) {
2024
GEP1 = LHSGEP;
2025
GEP2 = RHSGEP;
2026
}
2027
}
2028
}
2029
2030
if (!GEP1)
2031
return nullptr;
2032
2033
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the
2034
// computed offset. This may erase the original GEP, so be sure to cache the
2035
// inbounds flag before emitting the offset.
2036
// TODO: We should probably do this even if there is only one GEP.
2037
bool RewriteGEPs = GEP2 != nullptr;
2038
2039
// Emit the offset of the GEP and an intptr_t.
2040
bool GEP1IsInBounds = GEP1->isInBounds();
2041
Value *Result = EmitGEPOffset(GEP1, RewriteGEPs);
2042
2043
// If this is a single inbounds GEP and the original sub was nuw,
2044
// then the final multiplication is also nuw.
2045
if (auto *I = dyn_cast<Instruction>(Result))
2046
if (IsNUW && !GEP2 && !Swapped && GEP1IsInBounds &&
2047
I->getOpcode() == Instruction::Mul)
2048
I->setHasNoUnsignedWrap();
2049
2050
// If we have a 2nd GEP of the same base pointer, subtract the offsets.
2051
// If both GEPs are inbounds, then the subtract does not have signed overflow.
2052
if (GEP2) {
2053
bool GEP2IsInBounds = GEP2->isInBounds();
2054
Value *Offset = EmitGEPOffset(GEP2, RewriteGEPs);
2055
Result = Builder.CreateSub(Result, Offset, "gepdiff", /* NUW */ false,
2056
GEP1IsInBounds && GEP2IsInBounds);
2057
}
2058
2059
// If we have p - gep(p, ...) then we have to negate the result.
2060
if (Swapped)
2061
Result = Builder.CreateNeg(Result, "diff.neg");
2062
2063
return Builder.CreateIntCast(Result, Ty, true);
2064
}
2065
2066
static Instruction *foldSubOfMinMax(BinaryOperator &I,
2067
InstCombiner::BuilderTy &Builder) {
2068
Value *Op0 = I.getOperand(0);
2069
Value *Op1 = I.getOperand(1);
2070
Type *Ty = I.getType();
2071
auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op1);
2072
if (!MinMax)
2073
return nullptr;
2074
2075
// sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
2076
// sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
2077
Value *X = MinMax->getLHS();
2078
Value *Y = MinMax->getRHS();
2079
if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
2080
(Op0->hasOneUse() || Op1->hasOneUse())) {
2081
Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
2082
Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
2083
return CallInst::Create(F, {X, Y});
2084
}
2085
2086
// sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
2087
// sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
2088
Value *Z;
2089
if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
2090
if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) {
2091
Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Y, Z});
2092
return BinaryOperator::CreateAdd(X, USub);
2093
}
2094
if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) {
2095
Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Z, Y});
2096
return BinaryOperator::CreateAdd(X, USub);
2097
}
2098
}
2099
2100
// sub Op0, smin((sub nsw Op0, Z), 0) --> smax Op0, Z
2101
// sub Op0, smax((sub nsw Op0, Z), 0) --> smin Op0, Z
2102
if (MinMax->isSigned() && match(Y, m_ZeroInt()) &&
2103
match(X, m_NSWSub(m_Specific(Op0), m_Value(Z)))) {
2104
Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
2105
Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
2106
return CallInst::Create(F, {Op0, Z});
2107
}
2108
2109
return nullptr;
2110
}
2111
2112
Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
2113
if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1),
2114
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
2115
SQ.getWithInstruction(&I)))
2116
return replaceInstUsesWith(I, V);
2117
2118
if (Instruction *X = foldVectorBinop(I))
2119
return X;
2120
2121
if (Instruction *Phi = foldBinopWithPhiOperands(I))
2122
return Phi;
2123
2124
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2125
2126
// If this is a 'B = x-(-A)', change to B = x+A.
2127
// We deal with this without involving Negator to preserve NSW flag.
2128
if (Value *V = dyn_castNegVal(Op1)) {
2129
BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
2130
2131
if (const auto *BO = dyn_cast<BinaryOperator>(Op1)) {
2132
assert(BO->getOpcode() == Instruction::Sub &&
2133
"Expected a subtraction operator!");
2134
if (BO->hasNoSignedWrap() && I.hasNoSignedWrap())
2135
Res->setHasNoSignedWrap(true);
2136
} else {
2137
if (cast<Constant>(Op1)->isNotMinSignedValue() && I.hasNoSignedWrap())
2138
Res->setHasNoSignedWrap(true);
2139
}
2140
2141
return Res;
2142
}
2143
2144
// Try this before Negator to preserve NSW flag.
2145
if (Instruction *R = factorizeMathWithShlOps(I, Builder))
2146
return R;
2147
2148
Constant *C;
2149
if (match(Op0, m_ImmConstant(C))) {
2150
Value *X;
2151
Constant *C2;
2152
2153
// C-(X+C2) --> (C-C2)-X
2154
if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2)))) {
2155
// C-C2 never overflow, and C-(X+C2), (X+C2) has NSW/NUW
2156
// => (C-C2)-X can have NSW/NUW
2157
bool WillNotSOV = willNotOverflowSignedSub(C, C2, I);
2158
BinaryOperator *Res =
2159
BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
2160
auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2161
Res->setHasNoSignedWrap(I.hasNoSignedWrap() && OBO1->hasNoSignedWrap() &&
2162
WillNotSOV);
2163
Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap() &&
2164
OBO1->hasNoUnsignedWrap());
2165
return Res;
2166
}
2167
}
2168
2169
auto TryToNarrowDeduceFlags = [this, &I, &Op0, &Op1]() -> Instruction * {
2170
if (Instruction *Ext = narrowMathIfNoOverflow(I))
2171
return Ext;
2172
2173
bool Changed = false;
2174
if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
2175
Changed = true;
2176
I.setHasNoSignedWrap(true);
2177
}
2178
if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) {
2179
Changed = true;
2180
I.setHasNoUnsignedWrap(true);
2181
}
2182
2183
return Changed ? &I : nullptr;
2184
};
2185
2186
// First, let's try to interpret `sub a, b` as `add a, (sub 0, b)`,
2187
// and let's try to sink `(sub 0, b)` into `b` itself. But only if this isn't
2188
// a pure negation used by a select that looks like abs/nabs.
2189
bool IsNegation = match(Op0, m_ZeroInt());
2190
if (!IsNegation || none_of(I.users(), [&I, Op1](const User *U) {
2191
const Instruction *UI = dyn_cast<Instruction>(U);
2192
if (!UI)
2193
return false;
2194
return match(UI,
2195
m_Select(m_Value(), m_Specific(Op1), m_Specific(&I))) ||
2196
match(UI, m_Select(m_Value(), m_Specific(&I), m_Specific(Op1)));
2197
})) {
2198
if (Value *NegOp1 = Negator::Negate(IsNegation, /* IsNSW */ IsNegation &&
2199
I.hasNoSignedWrap(),
2200
Op1, *this))
2201
return BinaryOperator::CreateAdd(NegOp1, Op0);
2202
}
2203
if (IsNegation)
2204
return TryToNarrowDeduceFlags(); // Should have been handled in Negator!
2205
2206
// (A*B)-(A*C) -> A*(B-C) etc
2207
if (Value *V = foldUsingDistributiveLaws(I))
2208
return replaceInstUsesWith(I, V);
2209
2210
if (I.getType()->isIntOrIntVectorTy(1))
2211
return BinaryOperator::CreateXor(Op0, Op1);
2212
2213
// Replace (-1 - A) with (~A).
2214
if (match(Op0, m_AllOnes()))
2215
return BinaryOperator::CreateNot(Op1);
2216
2217
// (X + -1) - Y --> ~Y + X
2218
Value *X, *Y;
2219
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
2220
return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
2221
2222
// Reassociate sub/add sequences to create more add instructions and
2223
// reduce dependency chains:
2224
// ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1)
2225
Value *Z;
2226
if (match(Op0, m_OneUse(m_c_Add(m_OneUse(m_Sub(m_Value(X), m_Value(Y))),
2227
m_Value(Z))))) {
2228
Value *XZ = Builder.CreateAdd(X, Z);
2229
Value *YW = Builder.CreateAdd(Y, Op1);
2230
return BinaryOperator::CreateSub(XZ, YW);
2231
}
2232
2233
// ((X - Y) - Op1) --> X - (Y + Op1)
2234
if (match(Op0, m_OneUse(m_Sub(m_Value(X), m_Value(Y))))) {
2235
OverflowingBinaryOperator *LHSSub = cast<OverflowingBinaryOperator>(Op0);
2236
bool HasNUW = I.hasNoUnsignedWrap() && LHSSub->hasNoUnsignedWrap();
2237
bool HasNSW = HasNUW && I.hasNoSignedWrap() && LHSSub->hasNoSignedWrap();
2238
Value *Add = Builder.CreateAdd(Y, Op1, "", /* HasNUW */ HasNUW,
2239
/* HasNSW */ HasNSW);
2240
BinaryOperator *Sub = BinaryOperator::CreateSub(X, Add);
2241
Sub->setHasNoUnsignedWrap(HasNUW);
2242
Sub->setHasNoSignedWrap(HasNSW);
2243
return Sub;
2244
}
2245
2246
{
2247
// (X + Z) - (Y + Z) --> (X - Y)
2248
// This is done in other passes, but we want to be able to consume this
2249
// pattern in InstCombine so we can generate it without creating infinite
2250
// loops.
2251
if (match(Op0, m_Add(m_Value(X), m_Value(Z))) &&
2252
match(Op1, m_c_Add(m_Value(Y), m_Specific(Z))))
2253
return BinaryOperator::CreateSub(X, Y);
2254
2255
// (X + C0) - (Y + C1) --> (X - Y) + (C0 - C1)
2256
Constant *CX, *CY;
2257
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_ImmConstant(CX)))) &&
2258
match(Op1, m_OneUse(m_Add(m_Value(Y), m_ImmConstant(CY))))) {
2259
Value *OpsSub = Builder.CreateSub(X, Y);
2260
Constant *ConstsSub = ConstantExpr::getSub(CX, CY);
2261
return BinaryOperator::CreateAdd(OpsSub, ConstsSub);
2262
}
2263
}
2264
2265
// (~X) - (~Y) --> Y - X
2266
{
2267
// Need to ensure we can consume at least one of the `not` instructions,
2268
// otherwise this can inf loop.
2269
bool ConsumesOp0, ConsumesOp1;
2270
if (isFreeToInvert(Op0, Op0->hasOneUse(), ConsumesOp0) &&
2271
isFreeToInvert(Op1, Op1->hasOneUse(), ConsumesOp1) &&
2272
(ConsumesOp0 || ConsumesOp1)) {
2273
Value *NotOp0 = getFreelyInverted(Op0, Op0->hasOneUse(), &Builder);
2274
Value *NotOp1 = getFreelyInverted(Op1, Op1->hasOneUse(), &Builder);
2275
assert(NotOp0 != nullptr && NotOp1 != nullptr &&
2276
"isFreeToInvert desynced with getFreelyInverted");
2277
return BinaryOperator::CreateSub(NotOp1, NotOp0);
2278
}
2279
}
2280
2281
auto m_AddRdx = [](Value *&Vec) {
2282
return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
2283
};
2284
Value *V0, *V1;
2285
if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
2286
V0->getType() == V1->getType()) {
2287
// Difference of sums is sum of differences:
2288
// add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
2289
Value *Sub = Builder.CreateSub(V0, V1);
2290
Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add,
2291
{Sub->getType()}, {Sub});
2292
return replaceInstUsesWith(I, Rdx);
2293
}
2294
2295
if (Constant *C = dyn_cast<Constant>(Op0)) {
2296
Value *X;
2297
if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
2298
// C - (zext bool) --> bool ? C - 1 : C
2299
return SelectInst::Create(X, InstCombiner::SubOne(C), C);
2300
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
2301
// C - (sext bool) --> bool ? C + 1 : C
2302
return SelectInst::Create(X, InstCombiner::AddOne(C), C);
2303
2304
// C - ~X == X + (1+C)
2305
if (match(Op1, m_Not(m_Value(X))))
2306
return BinaryOperator::CreateAdd(X, InstCombiner::AddOne(C));
2307
2308
// Try to fold constant sub into select arguments.
2309
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
2310
if (Instruction *R = FoldOpIntoSelect(I, SI))
2311
return R;
2312
2313
// Try to fold constant sub into PHI values.
2314
if (PHINode *PN = dyn_cast<PHINode>(Op1))
2315
if (Instruction *R = foldOpIntoPhi(I, PN))
2316
return R;
2317
2318
Constant *C2;
2319
2320
// C-(C2-X) --> X+(C-C2)
2321
if (match(Op1, m_Sub(m_ImmConstant(C2), m_Value(X))))
2322
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
2323
}
2324
2325
const APInt *Op0C;
2326
if (match(Op0, m_APInt(Op0C))) {
2327
if (Op0C->isMask()) {
2328
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
2329
// zero. We don't use information from dominating conditions so this
2330
// transform is easier to reverse if necessary.
2331
KnownBits RHSKnown = llvm::computeKnownBits(
2332
Op1, 0, SQ.getWithInstruction(&I).getWithoutDomCondCache());
2333
if ((*Op0C | RHSKnown.Zero).isAllOnes())
2334
return BinaryOperator::CreateXor(Op1, Op0);
2335
}
2336
2337
// C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when:
2338
// (C3 - ((C2 & C3) - 1)) is pow2
2339
// ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1)
2340
// C2 is negative pow2 || sub nuw
2341
const APInt *C2, *C3;
2342
BinaryOperator *InnerSub;
2343
if (match(Op1, m_OneUse(m_And(m_BinOp(InnerSub), m_APInt(C2)))) &&
2344
match(InnerSub, m_Sub(m_APInt(C3), m_Value(X))) &&
2345
(InnerSub->hasNoUnsignedWrap() || C2->isNegatedPowerOf2())) {
2346
APInt C2AndC3 = *C2 & *C3;
2347
APInt C2AndC3Minus1 = C2AndC3 - 1;
2348
APInt C2AddC3 = *C2 + *C3;
2349
if ((*C3 - C2AndC3Minus1).isPowerOf2() &&
2350
C2AndC3Minus1.isSubsetOf(C2AddC3)) {
2351
Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), *C2));
2352
return BinaryOperator::CreateAdd(
2353
And, ConstantInt::get(I.getType(), *Op0C - C2AndC3));
2354
}
2355
}
2356
}
2357
2358
{
2359
Value *Y;
2360
// X-(X+Y) == -Y X-(Y+X) == -Y
2361
if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y))))
2362
return BinaryOperator::CreateNeg(Y);
2363
2364
// (X-Y)-X == -Y
2365
if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
2366
return BinaryOperator::CreateNeg(Y);
2367
}
2368
2369
// (sub (or A, B) (and A, B)) --> (xor A, B)
2370
{
2371
Value *A, *B;
2372
if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
2373
match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
2374
return BinaryOperator::CreateXor(A, B);
2375
}
2376
2377
// (sub (add A, B) (or A, B)) --> (and A, B)
2378
{
2379
Value *A, *B;
2380
if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&
2381
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
2382
return BinaryOperator::CreateAnd(A, B);
2383
}
2384
2385
// (sub (add A, B) (and A, B)) --> (or A, B)
2386
{
2387
Value *A, *B;
2388
if (match(Op0, m_Add(m_Value(A), m_Value(B))) &&
2389
match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
2390
return BinaryOperator::CreateOr(A, B);
2391
}
2392
2393
// (sub (and A, B) (or A, B)) --> neg (xor A, B)
2394
{
2395
Value *A, *B;
2396
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
2397
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&
2398
(Op0->hasOneUse() || Op1->hasOneUse()))
2399
return BinaryOperator::CreateNeg(Builder.CreateXor(A, B));
2400
}
2401
2402
// (sub (or A, B), (xor A, B)) --> (and A, B)
2403
{
2404
Value *A, *B;
2405
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
2406
match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
2407
return BinaryOperator::CreateAnd(A, B);
2408
}
2409
2410
// (sub (xor A, B) (or A, B)) --> neg (and A, B)
2411
{
2412
Value *A, *B;
2413
if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
2414
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))) &&
2415
(Op0->hasOneUse() || Op1->hasOneUse()))
2416
return BinaryOperator::CreateNeg(Builder.CreateAnd(A, B));
2417
}
2418
2419
{
2420
Value *Y;
2421
// ((X | Y) - X) --> (~X & Y)
2422
if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))
2423
return BinaryOperator::CreateAnd(
2424
Y, Builder.CreateNot(Op1, Op1->getName() + ".not"));
2425
}
2426
2427
{
2428
// (sub (and Op1, (neg X)), Op1) --> neg (and Op1, (add X, -1))
2429
Value *X;
2430
if (match(Op0, m_OneUse(m_c_And(m_Specific(Op1),
2431
m_OneUse(m_Neg(m_Value(X))))))) {
2432
return BinaryOperator::CreateNeg(Builder.CreateAnd(
2433
Op1, Builder.CreateAdd(X, Constant::getAllOnesValue(I.getType()))));
2434
}
2435
}
2436
2437
{
2438
// (sub (and Op1, C), Op1) --> neg (and Op1, ~C)
2439
Constant *C;
2440
if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_Constant(C))))) {
2441
return BinaryOperator::CreateNeg(
2442
Builder.CreateAnd(Op1, Builder.CreateNot(C)));
2443
}
2444
}
2445
2446
{
2447
// (sub (xor X, (sext C)), (sext C)) => (select C, (neg X), X)
2448
// (sub (sext C), (xor X, (sext C))) => (select C, X, (neg X))
2449
Value *C, *X;
2450
auto m_SubXorCmp = [&C, &X](Value *LHS, Value *RHS) {
2451
return match(LHS, m_OneUse(m_c_Xor(m_Value(X), m_Specific(RHS)))) &&
2452
match(RHS, m_SExt(m_Value(C))) &&
2453
(C->getType()->getScalarSizeInBits() == 1);
2454
};
2455
if (m_SubXorCmp(Op0, Op1))
2456
return SelectInst::Create(C, Builder.CreateNeg(X), X);
2457
if (m_SubXorCmp(Op1, Op0))
2458
return SelectInst::Create(C, X, Builder.CreateNeg(X));
2459
}
2460
2461
if (Instruction *R = tryFoldInstWithCtpopWithNot(&I))
2462
return R;
2463
2464
if (Instruction *R = foldSubOfMinMax(I, Builder))
2465
return R;
2466
2467
{
2468
// If we have a subtraction between some value and a select between
2469
// said value and something else, sink subtraction into select hands, i.e.:
2470
// sub (select %Cond, %TrueVal, %FalseVal), %Op1
2471
// ->
2472
// select %Cond, (sub %TrueVal, %Op1), (sub %FalseVal, %Op1)
2473
// or
2474
// sub %Op0, (select %Cond, %TrueVal, %FalseVal)
2475
// ->
2476
// select %Cond, (sub %Op0, %TrueVal), (sub %Op0, %FalseVal)
2477
// This will result in select between new subtraction and 0.
2478
auto SinkSubIntoSelect =
2479
[Ty = I.getType()](Value *Select, Value *OtherHandOfSub,
2480
auto SubBuilder) -> Instruction * {
2481
Value *Cond, *TrueVal, *FalseVal;
2482
if (!match(Select, m_OneUse(m_Select(m_Value(Cond), m_Value(TrueVal),
2483
m_Value(FalseVal)))))
2484
return nullptr;
2485
if (OtherHandOfSub != TrueVal && OtherHandOfSub != FalseVal)
2486
return nullptr;
2487
// While it is really tempting to just create two subtractions and let
2488
// InstCombine fold one of those to 0, it isn't possible to do so
2489
// because of worklist visitation order. So ugly it is.
2490
bool OtherHandOfSubIsTrueVal = OtherHandOfSub == TrueVal;
2491
Value *NewSub = SubBuilder(OtherHandOfSubIsTrueVal ? FalseVal : TrueVal);
2492
Constant *Zero = Constant::getNullValue(Ty);
2493
SelectInst *NewSel =
2494
SelectInst::Create(Cond, OtherHandOfSubIsTrueVal ? Zero : NewSub,
2495
OtherHandOfSubIsTrueVal ? NewSub : Zero);
2496
// Preserve prof metadata if any.
2497
NewSel->copyMetadata(cast<Instruction>(*Select));
2498
return NewSel;
2499
};
2500
if (Instruction *NewSel = SinkSubIntoSelect(
2501
/*Select=*/Op0, /*OtherHandOfSub=*/Op1,
2502
[Builder = &Builder, Op1](Value *OtherHandOfSelect) {
2503
return Builder->CreateSub(OtherHandOfSelect,
2504
/*OtherHandOfSub=*/Op1);
2505
}))
2506
return NewSel;
2507
if (Instruction *NewSel = SinkSubIntoSelect(
2508
/*Select=*/Op1, /*OtherHandOfSub=*/Op0,
2509
[Builder = &Builder, Op0](Value *OtherHandOfSelect) {
2510
return Builder->CreateSub(/*OtherHandOfSub=*/Op0,
2511
OtherHandOfSelect);
2512
}))
2513
return NewSel;
2514
}
2515
2516
// (X - (X & Y)) --> (X & ~Y)
2517
if (match(Op1, m_c_And(m_Specific(Op0), m_Value(Y))) &&
2518
(Op1->hasOneUse() || isa<Constant>(Y)))
2519
return BinaryOperator::CreateAnd(
2520
Op0, Builder.CreateNot(Y, Y->getName() + ".not"));
2521
2522
// ~X - Min/Max(~X, Y) -> ~Min/Max(X, ~Y) - X
2523
// ~X - Min/Max(Y, ~X) -> ~Min/Max(X, ~Y) - X
2524
// Min/Max(~X, Y) - ~X -> X - ~Min/Max(X, ~Y)
2525
// Min/Max(Y, ~X) - ~X -> X - ~Min/Max(X, ~Y)
2526
// As long as Y is freely invertible, this will be neutral or a win.
2527
// Note: We don't generate the inverse max/min, just create the 'not' of
2528
// it and let other folds do the rest.
2529
if (match(Op0, m_Not(m_Value(X))) &&
2530
match(Op1, m_c_MaxOrMin(m_Specific(Op0), m_Value(Y))) &&
2531
!Op0->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
2532
Value *Not = Builder.CreateNot(Op1);
2533
return BinaryOperator::CreateSub(Not, X);
2534
}
2535
if (match(Op1, m_Not(m_Value(X))) &&
2536
match(Op0, m_c_MaxOrMin(m_Specific(Op1), m_Value(Y))) &&
2537
!Op1->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
2538
Value *Not = Builder.CreateNot(Op0);
2539
return BinaryOperator::CreateSub(X, Not);
2540
}
2541
2542
// Optimize pointer differences into the same array into a size. Consider:
2543
// &A[10] - &A[0]: we should compile this to "10".
2544
Value *LHSOp, *RHSOp;
2545
if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
2546
match(Op1, m_PtrToInt(m_Value(RHSOp))))
2547
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
2548
I.hasNoUnsignedWrap()))
2549
return replaceInstUsesWith(I, Res);
2550
2551
// trunc(p)-trunc(q) -> trunc(p-q)
2552
if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
2553
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
2554
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(),
2555
/* IsNUW */ false))
2556
return replaceInstUsesWith(I, Res);
2557
2558
// Canonicalize a shifty way to code absolute value to the common pattern.
2559
// There are 2 potential commuted variants.
2560
// We're relying on the fact that we only do this transform when the shift has
2561
// exactly 2 uses and the xor has exactly 1 use (otherwise, we might increase
2562
// instructions).
2563
Value *A;
2564
const APInt *ShAmt;
2565
Type *Ty = I.getType();
2566
unsigned BitWidth = Ty->getScalarSizeInBits();
2567
if (match(Op1, m_AShr(m_Value(A), m_APInt(ShAmt))) &&
2568
Op1->hasNUses(2) && *ShAmt == BitWidth - 1 &&
2569
match(Op0, m_OneUse(m_c_Xor(m_Specific(A), m_Specific(Op1))))) {
2570
// B = ashr i32 A, 31 ; smear the sign bit
2571
// sub (xor A, B), B ; flip bits if negative and subtract -1 (add 1)
2572
// --> (A < 0) ? -A : A
2573
Value *IsNeg = Builder.CreateIsNeg(A);
2574
// Copy the nsw flags from the sub to the negate.
2575
Value *NegA = I.hasNoUnsignedWrap()
2576
? Constant::getNullValue(A->getType())
2577
: Builder.CreateNeg(A, "", I.hasNoSignedWrap());
2578
return SelectInst::Create(IsNeg, NegA, A);
2579
}
2580
2581
// If we are subtracting a low-bit masked subset of some value from an add
2582
// of that same value with no low bits changed, that is clearing some low bits
2583
// of the sum:
2584
// sub (X + AddC), (X & AndC) --> and (X + AddC), ~AndC
2585
const APInt *AddC, *AndC;
2586
if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) &&
2587
match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) {
2588
unsigned Cttz = AddC->countr_zero();
2589
APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));
2590
if ((HighMask & *AndC).isZero())
2591
return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));
2592
}
2593
2594
if (Instruction *V =
2595
canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(I))
2596
return V;
2597
2598
// X - usub.sat(X, Y) => umin(X, Y)
2599
if (match(Op1, m_OneUse(m_Intrinsic<Intrinsic::usub_sat>(m_Specific(Op0),
2600
m_Value(Y)))))
2601
return replaceInstUsesWith(
2602
I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y}));
2603
2604
// umax(X, Op1) - Op1 --> usub.sat(X, Op1)
2605
// TODO: The one-use restriction is not strictly necessary, but it may
2606
// require improving other pattern matching and/or codegen.
2607
if (match(Op0, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op1)))))
2608
return replaceInstUsesWith(
2609
I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
2610
2611
// Op0 - umin(X, Op0) --> usub.sat(Op0, X)
2612
if (match(Op1, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op0)))))
2613
return replaceInstUsesWith(
2614
I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op0, X}));
2615
2616
// Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
2617
if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
2618
Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});
2619
return BinaryOperator::CreateNeg(USub);
2620
}
2621
2622
// umin(X, Op1) - Op1 --> 0 - usub.sat(Op1, X)
2623
if (match(Op0, m_OneUse(m_c_UMin(m_Value(X), m_Specific(Op1))))) {
2624
Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {Op1, X});
2625
return BinaryOperator::CreateNeg(USub);
2626
}
2627
2628
// C - ctpop(X) => ctpop(~X) if C is bitwidth
2629
if (match(Op0, m_SpecificInt(BitWidth)) &&
2630
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))
2631
return replaceInstUsesWith(
2632
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
2633
{Builder.CreateNot(X)}));
2634
2635
// Reduce multiplies for difference-of-squares by factoring:
2636
// (X * X) - (Y * Y) --> (X + Y) * (X - Y)
2637
if (match(Op0, m_OneUse(m_Mul(m_Value(X), m_Deferred(X)))) &&
2638
match(Op1, m_OneUse(m_Mul(m_Value(Y), m_Deferred(Y))))) {
2639
auto *OBO0 = cast<OverflowingBinaryOperator>(Op0);
2640
auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2641
bool PropagateNSW = I.hasNoSignedWrap() && OBO0->hasNoSignedWrap() &&
2642
OBO1->hasNoSignedWrap() && BitWidth > 2;
2643
bool PropagateNUW = I.hasNoUnsignedWrap() && OBO0->hasNoUnsignedWrap() &&
2644
OBO1->hasNoUnsignedWrap() && BitWidth > 1;
2645
Value *Add = Builder.CreateAdd(X, Y, "add", PropagateNUW, PropagateNSW);
2646
Value *Sub = Builder.CreateSub(X, Y, "sub", PropagateNUW, PropagateNSW);
2647
Value *Mul = Builder.CreateMul(Add, Sub, "", PropagateNUW, PropagateNSW);
2648
return replaceInstUsesWith(I, Mul);
2649
}
2650
2651
// max(X,Y) nsw/nuw - min(X,Y) --> abs(X nsw - Y)
2652
if (match(Op0, m_OneUse(m_c_SMax(m_Value(X), m_Value(Y)))) &&
2653
match(Op1, m_OneUse(m_c_SMin(m_Specific(X), m_Specific(Y))))) {
2654
if (I.hasNoUnsignedWrap() || I.hasNoSignedWrap()) {
2655
Value *Sub =
2656
Builder.CreateSub(X, Y, "sub", /*HasNUW=*/false, /*HasNSW=*/true);
2657
Value *Call =
2658
Builder.CreateBinaryIntrinsic(Intrinsic::abs, Sub, Builder.getTrue());
2659
return replaceInstUsesWith(I, Call);
2660
}
2661
}
2662
2663
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
2664
return Res;
2665
2666
return TryToNarrowDeduceFlags();
2667
}
2668
2669
/// This eliminates floating-point negation in either 'fneg(X)' or
2670
/// 'fsub(-0.0, X)' form by combining into a constant operand.
2671
static Instruction *foldFNegIntoConstant(Instruction &I, const DataLayout &DL) {
2672
// This is limited with one-use because fneg is assumed better for
2673
// reassociation and cheaper in codegen than fmul/fdiv.
2674
// TODO: Should the m_OneUse restriction be removed?
2675
Instruction *FNegOp;
2676
if (!match(&I, m_FNeg(m_OneUse(m_Instruction(FNegOp)))))
2677
return nullptr;
2678
2679
Value *X;
2680
Constant *C;
2681
2682
// Fold negation into constant operand.
2683
// -(X * C) --> X * (-C)
2684
if (match(FNegOp, m_FMul(m_Value(X), m_Constant(C))))
2685
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
2686
return BinaryOperator::CreateFMulFMF(X, NegC, &I);
2687
// -(X / C) --> X / (-C)
2688
if (match(FNegOp, m_FDiv(m_Value(X), m_Constant(C))))
2689
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
2690
return BinaryOperator::CreateFDivFMF(X, NegC, &I);
2691
// -(C / X) --> (-C) / X
2692
if (match(FNegOp, m_FDiv(m_Constant(C), m_Value(X))))
2693
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) {
2694
Instruction *FDiv = BinaryOperator::CreateFDivFMF(NegC, X, &I);
2695
2696
// Intersect 'nsz' and 'ninf' because those special value exceptions may
2697
// not apply to the fdiv. Everything else propagates from the fneg.
2698
// TODO: We could propagate nsz/ninf from fdiv alone?
2699
FastMathFlags FMF = I.getFastMathFlags();
2700
FastMathFlags OpFMF = FNegOp->getFastMathFlags();
2701
FDiv->setHasNoSignedZeros(FMF.noSignedZeros() && OpFMF.noSignedZeros());
2702
FDiv->setHasNoInfs(FMF.noInfs() && OpFMF.noInfs());
2703
return FDiv;
2704
}
2705
// With NSZ [ counter-example with -0.0: -(-0.0 + 0.0) != 0.0 + -0.0 ]:
2706
// -(X + C) --> -X + -C --> -C - X
2707
if (I.hasNoSignedZeros() && match(FNegOp, m_FAdd(m_Value(X), m_Constant(C))))
2708
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
2709
return BinaryOperator::CreateFSubFMF(NegC, X, &I);
2710
2711
return nullptr;
2712
}
2713
2714
Instruction *InstCombinerImpl::hoistFNegAboveFMulFDiv(Value *FNegOp,
2715
Instruction &FMFSource) {
2716
Value *X, *Y;
2717
if (match(FNegOp, m_FMul(m_Value(X), m_Value(Y)))) {
2718
return cast<Instruction>(Builder.CreateFMulFMF(
2719
Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));
2720
}
2721
2722
if (match(FNegOp, m_FDiv(m_Value(X), m_Value(Y)))) {
2723
return cast<Instruction>(Builder.CreateFDivFMF(
2724
Builder.CreateFNegFMF(X, &FMFSource), Y, &FMFSource));
2725
}
2726
2727
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(FNegOp)) {
2728
// Make sure to preserve flags and metadata on the call.
2729
if (II->getIntrinsicID() == Intrinsic::ldexp) {
2730
FastMathFlags FMF = FMFSource.getFastMathFlags() | II->getFastMathFlags();
2731
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2732
Builder.setFastMathFlags(FMF);
2733
2734
CallInst *New = Builder.CreateCall(
2735
II->getCalledFunction(),
2736
{Builder.CreateFNeg(II->getArgOperand(0)), II->getArgOperand(1)});
2737
New->copyMetadata(*II);
2738
return New;
2739
}
2740
}
2741
2742
return nullptr;
2743
}
2744
2745
Instruction *InstCombinerImpl::visitFNeg(UnaryOperator &I) {
2746
Value *Op = I.getOperand(0);
2747
2748
if (Value *V = simplifyFNegInst(Op, I.getFastMathFlags(),
2749
getSimplifyQuery().getWithInstruction(&I)))
2750
return replaceInstUsesWith(I, V);
2751
2752
if (Instruction *X = foldFNegIntoConstant(I, DL))
2753
return X;
2754
2755
Value *X, *Y;
2756
2757
// If we can ignore the sign of zeros: -(X - Y) --> (Y - X)
2758
if (I.hasNoSignedZeros() &&
2759
match(Op, m_OneUse(m_FSub(m_Value(X), m_Value(Y)))))
2760
return BinaryOperator::CreateFSubFMF(Y, X, &I);
2761
2762
Value *OneUse;
2763
if (!match(Op, m_OneUse(m_Value(OneUse))))
2764
return nullptr;
2765
2766
if (Instruction *R = hoistFNegAboveFMulFDiv(OneUse, I))
2767
return replaceInstUsesWith(I, R);
2768
2769
// Try to eliminate fneg if at least 1 arm of the select is negated.
2770
Value *Cond;
2771
if (match(OneUse, m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))) {
2772
// Unlike most transforms, this one is not safe to propagate nsz unless
2773
// it is present on the original select. We union the flags from the select
2774
// and fneg and then remove nsz if needed.
2775
auto propagateSelectFMF = [&](SelectInst *S, bool CommonOperand) {
2776
S->copyFastMathFlags(&I);
2777
if (auto *OldSel = dyn_cast<SelectInst>(Op)) {
2778
FastMathFlags FMF = I.getFastMathFlags() | OldSel->getFastMathFlags();
2779
S->setFastMathFlags(FMF);
2780
if (!OldSel->hasNoSignedZeros() && !CommonOperand &&
2781
!isGuaranteedNotToBeUndefOrPoison(OldSel->getCondition()))
2782
S->setHasNoSignedZeros(false);
2783
}
2784
};
2785
// -(Cond ? -P : Y) --> Cond ? P : -Y
2786
Value *P;
2787
if (match(X, m_FNeg(m_Value(P)))) {
2788
Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
2789
SelectInst *NewSel = SelectInst::Create(Cond, P, NegY);
2790
propagateSelectFMF(NewSel, P == Y);
2791
return NewSel;
2792
}
2793
// -(Cond ? X : -P) --> Cond ? -X : P
2794
if (match(Y, m_FNeg(m_Value(P)))) {
2795
Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
2796
SelectInst *NewSel = SelectInst::Create(Cond, NegX, P);
2797
propagateSelectFMF(NewSel, P == X);
2798
return NewSel;
2799
}
2800
2801
// -(Cond ? X : C) --> Cond ? -X : -C
2802
// -(Cond ? C : Y) --> Cond ? -C : -Y
2803
if (match(X, m_ImmConstant()) || match(Y, m_ImmConstant())) {
2804
Value *NegX = Builder.CreateFNegFMF(X, &I, X->getName() + ".neg");
2805
Value *NegY = Builder.CreateFNegFMF(Y, &I, Y->getName() + ".neg");
2806
SelectInst *NewSel = SelectInst::Create(Cond, NegX, NegY);
2807
propagateSelectFMF(NewSel, /*CommonOperand=*/true);
2808
return NewSel;
2809
}
2810
}
2811
2812
// fneg (copysign x, y) -> copysign x, (fneg y)
2813
if (match(OneUse, m_CopySign(m_Value(X), m_Value(Y)))) {
2814
// The source copysign has an additional value input, so we can't propagate
2815
// flags the copysign doesn't also have.
2816
FastMathFlags FMF = I.getFastMathFlags();
2817
FMF &= cast<FPMathOperator>(OneUse)->getFastMathFlags();
2818
2819
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2820
Builder.setFastMathFlags(FMF);
2821
2822
Value *NegY = Builder.CreateFNeg(Y);
2823
Value *NewCopySign = Builder.CreateCopySign(X, NegY);
2824
return replaceInstUsesWith(I, NewCopySign);
2825
}
2826
2827
return nullptr;
2828
}
2829
2830
Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
2831
if (Value *V = simplifyFSubInst(I.getOperand(0), I.getOperand(1),
2832
I.getFastMathFlags(),
2833
getSimplifyQuery().getWithInstruction(&I)))
2834
return replaceInstUsesWith(I, V);
2835
2836
if (Instruction *X = foldVectorBinop(I))
2837
return X;
2838
2839
if (Instruction *Phi = foldBinopWithPhiOperands(I))
2840
return Phi;
2841
2842
// Subtraction from -0.0 is the canonical form of fneg.
2843
// fsub -0.0, X ==> fneg X
2844
// fsub nsz 0.0, X ==> fneg nsz X
2845
//
2846
// FIXME This matcher does not respect FTZ or DAZ yet:
2847
// fsub -0.0, Denorm ==> +-0
2848
// fneg Denorm ==> -Denorm
2849
Value *Op;
2850
if (match(&I, m_FNeg(m_Value(Op))))
2851
return UnaryOperator::CreateFNegFMF(Op, &I);
2852
2853
if (Instruction *X = foldFNegIntoConstant(I, DL))
2854
return X;
2855
2856
if (Instruction *R = foldFBinOpOfIntCasts(I))
2857
return R;
2858
2859
Value *X, *Y;
2860
Constant *C;
2861
2862
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2863
// If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
2864
// Canonicalize to fadd to make analysis easier.
2865
// This can also help codegen because fadd is commutative.
2866
// Note that if this fsub was really an fneg, the fadd with -0.0 will get
2867
// killed later. We still limit that particular transform with 'hasOneUse'
2868
// because an fneg is assumed better/cheaper than a generic fsub.
2869
if (I.hasNoSignedZeros() ||
2870
cannotBeNegativeZero(Op0, 0, getSimplifyQuery().getWithInstruction(&I))) {
2871
if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
2872
Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);
2873
return BinaryOperator::CreateFAddFMF(Op0, NewSub, &I);
2874
}
2875
}
2876
2877
// (-X) - Op1 --> -(X + Op1)
2878
if (I.hasNoSignedZeros() && !isa<ConstantExpr>(Op0) &&
2879
match(Op0, m_OneUse(m_FNeg(m_Value(X))))) {
2880
Value *FAdd = Builder.CreateFAddFMF(X, Op1, &I);
2881
return UnaryOperator::CreateFNegFMF(FAdd, &I);
2882
}
2883
2884
if (isa<Constant>(Op0))
2885
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
2886
if (Instruction *NV = FoldOpIntoSelect(I, SI))
2887
return NV;
2888
2889
// X - C --> X + (-C)
2890
// But don't transform constant expressions because there's an inverse fold
2891
// for X + (-Y) --> X - Y.
2892
if (match(Op1, m_ImmConstant(C)))
2893
if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL))
2894
return BinaryOperator::CreateFAddFMF(Op0, NegC, &I);
2895
2896
// X - (-Y) --> X + Y
2897
if (match(Op1, m_FNeg(m_Value(Y))))
2898
return BinaryOperator::CreateFAddFMF(Op0, Y, &I);
2899
2900
// Similar to above, but look through a cast of the negated value:
2901
// X - (fptrunc(-Y)) --> X + fptrunc(Y)
2902
Type *Ty = I.getType();
2903
if (match(Op1, m_OneUse(m_FPTrunc(m_FNeg(m_Value(Y))))))
2904
return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPTrunc(Y, Ty), &I);
2905
2906
// X - (fpext(-Y)) --> X + fpext(Y)
2907
if (match(Op1, m_OneUse(m_FPExt(m_FNeg(m_Value(Y))))))
2908
return BinaryOperator::CreateFAddFMF(Op0, Builder.CreateFPExt(Y, Ty), &I);
2909
2910
// Similar to above, but look through fmul/fdiv of the negated value:
2911
// Op0 - (-X * Y) --> Op0 + (X * Y)
2912
// Op0 - (Y * -X) --> Op0 + (X * Y)
2913
if (match(Op1, m_OneUse(m_c_FMul(m_FNeg(m_Value(X)), m_Value(Y))))) {
2914
Value *FMul = Builder.CreateFMulFMF(X, Y, &I);
2915
return BinaryOperator::CreateFAddFMF(Op0, FMul, &I);
2916
}
2917
// Op0 - (-X / Y) --> Op0 + (X / Y)
2918
// Op0 - (X / -Y) --> Op0 + (X / Y)
2919
if (match(Op1, m_OneUse(m_FDiv(m_FNeg(m_Value(X)), m_Value(Y)))) ||
2920
match(Op1, m_OneUse(m_FDiv(m_Value(X), m_FNeg(m_Value(Y)))))) {
2921
Value *FDiv = Builder.CreateFDivFMF(X, Y, &I);
2922
return BinaryOperator::CreateFAddFMF(Op0, FDiv, &I);
2923
}
2924
2925
// Handle special cases for FSub with selects feeding the operation
2926
if (Value *V = SimplifySelectsFeedingBinaryOp(I, Op0, Op1))
2927
return replaceInstUsesWith(I, V);
2928
2929
if (I.hasAllowReassoc() && I.hasNoSignedZeros()) {
2930
// (Y - X) - Y --> -X
2931
if (match(Op0, m_FSub(m_Specific(Op1), m_Value(X))))
2932
return UnaryOperator::CreateFNegFMF(X, &I);
2933
2934
// Y - (X + Y) --> -X
2935
// Y - (Y + X) --> -X
2936
if (match(Op1, m_c_FAdd(m_Specific(Op0), m_Value(X))))
2937
return UnaryOperator::CreateFNegFMF(X, &I);
2938
2939
// (X * C) - X --> X * (C - 1.0)
2940
if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {
2941
if (Constant *CSubOne = ConstantFoldBinaryOpOperands(
2942
Instruction::FSub, C, ConstantFP::get(Ty, 1.0), DL))
2943
return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
2944
}
2945
// X - (X * C) --> X * (1.0 - C)
2946
if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {
2947
if (Constant *OneSubC = ConstantFoldBinaryOpOperands(
2948
Instruction::FSub, ConstantFP::get(Ty, 1.0), C, DL))
2949
return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
2950
}
2951
2952
// Reassociate fsub/fadd sequences to create more fadd instructions and
2953
// reduce dependency chains:
2954
// ((X - Y) + Z) - Op1 --> (X + Z) - (Y + Op1)
2955
Value *Z;
2956
if (match(Op0, m_OneUse(m_c_FAdd(m_OneUse(m_FSub(m_Value(X), m_Value(Y))),
2957
m_Value(Z))))) {
2958
Value *XZ = Builder.CreateFAddFMF(X, Z, &I);
2959
Value *YW = Builder.CreateFAddFMF(Y, Op1, &I);
2960
return BinaryOperator::CreateFSubFMF(XZ, YW, &I);
2961
}
2962
2963
auto m_FaddRdx = [](Value *&Sum, Value *&Vec) {
2964
return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_fadd>(m_Value(Sum),
2965
m_Value(Vec)));
2966
};
2967
Value *A0, *A1, *V0, *V1;
2968
if (match(Op0, m_FaddRdx(A0, V0)) && match(Op1, m_FaddRdx(A1, V1)) &&
2969
V0->getType() == V1->getType()) {
2970
// Difference of sums is sum of differences:
2971
// add_rdx(A0, V0) - add_rdx(A1, V1) --> add_rdx(A0, V0 - V1) - A1
2972
Value *Sub = Builder.CreateFSubFMF(V0, V1, &I);
2973
Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_fadd,
2974
{Sub->getType()}, {A0, Sub}, &I);
2975
return BinaryOperator::CreateFSubFMF(Rdx, A1, &I);
2976
}
2977
2978
if (Instruction *F = factorizeFAddFSub(I, Builder))
2979
return F;
2980
2981
// TODO: This performs reassociative folds for FP ops. Some fraction of the
2982
// functionality has been subsumed by simple pattern matching here and in
2983
// InstSimplify. We should let a dedicated reassociation pass handle more
2984
// complex pattern matching and remove this from InstCombine.
2985
if (Value *V = FAddCombine(Builder).simplify(&I))
2986
return replaceInstUsesWith(I, V);
2987
2988
// (X - Y) - Op1 --> X - (Y + Op1)
2989
if (match(Op0, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
2990
Value *FAdd = Builder.CreateFAddFMF(Y, Op1, &I);
2991
return BinaryOperator::CreateFSubFMF(X, FAdd, &I);
2992
}
2993
}
2994
2995
return nullptr;
2996
}
2997
2998