Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
35271 views
1
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains an optimization for div and rem on architectures that
10
// execute short instructions significantly faster than longer instructions.
11
// For example, on Intel Atom 32-bit divides are slow enough that during
12
// runtime it is profitable to check the value of the operands, and if they are
13
// positive and less than 256 use an unsigned 8-bit divide.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
18
#include "llvm/ADT/DenseMap.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SmallPtrSet.h"
21
#include "llvm/Transforms/Utils/Local.h"
22
#include "llvm/Analysis/ValueTracking.h"
23
#include "llvm/IR/BasicBlock.h"
24
#include "llvm/IR/Constants.h"
25
#include "llvm/IR/DerivedTypes.h"
26
#include "llvm/IR/Function.h"
27
#include "llvm/IR/IRBuilder.h"
28
#include "llvm/IR/Instruction.h"
29
#include "llvm/IR/Instructions.h"
30
#include "llvm/IR/Module.h"
31
#include "llvm/IR/Type.h"
32
#include "llvm/IR/Value.h"
33
#include "llvm/Support/Casting.h"
34
#include "llvm/Support/KnownBits.h"
35
#include <cassert>
36
#include <cstdint>
37
38
using namespace llvm;
39
40
#define DEBUG_TYPE "bypass-slow-division"
41
42
namespace {
43
44
struct QuotRemPair {
45
Value *Quotient;
46
Value *Remainder;
47
48
QuotRemPair(Value *InQuotient, Value *InRemainder)
49
: Quotient(InQuotient), Remainder(InRemainder) {}
50
};
51
52
/// A quotient and remainder, plus a BB from which they logically "originate".
53
/// If you use Quotient or Remainder in a Phi node, you should use BB as its
54
/// corresponding predecessor.
55
struct QuotRemWithBB {
56
BasicBlock *BB = nullptr;
57
Value *Quotient = nullptr;
58
Value *Remainder = nullptr;
59
};
60
61
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
62
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
63
using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
64
65
enum ValueRange {
66
/// Operand definitely fits into BypassType. No runtime checks are needed.
67
VALRNG_KNOWN_SHORT,
68
/// A runtime check is required, as value range is unknown.
69
VALRNG_UNKNOWN,
70
/// Operand is unlikely to fit into BypassType. The bypassing should be
71
/// disabled.
72
VALRNG_LIKELY_LONG
73
};
74
75
class FastDivInsertionTask {
76
bool IsValidTask = false;
77
Instruction *SlowDivOrRem = nullptr;
78
IntegerType *BypassType = nullptr;
79
BasicBlock *MainBB = nullptr;
80
81
bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
82
ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
83
QuotRemWithBB createSlowBB(BasicBlock *Successor);
84
QuotRemWithBB createFastBB(BasicBlock *Successor);
85
QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
86
BasicBlock *PhiBB);
87
Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
88
std::optional<QuotRemPair> insertFastDivAndRem();
89
90
bool isSignedOp() {
91
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
92
SlowDivOrRem->getOpcode() == Instruction::SRem;
93
}
94
95
bool isDivisionOp() {
96
return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
97
SlowDivOrRem->getOpcode() == Instruction::UDiv;
98
}
99
100
Type *getSlowType() { return SlowDivOrRem->getType(); }
101
102
public:
103
FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
104
105
Value *getReplacement(DivCacheTy &Cache);
106
};
107
108
} // end anonymous namespace
109
110
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
111
const BypassWidthsTy &BypassWidths) {
112
switch (I->getOpcode()) {
113
case Instruction::UDiv:
114
case Instruction::SDiv:
115
case Instruction::URem:
116
case Instruction::SRem:
117
SlowDivOrRem = I;
118
break;
119
default:
120
// I is not a div/rem operation.
121
return;
122
}
123
124
// Skip division on vector types. Only optimize integer instructions.
125
IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
126
if (!SlowType)
127
return;
128
129
// Skip if this bitwidth is not bypassed.
130
auto BI = BypassWidths.find(SlowType->getBitWidth());
131
if (BI == BypassWidths.end())
132
return;
133
134
// Get type for div/rem instruction with bypass bitwidth.
135
IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
136
BypassType = BT;
137
138
// The original basic block.
139
MainBB = I->getParent();
140
141
// The instruction is indeed a slow div or rem operation.
142
IsValidTask = true;
143
}
144
145
/// Reuses previously-computed dividend or remainder from the current BB if
146
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
147
/// perform the optimization and caches the resulting dividend and remainder.
148
/// If no replacement can be generated, nullptr is returned.
149
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
150
// First, make sure that the task is valid.
151
if (!IsValidTask)
152
return nullptr;
153
154
// Then, look for a value in Cache.
155
Value *Dividend = SlowDivOrRem->getOperand(0);
156
Value *Divisor = SlowDivOrRem->getOperand(1);
157
DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
158
auto CacheI = Cache.find(Key);
159
160
if (CacheI == Cache.end()) {
161
// If previous instance does not exist, try to insert fast div.
162
std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
163
// Bail out if insertFastDivAndRem has failed.
164
if (!OptResult)
165
return nullptr;
166
CacheI = Cache.insert({Key, *OptResult}).first;
167
}
168
169
QuotRemPair &Value = CacheI->second;
170
return isDivisionOp() ? Value.Quotient : Value.Remainder;
171
}
172
173
/// Check if a value looks like a hash.
174
///
175
/// The routine is expected to detect values computed using the most common hash
176
/// algorithms. Typically, hash computations end with one of the following
177
/// instructions:
178
///
179
/// 1) MUL with a constant wider than BypassType
180
/// 2) XOR instruction
181
///
182
/// And even if we are wrong and the value is not a hash, it is still quite
183
/// unlikely that such values will fit into BypassType.
184
///
185
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
186
/// It is implemented as a depth-first search for values that look neither long
187
/// nor hash-like.
188
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
189
Instruction *I = dyn_cast<Instruction>(V);
190
if (!I)
191
return false;
192
193
switch (I->getOpcode()) {
194
case Instruction::Xor:
195
return true;
196
case Instruction::Mul: {
197
// After Constant Hoisting pass, long constants may be represented as
198
// bitcast instructions. As a result, some constants may look like an
199
// instruction at first, and an additional check is necessary to find out if
200
// an operand is actually a constant.
201
Value *Op1 = I->getOperand(1);
202
ConstantInt *C = dyn_cast<ConstantInt>(Op1);
203
if (!C && isa<BitCastInst>(Op1))
204
C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
205
return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();
206
}
207
case Instruction::PHI:
208
// Stop IR traversal in case of a crazy input code. This limits recursion
209
// depth.
210
if (Visited.size() >= 16)
211
return false;
212
// Do not visit nodes that have been visited already. We return true because
213
// it means that we couldn't find any value that doesn't look hash-like.
214
if (!Visited.insert(I).second)
215
return true;
216
return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
217
// Ignore undef values as they probably don't affect the division
218
// operands.
219
return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
220
isa<UndefValue>(V);
221
});
222
default:
223
return false;
224
}
225
}
226
227
/// Check if an integer value fits into our bypass type.
228
ValueRange FastDivInsertionTask::getValueRange(Value *V,
229
VisitedSetTy &Visited) {
230
unsigned ShortLen = BypassType->getBitWidth();
231
unsigned LongLen = V->getType()->getIntegerBitWidth();
232
233
assert(LongLen > ShortLen && "Value type must be wider than BypassType");
234
unsigned HiBits = LongLen - ShortLen;
235
236
const DataLayout &DL = SlowDivOrRem->getDataLayout();
237
KnownBits Known(LongLen);
238
239
computeKnownBits(V, Known, DL);
240
241
if (Known.countMinLeadingZeros() >= HiBits)
242
return VALRNG_KNOWN_SHORT;
243
244
if (Known.countMaxLeadingZeros() < HiBits)
245
return VALRNG_LIKELY_LONG;
246
247
// Long integer divisions are often used in hashtable implementations. It's
248
// not worth bypassing such divisions because hash values are extremely
249
// unlikely to have enough leading zeros. The call below tries to detect
250
// values that are unlikely to fit BypassType (including hashes).
251
if (isHashLikeValue(V, Visited))
252
return VALRNG_LIKELY_LONG;
253
254
return VALRNG_UNKNOWN;
255
}
256
257
/// Add new basic block for slow div and rem operations and put it before
258
/// SuccessorBB.
259
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
260
QuotRemWithBB DivRemPair;
261
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
262
MainBB->getParent(), SuccessorBB);
263
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
264
Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
265
266
Value *Dividend = SlowDivOrRem->getOperand(0);
267
Value *Divisor = SlowDivOrRem->getOperand(1);
268
269
if (isSignedOp()) {
270
DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
271
DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
272
} else {
273
DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
274
DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
275
}
276
277
Builder.CreateBr(SuccessorBB);
278
return DivRemPair;
279
}
280
281
/// Add new basic block for fast div and rem operations and put it before
282
/// SuccessorBB.
283
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
284
QuotRemWithBB DivRemPair;
285
DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
286
MainBB->getParent(), SuccessorBB);
287
IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
288
Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
289
290
Value *Dividend = SlowDivOrRem->getOperand(0);
291
Value *Divisor = SlowDivOrRem->getOperand(1);
292
Value *ShortDivisorV =
293
Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
294
Value *ShortDividendV =
295
Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
296
297
// udiv/urem because this optimization only handles positive numbers.
298
Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
299
Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
300
DivRemPair.Quotient =
301
Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
302
DivRemPair.Remainder =
303
Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
304
Builder.CreateBr(SuccessorBB);
305
306
return DivRemPair;
307
}
308
309
/// Creates Phi nodes for result of Div and Rem.
310
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
311
QuotRemWithBB &RHS,
312
BasicBlock *PhiBB) {
313
IRBuilder<> Builder(PhiBB, PhiBB->begin());
314
Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
315
PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
316
QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
317
QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
318
PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
319
RemPhi->addIncoming(LHS.Remainder, LHS.BB);
320
RemPhi->addIncoming(RHS.Remainder, RHS.BB);
321
return QuotRemPair(QuoPhi, RemPhi);
322
}
323
324
/// Creates a runtime check to test whether both the divisor and dividend fit
325
/// into BypassType. The check is inserted at the end of MainBB. True return
326
/// value means that the operands fit. Either of the operands may be NULL if it
327
/// doesn't need a runtime check.
328
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
329
assert((Op1 || Op2) && "Nothing to check");
330
IRBuilder<> Builder(MainBB, MainBB->end());
331
Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
332
333
Value *OrV;
334
if (Op1 && Op2)
335
OrV = Builder.CreateOr(Op1, Op2);
336
else
337
OrV = Op1 ? Op1 : Op2;
338
339
// BitMask is inverted to check if the operands are
340
// larger than the bypass type
341
uint64_t BitMask = ~BypassType->getBitMask();
342
Value *AndV = Builder.CreateAnd(OrV, BitMask);
343
344
// Compare operand values
345
Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
346
return Builder.CreateICmpEQ(AndV, ZeroV);
347
}
348
349
/// Substitutes the div/rem instruction with code that checks the value of the
350
/// operands and uses a shorter-faster div/rem instruction when possible.
351
std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
352
Value *Dividend = SlowDivOrRem->getOperand(0);
353
Value *Divisor = SlowDivOrRem->getOperand(1);
354
355
VisitedSetTy SetL;
356
ValueRange DividendRange = getValueRange(Dividend, SetL);
357
if (DividendRange == VALRNG_LIKELY_LONG)
358
return std::nullopt;
359
360
VisitedSetTy SetR;
361
ValueRange DivisorRange = getValueRange(Divisor, SetR);
362
if (DivisorRange == VALRNG_LIKELY_LONG)
363
return std::nullopt;
364
365
bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
366
bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
367
368
if (DividendShort && DivisorShort) {
369
// If both operands are known to be short then just replace the long
370
// division with a short one in-place. Since we're not introducing control
371
// flow in this case, narrowing the division is always a win, even if the
372
// divisor is a constant (and will later get replaced by a multiplication).
373
374
IRBuilder<> Builder(SlowDivOrRem);
375
Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
376
Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
377
Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
378
Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
379
Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
380
Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
381
return QuotRemPair(ExtDiv, ExtRem);
382
}
383
384
if (isa<ConstantInt>(Divisor)) {
385
// If the divisor is not a constant, DAGCombiner will convert it to a
386
// multiplication by a magic constant. It isn't clear if it is worth
387
// introducing control flow to get a narrower multiply.
388
return std::nullopt;
389
}
390
391
// After Constant Hoisting pass, long constants may be represented as
392
// bitcast instructions. As a result, some constants may look like an
393
// instruction at first, and an additional check is necessary to find out if
394
// an operand is actually a constant.
395
if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
396
if (BCI->getParent() == SlowDivOrRem->getParent() &&
397
isa<ConstantInt>(BCI->getOperand(0)))
398
return std::nullopt;
399
400
IRBuilder<> Builder(MainBB, MainBB->end());
401
Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
402
403
if (DividendShort && !isSignedOp()) {
404
// If the division is unsigned and Dividend is known to be short, then
405
// either
406
// 1) Divisor is less or equal to Dividend, and the result can be computed
407
// with a short division.
408
// 2) Divisor is greater than Dividend. In this case, no division is needed
409
// at all: The quotient is 0 and the remainder is equal to Dividend.
410
//
411
// So instead of checking at runtime whether Divisor fits into BypassType,
412
// we emit a runtime check to differentiate between these two cases. This
413
// lets us entirely avoid a long div.
414
415
// Split the basic block before the div/rem.
416
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
417
// Remove the unconditional branch from MainBB to SuccessorBB.
418
MainBB->back().eraseFromParent();
419
QuotRemWithBB Long;
420
Long.BB = MainBB;
421
Long.Quotient = ConstantInt::get(getSlowType(), 0);
422
Long.Remainder = Dividend;
423
QuotRemWithBB Fast = createFastBB(SuccessorBB);
424
QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
425
Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
426
Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
427
return Result;
428
} else {
429
// General case. Create both slow and fast div/rem pairs and choose one of
430
// them at runtime.
431
432
// Split the basic block before the div/rem.
433
BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
434
// Remove the unconditional branch from MainBB to SuccessorBB.
435
MainBB->back().eraseFromParent();
436
QuotRemWithBB Fast = createFastBB(SuccessorBB);
437
QuotRemWithBB Slow = createSlowBB(SuccessorBB);
438
QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
439
Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
440
DivisorShort ? nullptr : Divisor);
441
Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
442
return Result;
443
}
444
}
445
446
/// This optimization identifies DIV/REM instructions in a BB that can be
447
/// profitably bypassed and carried out with a shorter, faster divide.
448
bool llvm::bypassSlowDivision(BasicBlock *BB,
449
const BypassWidthsTy &BypassWidths) {
450
DivCacheTy PerBBDivCache;
451
452
bool MadeChange = false;
453
Instruction *Next = &*BB->begin();
454
while (Next != nullptr) {
455
// We may add instructions immediately after I, but we want to skip over
456
// them.
457
Instruction *I = Next;
458
Next = Next->getNextNode();
459
460
// Ignore dead code to save time and avoid bugs.
461
if (I->hasNUses(0))
462
continue;
463
464
FastDivInsertionTask Task(I, BypassWidths);
465
if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
466
I->replaceAllUsesWith(Replacement);
467
I->eraseFromParent();
468
MadeChange = true;
469
}
470
}
471
472
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
473
// create divrem machine instructions. Now erase any unused divs / rems so we
474
// don't leave extra instructions sitting around.
475
for (auto &KV : PerBBDivCache)
476
for (Value *V : {KV.second.Quotient, KV.second.Remainder})
477
RecursivelyDeleteTriviallyDeadInstructions(V);
478
479
return MadeChange;
480
}
481
482