Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
35233 views
1
//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass implements IR expansion for reduction intrinsics, allowing targets
10
// to enable the intrinsics until just before codegen.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/CodeGen/ExpandReductions.h"
15
#include "llvm/Analysis/TargetTransformInfo.h"
16
#include "llvm/CodeGen/Passes.h"
17
#include "llvm/IR/IRBuilder.h"
18
#include "llvm/IR/InstIterator.h"
19
#include "llvm/IR/IntrinsicInst.h"
20
#include "llvm/IR/Intrinsics.h"
21
#include "llvm/InitializePasses.h"
22
#include "llvm/Pass.h"
23
#include "llvm/Transforms/Utils/LoopUtils.h"
24
25
using namespace llvm;
26
27
namespace {
28
29
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
30
bool Changed = false;
31
SmallVector<IntrinsicInst *, 4> Worklist;
32
for (auto &I : instructions(F)) {
33
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
34
switch (II->getIntrinsicID()) {
35
default: break;
36
case Intrinsic::vector_reduce_fadd:
37
case Intrinsic::vector_reduce_fmul:
38
case Intrinsic::vector_reduce_add:
39
case Intrinsic::vector_reduce_mul:
40
case Intrinsic::vector_reduce_and:
41
case Intrinsic::vector_reduce_or:
42
case Intrinsic::vector_reduce_xor:
43
case Intrinsic::vector_reduce_smax:
44
case Intrinsic::vector_reduce_smin:
45
case Intrinsic::vector_reduce_umax:
46
case Intrinsic::vector_reduce_umin:
47
case Intrinsic::vector_reduce_fmax:
48
case Intrinsic::vector_reduce_fmin:
49
if (TTI->shouldExpandReduction(II))
50
Worklist.push_back(II);
51
52
break;
53
}
54
}
55
}
56
57
for (auto *II : Worklist) {
58
FastMathFlags FMF =
59
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
60
Intrinsic::ID ID = II->getIntrinsicID();
61
RecurKind RK = getMinMaxReductionRecurKind(ID);
62
TargetTransformInfo::ReductionShuffle RS =
63
TTI->getPreferredExpandedReductionShuffle(II);
64
65
Value *Rdx = nullptr;
66
IRBuilder<> Builder(II);
67
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
68
Builder.setFastMathFlags(FMF);
69
switch (ID) {
70
default: llvm_unreachable("Unexpected intrinsic!");
71
case Intrinsic::vector_reduce_fadd:
72
case Intrinsic::vector_reduce_fmul: {
73
// FMFs must be attached to the call, otherwise it's an ordered reduction
74
// and it can't be handled by generating a shuffle sequence.
75
Value *Acc = II->getArgOperand(0);
76
Value *Vec = II->getArgOperand(1);
77
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
78
if (!FMF.allowReassoc())
79
Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
80
else {
81
if (!isPowerOf2_32(
82
cast<FixedVectorType>(Vec->getType())->getNumElements()))
83
continue;
84
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
85
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
86
"bin.rdx");
87
}
88
break;
89
}
90
case Intrinsic::vector_reduce_and:
91
case Intrinsic::vector_reduce_or: {
92
// Canonicalize logical or/and reductions:
93
// Or reduction for i1 is represented as:
94
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
95
// %res = cmp ne iReduxWidth %val, 0
96
// And reduction for i1 is represented as:
97
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
98
// %res = cmp eq iReduxWidth %val, 11111
99
Value *Vec = II->getArgOperand(0);
100
auto *FTy = cast<FixedVectorType>(Vec->getType());
101
unsigned NumElts = FTy->getNumElements();
102
if (!isPowerOf2_32(NumElts))
103
continue;
104
105
if (FTy->getElementType() == Builder.getInt1Ty()) {
106
Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
107
if (ID == Intrinsic::vector_reduce_and) {
108
Rdx = Builder.CreateICmpEQ(
109
Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
110
} else {
111
assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
112
Rdx = Builder.CreateIsNotNull(Rdx);
113
}
114
break;
115
}
116
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
117
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
118
break;
119
}
120
case Intrinsic::vector_reduce_add:
121
case Intrinsic::vector_reduce_mul:
122
case Intrinsic::vector_reduce_xor:
123
case Intrinsic::vector_reduce_smax:
124
case Intrinsic::vector_reduce_smin:
125
case Intrinsic::vector_reduce_umax:
126
case Intrinsic::vector_reduce_umin: {
127
Value *Vec = II->getArgOperand(0);
128
if (!isPowerOf2_32(
129
cast<FixedVectorType>(Vec->getType())->getNumElements()))
130
continue;
131
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
132
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
133
break;
134
}
135
case Intrinsic::vector_reduce_fmax:
136
case Intrinsic::vector_reduce_fmin: {
137
// We require "nnan" to use a shuffle reduction; "nsz" is implied by the
138
// semantics of the reduction.
139
Value *Vec = II->getArgOperand(0);
140
if (!isPowerOf2_32(
141
cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
142
!FMF.noNaNs())
143
continue;
144
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
145
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
146
break;
147
}
148
}
149
II->replaceAllUsesWith(Rdx);
150
II->eraseFromParent();
151
Changed = true;
152
}
153
return Changed;
154
}
155
156
class ExpandReductions : public FunctionPass {
157
public:
158
static char ID;
159
ExpandReductions() : FunctionPass(ID) {
160
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
161
}
162
163
bool runOnFunction(Function &F) override {
164
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
165
return expandReductions(F, TTI);
166
}
167
168
void getAnalysisUsage(AnalysisUsage &AU) const override {
169
AU.addRequired<TargetTransformInfoWrapperPass>();
170
AU.setPreservesCFG();
171
}
172
};
173
}
174
175
char ExpandReductions::ID;
176
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
177
"Expand reduction intrinsics", false, false)
178
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
179
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
180
"Expand reduction intrinsics", false, false)
181
182
FunctionPass *llvm::createExpandReductionsPass() {
183
return new ExpandReductions();
184
}
185
186
PreservedAnalyses ExpandReductionsPass::run(Function &F,
187
FunctionAnalysisManager &AM) {
188
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
189
if (!expandReductions(F, &TTI))
190
return PreservedAnalyses::all();
191
PreservedAnalyses PA;
192
PA.preserveSet<CFGAnalyses>();
193
return PA;
194
}
195
196