CoCalc -- RISCVCodeGenPrepare.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
³⁵²⁶⁶ views
1
//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This is a RISC-V specific version of CodeGenPrepare.
10
// It munges the code in the input function to better prepare it for
11
// SelectionDAG-based code generation. This works around limitations in it's
12
// basic-block-at-a-time approach.
13
//
14
//===----------------------------------------------------------------------===//
15

16
#include "RISCV.h"
17
#include "RISCVTargetMachine.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/Analysis/ValueTracking.h"
20
#include "llvm/CodeGen/TargetPassConfig.h"
21
#include "llvm/IR/Dominators.h"
22
#include "llvm/IR/IRBuilder.h"
23
#include "llvm/IR/InstVisitor.h"
24
#include "llvm/IR/Intrinsics.h"
25
#include "llvm/IR/IntrinsicsRISCV.h"
26
#include "llvm/IR/PatternMatch.h"
27
#include "llvm/InitializePasses.h"
28
#include "llvm/Pass.h"
29

30
using namespace llvm;
31

32
#define DEBUG_TYPE "riscv-codegenprepare"
33
#define PASS_NAME "RISC-V CodeGenPrepare"
34

35
namespace {
36

37
class RISCVCodeGenPrepare : public FunctionPass,
38
                            public InstVisitor<RISCVCodeGenPrepare, bool> {
39
  const DataLayout *DL;
40
  const DominatorTree *DT;
41
  const RISCVSubtarget *ST;
42

43
public:
44
  static char ID;
45

46
  RISCVCodeGenPrepare() : FunctionPass(ID) {}
47

48
  bool runOnFunction(Function &F) override;
49

50
  StringRef getPassName() const override { return PASS_NAME; }
51

52
  void getAnalysisUsage(AnalysisUsage &AU) const override {
53
    AU.setPreservesCFG();
54
    AU.addRequired<DominatorTreeWrapperPass>();
55
    AU.addRequired<TargetPassConfig>();
56
  }
57

58
  bool visitInstruction(Instruction &I) { return false; }
59
  bool visitAnd(BinaryOperator &BO);
60
  bool visitIntrinsicInst(IntrinsicInst &I);
61
  bool expandVPStrideLoad(IntrinsicInst &I);
62
};
63

64
} // end anonymous namespace
65

66
// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
67
// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
68
// the upper 32 bits with ones.
69
bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
70
  if (!ST->is64Bit())
71
    return false;
72

73
  if (!BO.getType()->isIntegerTy(64))
74
    return false;
75

76
  using namespace PatternMatch;
77

78
  // Left hand side should be a zext nneg.
79
  Value *LHSSrc;
80
  if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
81
    return false;
82

83
  if (!LHSSrc->getType()->isIntegerTy(32))
84
    return false;
85

86
  // Right hand side should be a constant.
87
  Value *RHS = BO.getOperand(1);
88

89
  auto *CI = dyn_cast<ConstantInt>(RHS);
90
  if (!CI)
91
    return false;
92
  uint64_t C = CI->getZExtValue();
93

94
  // Look for constants that fit in 32 bits but not simm12, and can be made
95
  // into simm12 by sign extending bit 31. This will allow use of ANDI.
96
  // TODO: Is worth making simm32?
97
  if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
98
    return false;
99

100
  // Sign extend the constant and replace the And operand.
101
  C = SignExtend64<32>(C);
102
  BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
103

104
  return true;
105
}
106

107
// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
108
// reduction instructions write the result in the first element of a vector
109
// register. So when a reduction in a loop uses a scalar phi, we end up with
110
// unnecessary scalar moves:
111
//
112
// loop:
113
// vfmv.s.f v10, fa0
114
// vfredosum.vs v8, v8, v10
115
// vfmv.f.s fa0, v8
116
//
117
// This mainly affects ordered fadd reductions, since other types of reduction
118
// typically use element-wise vectorisation in the loop body. This tries to
119
// vectorize any scalar phis that feed into a fadd reduction:
120
//
121
// loop:
122
// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
123
// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
124
//                                                    <vscale x 2 x float> %vec)
125
//
126
// ->
127
//
128
// loop:
129
// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
130
// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
131
// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
132
//                                                    <vscale x 2 x float> %vec)
133
// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
134
//
135
// Which eliminates the scalar -> vector -> scalar crossing during instruction
136
// selection.
137
bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
138
  if (expandVPStrideLoad(I))
139
    return true;
140

141
  if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
142
    return false;
143

144
  auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
145
  if (!PHI || !PHI->hasOneUse() ||
146
      !llvm::is_contained(PHI->incoming_values(), &I))
147
    return false;
148

149
  Type *VecTy = I.getOperand(1)->getType();
150
  IRBuilder<> Builder(PHI);
151
  auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
152

153
  for (auto *BB : PHI->blocks()) {
154
    Builder.SetInsertPoint(BB->getTerminator());
155
    Value *InsertElt = Builder.CreateInsertElement(
156
        VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
157
    VecPHI->addIncoming(InsertElt, BB);
158
  }
159

160
  Builder.SetInsertPoint(&I);
161
  I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
162

163
  PHI->eraseFromParent();
164

165
  return true;
166
}
167

168
// Always expand zero strided loads so we match more .vx splat patterns, even if
169
// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
170
// it back to a strided load if it's optimized.
171
bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
172
  Value *BasePtr, *VL;
173

174
  using namespace PatternMatch;
175
  if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
176
                      m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
177
    return false;
178

179
  // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
180
  // avoid expanding here.
181
  if (II.getType()->getScalarSizeInBits() > ST->getXLen())
182
    return false;
183

184
  if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
185
    return false;
186

187
  auto *VTy = cast<VectorType>(II.getType());
188

189
  IRBuilder<> Builder(&II);
190
  Type *STy = VTy->getElementType();
191
  Value *Val = Builder.CreateLoad(STy, BasePtr);
192
  Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
193
                                       {Val, II.getOperand(2), VL});
194

195
  II.replaceAllUsesWith(Res);
196
  II.eraseFromParent();
197
  return true;
198
}
199

200
bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
201
  if (skipFunction(F))
202
    return false;
203

204
  auto &TPC = getAnalysis<TargetPassConfig>();
205
  auto &TM = TPC.getTM<RISCVTargetMachine>();
206
  ST = &TM.getSubtarget<RISCVSubtarget>(F);
207

208
  DL = &F.getDataLayout();
209
  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
210

211
  bool MadeChange = false;
212
  for (auto &BB : F)
213
    for (Instruction &I : llvm::make_early_inc_range(BB))
214
      MadeChange |= visit(I);
215

216
  return MadeChange;
217
}
218

219
INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
220
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
221
INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
222

223
char RISCVCodeGenPrepare::ID = 0;
224

225
FunctionPass *llvm::createRISCVCodeGenPreparePass() {
226
  return new RISCVCodeGenPrepare();
227
}
228

229
Product

Resources

Company