Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
35266 views
//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This is a RISC-V specific version of CodeGenPrepare.9// It munges the code in the input function to better prepare it for10// SelectionDAG-based code generation. This works around limitations in it's11// basic-block-at-a-time approach.12//13//===----------------------------------------------------------------------===//1415#include "RISCV.h"16#include "RISCVTargetMachine.h"17#include "llvm/ADT/Statistic.h"18#include "llvm/Analysis/ValueTracking.h"19#include "llvm/CodeGen/TargetPassConfig.h"20#include "llvm/IR/Dominators.h"21#include "llvm/IR/IRBuilder.h"22#include "llvm/IR/InstVisitor.h"23#include "llvm/IR/Intrinsics.h"24#include "llvm/IR/IntrinsicsRISCV.h"25#include "llvm/IR/PatternMatch.h"26#include "llvm/InitializePasses.h"27#include "llvm/Pass.h"2829using namespace llvm;3031#define DEBUG_TYPE "riscv-codegenprepare"32#define PASS_NAME "RISC-V CodeGenPrepare"3334namespace {3536class RISCVCodeGenPrepare : public FunctionPass,37public InstVisitor<RISCVCodeGenPrepare, bool> {38const DataLayout *DL;39const DominatorTree *DT;40const RISCVSubtarget *ST;4142public:43static char ID;4445RISCVCodeGenPrepare() : FunctionPass(ID) {}4647bool runOnFunction(Function &F) override;4849StringRef getPassName() const override { return PASS_NAME; }5051void getAnalysisUsage(AnalysisUsage &AU) const override {52AU.setPreservesCFG();53AU.addRequired<DominatorTreeWrapperPass>();54AU.addRequired<TargetPassConfig>();55}5657bool visitInstruction(Instruction &I) { return false; }58bool visitAnd(BinaryOperator &BO);59bool visitIntrinsicInst(IntrinsicInst &I);60bool expandVPStrideLoad(IntrinsicInst &I);61};6263} // end anonymous namespace6465// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,66// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill67// the upper 32 bits with ones.68bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {69if (!ST->is64Bit())70return false;7172if (!BO.getType()->isIntegerTy(64))73return false;7475using namespace PatternMatch;7677// Left hand side should be a zext nneg.78Value *LHSSrc;79if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))80return false;8182if (!LHSSrc->getType()->isIntegerTy(32))83return false;8485// Right hand side should be a constant.86Value *RHS = BO.getOperand(1);8788auto *CI = dyn_cast<ConstantInt>(RHS);89if (!CI)90return false;91uint64_t C = CI->getZExtValue();9293// Look for constants that fit in 32 bits but not simm12, and can be made94// into simm12 by sign extending bit 31. This will allow use of ANDI.95// TODO: Is worth making simm32?96if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))97return false;9899// Sign extend the constant and replace the And operand.100C = SignExtend64<32>(C);101BO.setOperand(1, ConstantInt::get(RHS->getType(), C));102103return true;104}105106// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector107// reduction instructions write the result in the first element of a vector108// register. So when a reduction in a loop uses a scalar phi, we end up with109// unnecessary scalar moves:110//111// loop:112// vfmv.s.f v10, fa0113// vfredosum.vs v8, v8, v10114// vfmv.f.s fa0, v8115//116// This mainly affects ordered fadd reductions, since other types of reduction117// typically use element-wise vectorisation in the loop body. This tries to118// vectorize any scalar phis that feed into a fadd reduction:119//120// loop:121// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]122// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,123// <vscale x 2 x float> %vec)124//125// ->126//127// loop:128// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]129// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0130// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,131// <vscale x 2 x float> %vec)132// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0133//134// Which eliminates the scalar -> vector -> scalar crossing during instruction135// selection.136bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {137if (expandVPStrideLoad(I))138return true;139140if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)141return false;142143auto *PHI = dyn_cast<PHINode>(I.getOperand(0));144if (!PHI || !PHI->hasOneUse() ||145!llvm::is_contained(PHI->incoming_values(), &I))146return false;147148Type *VecTy = I.getOperand(1)->getType();149IRBuilder<> Builder(PHI);150auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());151152for (auto *BB : PHI->blocks()) {153Builder.SetInsertPoint(BB->getTerminator());154Value *InsertElt = Builder.CreateInsertElement(155VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);156VecPHI->addIncoming(InsertElt, BB);157}158159Builder.SetInsertPoint(&I);160I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));161162PHI->eraseFromParent();163164return true;165}166167// Always expand zero strided loads so we match more .vx splat patterns, even if168// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert169// it back to a strided load if it's optimized.170bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {171Value *BasePtr, *VL;172173using namespace PatternMatch;174if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(175m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))176return false;177178// If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so179// avoid expanding here.180if (II.getType()->getScalarSizeInBits() > ST->getXLen())181return false;182183if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))184return false;185186auto *VTy = cast<VectorType>(II.getType());187188IRBuilder<> Builder(&II);189Type *STy = VTy->getElementType();190Value *Val = Builder.CreateLoad(STy, BasePtr);191Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},192{Val, II.getOperand(2), VL});193194II.replaceAllUsesWith(Res);195II.eraseFromParent();196return true;197}198199bool RISCVCodeGenPrepare::runOnFunction(Function &F) {200if (skipFunction(F))201return false;202203auto &TPC = getAnalysis<TargetPassConfig>();204auto &TM = TPC.getTM<RISCVTargetMachine>();205ST = &TM.getSubtarget<RISCVSubtarget>(F);206207DL = &F.getDataLayout();208DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();209210bool MadeChange = false;211for (auto &BB : F)212for (Instruction &I : llvm::make_early_inc_range(BB))213MadeChange |= visit(I);214215return MadeChange;216}217218INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)219INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)220INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)221222char RISCVCodeGenPrepare::ID = 0;223224FunctionPass *llvm::createRISCVCodeGenPreparePass() {225return new RISCVCodeGenPrepare();226}227228229