Path: blob/main/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp
35233 views
//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass implements IR expansion for reduction intrinsics, allowing targets9// to enable the intrinsics until just before codegen.10//11//===----------------------------------------------------------------------===//1213#include "llvm/CodeGen/ExpandReductions.h"14#include "llvm/Analysis/TargetTransformInfo.h"15#include "llvm/CodeGen/Passes.h"16#include "llvm/IR/IRBuilder.h"17#include "llvm/IR/InstIterator.h"18#include "llvm/IR/IntrinsicInst.h"19#include "llvm/IR/Intrinsics.h"20#include "llvm/InitializePasses.h"21#include "llvm/Pass.h"22#include "llvm/Transforms/Utils/LoopUtils.h"2324using namespace llvm;2526namespace {2728bool expandReductions(Function &F, const TargetTransformInfo *TTI) {29bool Changed = false;30SmallVector<IntrinsicInst *, 4> Worklist;31for (auto &I : instructions(F)) {32if (auto *II = dyn_cast<IntrinsicInst>(&I)) {33switch (II->getIntrinsicID()) {34default: break;35case Intrinsic::vector_reduce_fadd:36case Intrinsic::vector_reduce_fmul:37case Intrinsic::vector_reduce_add:38case Intrinsic::vector_reduce_mul:39case Intrinsic::vector_reduce_and:40case Intrinsic::vector_reduce_or:41case Intrinsic::vector_reduce_xor:42case Intrinsic::vector_reduce_smax:43case Intrinsic::vector_reduce_smin:44case Intrinsic::vector_reduce_umax:45case Intrinsic::vector_reduce_umin:46case Intrinsic::vector_reduce_fmax:47case Intrinsic::vector_reduce_fmin:48if (TTI->shouldExpandReduction(II))49Worklist.push_back(II);5051break;52}53}54}5556for (auto *II : Worklist) {57FastMathFlags FMF =58isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};59Intrinsic::ID ID = II->getIntrinsicID();60RecurKind RK = getMinMaxReductionRecurKind(ID);61TargetTransformInfo::ReductionShuffle RS =62TTI->getPreferredExpandedReductionShuffle(II);6364Value *Rdx = nullptr;65IRBuilder<> Builder(II);66IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);67Builder.setFastMathFlags(FMF);68switch (ID) {69default: llvm_unreachable("Unexpected intrinsic!");70case Intrinsic::vector_reduce_fadd:71case Intrinsic::vector_reduce_fmul: {72// FMFs must be attached to the call, otherwise it's an ordered reduction73// and it can't be handled by generating a shuffle sequence.74Value *Acc = II->getArgOperand(0);75Value *Vec = II->getArgOperand(1);76unsigned RdxOpcode = getArithmeticReductionInstruction(ID);77if (!FMF.allowReassoc())78Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);79else {80if (!isPowerOf2_32(81cast<FixedVectorType>(Vec->getType())->getNumElements()))82continue;83Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);84Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,85"bin.rdx");86}87break;88}89case Intrinsic::vector_reduce_and:90case Intrinsic::vector_reduce_or: {91// Canonicalize logical or/and reductions:92// Or reduction for i1 is represented as:93// %val = bitcast <ReduxWidth x i1> to iReduxWidth94// %res = cmp ne iReduxWidth %val, 095// And reduction for i1 is represented as:96// %val = bitcast <ReduxWidth x i1> to iReduxWidth97// %res = cmp eq iReduxWidth %val, 1111198Value *Vec = II->getArgOperand(0);99auto *FTy = cast<FixedVectorType>(Vec->getType());100unsigned NumElts = FTy->getNumElements();101if (!isPowerOf2_32(NumElts))102continue;103104if (FTy->getElementType() == Builder.getInt1Ty()) {105Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));106if (ID == Intrinsic::vector_reduce_and) {107Rdx = Builder.CreateICmpEQ(108Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));109} else {110assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");111Rdx = Builder.CreateIsNotNull(Rdx);112}113break;114}115unsigned RdxOpcode = getArithmeticReductionInstruction(ID);116Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);117break;118}119case Intrinsic::vector_reduce_add:120case Intrinsic::vector_reduce_mul:121case Intrinsic::vector_reduce_xor:122case Intrinsic::vector_reduce_smax:123case Intrinsic::vector_reduce_smin:124case Intrinsic::vector_reduce_umax:125case Intrinsic::vector_reduce_umin: {126Value *Vec = II->getArgOperand(0);127if (!isPowerOf2_32(128cast<FixedVectorType>(Vec->getType())->getNumElements()))129continue;130unsigned RdxOpcode = getArithmeticReductionInstruction(ID);131Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);132break;133}134case Intrinsic::vector_reduce_fmax:135case Intrinsic::vector_reduce_fmin: {136// We require "nnan" to use a shuffle reduction; "nsz" is implied by the137// semantics of the reduction.138Value *Vec = II->getArgOperand(0);139if (!isPowerOf2_32(140cast<FixedVectorType>(Vec->getType())->getNumElements()) ||141!FMF.noNaNs())142continue;143unsigned RdxOpcode = getArithmeticReductionInstruction(ID);144Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);145break;146}147}148II->replaceAllUsesWith(Rdx);149II->eraseFromParent();150Changed = true;151}152return Changed;153}154155class ExpandReductions : public FunctionPass {156public:157static char ID;158ExpandReductions() : FunctionPass(ID) {159initializeExpandReductionsPass(*PassRegistry::getPassRegistry());160}161162bool runOnFunction(Function &F) override {163const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);164return expandReductions(F, TTI);165}166167void getAnalysisUsage(AnalysisUsage &AU) const override {168AU.addRequired<TargetTransformInfoWrapperPass>();169AU.setPreservesCFG();170}171};172}173174char ExpandReductions::ID;175INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",176"Expand reduction intrinsics", false, false)177INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)178INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",179"Expand reduction intrinsics", false, false)180181FunctionPass *llvm::createExpandReductionsPass() {182return new ExpandReductions();183}184185PreservedAnalyses ExpandReductionsPass::run(Function &F,186FunctionAnalysisManager &AM) {187const auto &TTI = AM.getResult<TargetIRAnalysis>(F);188if (!expandReductions(F, &TTI))189return PreservedAnalyses::all();190PreservedAnalyses PA;191PA.preserveSet<CFGAnalyses>();192return PA;193}194195196