Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
103878 views
//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that RISC-V uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#include "RISCVISelLowering.h"14#include "MCTargetDesc/RISCVMatInt.h"15#include "RISCV.h"16#include "RISCVMachineFunctionInfo.h"17#include "RISCVRegisterInfo.h"18#include "RISCVSubtarget.h"19#include "RISCVTargetMachine.h"20#include "llvm/ADT/SmallSet.h"21#include "llvm/ADT/Statistic.h"22#include "llvm/Analysis/MemoryLocation.h"23#include "llvm/Analysis/VectorUtils.h"24#include "llvm/CodeGen/Analysis.h"25#include "llvm/CodeGen/MachineFrameInfo.h"26#include "llvm/CodeGen/MachineFunction.h"27#include "llvm/CodeGen/MachineInstrBuilder.h"28#include "llvm/CodeGen/MachineJumpTableInfo.h"29#include "llvm/CodeGen/MachineRegisterInfo.h"30#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"31#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"32#include "llvm/CodeGen/ValueTypes.h"33#include "llvm/IR/DiagnosticInfo.h"34#include "llvm/IR/DiagnosticPrinter.h"35#include "llvm/IR/IRBuilder.h"36#include "llvm/IR/Instructions.h"37#include "llvm/IR/IntrinsicsRISCV.h"38#include "llvm/IR/PatternMatch.h"39#include "llvm/Support/CommandLine.h"40#include "llvm/Support/Debug.h"41#include "llvm/Support/ErrorHandling.h"42#include "llvm/Support/InstructionCost.h"43#include "llvm/Support/KnownBits.h"44#include "llvm/Support/MathExtras.h"45#include "llvm/Support/raw_ostream.h"46#include <optional>4748using namespace llvm;4950#define DEBUG_TYPE "riscv-lower"5152STATISTIC(NumTailCalls, "Number of tail calls");5354static cl::opt<unsigned> ExtensionMaxWebSize(55DEBUG_TYPE "-ext-max-web-size", cl::Hidden,56cl::desc("Give the maximum size (in number of nodes) of the web of "57"instructions that we will consider for VW expansion"),58cl::init(18));5960static cl::opt<bool>61AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,62cl::desc("Allow the formation of VW_W operations (e.g., "63"VWADD_W) with splat constants"),64cl::init(false));6566static cl::opt<unsigned> NumRepeatedDivisors(67DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,68cl::desc("Set the minimum number of repetitions of a divisor to allow "69"transformation to multiplications by the reciprocal"),70cl::init(2));7172static cl::opt<int>73FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,74cl::desc("Give the maximum number of instructions that we will "75"use for creating a floating-point immediate value"),76cl::init(2));7778static cl::opt<bool>79RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,80cl::desc("Make i32 a legal type for SelectionDAG on RV64."));8182RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,83const RISCVSubtarget &STI)84: TargetLowering(TM), Subtarget(STI) {8586RISCVABI::ABI ABI = Subtarget.getTargetABI();87assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");8889if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&90!Subtarget.hasStdExtF()) {91errs() << "Hard-float 'f' ABI can't be used for a target that "92"doesn't support the F instruction set extension (ignoring "93"target-abi)\n";94ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;95} else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&96!Subtarget.hasStdExtD()) {97errs() << "Hard-float 'd' ABI can't be used for a target that "98"doesn't support the D instruction set extension (ignoring "99"target-abi)\n";100ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;101}102103switch (ABI) {104default:105report_fatal_error("Don't know how to lower this ABI");106case RISCVABI::ABI_ILP32:107case RISCVABI::ABI_ILP32E:108case RISCVABI::ABI_LP64E:109case RISCVABI::ABI_ILP32F:110case RISCVABI::ABI_ILP32D:111case RISCVABI::ABI_LP64:112case RISCVABI::ABI_LP64F:113case RISCVABI::ABI_LP64D:114break;115}116117MVT XLenVT = Subtarget.getXLenVT();118119// Set up the register classes.120addRegisterClass(XLenVT, &RISCV::GPRRegClass);121if (Subtarget.is64Bit() && RV64LegalI32)122addRegisterClass(MVT::i32, &RISCV::GPRRegClass);123124if (Subtarget.hasStdExtZfhmin())125addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);126if (Subtarget.hasStdExtZfbfmin())127addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);128if (Subtarget.hasStdExtF())129addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);130if (Subtarget.hasStdExtD())131addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);132if (Subtarget.hasStdExtZhinxmin())133addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);134if (Subtarget.hasStdExtZfinx())135addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);136if (Subtarget.hasStdExtZdinx()) {137if (Subtarget.is64Bit())138addRegisterClass(MVT::f64, &RISCV::GPRRegClass);139else140addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);141}142143static const MVT::SimpleValueType BoolVecVTs[] = {144MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,145MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};146static const MVT::SimpleValueType IntVecVTs[] = {147MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,148MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,149MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,150MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,151MVT::nxv4i64, MVT::nxv8i64};152static const MVT::SimpleValueType F16VecVTs[] = {153MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,154MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};155static const MVT::SimpleValueType BF16VecVTs[] = {156MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,157MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};158static const MVT::SimpleValueType F32VecVTs[] = {159MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};160static const MVT::SimpleValueType F64VecVTs[] = {161MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};162163if (Subtarget.hasVInstructions()) {164auto addRegClassForRVV = [this](MVT VT) {165// Disable the smallest fractional LMUL types if ELEN is less than166// RVVBitsPerBlock.167unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();168if (VT.getVectorMinNumElements() < MinElts)169return;170171unsigned Size = VT.getSizeInBits().getKnownMinValue();172const TargetRegisterClass *RC;173if (Size <= RISCV::RVVBitsPerBlock)174RC = &RISCV::VRRegClass;175else if (Size == 2 * RISCV::RVVBitsPerBlock)176RC = &RISCV::VRM2RegClass;177else if (Size == 4 * RISCV::RVVBitsPerBlock)178RC = &RISCV::VRM4RegClass;179else if (Size == 8 * RISCV::RVVBitsPerBlock)180RC = &RISCV::VRM8RegClass;181else182llvm_unreachable("Unexpected size");183184addRegisterClass(VT, RC);185};186187for (MVT VT : BoolVecVTs)188addRegClassForRVV(VT);189for (MVT VT : IntVecVTs) {190if (VT.getVectorElementType() == MVT::i64 &&191!Subtarget.hasVInstructionsI64())192continue;193addRegClassForRVV(VT);194}195196if (Subtarget.hasVInstructionsF16Minimal())197for (MVT VT : F16VecVTs)198addRegClassForRVV(VT);199200if (Subtarget.hasVInstructionsBF16())201for (MVT VT : BF16VecVTs)202addRegClassForRVV(VT);203204if (Subtarget.hasVInstructionsF32())205for (MVT VT : F32VecVTs)206addRegClassForRVV(VT);207208if (Subtarget.hasVInstructionsF64())209for (MVT VT : F64VecVTs)210addRegClassForRVV(VT);211212if (Subtarget.useRVVForFixedLengthVectors()) {213auto addRegClassForFixedVectors = [this](MVT VT) {214MVT ContainerVT = getContainerForFixedLengthVector(VT);215unsigned RCID = getRegClassIDForVecVT(ContainerVT);216const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();217addRegisterClass(VT, TRI.getRegClass(RCID));218};219for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())220if (useRVVForFixedLengthVectorVT(VT))221addRegClassForFixedVectors(VT);222223for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())224if (useRVVForFixedLengthVectorVT(VT))225addRegClassForFixedVectors(VT);226}227}228229// Compute derived properties from the register classes.230computeRegisterProperties(STI.getRegisterInfo());231232setStackPointerRegisterToSaveRestore(RISCV::X2);233234setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,235MVT::i1, Promote);236// DAGCombiner can call isLoadExtLegal for types that aren't legal.237setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,238MVT::i1, Promote);239240// TODO: add all necessary setOperationAction calls.241setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);242243setOperationAction(ISD::BR_JT, MVT::Other, Expand);244setOperationAction(ISD::BR_CC, XLenVT, Expand);245if (RV64LegalI32 && Subtarget.is64Bit())246setOperationAction(ISD::BR_CC, MVT::i32, Expand);247setOperationAction(ISD::BRCOND, MVT::Other, Custom);248setOperationAction(ISD::SELECT_CC, XLenVT, Expand);249if (RV64LegalI32 && Subtarget.is64Bit())250setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);251252if (!Subtarget.hasVendorXCValu())253setCondCodeAction(ISD::SETLE, XLenVT, Expand);254setCondCodeAction(ISD::SETGT, XLenVT, Custom);255setCondCodeAction(ISD::SETGE, XLenVT, Expand);256if (!Subtarget.hasVendorXCValu())257setCondCodeAction(ISD::SETULE, XLenVT, Expand);258setCondCodeAction(ISD::SETUGT, XLenVT, Custom);259setCondCodeAction(ISD::SETUGE, XLenVT, Expand);260261if (RV64LegalI32 && Subtarget.is64Bit())262setOperationAction(ISD::SETCC, MVT::i32, Promote);263264setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);265266setOperationAction(ISD::VASTART, MVT::Other, Custom);267setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);268if (RV64LegalI32 && Subtarget.is64Bit())269setOperationAction(ISD::VAARG, MVT::i32, Promote);270271setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);272273setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);274275if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())276setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);277278if (Subtarget.is64Bit()) {279setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);280281if (!RV64LegalI32) {282setOperationAction(ISD::LOAD, MVT::i32, Custom);283setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},284MVT::i32, Custom);285setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},286MVT::i32, Custom);287if (!Subtarget.hasStdExtZbb())288setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);289} else {290setOperationAction(ISD::SSUBO, MVT::i32, Custom);291if (Subtarget.hasStdExtZbb()) {292setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, MVT::i32, Custom);293setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Custom);294}295}296setOperationAction(ISD::SADDO, MVT::i32, Custom);297}298if (!Subtarget.hasStdExtZmmul()) {299setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);300if (RV64LegalI32 && Subtarget.is64Bit())301setOperationAction(ISD::MUL, MVT::i32, Promote);302} else if (Subtarget.is64Bit()) {303setOperationAction(ISD::MUL, MVT::i128, Custom);304if (!RV64LegalI32)305setOperationAction(ISD::MUL, MVT::i32, Custom);306else307setOperationAction(ISD::SMULO, MVT::i32, Custom);308} else {309setOperationAction(ISD::MUL, MVT::i64, Custom);310}311312if (!Subtarget.hasStdExtM()) {313setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},314XLenVT, Expand);315if (RV64LegalI32 && Subtarget.is64Bit())316setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, MVT::i32,317Promote);318} else if (Subtarget.is64Bit()) {319if (!RV64LegalI32)320setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},321{MVT::i8, MVT::i16, MVT::i32}, Custom);322}323324if (RV64LegalI32 && Subtarget.is64Bit()) {325setOperationAction({ISD::MULHS, ISD::MULHU}, MVT::i32, Expand);326setOperationAction(327{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32,328Expand);329}330331setOperationAction(332{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,333Expand);334335setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,336Custom);337338if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {339if (!RV64LegalI32 && Subtarget.is64Bit())340setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);341} else if (Subtarget.hasVendorXTHeadBb()) {342if (Subtarget.is64Bit())343setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);344setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);345} else if (Subtarget.hasVendorXCVbitmanip()) {346setOperationAction(ISD::ROTL, XLenVT, Expand);347} else {348setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);349if (RV64LegalI32 && Subtarget.is64Bit())350setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Expand);351}352353// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll354// pattern match it directly in isel.355setOperationAction(ISD::BSWAP, XLenVT,356(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||357Subtarget.hasVendorXTHeadBb())358? Legal359: Expand);360if (RV64LegalI32 && Subtarget.is64Bit())361setOperationAction(ISD::BSWAP, MVT::i32,362(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||363Subtarget.hasVendorXTHeadBb())364? Promote365: Expand);366367368if (Subtarget.hasVendorXCVbitmanip()) {369setOperationAction(ISD::BITREVERSE, XLenVT, Legal);370} else {371// Zbkb can use rev8+brev8 to implement bitreverse.372setOperationAction(ISD::BITREVERSE, XLenVT,373Subtarget.hasStdExtZbkb() ? Custom : Expand);374}375376if (Subtarget.hasStdExtZbb()) {377setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,378Legal);379if (RV64LegalI32 && Subtarget.is64Bit())380setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, MVT::i32,381Promote);382383if (Subtarget.is64Bit()) {384if (RV64LegalI32)385setOperationAction(ISD::CTTZ, MVT::i32, Legal);386else387setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);388}389} else if (!Subtarget.hasVendorXCVbitmanip()) {390setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);391if (RV64LegalI32 && Subtarget.is64Bit())392setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);393}394395if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||396Subtarget.hasVendorXCVbitmanip()) {397// We need the custom lowering to make sure that the resulting sequence398// for the 32bit case is efficient on 64bit targets.399if (Subtarget.is64Bit()) {400if (RV64LegalI32) {401setOperationAction(ISD::CTLZ, MVT::i32,402Subtarget.hasStdExtZbb() ? Legal : Promote);403if (!Subtarget.hasStdExtZbb())404setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);405} else406setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);407}408} else {409setOperationAction(ISD::CTLZ, XLenVT, Expand);410if (RV64LegalI32 && Subtarget.is64Bit())411setOperationAction(ISD::CTLZ, MVT::i32, Expand);412}413414if (!RV64LegalI32 && Subtarget.is64Bit() &&415!Subtarget.hasShortForwardBranchOpt())416setOperationAction(ISD::ABS, MVT::i32, Custom);417418// We can use PseudoCCSUB to implement ABS.419if (Subtarget.hasShortForwardBranchOpt())420setOperationAction(ISD::ABS, XLenVT, Legal);421422if (!Subtarget.hasVendorXTHeadCondMov()) {423setOperationAction(ISD::SELECT, XLenVT, Custom);424if (RV64LegalI32 && Subtarget.is64Bit())425setOperationAction(ISD::SELECT, MVT::i32, Promote);426}427428static const unsigned FPLegalNodeTypes[] = {429ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,430ISD::LLRINT, ISD::LROUND, ISD::LLROUND,431ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,432ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,433ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,434ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};435436static const ISD::CondCode FPCCToExpand[] = {437ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,438ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,439ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};440441static const unsigned FPOpToExpand[] = {442ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,443ISD::FREM};444445static const unsigned FPRndMode[] = {446ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,447ISD::FROUNDEVEN};448449if (Subtarget.hasStdExtZfhminOrZhinxmin())450setOperationAction(ISD::BITCAST, MVT::i16, Custom);451452static const unsigned ZfhminZfbfminPromoteOps[] = {453ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,454ISD::FSUB, ISD::FMUL, ISD::FMA,455ISD::FDIV, ISD::FSQRT, ISD::FABS,456ISD::FNEG, ISD::STRICT_FMA, ISD::STRICT_FADD,457ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,458ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,459ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,460ISD::FTRUNC, ISD::FRINT, ISD::FROUND,461ISD::FROUNDEVEN, ISD::SELECT};462463if (Subtarget.hasStdExtZfbfmin()) {464setOperationAction(ISD::BITCAST, MVT::i16, Custom);465setOperationAction(ISD::BITCAST, MVT::bf16, Custom);466setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);467setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);468setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);469setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);470setOperationAction(ISD::SELECT_CC, MVT::bf16, Expand);471setOperationAction(ISD::BR_CC, MVT::bf16, Expand);472setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);473setOperationAction(ISD::FREM, MVT::bf16, Promote);474// FIXME: Need to promote bf16 FCOPYSIGN to f32, but the475// DAGCombiner::visitFP_ROUND probably needs improvements first.476setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);477}478479if (Subtarget.hasStdExtZfhminOrZhinxmin()) {480if (Subtarget.hasStdExtZfhOrZhinx()) {481setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);482setOperationAction(FPRndMode, MVT::f16,483Subtarget.hasStdExtZfa() ? Legal : Custom);484setOperationAction(ISD::SELECT, MVT::f16, Custom);485setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);486} else {487setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);488setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,489ISD::STRICT_LROUND, ISD::STRICT_LLROUND},490MVT::f16, Legal);491// FIXME: Need to promote f16 FCOPYSIGN to f32, but the492// DAGCombiner::visitFP_ROUND probably needs improvements first.493setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);494}495496setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);497setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);498setCondCodeAction(FPCCToExpand, MVT::f16, Expand);499setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);500setOperationAction(ISD::BR_CC, MVT::f16, Expand);501502setOperationAction(ISD::FNEARBYINT, MVT::f16,503Subtarget.hasStdExtZfa() ? Legal : Promote);504setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,505ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,506ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,507ISD::FLOG10},508MVT::f16, Promote);509510// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have511// complete support for all operations in LegalizeDAG.512setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,513ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,514ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,515ISD::STRICT_FTRUNC},516MVT::f16, Promote);517518// We need to custom promote this.519if (Subtarget.is64Bit())520setOperationAction(ISD::FPOWI, MVT::i32, Custom);521522setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,523Subtarget.hasStdExtZfa() ? Legal : Custom);524}525526if (Subtarget.hasStdExtFOrZfinx()) {527setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);528setOperationAction(FPRndMode, MVT::f32,529Subtarget.hasStdExtZfa() ? Legal : Custom);530setCondCodeAction(FPCCToExpand, MVT::f32, Expand);531setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);532setOperationAction(ISD::SELECT, MVT::f32, Custom);533setOperationAction(ISD::BR_CC, MVT::f32, Expand);534setOperationAction(FPOpToExpand, MVT::f32, Expand);535setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);536setTruncStoreAction(MVT::f32, MVT::f16, Expand);537setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);538setTruncStoreAction(MVT::f32, MVT::bf16, Expand);539setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);540setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);541setOperationAction(ISD::FP_TO_BF16, MVT::f32,542Subtarget.isSoftFPABI() ? LibCall : Custom);543setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);544setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);545546if (Subtarget.hasStdExtZfa()) {547setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);548setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);549} else {550setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);551}552}553554if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())555setOperationAction(ISD::BITCAST, MVT::i32, Custom);556557if (Subtarget.hasStdExtDOrZdinx()) {558setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);559560if (!Subtarget.is64Bit())561setOperationAction(ISD::BITCAST, MVT::i64, Custom);562563if (Subtarget.hasStdExtZfa()) {564setOperationAction(FPRndMode, MVT::f64, Legal);565setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);566setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);567} else {568if (Subtarget.is64Bit())569setOperationAction(FPRndMode, MVT::f64, Custom);570571setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);572}573574setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);575setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);576setCondCodeAction(FPCCToExpand, MVT::f64, Expand);577setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);578setOperationAction(ISD::SELECT, MVT::f64, Custom);579setOperationAction(ISD::BR_CC, MVT::f64, Expand);580setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);581setTruncStoreAction(MVT::f64, MVT::f32, Expand);582setOperationAction(FPOpToExpand, MVT::f64, Expand);583setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);584setTruncStoreAction(MVT::f64, MVT::f16, Expand);585setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);586setTruncStoreAction(MVT::f64, MVT::bf16, Expand);587setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);588setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);589setOperationAction(ISD::FP_TO_BF16, MVT::f64,590Subtarget.isSoftFPABI() ? LibCall : Custom);591setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);592setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);593}594595if (Subtarget.is64Bit()) {596setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,597ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},598MVT::i32, Custom);599setOperationAction(ISD::LROUND, MVT::i32, Custom);600}601602if (Subtarget.hasStdExtFOrZfinx()) {603setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,604Custom);605606setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,607ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},608XLenVT, Legal);609610if (RV64LegalI32 && Subtarget.is64Bit())611setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,612ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},613MVT::i32, Legal);614615setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);616setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);617}618619setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,620ISD::JumpTable},621XLenVT, Custom);622623setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);624625if (Subtarget.is64Bit())626setOperationAction(ISD::Constant, MVT::i64, Custom);627628// TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.629// Unfortunately this can't be determined just from the ISA naming string.630setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,631Subtarget.is64Bit() ? Legal : Custom);632setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,633Subtarget.is64Bit() ? Legal : Custom);634635setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);636setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);637if (Subtarget.is64Bit())638setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);639640if (Subtarget.hasStdExtZicbop()) {641setOperationAction(ISD::PREFETCH, MVT::Other, Legal);642}643644if (Subtarget.hasStdExtA()) {645setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());646if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())647setMinCmpXchgSizeInBits(8);648else649setMinCmpXchgSizeInBits(32);650} else if (Subtarget.hasForcedAtomics()) {651setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());652} else {653setMaxAtomicSizeInBitsSupported(0);654}655656setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);657658setBooleanContents(ZeroOrOneBooleanContent);659660if (getTargetMachine().getTargetTriple().isOSLinux()) {661// Custom lowering of llvm.clear_cache.662setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom);663}664665if (Subtarget.hasVInstructions()) {666setBooleanVectorContents(ZeroOrOneBooleanContent);667668setOperationAction(ISD::VSCALE, XLenVT, Custom);669if (RV64LegalI32 && Subtarget.is64Bit())670setOperationAction(ISD::VSCALE, MVT::i32, Custom);671672// RVV intrinsics may have illegal operands.673// We also need to custom legalize vmv.x.s.674setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,675ISD::INTRINSIC_VOID},676{MVT::i8, MVT::i16}, Custom);677if (Subtarget.is64Bit())678setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},679MVT::i32, Custom);680else681setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},682MVT::i64, Custom);683684setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},685MVT::Other, Custom);686687static const unsigned IntegerVPOps[] = {688ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,689ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,690ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,691ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,692ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,693ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,694ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,695ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,696ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,697ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,698ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,699ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,700ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,701ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,702ISD::EXPERIMENTAL_VP_SPLAT};703704static const unsigned FloatingPointVPOps[] = {705ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,706ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,707ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,708ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,709ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,710ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,711ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,712ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,713ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,714ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,715ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,716ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,717ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,718ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};719720static const unsigned IntegerVecReduceOps[] = {721ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,722ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,723ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};724725static const unsigned FloatingPointVecReduceOps[] = {726ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,727ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};728729if (!Subtarget.is64Bit()) {730// We must custom-lower certain vXi64 operations on RV32 due to the vector731// element type being illegal.732setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},733MVT::i64, Custom);734735setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);736737setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,738ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,739ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,740ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},741MVT::i64, Custom);742}743744for (MVT VT : BoolVecVTs) {745if (!isTypeLegal(VT))746continue;747748setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);749750// Mask VTs are custom-expanded into a series of standard nodes751setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,752ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,753ISD::SCALAR_TO_VECTOR},754VT, Custom);755756setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,757Custom);758759setOperationAction(ISD::SELECT, VT, Custom);760setOperationAction(761{ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,762Expand);763764setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,765Custom);766767setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);768769setOperationAction(770{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,771Custom);772773setOperationAction(774{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,775Custom);776777// RVV has native int->float & float->int conversions where the778// element type sizes are within one power-of-two of each other. Any779// wider distances between type sizes have to be lowered as sequences780// which progressively narrow the gap in stages.781setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,782ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,783ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,784ISD::STRICT_FP_TO_UINT},785VT, Custom);786setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,787Custom);788789// Expand all extending loads to types larger than this, and truncating790// stores from types larger than this.791for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {792setTruncStoreAction(VT, OtherVT, Expand);793setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,794OtherVT, Expand);795}796797setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,798ISD::VP_TRUNCATE, ISD::VP_SETCC},799VT, Custom);800801setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);802setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);803804setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);805806setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);807setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);808809setOperationPromotedToType(810ISD::VECTOR_SPLICE, VT,811MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));812}813814for (MVT VT : IntVecVTs) {815if (!isTypeLegal(VT))816continue;817818setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);819setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);820821// Vectors implement MULHS/MULHU.822setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);823824// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.825if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())826setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);827828setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,829Legal);830831setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);832833// Custom-lower extensions and truncations from/to mask types.834setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},835VT, Custom);836837// RVV has native int->float & float->int conversions where the838// element type sizes are within one power-of-two of each other. Any839// wider distances between type sizes have to be lowered as sequences840// which progressively narrow the gap in stages.841setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,842ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,843ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,844ISD::STRICT_FP_TO_UINT},845VT, Custom);846setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,847Custom);848setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,849ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,850ISD::SSUBSAT, ISD::USUBSAT},851VT, Legal);852853// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"854// nodes which truncate by one power of two at a time.855setOperationAction(ISD::TRUNCATE, VT, Custom);856857// Custom-lower insert/extract operations to simplify patterns.858setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,859Custom);860861// Custom-lower reduction operations to set up the corresponding custom862// nodes' operands.863setOperationAction(IntegerVecReduceOps, VT, Custom);864865setOperationAction(IntegerVPOps, VT, Custom);866867setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);868869setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},870VT, Custom);871872setOperationAction(873{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,874ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},875VT, Custom);876877setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,878ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},879VT, Custom);880881setOperationAction(ISD::SELECT, VT, Custom);882setOperationAction(ISD::SELECT_CC, VT, Expand);883884setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);885886for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {887setTruncStoreAction(VT, OtherVT, Expand);888setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,889OtherVT, Expand);890}891892setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);893setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);894895// Splice896setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);897898if (Subtarget.hasStdExtZvkb()) {899setOperationAction(ISD::BSWAP, VT, Legal);900setOperationAction(ISD::VP_BSWAP, VT, Custom);901} else {902setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);903setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);904}905906if (Subtarget.hasStdExtZvbb()) {907setOperationAction(ISD::BITREVERSE, VT, Legal);908setOperationAction(ISD::VP_BITREVERSE, VT, Custom);909setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,910ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},911VT, Custom);912} else {913setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);914setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);915setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,916ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},917VT, Expand);918919// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the920// range of f32.921EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());922if (isTypeLegal(FloatVT)) {923setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,924ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,925ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},926VT, Custom);927}928}929}930931// Expand various CCs to best match the RVV ISA, which natively supports UNE932// but no other unordered comparisons, and supports all ordered comparisons933// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization934// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),935// and we pattern-match those back to the "original", swapping operands once936// more. This way we catch both operations and both "vf" and "fv" forms with937// fewer patterns.938static const ISD::CondCode VFPCCToExpand[] = {939ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,940ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,941ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,942};943944// TODO: support more ops.945static const unsigned ZvfhminPromoteOps[] = {946ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,947ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,948ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,949ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,950ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,951ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,952ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};953954// TODO: support more vp ops.955static const unsigned ZvfhminPromoteVPOps[] = {956ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,957ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,958ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,959ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,960ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,961ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,962ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,963ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,964ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};965966// Sets common operation actions on RVV floating-point vector types.967const auto SetCommonVFPActions = [&](MVT VT) {968setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);969// RVV has native FP_ROUND & FP_EXTEND conversions where the element type970// sizes are within one power-of-two of each other. Therefore conversions971// between vXf16 and vXf64 must be lowered as sequences which convert via972// vXf32.973setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);974setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);975// Custom-lower insert/extract operations to simplify patterns.976setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,977Custom);978// Expand various condition codes (explained above).979setCondCodeAction(VFPCCToExpand, VT, Expand);980981setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);982setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);983984setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,985ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,986ISD::IS_FPCLASS},987VT, Custom);988989setOperationAction(FloatingPointVecReduceOps, VT, Custom);990991// Expand FP operations that need libcalls.992setOperationAction(ISD::FREM, VT, Expand);993setOperationAction(ISD::FPOW, VT, Expand);994setOperationAction(ISD::FCOS, VT, Expand);995setOperationAction(ISD::FSIN, VT, Expand);996setOperationAction(ISD::FSINCOS, VT, Expand);997setOperationAction(ISD::FEXP, VT, Expand);998setOperationAction(ISD::FEXP2, VT, Expand);999setOperationAction(ISD::FEXP10, VT, Expand);1000setOperationAction(ISD::FLOG, VT, Expand);1001setOperationAction(ISD::FLOG2, VT, Expand);1002setOperationAction(ISD::FLOG10, VT, Expand);10031004setOperationAction(ISD::FCOPYSIGN, VT, Legal);10051006setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);10071008setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},1009VT, Custom);10101011setOperationAction(1012{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,1013ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},1014VT, Custom);10151016setOperationAction(ISD::SELECT, VT, Custom);1017setOperationAction(ISD::SELECT_CC, VT, Expand);10181019setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,1020ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},1021VT, Custom);10221023setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);1024setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);10251026setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);10271028setOperationAction(FloatingPointVPOps, VT, Custom);10291030setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,1031Custom);1032setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,1033ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},1034VT, Legal);1035setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,1036ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,1037ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,1038ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},1039VT, Custom);1040};10411042// Sets common extload/truncstore actions on RVV floating-point vector1043// types.1044const auto SetCommonVFPExtLoadTruncStoreActions =1045[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {1046for (auto SmallVT : SmallerVTs) {1047setTruncStoreAction(VT, SmallVT, Expand);1048setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);1049}1050};10511052if (Subtarget.hasVInstructionsF16()) {1053for (MVT VT : F16VecVTs) {1054if (!isTypeLegal(VT))1055continue;1056SetCommonVFPActions(VT);1057}1058} else if (Subtarget.hasVInstructionsF16Minimal()) {1059for (MVT VT : F16VecVTs) {1060if (!isTypeLegal(VT))1061continue;1062setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);1063setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,1064Custom);1065setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);1066setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,1067Custom);1068setOperationAction(ISD::SELECT_CC, VT, Expand);1069setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,1070ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},1071VT, Custom);1072setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,1073ISD::EXTRACT_SUBVECTOR},1074VT, Custom);1075if (Subtarget.hasStdExtZfhmin())1076setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);1077// load/store1078setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);10791080// Custom split nxv32f16 since nxv32f32 if not legal.1081if (VT == MVT::nxv32f16) {1082setOperationAction(ZvfhminPromoteOps, VT, Custom);1083setOperationAction(ZvfhminPromoteVPOps, VT, Custom);1084continue;1085}1086// Add more promote ops.1087MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());1088setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);1089setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);1090}1091}10921093// TODO: Could we merge some code with zvfhmin?1094if (Subtarget.hasVInstructionsBF16()) {1095for (MVT VT : BF16VecVTs) {1096if (!isTypeLegal(VT))1097continue;1098setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);1099setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);1100setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,1101Custom);1102setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,1103ISD::EXTRACT_SUBVECTOR},1104VT, Custom);1105setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);1106if (Subtarget.hasStdExtZfbfmin())1107setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);1108setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,1109Custom);1110setOperationAction(ISD::SELECT_CC, VT, Expand);1111// TODO: Promote to fp32.1112}1113}11141115if (Subtarget.hasVInstructionsF32()) {1116for (MVT VT : F32VecVTs) {1117if (!isTypeLegal(VT))1118continue;1119SetCommonVFPActions(VT);1120SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);1121}1122}11231124if (Subtarget.hasVInstructionsF64()) {1125for (MVT VT : F64VecVTs) {1126if (!isTypeLegal(VT))1127continue;1128SetCommonVFPActions(VT);1129SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);1130SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);1131}1132}11331134if (Subtarget.useRVVForFixedLengthVectors()) {1135for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {1136if (!useRVVForFixedLengthVectorVT(VT))1137continue;11381139// By default everything must be expanded.1140for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)1141setOperationAction(Op, VT, Expand);1142for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {1143setTruncStoreAction(VT, OtherVT, Expand);1144setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, VT,1145OtherVT, Expand);1146}11471148// Custom lower fixed vector undefs to scalable vector undefs to avoid1149// expansion to a build_vector of 0s.1150setOperationAction(ISD::UNDEF, VT, Custom);11511152// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.1153setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,1154Custom);11551156setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,1157Custom);11581159setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},1160VT, Custom);11611162setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);11631164setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);11651166setOperationAction(ISD::SETCC, VT, Custom);11671168setOperationAction(ISD::SELECT, VT, Custom);11691170setOperationAction(ISD::TRUNCATE, VT, Custom);11711172setOperationAction(ISD::BITCAST, VT, Custom);11731174setOperationAction(1175{ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,1176Custom);11771178setOperationAction(1179{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,1180Custom);11811182setOperationAction(1183{1184ISD::SINT_TO_FP,1185ISD::UINT_TO_FP,1186ISD::FP_TO_SINT,1187ISD::FP_TO_UINT,1188ISD::STRICT_SINT_TO_FP,1189ISD::STRICT_UINT_TO_FP,1190ISD::STRICT_FP_TO_SINT,1191ISD::STRICT_FP_TO_UINT,1192},1193VT, Custom);1194setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,1195Custom);11961197setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);11981199// Operations below are different for between masks and other vectors.1200if (VT.getVectorElementType() == MVT::i1) {1201setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,1202ISD::OR, ISD::XOR},1203VT, Custom);12041205setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,1206ISD::VP_SETCC, ISD::VP_TRUNCATE},1207VT, Custom);12081209setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);1210setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);1211continue;1212}12131214// Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to1215// it before type legalization for i64 vectors on RV32. It will then be1216// type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.1217// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs1218// improvements first.1219if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {1220setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);1221setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);1222}12231224setOperationAction(1225{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);12261227setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,1228ISD::EXPERIMENTAL_VP_STRIDED_LOAD,1229ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,1230ISD::VP_SCATTER},1231VT, Custom);12321233setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,1234ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,1235ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},1236VT, Custom);12371238setOperationAction(1239{ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);12401241setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Custom);12421243// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.1244if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())1245setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);12461247setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS,1248ISD::AVGCEILU, ISD::SADDSAT, ISD::UADDSAT,1249ISD::SSUBSAT, ISD::USUBSAT},1250VT, Custom);12511252setOperationAction(ISD::VSELECT, VT, Custom);12531254setOperationAction(1255{ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);12561257// Custom-lower reduction operations to set up the corresponding custom1258// nodes' operands.1259setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,1260ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,1261ISD::VECREDUCE_UMIN},1262VT, Custom);12631264setOperationAction(IntegerVPOps, VT, Custom);12651266if (Subtarget.hasStdExtZvkb())1267setOperationAction({ISD::BSWAP, ISD::ROTL, ISD::ROTR}, VT, Custom);12681269if (Subtarget.hasStdExtZvbb()) {1270setOperationAction({ISD::BITREVERSE, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,1271ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTPOP},1272VT, Custom);1273} else {1274// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the1275// range of f32.1276EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());1277if (isTypeLegal(FloatVT))1278setOperationAction(1279{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,1280Custom);1281}1282}12831284for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {1285// There are no extending loads or truncating stores.1286for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {1287setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);1288setTruncStoreAction(VT, InnerVT, Expand);1289}12901291if (!useRVVForFixedLengthVectorVT(VT))1292continue;12931294// By default everything must be expanded.1295for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)1296setOperationAction(Op, VT, Expand);12971298// Custom lower fixed vector undefs to scalable vector undefs to avoid1299// expansion to a build_vector of 0s.1300setOperationAction(ISD::UNDEF, VT, Custom);13011302setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,1303ISD::EXTRACT_SUBVECTOR},1304VT, Custom);13051306// FIXME: mload, mstore, mgather, mscatter, vp_load/store,1307// vp_stride_load/store, vp_gather/scatter can be hoisted to here.1308setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);13091310setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);1311setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,1312Custom);13131314if (VT.getVectorElementType() == MVT::f16 &&1315!Subtarget.hasVInstructionsF16()) {1316setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);1317setOperationAction(1318{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,1319Custom);1320setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP,1321ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},1322VT, Custom);1323setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);1324if (Subtarget.hasStdExtZfhmin()) {1325// FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.1326setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);1327} else {1328// We need to custom legalize f16 build vectors if Zfhmin isn't1329// available.1330setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);1331}1332MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());1333// Don't promote f16 vector operations to f32 if f32 vector type is1334// not legal.1335// TODO: could split the f16 vector into two vectors and do promotion.1336if (!isTypeLegal(F32VecVT))1337continue;1338setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);1339setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);1340continue;1341}13421343if (VT.getVectorElementType() == MVT::bf16) {1344setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);1345// FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.1346setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);1347setOperationAction(1348{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,1349Custom);1350// TODO: Promote to fp32.1351continue;1352}13531354setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE,1355ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},1356VT, Custom);13571358setOperationAction(1359{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);13601361setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,1362ISD::EXPERIMENTAL_VP_STRIDED_LOAD,1363ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,1364ISD::VP_SCATTER},1365VT, Custom);13661367setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,1368ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,1369ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,1370ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM},1371VT, Custom);13721373setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,1374ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},1375VT, Custom);13761377setCondCodeAction(VFPCCToExpand, VT, Expand);13781379setOperationAction(ISD::SETCC, VT, Custom);1380setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);13811382setOperationAction(ISD::BITCAST, VT, Custom);13831384setOperationAction(FloatingPointVecReduceOps, VT, Custom);13851386setOperationAction(FloatingPointVPOps, VT, Custom);13871388setOperationAction(1389{ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,1390ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,1391ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,1392ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,1393ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},1394VT, Custom);1395}13961397// Custom-legalize bitcasts from fixed-length vectors to scalar types.1398setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);1399if (Subtarget.is64Bit())1400setOperationAction(ISD::BITCAST, MVT::i64, Custom);1401if (Subtarget.hasStdExtZfhminOrZhinxmin())1402setOperationAction(ISD::BITCAST, MVT::f16, Custom);1403if (Subtarget.hasStdExtFOrZfinx())1404setOperationAction(ISD::BITCAST, MVT::f32, Custom);1405if (Subtarget.hasStdExtDOrZdinx())1406setOperationAction(ISD::BITCAST, MVT::f64, Custom);1407}1408}14091410if (Subtarget.hasStdExtA()) {1411setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);1412if (RV64LegalI32 && Subtarget.is64Bit())1413setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);1414}14151416if (Subtarget.hasForcedAtomics()) {1417// Force __sync libcalls to be emitted for atomic rmw/cas operations.1418setOperationAction(1419{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,1420ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,1421ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,1422ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},1423XLenVT, LibCall);1424}14251426if (Subtarget.hasVendorXTHeadMemIdx()) {1427for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {1428setIndexedLoadAction(im, MVT::i8, Legal);1429setIndexedStoreAction(im, MVT::i8, Legal);1430setIndexedLoadAction(im, MVT::i16, Legal);1431setIndexedStoreAction(im, MVT::i16, Legal);1432setIndexedLoadAction(im, MVT::i32, Legal);1433setIndexedStoreAction(im, MVT::i32, Legal);14341435if (Subtarget.is64Bit()) {1436setIndexedLoadAction(im, MVT::i64, Legal);1437setIndexedStoreAction(im, MVT::i64, Legal);1438}1439}1440}14411442if (Subtarget.hasVendorXCVmem()) {1443setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);1444setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);1445setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);14461447setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);1448setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);1449setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);1450}14511452if (Subtarget.hasVendorXCValu()) {1453setOperationAction(ISD::ABS, XLenVT, Legal);1454setOperationAction(ISD::SMIN, XLenVT, Legal);1455setOperationAction(ISD::UMIN, XLenVT, Legal);1456setOperationAction(ISD::SMAX, XLenVT, Legal);1457setOperationAction(ISD::UMAX, XLenVT, Legal);1458setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);1459setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);1460}14611462// Function alignments.1463const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);1464setMinFunctionAlignment(FunctionAlignment);1465// Set preferred alignments.1466setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());1467setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());14681469setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,1470ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,1471ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});1472if (Subtarget.is64Bit())1473setTargetDAGCombine(ISD::SRA);14741475if (Subtarget.hasStdExtFOrZfinx())1476setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});14771478if (Subtarget.hasStdExtZbb())1479setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});14801481if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||1482Subtarget.hasStdExtV())1483setTargetDAGCombine(ISD::TRUNCATE);14841485if (Subtarget.hasStdExtZbkb())1486setTargetDAGCombine(ISD::BITREVERSE);1487if (Subtarget.hasStdExtZfhminOrZhinxmin())1488setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);1489if (Subtarget.hasStdExtFOrZfinx())1490setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,1491ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});1492if (Subtarget.hasVInstructions())1493setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,1494ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,1495ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,1496ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,1497ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,1498ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,1499ISD::INSERT_VECTOR_ELT, ISD::ABS});1500if (Subtarget.hasVendorXTHeadMemPair())1501setTargetDAGCombine({ISD::LOAD, ISD::STORE});1502if (Subtarget.useRVVForFixedLengthVectors())1503setTargetDAGCombine(ISD::BITCAST);15041505setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");1506setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");15071508// Disable strict node mutation.1509IsStrictFPEnabled = true;15101511// Let the subtarget decide if a predictable select is more expensive than the1512// corresponding branch. This information is used in CGP/SelectOpt to decide1513// when to convert selects into branches.1514PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();1515}15161517EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,1518LLVMContext &Context,1519EVT VT) const {1520if (!VT.isVector())1521return getPointerTy(DL);1522if (Subtarget.hasVInstructions() &&1523(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))1524return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());1525return VT.changeVectorElementTypeToInteger();1526}15271528MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {1529return Subtarget.getXLenVT();1530}15311532// Return false if we can lower get_vector_length to a vsetvli intrinsic.1533bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,1534unsigned VF,1535bool IsScalable) const {1536if (!Subtarget.hasVInstructions())1537return true;15381539if (!IsScalable)1540return true;15411542if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())1543return true;15441545// Don't allow VF=1 if those types are't legal.1546if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())1547return true;15481549// VLEN=32 support is incomplete.1550if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)1551return true;15521553// The maximum VF is for the smallest element width with LMUL=8.1554// VF must be a power of 2.1555unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;1556return VF > MaxVF || !isPowerOf2_32(VF);1557}15581559bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {1560return !Subtarget.hasVInstructions() ||1561VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);1562}15631564bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,1565const CallInst &I,1566MachineFunction &MF,1567unsigned Intrinsic) const {1568auto &DL = I.getDataLayout();15691570auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,1571bool IsUnitStrided, bool UsePtrVal = false) {1572Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;1573// We can't use ptrVal if the intrinsic can access memory before the1574// pointer. This means we can't use it for strided or indexed intrinsics.1575if (UsePtrVal)1576Info.ptrVal = I.getArgOperand(PtrOp);1577else1578Info.fallbackAddressSpace =1579I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();1580Type *MemTy;1581if (IsStore) {1582// Store value is the first operand.1583MemTy = I.getArgOperand(0)->getType();1584} else {1585// Use return type. If it's segment load, return type is a struct.1586MemTy = I.getType();1587if (MemTy->isStructTy())1588MemTy = MemTy->getStructElementType(0);1589}1590if (!IsUnitStrided)1591MemTy = MemTy->getScalarType();15921593Info.memVT = getValueType(DL, MemTy);1594Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);1595Info.size = MemoryLocation::UnknownSize;1596Info.flags |=1597IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;1598return true;1599};16001601if (I.hasMetadata(LLVMContext::MD_nontemporal))1602Info.flags |= MachineMemOperand::MONonTemporal;16031604Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);1605switch (Intrinsic) {1606default:1607return false;1608case Intrinsic::riscv_masked_atomicrmw_xchg_i32:1609case Intrinsic::riscv_masked_atomicrmw_add_i32:1610case Intrinsic::riscv_masked_atomicrmw_sub_i32:1611case Intrinsic::riscv_masked_atomicrmw_nand_i32:1612case Intrinsic::riscv_masked_atomicrmw_max_i32:1613case Intrinsic::riscv_masked_atomicrmw_min_i32:1614case Intrinsic::riscv_masked_atomicrmw_umax_i32:1615case Intrinsic::riscv_masked_atomicrmw_umin_i32:1616case Intrinsic::riscv_masked_cmpxchg_i32:1617Info.opc = ISD::INTRINSIC_W_CHAIN;1618Info.memVT = MVT::i32;1619Info.ptrVal = I.getArgOperand(0);1620Info.offset = 0;1621Info.align = Align(4);1622Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |1623MachineMemOperand::MOVolatile;1624return true;1625case Intrinsic::riscv_masked_strided_load:1626return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,1627/*IsUnitStrided*/ false);1628case Intrinsic::riscv_masked_strided_store:1629return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,1630/*IsUnitStrided*/ false);1631case Intrinsic::riscv_seg2_load:1632case Intrinsic::riscv_seg3_load:1633case Intrinsic::riscv_seg4_load:1634case Intrinsic::riscv_seg5_load:1635case Intrinsic::riscv_seg6_load:1636case Intrinsic::riscv_seg7_load:1637case Intrinsic::riscv_seg8_load:1638return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,1639/*IsUnitStrided*/ false, /*UsePtrVal*/ true);1640case Intrinsic::riscv_seg2_store:1641case Intrinsic::riscv_seg3_store:1642case Intrinsic::riscv_seg4_store:1643case Intrinsic::riscv_seg5_store:1644case Intrinsic::riscv_seg6_store:1645case Intrinsic::riscv_seg7_store:1646case Intrinsic::riscv_seg8_store:1647// Operands are (vec, ..., vec, ptr, vl)1648return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,1649/*IsStore*/ true,1650/*IsUnitStrided*/ false, /*UsePtrVal*/ true);1651case Intrinsic::riscv_vle:1652case Intrinsic::riscv_vle_mask:1653case Intrinsic::riscv_vleff:1654case Intrinsic::riscv_vleff_mask:1655return SetRVVLoadStoreInfo(/*PtrOp*/ 1,1656/*IsStore*/ false,1657/*IsUnitStrided*/ true,1658/*UsePtrVal*/ true);1659case Intrinsic::riscv_vse:1660case Intrinsic::riscv_vse_mask:1661return SetRVVLoadStoreInfo(/*PtrOp*/ 1,1662/*IsStore*/ true,1663/*IsUnitStrided*/ true,1664/*UsePtrVal*/ true);1665case Intrinsic::riscv_vlse:1666case Intrinsic::riscv_vlse_mask:1667case Intrinsic::riscv_vloxei:1668case Intrinsic::riscv_vloxei_mask:1669case Intrinsic::riscv_vluxei:1670case Intrinsic::riscv_vluxei_mask:1671return SetRVVLoadStoreInfo(/*PtrOp*/ 1,1672/*IsStore*/ false,1673/*IsUnitStrided*/ false);1674case Intrinsic::riscv_vsse:1675case Intrinsic::riscv_vsse_mask:1676case Intrinsic::riscv_vsoxei:1677case Intrinsic::riscv_vsoxei_mask:1678case Intrinsic::riscv_vsuxei:1679case Intrinsic::riscv_vsuxei_mask:1680return SetRVVLoadStoreInfo(/*PtrOp*/ 1,1681/*IsStore*/ true,1682/*IsUnitStrided*/ false);1683case Intrinsic::riscv_vlseg2:1684case Intrinsic::riscv_vlseg3:1685case Intrinsic::riscv_vlseg4:1686case Intrinsic::riscv_vlseg5:1687case Intrinsic::riscv_vlseg6:1688case Intrinsic::riscv_vlseg7:1689case Intrinsic::riscv_vlseg8:1690case Intrinsic::riscv_vlseg2ff:1691case Intrinsic::riscv_vlseg3ff:1692case Intrinsic::riscv_vlseg4ff:1693case Intrinsic::riscv_vlseg5ff:1694case Intrinsic::riscv_vlseg6ff:1695case Intrinsic::riscv_vlseg7ff:1696case Intrinsic::riscv_vlseg8ff:1697return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,1698/*IsStore*/ false,1699/*IsUnitStrided*/ false, /*UsePtrVal*/ true);1700case Intrinsic::riscv_vlseg2_mask:1701case Intrinsic::riscv_vlseg3_mask:1702case Intrinsic::riscv_vlseg4_mask:1703case Intrinsic::riscv_vlseg5_mask:1704case Intrinsic::riscv_vlseg6_mask:1705case Intrinsic::riscv_vlseg7_mask:1706case Intrinsic::riscv_vlseg8_mask:1707case Intrinsic::riscv_vlseg2ff_mask:1708case Intrinsic::riscv_vlseg3ff_mask:1709case Intrinsic::riscv_vlseg4ff_mask:1710case Intrinsic::riscv_vlseg5ff_mask:1711case Intrinsic::riscv_vlseg6ff_mask:1712case Intrinsic::riscv_vlseg7ff_mask:1713case Intrinsic::riscv_vlseg8ff_mask:1714return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,1715/*IsStore*/ false,1716/*IsUnitStrided*/ false, /*UsePtrVal*/ true);1717case Intrinsic::riscv_vlsseg2:1718case Intrinsic::riscv_vlsseg3:1719case Intrinsic::riscv_vlsseg4:1720case Intrinsic::riscv_vlsseg5:1721case Intrinsic::riscv_vlsseg6:1722case Intrinsic::riscv_vlsseg7:1723case Intrinsic::riscv_vlsseg8:1724case Intrinsic::riscv_vloxseg2:1725case Intrinsic::riscv_vloxseg3:1726case Intrinsic::riscv_vloxseg4:1727case Intrinsic::riscv_vloxseg5:1728case Intrinsic::riscv_vloxseg6:1729case Intrinsic::riscv_vloxseg7:1730case Intrinsic::riscv_vloxseg8:1731case Intrinsic::riscv_vluxseg2:1732case Intrinsic::riscv_vluxseg3:1733case Intrinsic::riscv_vluxseg4:1734case Intrinsic::riscv_vluxseg5:1735case Intrinsic::riscv_vluxseg6:1736case Intrinsic::riscv_vluxseg7:1737case Intrinsic::riscv_vluxseg8:1738return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,1739/*IsStore*/ false,1740/*IsUnitStrided*/ false);1741case Intrinsic::riscv_vlsseg2_mask:1742case Intrinsic::riscv_vlsseg3_mask:1743case Intrinsic::riscv_vlsseg4_mask:1744case Intrinsic::riscv_vlsseg5_mask:1745case Intrinsic::riscv_vlsseg6_mask:1746case Intrinsic::riscv_vlsseg7_mask:1747case Intrinsic::riscv_vlsseg8_mask:1748case Intrinsic::riscv_vloxseg2_mask:1749case Intrinsic::riscv_vloxseg3_mask:1750case Intrinsic::riscv_vloxseg4_mask:1751case Intrinsic::riscv_vloxseg5_mask:1752case Intrinsic::riscv_vloxseg6_mask:1753case Intrinsic::riscv_vloxseg7_mask:1754case Intrinsic::riscv_vloxseg8_mask:1755case Intrinsic::riscv_vluxseg2_mask:1756case Intrinsic::riscv_vluxseg3_mask:1757case Intrinsic::riscv_vluxseg4_mask:1758case Intrinsic::riscv_vluxseg5_mask:1759case Intrinsic::riscv_vluxseg6_mask:1760case Intrinsic::riscv_vluxseg7_mask:1761case Intrinsic::riscv_vluxseg8_mask:1762return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,1763/*IsStore*/ false,1764/*IsUnitStrided*/ false);1765case Intrinsic::riscv_vsseg2:1766case Intrinsic::riscv_vsseg3:1767case Intrinsic::riscv_vsseg4:1768case Intrinsic::riscv_vsseg5:1769case Intrinsic::riscv_vsseg6:1770case Intrinsic::riscv_vsseg7:1771case Intrinsic::riscv_vsseg8:1772return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,1773/*IsStore*/ true,1774/*IsUnitStrided*/ false);1775case Intrinsic::riscv_vsseg2_mask:1776case Intrinsic::riscv_vsseg3_mask:1777case Intrinsic::riscv_vsseg4_mask:1778case Intrinsic::riscv_vsseg5_mask:1779case Intrinsic::riscv_vsseg6_mask:1780case Intrinsic::riscv_vsseg7_mask:1781case Intrinsic::riscv_vsseg8_mask:1782return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,1783/*IsStore*/ true,1784/*IsUnitStrided*/ false);1785case Intrinsic::riscv_vssseg2:1786case Intrinsic::riscv_vssseg3:1787case Intrinsic::riscv_vssseg4:1788case Intrinsic::riscv_vssseg5:1789case Intrinsic::riscv_vssseg6:1790case Intrinsic::riscv_vssseg7:1791case Intrinsic::riscv_vssseg8:1792case Intrinsic::riscv_vsoxseg2:1793case Intrinsic::riscv_vsoxseg3:1794case Intrinsic::riscv_vsoxseg4:1795case Intrinsic::riscv_vsoxseg5:1796case Intrinsic::riscv_vsoxseg6:1797case Intrinsic::riscv_vsoxseg7:1798case Intrinsic::riscv_vsoxseg8:1799case Intrinsic::riscv_vsuxseg2:1800case Intrinsic::riscv_vsuxseg3:1801case Intrinsic::riscv_vsuxseg4:1802case Intrinsic::riscv_vsuxseg5:1803case Intrinsic::riscv_vsuxseg6:1804case Intrinsic::riscv_vsuxseg7:1805case Intrinsic::riscv_vsuxseg8:1806return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,1807/*IsStore*/ true,1808/*IsUnitStrided*/ false);1809case Intrinsic::riscv_vssseg2_mask:1810case Intrinsic::riscv_vssseg3_mask:1811case Intrinsic::riscv_vssseg4_mask:1812case Intrinsic::riscv_vssseg5_mask:1813case Intrinsic::riscv_vssseg6_mask:1814case Intrinsic::riscv_vssseg7_mask:1815case Intrinsic::riscv_vssseg8_mask:1816case Intrinsic::riscv_vsoxseg2_mask:1817case Intrinsic::riscv_vsoxseg3_mask:1818case Intrinsic::riscv_vsoxseg4_mask:1819case Intrinsic::riscv_vsoxseg5_mask:1820case Intrinsic::riscv_vsoxseg6_mask:1821case Intrinsic::riscv_vsoxseg7_mask:1822case Intrinsic::riscv_vsoxseg8_mask:1823case Intrinsic::riscv_vsuxseg2_mask:1824case Intrinsic::riscv_vsuxseg3_mask:1825case Intrinsic::riscv_vsuxseg4_mask:1826case Intrinsic::riscv_vsuxseg5_mask:1827case Intrinsic::riscv_vsuxseg6_mask:1828case Intrinsic::riscv_vsuxseg7_mask:1829case Intrinsic::riscv_vsuxseg8_mask:1830return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,1831/*IsStore*/ true,1832/*IsUnitStrided*/ false);1833}1834}18351836bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,1837const AddrMode &AM, Type *Ty,1838unsigned AS,1839Instruction *I) const {1840// No global is ever allowed as a base.1841if (AM.BaseGV)1842return false;18431844// RVV instructions only support register addressing.1845if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))1846return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;18471848// Require a 12-bit signed offset.1849if (!isInt<12>(AM.BaseOffs))1850return false;18511852switch (AM.Scale) {1853case 0: // "r+i" or just "i", depending on HasBaseReg.1854break;1855case 1:1856if (!AM.HasBaseReg) // allow "r+i".1857break;1858return false; // disallow "r+r" or "r+r+i".1859default:1860return false;1861}18621863return true;1864}18651866bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {1867return isInt<12>(Imm);1868}18691870bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {1871return isInt<12>(Imm);1872}18731874// On RV32, 64-bit integers are split into their high and low parts and held1875// in two different registers, so the trunc is free since the low register can1876// just be used.1877// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of1878// isTruncateFree?1879bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {1880if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())1881return false;1882unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();1883unsigned DestBits = DstTy->getPrimitiveSizeInBits();1884return (SrcBits == 64 && DestBits == 32);1885}18861887bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {1888// We consider i64->i32 free on RV64 since we have good selection of W1889// instructions that make promoting operations back to i64 free in many cases.1890if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||1891!DstVT.isInteger())1892return false;1893unsigned SrcBits = SrcVT.getSizeInBits();1894unsigned DestBits = DstVT.getSizeInBits();1895return (SrcBits == 64 && DestBits == 32);1896}18971898bool RISCVTargetLowering::isTruncateFree(SDValue Val, EVT VT2) const {1899EVT SrcVT = Val.getValueType();1900// free truncate from vnsrl and vnsra1901if (Subtarget.hasStdExtV() &&1902(Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&1903SrcVT.isVector() && VT2.isVector()) {1904unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();1905unsigned DestBits = VT2.getVectorElementType().getSizeInBits();1906if (SrcBits == DestBits * 2) {1907return true;1908}1909}1910return TargetLowering::isTruncateFree(Val, VT2);1911}19121913bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {1914// Zexts are free if they can be combined with a load.1915// Don't advertise i32->i64 zextload as being free for RV64. It interacts1916// poorly with type legalization of compares preferring sext.1917if (auto *LD = dyn_cast<LoadSDNode>(Val)) {1918EVT MemVT = LD->getMemoryVT();1919if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&1920(LD->getExtensionType() == ISD::NON_EXTLOAD ||1921LD->getExtensionType() == ISD::ZEXTLOAD))1922return true;1923}19241925return TargetLowering::isZExtFree(Val, VT2);1926}19271928bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {1929return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;1930}19311932bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {1933return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);1934}19351936bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {1937return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();1938}19391940bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {1941return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||1942Subtarget.hasVendorXCVbitmanip();1943}19441945bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(1946const Instruction &AndI) const {1947// We expect to be able to match a bit extraction instruction if the Zbs1948// extension is supported and the mask is a power of two. However, we1949// conservatively return false if the mask would fit in an ANDI instruction,1950// on the basis that it's possible the sinking+duplication of the AND in1951// CodeGenPrepare triggered by this hook wouldn't decrease the instruction1952// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).1953if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())1954return false;1955ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));1956if (!Mask)1957return false;1958return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();1959}19601961bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {1962EVT VT = Y.getValueType();19631964// FIXME: Support vectors once we have tests.1965if (VT.isVector())1966return false;19671968return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&1969(!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());1970}19711972bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {1973// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.1974if (Subtarget.hasStdExtZbs())1975return X.getValueType().isScalarInteger();1976auto *C = dyn_cast<ConstantSDNode>(Y);1977// XTheadBs provides th.tst (similar to bexti), if Y is a constant1978if (Subtarget.hasVendorXTHeadBs())1979return C != nullptr;1980// We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.1981return C && C->getAPIntValue().ule(10);1982}19831984bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,1985EVT VT) const {1986// Only enable for rvv.1987if (!VT.isVector() || !Subtarget.hasVInstructions())1988return false;19891990if (VT.isFixedLengthVector() && !isTypeLegal(VT))1991return false;19921993return true;1994}19951996bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,1997Type *Ty) const {1998assert(Ty->isIntegerTy());19992000unsigned BitSize = Ty->getIntegerBitWidth();2001if (BitSize > Subtarget.getXLen())2002return false;20032004// Fast path, assume 32-bit immediates are cheap.2005int64_t Val = Imm.getSExtValue();2006if (isInt<32>(Val))2007return true;20082009// A constant pool entry may be more aligned thant he load we're trying to2010// replace. If we don't support unaligned scalar mem, prefer the constant2011// pool.2012// TODO: Can the caller pass down the alignment?2013if (!Subtarget.enableUnalignedScalarMem())2014return true;20152016// Prefer to keep the load if it would require many instructions.2017// This uses the same threshold we use for constant pools but doesn't2018// check useConstantPoolForLargeInts.2019// TODO: Should we keep the load only when we're definitely going to emit a2020// constant pool?20212022RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, Subtarget);2023return Seq.size() <= Subtarget.getMaxBuildIntsCost();2024}20252026bool RISCVTargetLowering::2027shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(2028SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,2029unsigned OldShiftOpcode, unsigned NewShiftOpcode,2030SelectionDAG &DAG) const {2031// One interesting pattern that we'd want to form is 'bit extract':2032// ((1 >> Y) & 1) ==/!= 02033// But we also need to be careful not to try to reverse that fold.20342035// Is this '((1 >> Y) & 1)'?2036if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())2037return false; // Keep the 'bit extract' pattern.20382039// Will this be '((1 >> Y) & 1)' after the transform?2040if (NewShiftOpcode == ISD::SRL && CC->isOne())2041return true; // Do form the 'bit extract' pattern.20422043// If 'X' is a constant, and we transform, then we will immediately2044// try to undo the fold, thus causing endless combine loop.2045// So only do the transform if X is not a constant. This matches the default2046// implementation of this function.2047return !XC;2048}20492050bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {2051switch (Opcode) {2052case Instruction::Add:2053case Instruction::Sub:2054case Instruction::Mul:2055case Instruction::And:2056case Instruction::Or:2057case Instruction::Xor:2058case Instruction::FAdd:2059case Instruction::FSub:2060case Instruction::FMul:2061case Instruction::FDiv:2062case Instruction::ICmp:2063case Instruction::FCmp:2064return true;2065case Instruction::Shl:2066case Instruction::LShr:2067case Instruction::AShr:2068case Instruction::UDiv:2069case Instruction::SDiv:2070case Instruction::URem:2071case Instruction::SRem:2072case Instruction::Select:2073return Operand == 1;2074default:2075return false;2076}2077}207820792080bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {2081if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())2082return false;20832084if (canSplatOperand(I->getOpcode(), Operand))2085return true;20862087auto *II = dyn_cast<IntrinsicInst>(I);2088if (!II)2089return false;20902091switch (II->getIntrinsicID()) {2092case Intrinsic::fma:2093case Intrinsic::vp_fma:2094return Operand == 0 || Operand == 1;2095case Intrinsic::vp_shl:2096case Intrinsic::vp_lshr:2097case Intrinsic::vp_ashr:2098case Intrinsic::vp_udiv:2099case Intrinsic::vp_sdiv:2100case Intrinsic::vp_urem:2101case Intrinsic::vp_srem:2102case Intrinsic::ssub_sat:2103case Intrinsic::vp_ssub_sat:2104case Intrinsic::usub_sat:2105case Intrinsic::vp_usub_sat:2106return Operand == 1;2107// These intrinsics are commutative.2108case Intrinsic::vp_add:2109case Intrinsic::vp_mul:2110case Intrinsic::vp_and:2111case Intrinsic::vp_or:2112case Intrinsic::vp_xor:2113case Intrinsic::vp_fadd:2114case Intrinsic::vp_fmul:2115case Intrinsic::vp_icmp:2116case Intrinsic::vp_fcmp:2117case Intrinsic::smin:2118case Intrinsic::vp_smin:2119case Intrinsic::umin:2120case Intrinsic::vp_umin:2121case Intrinsic::smax:2122case Intrinsic::vp_smax:2123case Intrinsic::umax:2124case Intrinsic::vp_umax:2125case Intrinsic::sadd_sat:2126case Intrinsic::vp_sadd_sat:2127case Intrinsic::uadd_sat:2128case Intrinsic::vp_uadd_sat:2129// These intrinsics have 'vr' versions.2130case Intrinsic::vp_sub:2131case Intrinsic::vp_fsub:2132case Intrinsic::vp_fdiv:2133return Operand == 0 || Operand == 1;2134default:2135return false;2136}2137}21382139/// Check if sinking \p I's operands to I's basic block is profitable, because2140/// the operands can be folded into a target instruction, e.g.2141/// splats of scalars can fold into vector instructions.2142bool RISCVTargetLowering::shouldSinkOperands(2143Instruction *I, SmallVectorImpl<Use *> &Ops) const {2144using namespace llvm::PatternMatch;21452146if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())2147return false;21482149// Don't sink splat operands if the target prefers it. Some targets requires2150// S2V transfer buffers and we can run out of them copying the same value2151// repeatedly.2152// FIXME: It could still be worth doing if it would improve vector register2153// pressure and prevent a vector spill.2154if (!Subtarget.sinkSplatOperands())2155return false;21562157for (auto OpIdx : enumerate(I->operands())) {2158if (!canSplatOperand(I, OpIdx.index()))2159continue;21602161Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());2162// Make sure we are not already sinking this operand2163if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))2164continue;21652166// We are looking for a splat that can be sunk.2167if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),2168m_Undef(), m_ZeroMask())))2169continue;21702171// Don't sink i1 splats.2172if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))2173continue;21742175// All uses of the shuffle should be sunk to avoid duplicating it across gpr2176// and vector registers2177for (Use &U : Op->uses()) {2178Instruction *Insn = cast<Instruction>(U.getUser());2179if (!canSplatOperand(Insn, U.getOperandNo()))2180return false;2181}21822183Ops.push_back(&Op->getOperandUse(0));2184Ops.push_back(&OpIdx.value());2185}2186return true;2187}21882189bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {2190unsigned Opc = VecOp.getOpcode();21912192// Assume target opcodes can't be scalarized.2193// TODO - do we have any exceptions?2194if (Opc >= ISD::BUILTIN_OP_END)2195return false;21962197// If the vector op is not supported, try to convert to scalar.2198EVT VecVT = VecOp.getValueType();2199if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))2200return true;22012202// If the vector op is supported, but the scalar op is not, the transform may2203// not be worthwhile.2204// Permit a vector binary operation can be converted to scalar binary2205// operation which is custom lowered with illegal type.2206EVT ScalarVT = VecVT.getScalarType();2207return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||2208isOperationCustom(Opc, ScalarVT);2209}22102211bool RISCVTargetLowering::isOffsetFoldingLegal(2212const GlobalAddressSDNode *GA) const {2213// In order to maximise the opportunity for common subexpression elimination,2214// keep a separate ADD node for the global address offset instead of folding2215// it in the global address node. Later peephole optimisations may choose to2216// fold it back in when profitable.2217return false;2218}22192220// Return one of the followings:2221// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.2222// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its2223// positive counterpart, which will be materialized from the first returned2224// element. The second returned element indicated that there should be a FNEG2225// followed.2226// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.2227std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,2228EVT VT) const {2229if (!Subtarget.hasStdExtZfa())2230return std::make_pair(-1, false);22312232bool IsSupportedVT = false;2233if (VT == MVT::f16) {2234IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();2235} else if (VT == MVT::f32) {2236IsSupportedVT = true;2237} else if (VT == MVT::f64) {2238assert(Subtarget.hasStdExtD() && "Expect D extension");2239IsSupportedVT = true;2240}22412242if (!IsSupportedVT)2243return std::make_pair(-1, false);22442245int Index = RISCVLoadFPImm::getLoadFPImm(Imm);2246if (Index < 0 && Imm.isNegative())2247// Try the combination of its positive counterpart + FNEG.2248return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);2249else2250return std::make_pair(Index, false);2251}22522253bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,2254bool ForCodeSize) const {2255bool IsLegalVT = false;2256if (VT == MVT::f16)2257IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();2258else if (VT == MVT::f32)2259IsLegalVT = Subtarget.hasStdExtFOrZfinx();2260else if (VT == MVT::f64)2261IsLegalVT = Subtarget.hasStdExtDOrZdinx();2262else if (VT == MVT::bf16)2263IsLegalVT = Subtarget.hasStdExtZfbfmin();22642265if (!IsLegalVT)2266return false;22672268if (getLegalZfaFPImm(Imm, VT).first >= 0)2269return true;22702271// Cannot create a 64 bit floating-point immediate value for rv32.2272if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {2273// td can handle +0.0 or -0.0 already.2274// -0.0 can be created by fmv + fneg.2275return Imm.isZero();2276}22772278// Special case: fmv + fneg2279if (Imm.isNegZero())2280return true;22812282// Building an integer and then converting requires a fmv at the end of2283// the integer sequence.2284const int Cost =22851 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),2286Subtarget);2287return Cost <= FPImmCost;2288}22892290// TODO: This is very conservative.2291bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,2292unsigned Index) const {2293if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))2294return false;22952296// Only support extracting a fixed from a fixed vector for now.2297if (ResVT.isScalableVector() || SrcVT.isScalableVector())2298return false;22992300EVT EltVT = ResVT.getVectorElementType();2301assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");23022303// The smallest type we can slide is i8.2304// TODO: We can extract index 0 from a mask vector without a slide.2305if (EltVT == MVT::i1)2306return false;23072308unsigned ResElts = ResVT.getVectorNumElements();2309unsigned SrcElts = SrcVT.getVectorNumElements();23102311unsigned MinVLen = Subtarget.getRealMinVLen();2312unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();23132314// If we're extracting only data from the first VLEN bits of the source2315// then we can always do this with an m1 vslidedown.vx. Restricting the2316// Index ensures we can use a vslidedown.vi.2317// TODO: We can generalize this when the exact VLEN is known.2318if (Index + ResElts <= MinVLMAX && Index < 31)2319return true;23202321// Convervatively only handle extracting half of a vector.2322// TODO: For sizes which aren't multiples of VLEN sizes, this may not be2323// a cheap extract. However, this case is important in practice for2324// shuffled extracts of longer vectors. How resolve?2325if ((ResElts * 2) != SrcElts)2326return false;23272328// Slide can support arbitrary index, but we only treat vslidedown.vi as2329// cheap.2330if (Index >= 32)2331return false;23322333// TODO: We can do arbitrary slidedowns, but for now only support extracting2334// the upper half of a vector until we have more test coverage.2335return Index == 0 || Index == ResElts;2336}23372338MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,2339CallingConv::ID CC,2340EVT VT) const {2341// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.2342// We might still end up using a GPR but that will be decided based on ABI.2343if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&2344!Subtarget.hasStdExtZfhminOrZhinxmin())2345return MVT::f32;23462347MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);23482349if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)2350return MVT::i64;23512352return PartVT;2353}23542355unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,2356CallingConv::ID CC,2357EVT VT) const {2358// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.2359// We might still end up using a GPR but that will be decided based on ABI.2360if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&2361!Subtarget.hasStdExtZfhminOrZhinxmin())2362return 1;23632364return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);2365}23662367unsigned RISCVTargetLowering::getVectorTypeBreakdownForCallingConv(2368LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,2369unsigned &NumIntermediates, MVT &RegisterVT) const {2370unsigned NumRegs = TargetLowering::getVectorTypeBreakdownForCallingConv(2371Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);23722373if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)2374IntermediateVT = MVT::i64;23752376if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)2377RegisterVT = MVT::i64;23782379return NumRegs;2380}23812382// Changes the condition code and swaps operands if necessary, so the SetCC2383// operation matches one of the comparisons supported directly by branches2384// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare2385// with 1/-1.2386static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,2387ISD::CondCode &CC, SelectionDAG &DAG) {2388// If this is a single bit test that can't be handled by ANDI, shift the2389// bit to be tested to the MSB and perform a signed compare with 0.2390if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&2391LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&2392isa<ConstantSDNode>(LHS.getOperand(1))) {2393uint64_t Mask = LHS.getConstantOperandVal(1);2394if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {2395unsigned ShAmt = 0;2396if (isPowerOf2_64(Mask)) {2397CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;2398ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);2399} else {2400ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);2401}24022403LHS = LHS.getOperand(0);2404if (ShAmt != 0)2405LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,2406DAG.getConstant(ShAmt, DL, LHS.getValueType()));2407return;2408}2409}24102411if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {2412int64_t C = RHSC->getSExtValue();2413switch (CC) {2414default: break;2415case ISD::SETGT:2416// Convert X > -1 to X >= 0.2417if (C == -1) {2418RHS = DAG.getConstant(0, DL, RHS.getValueType());2419CC = ISD::SETGE;2420return;2421}2422break;2423case ISD::SETLT:2424// Convert X < 1 to 0 >= X.2425if (C == 1) {2426RHS = LHS;2427LHS = DAG.getConstant(0, DL, RHS.getValueType());2428CC = ISD::SETGE;2429return;2430}2431break;2432}2433}24342435switch (CC) {2436default:2437break;2438case ISD::SETGT:2439case ISD::SETLE:2440case ISD::SETUGT:2441case ISD::SETULE:2442CC = ISD::getSetCCSwappedOperands(CC);2443std::swap(LHS, RHS);2444break;2445}2446}24472448RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {2449assert(VT.isScalableVector() && "Expecting a scalable vector type");2450unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();2451if (VT.getVectorElementType() == MVT::i1)2452KnownSize *= 8;24532454switch (KnownSize) {2455default:2456llvm_unreachable("Invalid LMUL.");2457case 8:2458return RISCVII::VLMUL::LMUL_F8;2459case 16:2460return RISCVII::VLMUL::LMUL_F4;2461case 32:2462return RISCVII::VLMUL::LMUL_F2;2463case 64:2464return RISCVII::VLMUL::LMUL_1;2465case 128:2466return RISCVII::VLMUL::LMUL_2;2467case 256:2468return RISCVII::VLMUL::LMUL_4;2469case 512:2470return RISCVII::VLMUL::LMUL_8;2471}2472}24732474unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {2475switch (LMul) {2476default:2477llvm_unreachable("Invalid LMUL.");2478case RISCVII::VLMUL::LMUL_F8:2479case RISCVII::VLMUL::LMUL_F4:2480case RISCVII::VLMUL::LMUL_F2:2481case RISCVII::VLMUL::LMUL_1:2482return RISCV::VRRegClassID;2483case RISCVII::VLMUL::LMUL_2:2484return RISCV::VRM2RegClassID;2485case RISCVII::VLMUL::LMUL_4:2486return RISCV::VRM4RegClassID;2487case RISCVII::VLMUL::LMUL_8:2488return RISCV::VRM8RegClassID;2489}2490}24912492unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {2493RISCVII::VLMUL LMUL = getLMUL(VT);2494if (LMUL == RISCVII::VLMUL::LMUL_F8 ||2495LMUL == RISCVII::VLMUL::LMUL_F4 ||2496LMUL == RISCVII::VLMUL::LMUL_F2 ||2497LMUL == RISCVII::VLMUL::LMUL_1) {2498static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,2499"Unexpected subreg numbering");2500return RISCV::sub_vrm1_0 + Index;2501}2502if (LMUL == RISCVII::VLMUL::LMUL_2) {2503static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,2504"Unexpected subreg numbering");2505return RISCV::sub_vrm2_0 + Index;2506}2507if (LMUL == RISCVII::VLMUL::LMUL_4) {2508static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,2509"Unexpected subreg numbering");2510return RISCV::sub_vrm4_0 + Index;2511}2512llvm_unreachable("Invalid vector type.");2513}25142515unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {2516if (VT.getVectorElementType() == MVT::i1)2517return RISCV::VRRegClassID;2518return getRegClassIDForLMUL(getLMUL(VT));2519}25202521// Attempt to decompose a subvector insert/extract between VecVT and2522// SubVecVT via subregister indices. Returns the subregister index that2523// can perform the subvector insert/extract with the given element index, as2524// well as the index corresponding to any leftover subvectors that must be2525// further inserted/extracted within the register class for SubVecVT.2526std::pair<unsigned, unsigned>2527RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(2528MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,2529const RISCVRegisterInfo *TRI) {2530static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&2531RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&2532RISCV::VRM2RegClassID > RISCV::VRRegClassID),2533"Register classes not ordered");2534unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);2535unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);2536// Try to compose a subregister index that takes us from the incoming2537// LMUL>1 register class down to the outgoing one. At each step we half2538// the LMUL:2539// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_02540// Note that this is not guaranteed to find a subregister index, such as2541// when we are extracting from one VR type to another.2542unsigned SubRegIdx = RISCV::NoSubRegister;2543for (const unsigned RCID :2544{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})2545if (VecRegClassID > RCID && SubRegClassID <= RCID) {2546VecVT = VecVT.getHalfNumVectorElementsVT();2547bool IsHi =2548InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();2549SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,2550getSubregIndexByMVT(VecVT, IsHi));2551if (IsHi)2552InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();2553}2554return {SubRegIdx, InsertExtractIdx};2555}25562557// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar2558// stores for those types.2559bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {2560return !Subtarget.useRVVForFixedLengthVectors() ||2561(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);2562}25632564bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {2565if (!ScalarTy.isSimple())2566return false;2567switch (ScalarTy.getSimpleVT().SimpleTy) {2568case MVT::iPTR:2569return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;2570case MVT::i8:2571case MVT::i16:2572case MVT::i32:2573return true;2574case MVT::i64:2575return Subtarget.hasVInstructionsI64();2576case MVT::f16:2577return Subtarget.hasVInstructionsF16();2578case MVT::f32:2579return Subtarget.hasVInstructionsF32();2580case MVT::f64:2581return Subtarget.hasVInstructionsF64();2582default:2583return false;2584}2585}258625872588unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {2589return NumRepeatedDivisors;2590}25912592static SDValue getVLOperand(SDValue Op) {2593assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||2594Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&2595"Unexpected opcode");2596bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;2597unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);2598const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =2599RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);2600if (!II)2601return SDValue();2602return Op.getOperand(II->VLOperand + 1 + HasChain);2603}26042605static bool useRVVForFixedLengthVectorVT(MVT VT,2606const RISCVSubtarget &Subtarget) {2607assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");2608if (!Subtarget.useRVVForFixedLengthVectors())2609return false;26102611// We only support a set of vector types with a consistent maximum fixed size2612// across all supported vector element types to avoid legalization issues.2613// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest2614// fixed-length vector type we support is 1024 bytes.2615if (VT.getFixedSizeInBits() > 1024 * 8)2616return false;26172618unsigned MinVLen = Subtarget.getRealMinVLen();26192620MVT EltVT = VT.getVectorElementType();26212622// Don't use RVV for vectors we cannot scalarize if required.2623switch (EltVT.SimpleTy) {2624// i1 is supported but has different rules.2625default:2626return false;2627case MVT::i1:2628// Masks can only use a single register.2629if (VT.getVectorNumElements() > MinVLen)2630return false;2631MinVLen /= 8;2632break;2633case MVT::i8:2634case MVT::i16:2635case MVT::i32:2636break;2637case MVT::i64:2638if (!Subtarget.hasVInstructionsI64())2639return false;2640break;2641case MVT::f16:2642if (!Subtarget.hasVInstructionsF16Minimal())2643return false;2644break;2645case MVT::bf16:2646if (!Subtarget.hasVInstructionsBF16())2647return false;2648break;2649case MVT::f32:2650if (!Subtarget.hasVInstructionsF32())2651return false;2652break;2653case MVT::f64:2654if (!Subtarget.hasVInstructionsF64())2655return false;2656break;2657}26582659// Reject elements larger than ELEN.2660if (EltVT.getSizeInBits() > Subtarget.getELen())2661return false;26622663unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);2664// Don't use RVV for types that don't fit.2665if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())2666return false;26672668// TODO: Perhaps an artificial restriction, but worth having whilst getting2669// the base fixed length RVV support in place.2670if (!VT.isPow2VectorType())2671return false;26722673return true;2674}26752676bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {2677return ::useRVVForFixedLengthVectorVT(VT, Subtarget);2678}26792680// Return the largest legal scalable vector type that matches VT's element type.2681static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,2682const RISCVSubtarget &Subtarget) {2683// This may be called before legal types are setup.2684assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||2685useRVVForFixedLengthVectorVT(VT, Subtarget)) &&2686"Expected legal fixed length vector!");26872688unsigned MinVLen = Subtarget.getRealMinVLen();2689unsigned MaxELen = Subtarget.getELen();26902691MVT EltVT = VT.getVectorElementType();2692switch (EltVT.SimpleTy) {2693default:2694llvm_unreachable("unexpected element type for RVV container");2695case MVT::i1:2696case MVT::i8:2697case MVT::i16:2698case MVT::i32:2699case MVT::i64:2700case MVT::bf16:2701case MVT::f16:2702case MVT::f32:2703case MVT::f64: {2704// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for2705// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within2706// each fractional LMUL we support SEW between 8 and LMUL*ELEN.2707unsigned NumElts =2708(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;2709NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);2710assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");2711return MVT::getScalableVectorVT(EltVT, NumElts);2712}2713}2714}27152716static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,2717const RISCVSubtarget &Subtarget) {2718return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,2719Subtarget);2720}27212722MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {2723return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());2724}27252726// Grow V to consume an entire RVV register.2727static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,2728const RISCVSubtarget &Subtarget) {2729assert(VT.isScalableVector() &&2730"Expected to convert into a scalable vector!");2731assert(V.getValueType().isFixedLengthVector() &&2732"Expected a fixed length vector operand!");2733SDLoc DL(V);2734SDValue Zero = DAG.getVectorIdxConstant(0, DL);2735return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);2736}27372738// Shrink V so it's just big enough to maintain a VT's worth of data.2739static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,2740const RISCVSubtarget &Subtarget) {2741assert(VT.isFixedLengthVector() &&2742"Expected to convert into a fixed length vector!");2743assert(V.getValueType().isScalableVector() &&2744"Expected a scalable vector operand!");2745SDLoc DL(V);2746SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());2747return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);2748}27492750/// Return the type of the mask type suitable for masking the provided2751/// vector type. This is simply an i1 element type vector of the same2752/// (possibly scalable) length.2753static MVT getMaskTypeFor(MVT VecVT) {2754assert(VecVT.isVector());2755ElementCount EC = VecVT.getVectorElementCount();2756return MVT::getVectorVT(MVT::i1, EC);2757}27582759/// Creates an all ones mask suitable for masking a vector of type VecTy with2760/// vector length VL. .2761static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,2762SelectionDAG &DAG) {2763MVT MaskVT = getMaskTypeFor(VecVT);2764return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);2765}27662767static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,2768SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {2769// If we know the exact VLEN, and our VL is exactly equal to VLMAX,2770// canonicalize the representation. InsertVSETVLI will pick the immediate2771// encoding later if profitable.2772const auto [MinVLMAX, MaxVLMAX] =2773RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);2774if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)2775return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());27762777return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());2778}27792780static std::pair<SDValue, SDValue>2781getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,2782const RISCVSubtarget &Subtarget) {2783assert(VecVT.isScalableVector() && "Expecting a scalable vector");2784SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());2785SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);2786return {Mask, VL};2787}27882789static std::pair<SDValue, SDValue>2790getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,2791SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {2792assert(ContainerVT.isScalableVector() && "Expecting scalable container type");2793SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);2794SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);2795return {Mask, VL};2796}27972798// Gets the two common "VL" operands: an all-ones mask and the vector length.2799// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is2800// the vector type that the fixed-length vector is contained in. Otherwise if2801// VecVT is scalable, then ContainerVT should be the same as VecVT.2802static std::pair<SDValue, SDValue>2803getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,2804const RISCVSubtarget &Subtarget) {2805if (VecVT.isFixedLengthVector())2806return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,2807Subtarget);2808assert(ContainerVT.isScalableVector() && "Expecting scalable container type");2809return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);2810}28112812SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,2813SelectionDAG &DAG) const {2814assert(VecVT.isScalableVector() && "Expected scalable vector");2815return DAG.getElementCount(DL, Subtarget.getXLenVT(),2816VecVT.getVectorElementCount());2817}28182819std::pair<unsigned, unsigned>2820RISCVTargetLowering::computeVLMAXBounds(MVT VecVT,2821const RISCVSubtarget &Subtarget) {2822assert(VecVT.isScalableVector() && "Expected scalable vector");28232824unsigned EltSize = VecVT.getScalarSizeInBits();2825unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();28262827unsigned VectorBitsMax = Subtarget.getRealMaxVLen();2828unsigned MaxVLMAX =2829RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);28302831unsigned VectorBitsMin = Subtarget.getRealMinVLen();2832unsigned MinVLMAX =2833RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);28342835return std::make_pair(MinVLMAX, MaxVLMAX);2836}28372838// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few2839// of either is (currently) supported. This can get us into an infinite loop2840// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR2841// as a ..., etc.2842// Until either (or both) of these can reliably lower any node, reporting that2843// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks2844// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,2845// which is not desirable.2846bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(2847EVT VT, unsigned DefinedValues) const {2848return false;2849}28502851InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {2852// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is2853// implementation-defined.2854if (!VT.isVector())2855return InstructionCost::getInvalid();2856unsigned DLenFactor = Subtarget.getDLenFactor();2857unsigned Cost;2858if (VT.isScalableVector()) {2859unsigned LMul;2860bool Fractional;2861std::tie(LMul, Fractional) =2862RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));2863if (Fractional)2864Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;2865else2866Cost = (LMul * DLenFactor);2867} else {2868Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);2869}2870return Cost;2871}287228732874/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv2875/// is generally quadratic in the number of vreg implied by LMUL. Note that2876/// operand (index and possibly mask) are handled separately.2877InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {2878return getLMULCost(VT) * getLMULCost(VT);2879}28802881/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.2882/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,2883/// or may track the vrgather.vv cost. It is implementation-dependent.2884InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {2885return getLMULCost(VT);2886}28872888/// Return the cost of a vslidedown.vx or vslideup.vx instruction2889/// for the type VT. (This does not cover the vslide1up or vslide1down2890/// variants.) Slides may be linear in the number of vregs implied by LMUL,2891/// or may track the vrgather.vv cost. It is implementation-dependent.2892InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {2893return getLMULCost(VT);2894}28952896/// Return the cost of a vslidedown.vi or vslideup.vi instruction2897/// for the type VT. (This does not cover the vslide1up or vslide1down2898/// variants.) Slides may be linear in the number of vregs implied by LMUL,2899/// or may track the vrgather.vv cost. It is implementation-dependent.2900InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {2901return getLMULCost(VT);2902}29032904static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,2905const RISCVSubtarget &Subtarget) {2906// RISC-V FP-to-int conversions saturate to the destination register size, but2907// don't produce 0 for nan. We can use a conversion instruction and fix the2908// nan case with a compare and a select.2909SDValue Src = Op.getOperand(0);29102911MVT DstVT = Op.getSimpleValueType();2912EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();29132914bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;29152916if (!DstVT.isVector()) {2917// For bf16 or for f16 in absense of Zfh, promote to f32, then saturate2918// the result.2919if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||2920Src.getValueType() == MVT::bf16) {2921Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);2922}29232924unsigned Opc;2925if (SatVT == DstVT)2926Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;2927else if (DstVT == MVT::i64 && SatVT == MVT::i32)2928Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;2929else2930return SDValue();2931// FIXME: Support other SatVTs by clamping before or after the conversion.29322933SDLoc DL(Op);2934SDValue FpToInt = DAG.getNode(2935Opc, DL, DstVT, Src,2936DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));29372938if (Opc == RISCVISD::FCVT_WU_RV64)2939FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);29402941SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);2942return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,2943ISD::CondCode::SETUO);2944}29452946// Vectors.29472948MVT DstEltVT = DstVT.getVectorElementType();2949MVT SrcVT = Src.getSimpleValueType();2950MVT SrcEltVT = SrcVT.getVectorElementType();2951unsigned SrcEltSize = SrcEltVT.getSizeInBits();2952unsigned DstEltSize = DstEltVT.getSizeInBits();29532954// Only handle saturating to the destination type.2955if (SatVT != DstEltVT)2956return SDValue();29572958// FIXME: Don't support narrowing by more than 1 steps for now.2959if (SrcEltSize > (2 * DstEltSize))2960return SDValue();29612962MVT DstContainerVT = DstVT;2963MVT SrcContainerVT = SrcVT;2964if (DstVT.isFixedLengthVector()) {2965DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);2966SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);2967assert(DstContainerVT.getVectorElementCount() ==2968SrcContainerVT.getVectorElementCount() &&2969"Expected same element count");2970Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);2971}29722973SDLoc DL(Op);29742975auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);29762977SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),2978{Src, Src, DAG.getCondCode(ISD::SETNE),2979DAG.getUNDEF(Mask.getValueType()), Mask, VL});29802981// Need to widen by more than 1 step, promote the FP type, then do a widening2982// convert.2983if (DstEltSize > (2 * SrcEltSize)) {2984assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");2985MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);2986Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);2987}29882989unsigned RVVOpc =2990IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;2991SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);29922993SDValue SplatZero = DAG.getNode(2994RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),2995DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);2996Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,2997Res, DAG.getUNDEF(DstContainerVT), VL);29982999if (DstVT.isFixedLengthVector())3000Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);30013002return Res;3003}30043005static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {3006switch (Opc) {3007case ISD::FROUNDEVEN:3008case ISD::STRICT_FROUNDEVEN:3009case ISD::VP_FROUNDEVEN:3010return RISCVFPRndMode::RNE;3011case ISD::FTRUNC:3012case ISD::STRICT_FTRUNC:3013case ISD::VP_FROUNDTOZERO:3014return RISCVFPRndMode::RTZ;3015case ISD::FFLOOR:3016case ISD::STRICT_FFLOOR:3017case ISD::VP_FFLOOR:3018return RISCVFPRndMode::RDN;3019case ISD::FCEIL:3020case ISD::STRICT_FCEIL:3021case ISD::VP_FCEIL:3022return RISCVFPRndMode::RUP;3023case ISD::FROUND:3024case ISD::STRICT_FROUND:3025case ISD::VP_FROUND:3026return RISCVFPRndMode::RMM;3027case ISD::FRINT:3028return RISCVFPRndMode::DYN;3029}30303031return RISCVFPRndMode::Invalid;3032}30333034// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND3035// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to3036// the integer domain and back. Taking care to avoid converting values that are3037// nan or already correct.3038static SDValue3039lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,3040const RISCVSubtarget &Subtarget) {3041MVT VT = Op.getSimpleValueType();3042assert(VT.isVector() && "Unexpected type");30433044SDLoc DL(Op);30453046SDValue Src = Op.getOperand(0);30473048MVT ContainerVT = VT;3049if (VT.isFixedLengthVector()) {3050ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);3051Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);3052}30533054SDValue Mask, VL;3055if (Op->isVPOpcode()) {3056Mask = Op.getOperand(1);3057if (VT.isFixedLengthVector())3058Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,3059Subtarget);3060VL = Op.getOperand(2);3061} else {3062std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);3063}30643065// Freeze the source since we are increasing the number of uses.3066Src = DAG.getFreeze(Src);30673068// We do the conversion on the absolute value and fix the sign at the end.3069SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);30703071// Determine the largest integer that can be represented exactly. This and3072// values larger than it don't have any fractional bits so don't need to3073// be converted.3074const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);3075unsigned Precision = APFloat::semanticsPrecision(FltSem);3076APFloat MaxVal = APFloat(FltSem);3077MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),3078/*IsSigned*/ false, APFloat::rmNearestTiesToEven);3079SDValue MaxValNode =3080DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());3081SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,3082DAG.getUNDEF(ContainerVT), MaxValNode, VL);30833084// If abs(Src) was larger than MaxVal or nan, keep it.3085MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());3086Mask =3087DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,3088{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),3089Mask, Mask, VL});30903091// Truncate to integer and convert back to FP.3092MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();3093MVT XLenVT = Subtarget.getXLenVT();3094SDValue Truncated;30953096switch (Op.getOpcode()) {3097default:3098llvm_unreachable("Unexpected opcode");3099case ISD::FCEIL:3100case ISD::VP_FCEIL:3101case ISD::FFLOOR:3102case ISD::VP_FFLOOR:3103case ISD::FROUND:3104case ISD::FROUNDEVEN:3105case ISD::VP_FROUND:3106case ISD::VP_FROUNDEVEN:3107case ISD::VP_FROUNDTOZERO: {3108RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());3109assert(FRM != RISCVFPRndMode::Invalid);3110Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,3111DAG.getTargetConstant(FRM, DL, XLenVT), VL);3112break;3113}3114case ISD::FTRUNC:3115Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,3116Mask, VL);3117break;3118case ISD::FRINT:3119case ISD::VP_FRINT:3120Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);3121break;3122case ISD::FNEARBYINT:3123case ISD::VP_FNEARBYINT:3124Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,3125Mask, VL);3126break;3127}31283129// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.3130if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)3131Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,3132Mask, VL);31333134// Restore the original sign so that -0.0 is preserved.3135Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,3136Src, Src, Mask, VL);31373138if (!VT.isFixedLengthVector())3139return Truncated;31403141return convertFromScalableVector(VT, Truncated, DAG, Subtarget);3142}31433144// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND3145// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to3146// qNan and coverting the new source to integer and back to FP.3147static SDValue3148lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,3149const RISCVSubtarget &Subtarget) {3150SDLoc DL(Op);3151MVT VT = Op.getSimpleValueType();3152SDValue Chain = Op.getOperand(0);3153SDValue Src = Op.getOperand(1);31543155MVT ContainerVT = VT;3156if (VT.isFixedLengthVector()) {3157ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);3158Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);3159}31603161auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);31623163// Freeze the source since we are increasing the number of uses.3164Src = DAG.getFreeze(Src);31653166// Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.3167MVT MaskVT = Mask.getSimpleValueType();3168SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,3169DAG.getVTList(MaskVT, MVT::Other),3170{Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),3171DAG.getUNDEF(MaskVT), Mask, VL});3172Chain = Unorder.getValue(1);3173Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,3174DAG.getVTList(ContainerVT, MVT::Other),3175{Chain, Src, Src, Src, Unorder, VL});3176Chain = Src.getValue(1);31773178// We do the conversion on the absolute value and fix the sign at the end.3179SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);31803181// Determine the largest integer that can be represented exactly. This and3182// values larger than it don't have any fractional bits so don't need to3183// be converted.3184const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);3185unsigned Precision = APFloat::semanticsPrecision(FltSem);3186APFloat MaxVal = APFloat(FltSem);3187MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),3188/*IsSigned*/ false, APFloat::rmNearestTiesToEven);3189SDValue MaxValNode =3190DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());3191SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,3192DAG.getUNDEF(ContainerVT), MaxValNode, VL);31933194// If abs(Src) was larger than MaxVal or nan, keep it.3195Mask = DAG.getNode(3196RISCVISD::SETCC_VL, DL, MaskVT,3197{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});31983199// Truncate to integer and convert back to FP.3200MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();3201MVT XLenVT = Subtarget.getXLenVT();3202SDValue Truncated;32033204switch (Op.getOpcode()) {3205default:3206llvm_unreachable("Unexpected opcode");3207case ISD::STRICT_FCEIL:3208case ISD::STRICT_FFLOOR:3209case ISD::STRICT_FROUND:3210case ISD::STRICT_FROUNDEVEN: {3211RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());3212assert(FRM != RISCVFPRndMode::Invalid);3213Truncated = DAG.getNode(3214RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),3215{Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});3216break;3217}3218case ISD::STRICT_FTRUNC:3219Truncated =3220DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,3221DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);3222break;3223case ISD::STRICT_FNEARBYINT:3224Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,3225DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,3226Mask, VL);3227break;3228}3229Chain = Truncated.getValue(1);32303231// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.3232if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {3233Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,3234DAG.getVTList(ContainerVT, MVT::Other), Chain,3235Truncated, Mask, VL);3236Chain = Truncated.getValue(1);3237}32383239// Restore the original sign so that -0.0 is preserved.3240Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,3241Src, Src, Mask, VL);32423243if (VT.isFixedLengthVector())3244Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);3245return DAG.getMergeValues({Truncated, Chain}, DL);3246}32473248static SDValue3249lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,3250const RISCVSubtarget &Subtarget) {3251MVT VT = Op.getSimpleValueType();3252if (VT.isVector())3253return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);32543255if (DAG.shouldOptForSize())3256return SDValue();32573258SDLoc DL(Op);3259SDValue Src = Op.getOperand(0);32603261// Create an integer the size of the mantissa with the MSB set. This and all3262// values larger than it don't have any fractional bits so don't need to be3263// converted.3264const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);3265unsigned Precision = APFloat::semanticsPrecision(FltSem);3266APFloat MaxVal = APFloat(FltSem);3267MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),3268/*IsSigned*/ false, APFloat::rmNearestTiesToEven);3269SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);32703271RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());3272return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,3273DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));3274}32753276// Expand vector LRINT and LLRINT by converting to the integer domain.3277static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG,3278const RISCVSubtarget &Subtarget) {3279MVT VT = Op.getSimpleValueType();3280assert(VT.isVector() && "Unexpected type");32813282SDLoc DL(Op);3283SDValue Src = Op.getOperand(0);3284MVT ContainerVT = VT;32853286if (VT.isFixedLengthVector()) {3287ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);3288Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);3289}32903291auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);3292SDValue Truncated =3293DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);32943295if (!VT.isFixedLengthVector())3296return Truncated;32973298return convertFromScalableVector(VT, Truncated, DAG, Subtarget);3299}33003301static SDValue3302getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,3303const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,3304SDValue Offset, SDValue Mask, SDValue VL,3305unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {3306if (Merge.isUndef())3307Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;3308SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());3309SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};3310return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);3311}33123313static SDValue3314getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,3315EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,3316SDValue VL,3317unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {3318if (Merge.isUndef())3319Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;3320SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());3321SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};3322return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);3323}33243325static MVT getLMUL1VT(MVT VT) {3326assert(VT.getVectorElementType().getSizeInBits() <= 64 &&3327"Unexpected vector MVT");3328return MVT::getScalableVectorVT(3329VT.getVectorElementType(),3330RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());3331}33323333struct VIDSequence {3334int64_t StepNumerator;3335unsigned StepDenominator;3336int64_t Addend;3337};33383339static std::optional<uint64_t> getExactInteger(const APFloat &APF,3340uint32_t BitWidth) {3341// We will use a SINT_TO_FP to materialize this constant so we should use a3342// signed APSInt here.3343APSInt ValInt(BitWidth, /*IsUnsigned*/ false);3344// We use an arbitrary rounding mode here. If a floating-point is an exact3345// integer (e.g., 1.0), the rounding mode does not affect the output value. If3346// the rounding mode changes the output value, then it is not an exact3347// integer.3348RoundingMode ArbitraryRM = RoundingMode::TowardZero;3349bool IsExact;3350// If it is out of signed integer range, it will return an invalid operation.3351// If it is not an exact integer, IsExact is false.3352if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==3353APFloatBase::opInvalidOp) ||3354!IsExact)3355return std::nullopt;3356return ValInt.extractBitsAsZExtValue(BitWidth, 0);3357}33583359// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]3360// to the (non-zero) step S and start value X. This can be then lowered as the3361// RVV sequence (VID * S) + X, for example.3362// The step S is represented as an integer numerator divided by a positive3363// denominator. Note that the implementation currently only identifies3364// sequences in which either the numerator is +/- 1 or the denominator is 1. It3365// cannot detect 2/3, for example.3366// Note that this method will also match potentially unappealing index3367// sequences, like <i32 0, i32 50939494>, however it is left to the caller to3368// determine whether this is worth generating code for.3369static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,3370unsigned EltSizeInBits) {3371assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");3372if (!cast<BuildVectorSDNode>(Op)->isConstant())3373return std::nullopt;3374bool IsInteger = Op.getValueType().isInteger();33753376std::optional<unsigned> SeqStepDenom;3377std::optional<int64_t> SeqStepNum, SeqAddend;3378std::optional<std::pair<uint64_t, unsigned>> PrevElt;3379assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());33803381// First extract the ops into a list of constant integer values. This may not3382// be possible for floats if they're not all representable as integers.3383SmallVector<std::optional<uint64_t>> Elts(Op.getNumOperands());3384const unsigned OpSize = Op.getScalarValueSizeInBits();3385for (auto [Idx, Elt] : enumerate(Op->op_values())) {3386if (Elt.isUndef()) {3387Elts[Idx] = std::nullopt;3388continue;3389}3390if (IsInteger) {3391Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);3392} else {3393auto ExactInteger =3394getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);3395if (!ExactInteger)3396return std::nullopt;3397Elts[Idx] = *ExactInteger;3398}3399}34003401for (auto [Idx, Elt] : enumerate(Elts)) {3402// Assume undef elements match the sequence; we just have to be careful3403// when interpolating across them.3404if (!Elt)3405continue;34063407if (PrevElt) {3408// Calculate the step since the last non-undef element, and ensure3409// it's consistent across the entire sequence.3410unsigned IdxDiff = Idx - PrevElt->second;3411int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);34123413// A zero-value value difference means that we're somewhere in the middle3414// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a3415// step change before evaluating the sequence.3416if (ValDiff == 0)3417continue;34183419int64_t Remainder = ValDiff % IdxDiff;3420// Normalize the step if it's greater than 1.3421if (Remainder != ValDiff) {3422// The difference must cleanly divide the element span.3423if (Remainder != 0)3424return std::nullopt;3425ValDiff /= IdxDiff;3426IdxDiff = 1;3427}34283429if (!SeqStepNum)3430SeqStepNum = ValDiff;3431else if (ValDiff != SeqStepNum)3432return std::nullopt;34333434if (!SeqStepDenom)3435SeqStepDenom = IdxDiff;3436else if (IdxDiff != *SeqStepDenom)3437return std::nullopt;3438}34393440// Record this non-undef element for later.3441if (!PrevElt || PrevElt->first != *Elt)3442PrevElt = std::make_pair(*Elt, Idx);3443}34443445// We need to have logged a step for this to count as a legal index sequence.3446if (!SeqStepNum || !SeqStepDenom)3447return std::nullopt;34483449// Loop back through the sequence and validate elements we might have skipped3450// while waiting for a valid step. While doing this, log any sequence addend.3451for (auto [Idx, Elt] : enumerate(Elts)) {3452if (!Elt)3453continue;3454uint64_t ExpectedVal =3455(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;3456int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);3457if (!SeqAddend)3458SeqAddend = Addend;3459else if (Addend != SeqAddend)3460return std::nullopt;3461}34623463assert(SeqAddend && "Must have an addend if we have a step");34643465return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};3466}34673468// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT3469// and lower it as a VRGATHER_VX_VL from the source vector.3470static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,3471SelectionDAG &DAG,3472const RISCVSubtarget &Subtarget) {3473if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)3474return SDValue();3475SDValue Vec = SplatVal.getOperand(0);3476// Only perform this optimization on vectors of the same size for simplicity.3477// Don't perform this optimization for i1 vectors.3478// FIXME: Support i1 vectors, maybe by promoting to i8?3479if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)3480return SDValue();3481SDValue Idx = SplatVal.getOperand(1);3482// The index must be a legal type.3483if (Idx.getValueType() != Subtarget.getXLenVT())3484return SDValue();34853486MVT ContainerVT = VT;3487if (VT.isFixedLengthVector()) {3488ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);3489Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);3490}34913492auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);34933494SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,3495Idx, DAG.getUNDEF(ContainerVT), Mask, VL);34963497if (!VT.isFixedLengthVector())3498return Gather;34993500return convertFromScalableVector(VT, Gather, DAG, Subtarget);3501}350235033504/// Try and optimize BUILD_VECTORs with "dominant values" - these are values3505/// which constitute a large proportion of the elements. In such cases we can3506/// splat a vector with the dominant element and make up the shortfall with3507/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.3508/// Note that this includes vectors of 2 elements by association. The3509/// upper-most element is the "dominant" one, allowing us to use a splat to3510/// "insert" the upper element, and an insert of the lower element at position3511/// 0, which improves codegen.3512static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG,3513const RISCVSubtarget &Subtarget) {3514MVT VT = Op.getSimpleValueType();3515assert(VT.isFixedLengthVector() && "Unexpected vector!");35163517MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);35183519SDLoc DL(Op);3520auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);35213522MVT XLenVT = Subtarget.getXLenVT();3523unsigned NumElts = Op.getNumOperands();35243525SDValue DominantValue;3526unsigned MostCommonCount = 0;3527DenseMap<SDValue, unsigned> ValueCounts;3528unsigned NumUndefElts =3529count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });35303531// Track the number of scalar loads we know we'd be inserting, estimated as3532// any non-zero floating-point constant. Other kinds of element are either3533// already in registers or are materialized on demand. The threshold at which3534// a vector load is more desirable than several scalar materializion and3535// vector-insertion instructions is not known.3536unsigned NumScalarLoads = 0;35373538for (SDValue V : Op->op_values()) {3539if (V.isUndef())3540continue;35413542ValueCounts.insert(std::make_pair(V, 0));3543unsigned &Count = ValueCounts[V];3544if (0 == Count)3545if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))3546NumScalarLoads += !CFP->isExactlyValue(+0.0);35473548// Is this value dominant? In case of a tie, prefer the highest element as3549// it's cheaper to insert near the beginning of a vector than it is at the3550// end.3551if (++Count >= MostCommonCount) {3552DominantValue = V;3553MostCommonCount = Count;3554}3555}35563557assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");3558unsigned NumDefElts = NumElts - NumUndefElts;3559unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;35603561// Don't perform this optimization when optimizing for size, since3562// materializing elements and inserting them tends to cause code bloat.3563if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&3564(NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&3565((MostCommonCount > DominantValueCountThreshold) ||3566(ValueCounts.size() <= Log2_32(NumDefElts)))) {3567// Start by splatting the most common element.3568SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);35693570DenseSet<SDValue> Processed{DominantValue};35713572// We can handle an insert into the last element (of a splat) via3573// v(f)slide1down. This is slightly better than the vslideup insert3574// lowering as it avoids the need for a vector group temporary. It3575// is also better than using vmerge.vx as it avoids the need to3576// materialize the mask in a vector register.3577if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);3578!LastOp.isUndef() && ValueCounts[LastOp] == 1 &&3579LastOp != DominantValue) {3580Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);3581auto OpCode =3582VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;3583if (!VT.isFloatingPoint())3584LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);3585Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,3586LastOp, Mask, VL);3587Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);3588Processed.insert(LastOp);3589}35903591MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);3592for (const auto &OpIdx : enumerate(Op->ops())) {3593const SDValue &V = OpIdx.value();3594if (V.isUndef() || !Processed.insert(V).second)3595continue;3596if (ValueCounts[V] == 1) {3597Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,3598DAG.getVectorIdxConstant(OpIdx.index(), DL));3599} else {3600// Blend in all instances of this value using a VSELECT, using a3601// mask where each bit signals whether that element is the one3602// we're after.3603SmallVector<SDValue> Ops;3604transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {3605return DAG.getConstant(V == V1, DL, XLenVT);3606});3607Vec = DAG.getNode(ISD::VSELECT, DL, VT,3608DAG.getBuildVector(SelMaskTy, DL, Ops),3609DAG.getSplatBuildVector(VT, DL, V), Vec);3610}3611}36123613return Vec;3614}36153616return SDValue();3617}36183619static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,3620const RISCVSubtarget &Subtarget) {3621MVT VT = Op.getSimpleValueType();3622assert(VT.isFixedLengthVector() && "Unexpected vector!");36233624MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);36253626SDLoc DL(Op);3627auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);36283629MVT XLenVT = Subtarget.getXLenVT();3630unsigned NumElts = Op.getNumOperands();36313632if (VT.getVectorElementType() == MVT::i1) {3633if (ISD::isBuildVectorAllZeros(Op.getNode())) {3634SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);3635return convertFromScalableVector(VT, VMClr, DAG, Subtarget);3636}36373638if (ISD::isBuildVectorAllOnes(Op.getNode())) {3639SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);3640return convertFromScalableVector(VT, VMSet, DAG, Subtarget);3641}36423643// Lower constant mask BUILD_VECTORs via an integer vector type, in3644// scalar integer chunks whose bit-width depends on the number of mask3645// bits and XLEN.3646// First, determine the most appropriate scalar integer type to use. This3647// is at most XLenVT, but may be shrunk to a smaller vector element type3648// according to the size of the final vector - use i8 chunks rather than3649// XLenVT if we're producing a v8i1. This results in more consistent3650// codegen across RV32 and RV64.3651unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());3652NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());3653// If we have to use more than one INSERT_VECTOR_ELT then this3654// optimization is likely to increase code size; avoid peforming it in3655// such a case. We can use a load from a constant pool in this case.3656if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)3657return SDValue();3658// Now we can create our integer vector type. Note that it may be larger3659// than the resulting mask type: v4i1 would use v1i8 as its integer type.3660unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);3661MVT IntegerViaVecVT =3662MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),3663IntegerViaVecElts);36643665uint64_t Bits = 0;3666unsigned BitPos = 0, IntegerEltIdx = 0;3667SmallVector<SDValue, 8> Elts(IntegerViaVecElts);36683669for (unsigned I = 0; I < NumElts;) {3670SDValue V = Op.getOperand(I);3671bool BitValue = !V.isUndef() && V->getAsZExtVal();3672Bits |= ((uint64_t)BitValue << BitPos);3673++BitPos;3674++I;36753676// Once we accumulate enough bits to fill our scalar type or process the3677// last element, insert into our vector and clear our accumulated data.3678if (I % NumViaIntegerBits == 0 || I == NumElts) {3679if (NumViaIntegerBits <= 32)3680Bits = SignExtend64<32>(Bits);3681SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);3682Elts[IntegerEltIdx] = Elt;3683Bits = 0;3684BitPos = 0;3685IntegerEltIdx++;3686}3687}36883689SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);36903691if (NumElts < NumViaIntegerBits) {3692// If we're producing a smaller vector than our minimum legal integer3693// type, bitcast to the equivalent (known-legal) mask type, and extract3694// our final mask.3695assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");3696Vec = DAG.getBitcast(MVT::v8i1, Vec);3697Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,3698DAG.getConstant(0, DL, XLenVT));3699} else {3700// Else we must have produced an integer type with the same size as the3701// mask type; bitcast for the final result.3702assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());3703Vec = DAG.getBitcast(VT, Vec);3704}37053706return Vec;3707}37083709if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {3710unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL3711: RISCVISD::VMV_V_X_VL;3712if (!VT.isFloatingPoint())3713Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);3714Splat =3715DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);3716return convertFromScalableVector(VT, Splat, DAG, Subtarget);3717}37183719// Try and match index sequences, which we can lower to the vid instruction3720// with optional modifications. An all-undef vector is matched by3721// getSplatValue, above.3722if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {3723int64_t StepNumerator = SimpleVID->StepNumerator;3724unsigned StepDenominator = SimpleVID->StepDenominator;3725int64_t Addend = SimpleVID->Addend;37263727assert(StepNumerator != 0 && "Invalid step");3728bool Negate = false;3729int64_t SplatStepVal = StepNumerator;3730unsigned StepOpcode = ISD::MUL;3731// Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it3732// anyway as the shift of 63 won't fit in uimm5.3733if (StepNumerator != 1 && StepNumerator != INT64_MIN &&3734isPowerOf2_64(std::abs(StepNumerator))) {3735Negate = StepNumerator < 0;3736StepOpcode = ISD::SHL;3737SplatStepVal = Log2_64(std::abs(StepNumerator));3738}37393740// Only emit VIDs with suitably-small steps/addends. We use imm5 is a3741// threshold since it's the immediate value many RVV instructions accept.3742// There is no vmul.vi instruction so ensure multiply constant can fit in3743// a single addi instruction.3744if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||3745(StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&3746isPowerOf2_32(StepDenominator) &&3747(SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {3748MVT VIDVT =3749VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;3750MVT VIDContainerVT =3751getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);3752SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);3753// Convert right out of the scalable type so we can use standard ISD3754// nodes for the rest of the computation. If we used scalable types with3755// these, we'd lose the fixed-length vector info and generate worse3756// vsetvli code.3757VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);3758if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||3759(StepOpcode == ISD::SHL && SplatStepVal != 0)) {3760SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);3761VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);3762}3763if (StepDenominator != 1) {3764SDValue SplatStep =3765DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);3766VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);3767}3768if (Addend != 0 || Negate) {3769SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);3770VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,3771VID);3772}3773if (VT.isFloatingPoint()) {3774// TODO: Use vfwcvt to reduce register pressure.3775VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);3776}3777return VID;3778}3779}37803781// For very small build_vectors, use a single scalar insert of a constant.3782// TODO: Base this on constant rematerialization cost, not size.3783const unsigned EltBitSize = VT.getScalarSizeInBits();3784if (VT.getSizeInBits() <= 32 &&3785ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {3786MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());3787assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&3788"Unexpected sequence type");3789// If we can use the original VL with the modified element type, this3790// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this3791// be moved into InsertVSETVLI?3792unsigned ViaVecLen =3793(Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;3794MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);37953796uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);3797uint64_t SplatValue = 0;3798// Construct the amalgamated value at this larger vector type.3799for (const auto &OpIdx : enumerate(Op->op_values())) {3800const auto &SeqV = OpIdx.value();3801if (!SeqV.isUndef())3802SplatValue |=3803((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));3804}38053806// On RV64, sign-extend from 32 to 64 bits where possible in order to3807// achieve better constant materializion.3808if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)3809SplatValue = SignExtend64<32>(SplatValue);38103811SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,3812DAG.getUNDEF(ViaVecVT),3813DAG.getConstant(SplatValue, DL, XLenVT),3814DAG.getVectorIdxConstant(0, DL));3815if (ViaVecLen != 1)3816Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,3817MVT::getVectorVT(ViaIntVT, 1), Vec,3818DAG.getConstant(0, DL, XLenVT));3819return DAG.getBitcast(VT, Vec);3820}382138223823// Attempt to detect "hidden" splats, which only reveal themselves as splats3824// when re-interpreted as a vector with a larger element type. For example,3825// v4i16 = build_vector i16 0, i16 1, i16 0, i16 13826// could be instead splat as3827// v2i32 = build_vector i32 0x00010000, i32 0x000100003828// TODO: This optimization could also work on non-constant splats, but it3829// would require bit-manipulation instructions to construct the splat value.3830SmallVector<SDValue> Sequence;3831const auto *BV = cast<BuildVectorSDNode>(Op);3832if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&3833ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&3834BV->getRepeatedSequence(Sequence) &&3835(Sequence.size() * EltBitSize) <= Subtarget.getELen()) {3836unsigned SeqLen = Sequence.size();3837MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);3838assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||3839ViaIntVT == MVT::i64) &&3840"Unexpected sequence type");38413842// If we can use the original VL with the modified element type, this3843// means we only have a VTYPE toggle, not a VL toggle. TODO: Should this3844// be moved into InsertVSETVLI?3845const unsigned RequiredVL = NumElts / SeqLen;3846const unsigned ViaVecLen =3847(Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?3848NumElts : RequiredVL;3849MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);38503851unsigned EltIdx = 0;3852uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);3853uint64_t SplatValue = 0;3854// Construct the amalgamated value which can be splatted as this larger3855// vector type.3856for (const auto &SeqV : Sequence) {3857if (!SeqV.isUndef())3858SplatValue |=3859((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));3860EltIdx++;3861}38623863// On RV64, sign-extend from 32 to 64 bits where possible in order to3864// achieve better constant materializion.3865if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)3866SplatValue = SignExtend64<32>(SplatValue);38673868// Since we can't introduce illegal i64 types at this stage, we can only3869// perform an i64 splat on RV32 if it is its own sign-extended value. That3870// way we can use RVV instructions to splat.3871assert((ViaIntVT.bitsLE(XLenVT) ||3872(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&3873"Unexpected bitcast sequence");3874if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {3875SDValue ViaVL =3876DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);3877MVT ViaContainerVT =3878getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);3879SDValue Splat =3880DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,3881DAG.getUNDEF(ViaContainerVT),3882DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);3883Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);3884if (ViaVecLen != RequiredVL)3885Splat = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,3886MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,3887DAG.getConstant(0, DL, XLenVT));3888return DAG.getBitcast(VT, Splat);3889}3890}38913892// If the number of signbits allows, see if we can lower as a <N x i8>.3893// Our main goal here is to reduce LMUL (and thus work) required to3894// build the constant, but we will also narrow if the resulting3895// narrow vector is known to materialize cheaply.3896// TODO: We really should be costing the smaller vector. There are3897// profitable cases this misses.3898if (EltBitSize > 8 && VT.isInteger() &&3899(NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {3900unsigned SignBits = DAG.ComputeNumSignBits(Op);3901if (EltBitSize - SignBits < 8) {3902SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),3903DL, Op->ops());3904Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),3905Source, DAG, Subtarget);3906SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);3907return convertFromScalableVector(VT, Res, DAG, Subtarget);3908}3909}39103911if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))3912return Res;39133914// For constant vectors, use generic constant pool lowering. Otherwise,3915// we'd have to materialize constants in GPRs just to move them into the3916// vector.3917return SDValue();3918}39193920static unsigned getPACKOpcode(unsigned DestBW,3921const RISCVSubtarget &Subtarget) {3922switch (DestBW) {3923default:3924llvm_unreachable("Unsupported pack size");3925case 16:3926return RISCV::PACKH;3927case 32:3928return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;3929case 64:3930assert(Subtarget.is64Bit());3931return RISCV::PACK;3932}3933}39343935/// Double the element size of the build vector to reduce the number3936/// of vslide1down in the build vector chain. In the worst case, this3937/// trades three scalar operations for 1 vector operation. Scalar3938/// operations are generally lower latency, and for out-of-order cores3939/// we also benefit from additional parallelism.3940static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG,3941const RISCVSubtarget &Subtarget) {3942SDLoc DL(Op);3943MVT VT = Op.getSimpleValueType();3944assert(VT.isFixedLengthVector() && "Unexpected vector!");3945MVT ElemVT = VT.getVectorElementType();3946if (!ElemVT.isInteger())3947return SDValue();39483949// TODO: Relax these architectural restrictions, possibly with costing3950// of the actual instructions required.3951if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())3952return SDValue();39533954unsigned NumElts = VT.getVectorNumElements();3955unsigned ElemSizeInBits = ElemVT.getSizeInBits();3956if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||3957NumElts % 2 != 0)3958return SDValue();39593960// Produce [B,A] packed into a type twice as wide. Note that all3961// scalars are XLenVT, possibly masked (see below).3962MVT XLenVT = Subtarget.getXLenVT();3963SDValue Mask = DAG.getConstant(3964APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);3965auto pack = [&](SDValue A, SDValue B) {3966// Bias the scheduling of the inserted operations to near the3967// definition of the element - this tends to reduce register3968// pressure overall.3969SDLoc ElemDL(B);3970if (Subtarget.hasStdExtZbkb())3971// Note that we're relying on the high bits of the result being3972// don't care. For PACKW, the result is *sign* extended.3973return SDValue(3974DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),3975ElemDL, XLenVT, A, B),39760);39773978A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);3979B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);3980SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);3981SDNodeFlags Flags;3982Flags.setDisjoint(true);3983return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,3984DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt), Flags);3985};39863987SmallVector<SDValue> NewOperands;3988NewOperands.reserve(NumElts / 2);3989for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)3990NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));3991assert(NumElts == NewOperands.size() * 2);3992MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);3993MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);3994return DAG.getNode(ISD::BITCAST, DL, VT,3995DAG.getBuildVector(WideVecVT, DL, NewOperands));3996}39973998// Convert to an vXf16 build_vector to vXi16 with bitcasts.3999static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) {4000MVT VT = Op.getSimpleValueType();4001MVT IVT = VT.changeVectorElementType(MVT::i16);4002SmallVector<SDValue, 16> NewOps(Op.getNumOperands());4003for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)4004NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));4005SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);4006return DAG.getBitcast(VT, Res);4007}40084009static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,4010const RISCVSubtarget &Subtarget) {4011MVT VT = Op.getSimpleValueType();4012assert(VT.isFixedLengthVector() && "Unexpected vector!");40134014// If we don't have scalar f16, we need to bitcast to an i16 vector.4015if (VT.getVectorElementType() == MVT::f16 &&4016!Subtarget.hasStdExtZfhmin())4017return lowerBUILD_VECTORvXf16(Op, DAG);40184019if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||4020ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))4021return lowerBuildVectorOfConstants(Op, DAG, Subtarget);40224023MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);40244025SDLoc DL(Op);4026auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);40274028MVT XLenVT = Subtarget.getXLenVT();40294030if (VT.getVectorElementType() == MVT::i1) {4031// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask4032// vector type, we have a legal equivalently-sized i8 type, so we can use4033// that.4034MVT WideVecVT = VT.changeVectorElementType(MVT::i8);4035SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);40364037SDValue WideVec;4038if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {4039// For a splat, perform a scalar truncate before creating the wider4040// vector.4041Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,4042DAG.getConstant(1, DL, Splat.getValueType()));4043WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);4044} else {4045SmallVector<SDValue, 8> Ops(Op->op_values());4046WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);4047SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);4048WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);4049}40504051return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);4052}40534054if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {4055if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))4056return Gather;4057unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL4058: RISCVISD::VMV_V_X_VL;4059if (!VT.isFloatingPoint())4060Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);4061Splat =4062DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);4063return convertFromScalableVector(VT, Splat, DAG, Subtarget);4064}40654066if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))4067return Res;40684069// If we're compiling for an exact VLEN value, we can split our work per4070// register in the register group.4071if (const auto VLen = Subtarget.getRealVLen();4072VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {4073MVT ElemVT = VT.getVectorElementType();4074unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();4075EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);4076MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);4077MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);4078assert(M1VT == getLMUL1VT(M1VT));40794080// The following semantically builds up a fixed length concat_vector4081// of the component build_vectors. We eagerly lower to scalable and4082// insert_subvector here to avoid DAG combining it back to a large4083// build_vector.4084SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());4085unsigned NumOpElts = M1VT.getVectorMinNumElements();4086SDValue Vec = DAG.getUNDEF(ContainerVT);4087for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {4088auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);4089SDValue SubBV =4090DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);4091SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);4092unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;4093Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,4094DAG.getVectorIdxConstant(InsertIdx, DL));4095}4096return convertFromScalableVector(VT, Vec, DAG, Subtarget);4097}40984099// If we're about to resort to vslide1down (or stack usage), pack our4100// elements into the widest scalar type we can. This will force a VL/VTYPE4101// toggle, but reduces the critical path, the number of vslide1down ops4102// required, and possibly enables scalar folds of the values.4103if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))4104return Res;41054106// For m1 vectors, if we have non-undef values in both halves of our vector,4107// split the vector into low and high halves, build them separately, then4108// use a vselect to combine them. For long vectors, this cuts the critical4109// path of the vslide1down sequence in half, and gives us an opportunity4110// to special case each half independently. Note that we don't change the4111// length of the sub-vectors here, so if both fallback to the generic4112// vslide1down path, we should be able to fold the vselect into the final4113// vslidedown (for the undef tail) for the first half w/ masking.4114unsigned NumElts = VT.getVectorNumElements();4115unsigned NumUndefElts =4116count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });4117unsigned NumDefElts = NumElts - NumUndefElts;4118if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&4119ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {4120SmallVector<SDValue> SubVecAOps, SubVecBOps;4121SmallVector<SDValue> MaskVals;4122SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));4123SubVecAOps.reserve(NumElts);4124SubVecBOps.reserve(NumElts);4125for (unsigned i = 0; i < NumElts; i++) {4126SDValue Elem = Op->getOperand(i);4127if (i < NumElts / 2) {4128SubVecAOps.push_back(Elem);4129SubVecBOps.push_back(UndefElem);4130} else {4131SubVecAOps.push_back(UndefElem);4132SubVecBOps.push_back(Elem);4133}4134bool SelectMaskVal = (i < NumElts / 2);4135MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));4136}4137assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&4138MaskVals.size() == NumElts);41394140SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);4141SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);4142MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);4143SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);4144return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);4145}41464147// Cap the cost at a value linear to the number of elements in the vector.4148// The default lowering is to use the stack. The vector store + scalar loads4149// is linear in VL. However, at high lmuls vslide1down and vslidedown end up4150// being (at least) linear in LMUL. As a result, using the vslidedown4151// lowering for every element ends up being VL*LMUL..4152// TODO: Should we be directly costing the stack alternative? Doing so might4153// give us a more accurate upper bound.4154InstructionCost LinearBudget = VT.getVectorNumElements() * 2;41554156// TODO: unify with TTI getSlideCost.4157InstructionCost PerSlideCost = 1;4158switch (RISCVTargetLowering::getLMUL(ContainerVT)) {4159default: break;4160case RISCVII::VLMUL::LMUL_2:4161PerSlideCost = 2;4162break;4163case RISCVII::VLMUL::LMUL_4:4164PerSlideCost = 4;4165break;4166case RISCVII::VLMUL::LMUL_8:4167PerSlideCost = 8;4168break;4169}41704171// TODO: Should we be using the build instseq then cost + evaluate scheme4172// we use for integer constants here?4173unsigned UndefCount = 0;4174for (const SDValue &V : Op->ops()) {4175if (V.isUndef()) {4176UndefCount++;4177continue;4178}4179if (UndefCount) {4180LinearBudget -= PerSlideCost;4181UndefCount = 0;4182}4183LinearBudget -= PerSlideCost;4184}4185if (UndefCount) {4186LinearBudget -= PerSlideCost;4187}41884189if (LinearBudget < 0)4190return SDValue();41914192assert((!VT.isFloatingPoint() ||4193VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&4194"Illegal type which will result in reserved encoding");41954196const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;41974198SDValue Vec;4199UndefCount = 0;4200for (SDValue V : Op->ops()) {4201if (V.isUndef()) {4202UndefCount++;4203continue;4204}42054206// Start our sequence with a TA splat in the hopes that hardware is able to4207// recognize there's no dependency on the prior value of our temporary4208// register.4209if (!Vec) {4210Vec = DAG.getSplatVector(VT, DL, V);4211Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);4212UndefCount = 0;4213continue;4214}42154216if (UndefCount) {4217const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());4218Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),4219Vec, Offset, Mask, VL, Policy);4220UndefCount = 0;4221}4222auto OpCode =4223VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;4224if (!VT.isFloatingPoint())4225V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);4226Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,4227V, Mask, VL);4228}4229if (UndefCount) {4230const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());4231Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),4232Vec, Offset, Mask, VL, Policy);4233}4234return convertFromScalableVector(VT, Vec, DAG, Subtarget);4235}42364237static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,4238SDValue Lo, SDValue Hi, SDValue VL,4239SelectionDAG &DAG) {4240if (!Passthru)4241Passthru = DAG.getUNDEF(VT);4242if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {4243int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();4244int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();4245// If Hi constant is all the same sign bit as Lo, lower this as a custom4246// node in order to try and match RVV vector/scalar instructions.4247if ((LoC >> 31) == HiC)4248return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);42494250// If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,4251// we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use4252// vlmax vsetvli or vsetivli to change the VL.4253// FIXME: Support larger constants?4254// FIXME: Support non-constant VLs by saturating?4255if (LoC == HiC) {4256SDValue NewVL;4257if (isAllOnesConstant(VL) ||4258(isa<RegisterSDNode>(VL) &&4259cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))4260NewVL = DAG.getRegister(RISCV::X0, MVT::i32);4261else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))4262NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);42634264if (NewVL) {4265MVT InterVT =4266MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);4267auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,4268DAG.getUNDEF(InterVT), Lo, NewVL);4269return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);4270}4271}4272}42734274// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.4275if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&4276isa<ConstantSDNode>(Hi.getOperand(1)) &&4277Hi.getConstantOperandVal(1) == 31)4278return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);42794280// If the hi bits of the splat are undefined, then it's fine to just splat Lo4281// even if it might be sign extended.4282if (Hi.isUndef())4283return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);42844285// Fall back to a stack store and stride x0 vector load.4286return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,4287Hi, VL);4288}42894290// Called by type legalization to handle splat of i64 on RV32.4291// FIXME: We can optimize this when the type has sign or zero bits in one4292// of the halves.4293static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,4294SDValue Scalar, SDValue VL,4295SelectionDAG &DAG) {4296assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");4297SDValue Lo, Hi;4298std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);4299return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);4300}43014302// This function lowers a splat of a scalar operand Splat with the vector4303// length VL. It ensures the final sequence is type legal, which is useful when4304// lowering a splat after type legalization.4305static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,4306MVT VT, const SDLoc &DL, SelectionDAG &DAG,4307const RISCVSubtarget &Subtarget) {4308bool HasPassthru = Passthru && !Passthru.isUndef();4309if (!HasPassthru && !Passthru)4310Passthru = DAG.getUNDEF(VT);4311if (VT.isFloatingPoint())4312return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);43134314MVT XLenVT = Subtarget.getXLenVT();43154316// Simplest case is that the operand needs to be promoted to XLenVT.4317if (Scalar.getValueType().bitsLE(XLenVT)) {4318// If the operand is a constant, sign extend to increase our chances4319// of being able to use a .vi instruction. ANY_EXTEND would become a4320// a zero extend and the simm5 check in isel would fail.4321// FIXME: Should we ignore the upper bits in isel instead?4322unsigned ExtOpc =4323isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;4324Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);4325return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);4326}43274328assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&4329"Unexpected scalar for splat lowering!");43304331if (isOneConstant(VL) && isNullConstant(Scalar))4332return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,4333DAG.getConstant(0, DL, XLenVT), VL);43344335// Otherwise use the more complicated splatting algorithm.4336return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);4337}43384339// This function lowers an insert of a scalar operand Scalar into lane4340// 0 of the vector regardless of the value of VL. The contents of the4341// remaining lanes of the result vector are unspecified. VL is assumed4342// to be non-zero.4343static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,4344const SDLoc &DL, SelectionDAG &DAG,4345const RISCVSubtarget &Subtarget) {4346assert(VT.isScalableVector() && "Expect VT is scalable vector type.");43474348const MVT XLenVT = Subtarget.getXLenVT();4349SDValue Passthru = DAG.getUNDEF(VT);43504351if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&4352isNullConstant(Scalar.getOperand(1))) {4353SDValue ExtractedVal = Scalar.getOperand(0);4354// The element types must be the same.4355if (ExtractedVal.getValueType().getVectorElementType() ==4356VT.getVectorElementType()) {4357MVT ExtractedVT = ExtractedVal.getSimpleValueType();4358MVT ExtractedContainerVT = ExtractedVT;4359if (ExtractedContainerVT.isFixedLengthVector()) {4360ExtractedContainerVT = getContainerForFixedLengthVector(4361DAG, ExtractedContainerVT, Subtarget);4362ExtractedVal = convertToScalableVector(ExtractedContainerVT,4363ExtractedVal, DAG, Subtarget);4364}4365if (ExtractedContainerVT.bitsLE(VT))4366return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,4367ExtractedVal, DAG.getVectorIdxConstant(0, DL));4368return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,4369DAG.getVectorIdxConstant(0, DL));4370}4371}437243734374if (VT.isFloatingPoint())4375return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,4376DAG.getUNDEF(VT), Scalar, VL);43774378// Avoid the tricky legalization cases by falling back to using the4379// splat code which already handles it gracefully.4380if (!Scalar.getValueType().bitsLE(XLenVT))4381return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,4382DAG.getConstant(1, DL, XLenVT),4383VT, DL, DAG, Subtarget);43844385// If the operand is a constant, sign extend to increase our chances4386// of being able to use a .vi instruction. ANY_EXTEND would become a4387// a zero extend and the simm5 check in isel would fail.4388// FIXME: Should we ignore the upper bits in isel instead?4389unsigned ExtOpc =4390isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;4391Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);4392return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,4393DAG.getUNDEF(VT), Scalar, VL);4394}43954396// Is this a shuffle extracts either the even or odd elements of a vector?4397// That is, specifically, either (a) or (b) below.4398// t34: v8i8 = extract_subvector t11, Constant:i64<0>4399// t33: v8i8 = extract_subvector t11, Constant:i64<8>4400// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t334401// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t334402// Returns {Src Vector, Even Elements} om success4403static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,4404SDValue V2, ArrayRef<int> Mask,4405const RISCVSubtarget &Subtarget) {4406// Need to be able to widen the vector.4407if (VT.getScalarSizeInBits() >= Subtarget.getELen())4408return false;44094410// Both input must be extracts.4411if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||4412V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)4413return false;44144415// Extracting from the same source.4416SDValue Src = V1.getOperand(0);4417if (Src != V2.getOperand(0))4418return false;44194420// Src needs to have twice the number of elements.4421if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))4422return false;44234424// The extracts must extract the two halves of the source.4425if (V1.getConstantOperandVal(1) != 0 ||4426V2.getConstantOperandVal(1) != Mask.size())4427return false;44284429// First index must be the first even or odd element from V1.4430if (Mask[0] != 0 && Mask[0] != 1)4431return false;44324433// The others must increase by 2 each time.4434// TODO: Support undef elements?4435for (unsigned i = 1; i != Mask.size(); ++i)4436if (Mask[i] != Mask[i - 1] + 2)4437return false;44384439return true;4440}44414442/// Is this shuffle interleaving contiguous elements from one vector into the4443/// even elements and contiguous elements from another vector into the odd4444/// elements. \p EvenSrc will contain the element that should be in the first4445/// even element. \p OddSrc will contain the element that should be in the first4446/// odd element. These can be the first element in a source or the element half4447/// way through the source.4448static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,4449int &OddSrc, const RISCVSubtarget &Subtarget) {4450// We need to be able to widen elements to the next larger integer type.4451if (VT.getScalarSizeInBits() >= Subtarget.getELen())4452return false;44534454int Size = Mask.size();4455int NumElts = VT.getVectorNumElements();4456assert(Size == (int)NumElts && "Unexpected mask size");44574458SmallVector<unsigned, 2> StartIndexes;4459if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))4460return false;44614462EvenSrc = StartIndexes[0];4463OddSrc = StartIndexes[1];44644465// One source should be low half of first vector.4466if (EvenSrc != 0 && OddSrc != 0)4467return false;44684469// Subvectors will be subtracted from either at the start of the two input4470// vectors, or at the start and middle of the first vector if it's an unary4471// interleave.4472// In both cases, HalfNumElts will be extracted.4473// We need to ensure that the extract indices are 0 or HalfNumElts otherwise4474// we'll create an illegal extract_subvector.4475// FIXME: We could support other values using a slidedown first.4476int HalfNumElts = NumElts / 2;4477return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);4478}44794480/// Match shuffles that concatenate two vectors, rotate the concatenation,4481/// and then extract the original number of elements from the rotated result.4482/// This is equivalent to vector.splice or X86's PALIGNR instruction. The4483/// returned rotation amount is for a rotate right, where elements move from4484/// higher elements to lower elements. \p LoSrc indicates the first source4485/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector4486/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be4487/// 0 or 1 if a rotation is found.4488///4489/// NOTE: We talk about rotate to the right which matches how bit shift and4490/// rotate instructions are described where LSBs are on the right, but LLVM IR4491/// and the table below write vectors with the lowest elements on the left.4492static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {4493int Size = Mask.size();44944495// We need to detect various ways of spelling a rotation:4496// [11, 12, 13, 14, 15, 0, 1, 2]4497// [-1, 12, 13, 14, -1, -1, 1, -1]4498// [-1, -1, -1, -1, -1, -1, 1, 2]4499// [ 3, 4, 5, 6, 7, 8, 9, 10]4500// [-1, 4, 5, 6, -1, -1, 9, -1]4501// [-1, 4, 5, 6, -1, -1, -1, -1]4502int Rotation = 0;4503LoSrc = -1;4504HiSrc = -1;4505for (int i = 0; i != Size; ++i) {4506int M = Mask[i];4507if (M < 0)4508continue;45094510// Determine where a rotate vector would have started.4511int StartIdx = i - (M % Size);4512// The identity rotation isn't interesting, stop.4513if (StartIdx == 0)4514return -1;45154516// If we found the tail of a vector the rotation must be the missing4517// front. If we found the head of a vector, it must be how much of the4518// head.4519int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;45204521if (Rotation == 0)4522Rotation = CandidateRotation;4523else if (Rotation != CandidateRotation)4524// The rotations don't match, so we can't match this mask.4525return -1;45264527// Compute which value this mask is pointing at.4528int MaskSrc = M < Size ? 0 : 1;45294530// Compute which of the two target values this index should be assigned to.4531// This reflects whether the high elements are remaining or the low elemnts4532// are remaining.4533int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;45344535// Either set up this value if we've not encountered it before, or check4536// that it remains consistent.4537if (TargetSrc < 0)4538TargetSrc = MaskSrc;4539else if (TargetSrc != MaskSrc)4540// This may be a rotation, but it pulls from the inputs in some4541// unsupported interleaving.4542return -1;4543}45444545// Check that we successfully analyzed the mask, and normalize the results.4546assert(Rotation != 0 && "Failed to locate a viable rotation!");4547assert((LoSrc >= 0 || HiSrc >= 0) &&4548"Failed to find a rotated input vector!");45494550return Rotation;4551}45524553// Lower a deinterleave shuffle to vnsrl.4554// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)4555// -> [p, q, r, s] (EvenElts == false)4556// VT is the type of the vector to return, <[vscale x ]n x ty>4557// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>4558static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,4559bool EvenElts,4560const RISCVSubtarget &Subtarget,4561SelectionDAG &DAG) {4562// The result is a vector of type <m x n x ty>4563MVT ContainerVT = VT;4564// Convert fixed vectors to scalable if needed4565if (ContainerVT.isFixedLengthVector()) {4566assert(Src.getSimpleValueType().isFixedLengthVector());4567ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);45684569// The source is a vector of type <m x n*2 x ty>4570MVT SrcContainerVT =4571MVT::getVectorVT(ContainerVT.getVectorElementType(),4572ContainerVT.getVectorElementCount() * 2);4573Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);4574}45754576auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);45774578// Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>4579// This also converts FP to int.4580unsigned EltBits = ContainerVT.getScalarSizeInBits();4581MVT WideSrcContainerVT = MVT::getVectorVT(4582MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());4583Src = DAG.getBitcast(WideSrcContainerVT, Src);45844585// The integer version of the container type.4586MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();45874588// If we want even elements, then the shift amount is 0. Otherwise, shift by4589// the original element size.4590unsigned Shift = EvenElts ? 0 : EltBits;4591SDValue SplatShift = DAG.getNode(4592RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),4593DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);4594SDValue Res =4595DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,4596DAG.getUNDEF(IntContainerVT), TrueMask, VL);4597// Cast back to FP if needed.4598Res = DAG.getBitcast(ContainerVT, Res);45994600if (VT.isFixedLengthVector())4601Res = convertFromScalableVector(VT, Res, DAG, Subtarget);4602return Res;4603}46044605// Lower the following shuffle to vslidedown.4606// a)4607// t49: v8i8 = extract_subvector t13, Constant:i64<0>4608// t109: v8i8 = extract_subvector t13, Constant:i64<8>4609// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t1064610// b)4611// t69: v16i16 = extract_subvector t68, Constant:i64<0>4612// t23: v8i16 = extract_subvector t69, Constant:i64<0>4613// t29: v4i16 = extract_subvector t23, Constant:i64<4>4614// t26: v8i16 = extract_subvector t69, Constant:i64<8>4615// t30: v4i16 = extract_subvector t26, Constant:i64<0>4616// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t304617static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,4618SDValue V1, SDValue V2,4619ArrayRef<int> Mask,4620const RISCVSubtarget &Subtarget,4621SelectionDAG &DAG) {4622auto findNonEXTRACT_SUBVECTORParent =4623[](SDValue Parent) -> std::pair<SDValue, uint64_t> {4624uint64_t Offset = 0;4625while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&4626// EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from4627// a scalable vector. But we don't want to match the case.4628Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {4629Offset += Parent.getConstantOperandVal(1);4630Parent = Parent.getOperand(0);4631}4632return std::make_pair(Parent, Offset);4633};46344635auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);4636auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);46374638// Extracting from the same source.4639SDValue Src = V1Src;4640if (Src != V2Src)4641return SDValue();46424643// Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.4644SmallVector<int, 16> NewMask(Mask);4645for (size_t i = 0; i != NewMask.size(); ++i) {4646if (NewMask[i] == -1)4647continue;46484649if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {4650NewMask[i] = NewMask[i] + V1IndexOffset;4651} else {4652// Minus NewMask.size() is needed. Otherwise, the b case would be4653// <5,6,7,12> instead of <5,6,7,8>.4654NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;4655}4656}46574658// First index must be known and non-zero. It will be used as the slidedown4659// amount.4660if (NewMask[0] <= 0)4661return SDValue();46624663// NewMask is also continuous.4664for (unsigned i = 1; i != NewMask.size(); ++i)4665if (NewMask[i - 1] + 1 != NewMask[i])4666return SDValue();46674668MVT XLenVT = Subtarget.getXLenVT();4669MVT SrcVT = Src.getSimpleValueType();4670MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);4671auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);4672SDValue Slidedown =4673getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),4674convertToScalableVector(ContainerVT, Src, DAG, Subtarget),4675DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);4676return DAG.getNode(4677ISD::EXTRACT_SUBVECTOR, DL, VT,4678convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),4679DAG.getConstant(0, DL, XLenVT));4680}46814682// Because vslideup leaves the destination elements at the start intact, we can4683// use it to perform shuffles that insert subvectors:4684//4685// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>4686// ->4687// vsetvli zero, 8, e8, mf2, ta, ma4688// vslideup.vi v8, v9, 44689//4690// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>4691// ->4692// vsetvli zero, 5, e8, mf2, tu, ma4693// vslideup.v1 v8, v9, 24694static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,4695SDValue V1, SDValue V2,4696ArrayRef<int> Mask,4697const RISCVSubtarget &Subtarget,4698SelectionDAG &DAG) {4699unsigned NumElts = VT.getVectorNumElements();4700int NumSubElts, Index;4701if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,4702Index))4703return SDValue();47044705bool OpsSwapped = Mask[Index] < (int)NumElts;4706SDValue InPlace = OpsSwapped ? V2 : V1;4707SDValue ToInsert = OpsSwapped ? V1 : V2;47084709MVT XLenVT = Subtarget.getXLenVT();4710MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);4711auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;4712// We slide up by the index that the subvector is being inserted at, and set4713// VL to the index + the number of elements being inserted.4714unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;4715// If the we're adding a suffix to the in place vector, i.e. inserting right4716// up to the very end of it, then we don't actually care about the tail.4717if (NumSubElts + Index >= (int)NumElts)4718Policy |= RISCVII::TAIL_AGNOSTIC;47194720InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);4721ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);4722SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);47234724SDValue Res;4725// If we're inserting into the lowest elements, use a tail undisturbed4726// vmv.v.v.4727if (Index == 0)4728Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,4729VL);4730else4731Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,4732DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);4733return convertFromScalableVector(VT, Res, DAG, Subtarget);4734}47354736/// Match v(f)slide1up/down idioms. These operations involve sliding4737/// N-1 elements to make room for an inserted scalar at one end.4738static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,4739SDValue V1, SDValue V2,4740ArrayRef<int> Mask,4741const RISCVSubtarget &Subtarget,4742SelectionDAG &DAG) {4743bool OpsSwapped = false;4744if (!isa<BuildVectorSDNode>(V1)) {4745if (!isa<BuildVectorSDNode>(V2))4746return SDValue();4747std::swap(V1, V2);4748OpsSwapped = true;4749}4750SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();4751if (!Splat)4752return SDValue();47534754// Return true if the mask could describe a slide of Mask.size() - 14755// elements from concat_vector(V1, V2)[Base:] to [Offset:].4756auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {4757const unsigned S = (Offset > 0) ? 0 : -Offset;4758const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);4759for (unsigned i = S; i != E; ++i)4760if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)4761return false;4762return true;4763};47644765const unsigned NumElts = VT.getVectorNumElements();4766bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);4767if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))4768return SDValue();47694770const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];4771// Inserted lane must come from splat, undef scalar is legal but not profitable.4772if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)4773return SDValue();47744775MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);4776auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);4777auto OpCode = IsVSlidedown ?4778(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :4779(VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);4780if (!VT.isFloatingPoint())4781Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);4782auto Vec = DAG.getNode(OpCode, DL, ContainerVT,4783DAG.getUNDEF(ContainerVT),4784convertToScalableVector(ContainerVT, V2, DAG, Subtarget),4785Splat, TrueMask, VL);4786return convertFromScalableVector(VT, Vec, DAG, Subtarget);4787}47884789// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx4790// to create an interleaved vector of <[vscale x] n*2 x ty>.4791// This requires that the size of ty is less than the subtarget's maximum ELEN.4792static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,4793const SDLoc &DL, SelectionDAG &DAG,4794const RISCVSubtarget &Subtarget) {4795MVT VecVT = EvenV.getSimpleValueType();4796MVT VecContainerVT = VecVT; // <vscale x n x ty>4797// Convert fixed vectors to scalable if needed4798if (VecContainerVT.isFixedLengthVector()) {4799VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);4800EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);4801OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);4802}48034804assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());48054806// We're working with a vector of the same size as the resulting4807// interleaved vector, but with half the number of elements and4808// twice the SEW (Hence the restriction on not using the maximum4809// ELEN)4810MVT WideVT =4811MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),4812VecVT.getVectorElementCount());4813MVT WideContainerVT = WideVT; // <vscale x n x ty*2>4814if (WideContainerVT.isFixedLengthVector())4815WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);48164817// Bitcast the input vectors to integers in case they are FP4818VecContainerVT = VecContainerVT.changeTypeToInteger();4819EvenV = DAG.getBitcast(VecContainerVT, EvenV);4820OddV = DAG.getBitcast(VecContainerVT, OddV);48214822auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);4823SDValue Passthru = DAG.getUNDEF(WideContainerVT);48244825SDValue Interleaved;4826if (OddV.isUndef()) {4827// If OddV is undef, this is a zero extend.4828// FIXME: Not only does this optimize the code, it fixes some correctness4829// issues because MIR does not have freeze.4830Interleaved =4831DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);4832} else if (Subtarget.hasStdExtZvbb()) {4833// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.4834SDValue OffsetVec =4835DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);4836Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,4837OffsetVec, Passthru, Mask, VL);4838if (!EvenV.isUndef())4839Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,4840Interleaved, EvenV, Passthru, Mask, VL);4841} else if (EvenV.isUndef()) {4842Interleaved =4843DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);48444845SDValue OffsetVec =4846DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);4847Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,4848Interleaved, OffsetVec, Passthru, Mask, VL);4849} else {4850// FIXME: We should freeze the odd vector here. We already handled the case4851// of provably undef/poison above.48524853// Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with4854// vwaddu.vv4855Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,4856OddV, Passthru, Mask, VL);48574858// Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)4859SDValue AllOnesVec = DAG.getSplatVector(4860VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));4861SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,4862OddV, AllOnesVec, Passthru, Mask, VL);48634864// Add the two together so we get4865// (OddV * 0xff...ff) + (OddV + EvenV)4866// = (OddV * 0x100...00) + EvenV4867// = (OddV << VecVT.getScalarSizeInBits()) + EvenV4868// Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx4869Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,4870Interleaved, OddsMul, Passthru, Mask, VL);4871}48724873// Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>4874MVT ResultContainerVT = MVT::getVectorVT(4875VecVT.getVectorElementType(), // Make sure to use original type4876VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));4877Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);48784879// Convert back to a fixed vector if needed4880MVT ResultVT =4881MVT::getVectorVT(VecVT.getVectorElementType(),4882VecVT.getVectorElementCount().multiplyCoefficientBy(2));4883if (ResultVT.isFixedLengthVector())4884Interleaved =4885convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);48864887return Interleaved;4888}48894890// If we have a vector of bits that we want to reverse, we can use a vbrev on a4891// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.4892static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN,4893SelectionDAG &DAG,4894const RISCVSubtarget &Subtarget) {4895SDLoc DL(SVN);4896MVT VT = SVN->getSimpleValueType(0);4897SDValue V = SVN->getOperand(0);4898unsigned NumElts = VT.getVectorNumElements();48994900assert(VT.getVectorElementType() == MVT::i1);49014902if (!ShuffleVectorInst::isReverseMask(SVN->getMask(),4903SVN->getMask().size()) ||4904!SVN->getOperand(1).isUndef())4905return SDValue();49064907unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));4908EVT ViaVT = EVT::getVectorVT(4909*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);4910EVT ViaBitVT =4911EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());49124913// If we don't have zvbb or the larger element type > ELEN, the operation will4914// be illegal.4915if (!Subtarget.getTargetLowering()->isOperationLegalOrCustom(ISD::BITREVERSE,4916ViaVT) ||4917!Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))4918return SDValue();49194920// If the bit vector doesn't fit exactly into the larger element type, we need4921// to insert it into the larger vector and then shift up the reversed bits4922// afterwards to get rid of the gap introduced.4923if (ViaEltSize > NumElts)4924V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),4925V, DAG.getVectorIdxConstant(0, DL));49264927SDValue Res =4928DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));49294930// Shift up the reversed bits if the vector didn't exactly fit into the larger4931// element type.4932if (ViaEltSize > NumElts)4933Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,4934DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));49354936Res = DAG.getBitcast(ViaBitVT, Res);49374938if (ViaEltSize > NumElts)4939Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,4940DAG.getVectorIdxConstant(0, DL));4941return Res;4942}49434944static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,4945SelectionDAG &DAG,4946const RISCVSubtarget &Subtarget,4947MVT &RotateVT, unsigned &RotateAmt) {4948SDLoc DL(SVN);49494950EVT VT = SVN->getValueType(0);4951unsigned NumElts = VT.getVectorNumElements();4952unsigned EltSizeInBits = VT.getScalarSizeInBits();4953unsigned NumSubElts;4954if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,4955NumElts, NumSubElts, RotateAmt))4956return false;4957RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),4958NumElts / NumSubElts);49594960// We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.4961return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);4962}49634964// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can4965// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this4966// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.4967static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,4968SelectionDAG &DAG,4969const RISCVSubtarget &Subtarget) {4970SDLoc DL(SVN);49714972EVT VT = SVN->getValueType(0);4973unsigned RotateAmt;4974MVT RotateVT;4975if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))4976return SDValue();49774978SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));49794980SDValue Rotate;4981// A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,4982// so canonicalize to vrev8.4983if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)4984Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);4985else4986Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,4987DAG.getConstant(RotateAmt, DL, RotateVT));49884989return DAG.getBitcast(VT, Rotate);4990}49914992// If compiling with an exactly known VLEN, see if we can split a4993// shuffle on m2 or larger into a small number of m1 sized shuffles4994// which write each destination registers exactly once.4995static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,4996SelectionDAG &DAG,4997const RISCVSubtarget &Subtarget) {4998SDLoc DL(SVN);4999MVT VT = SVN->getSimpleValueType(0);5000SDValue V1 = SVN->getOperand(0);5001SDValue V2 = SVN->getOperand(1);5002ArrayRef<int> Mask = SVN->getMask();5003unsigned NumElts = VT.getVectorNumElements();50045005// If we don't know exact data layout, not much we can do. If this5006// is already m1 or smaller, no point in splitting further.5007const auto VLen = Subtarget.getRealVLen();5008if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)5009return SDValue();50105011// Avoid picking up bitrotate patterns which we have a linear-in-lmul5012// expansion for.5013unsigned RotateAmt;5014MVT RotateVT;5015if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))5016return SDValue();50175018MVT ElemVT = VT.getVectorElementType();5019unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();5020unsigned VRegsPerSrc = NumElts / ElemsPerVReg;50215022SmallVector<std::pair<int, SmallVector<int>>>5023OutMasks(VRegsPerSrc, {-1, {}});50245025// Check if our mask can be done as a 1-to-1 mapping from source5026// to destination registers in the group without needing to5027// write each destination more than once.5028for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {5029int DstVecIdx = DstIdx / ElemsPerVReg;5030int DstSubIdx = DstIdx % ElemsPerVReg;5031int SrcIdx = Mask[DstIdx];5032if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)5033continue;5034int SrcVecIdx = SrcIdx / ElemsPerVReg;5035int SrcSubIdx = SrcIdx % ElemsPerVReg;5036if (OutMasks[DstVecIdx].first == -1)5037OutMasks[DstVecIdx].first = SrcVecIdx;5038if (OutMasks[DstVecIdx].first != SrcVecIdx)5039// Note: This case could easily be handled by keeping track of a chain5040// of source values and generating two element shuffles below. This is5041// less an implementation question, and more a profitability one.5042return SDValue();50435044OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);5045OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;5046}50475048EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);5049MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);5050MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);5051assert(M1VT == getLMUL1VT(M1VT));5052unsigned NumOpElts = M1VT.getVectorMinNumElements();5053SDValue Vec = DAG.getUNDEF(ContainerVT);5054// The following semantically builds up a fixed length concat_vector5055// of the component shuffle_vectors. We eagerly lower to scalable here5056// to avoid DAG combining it back to a large shuffle_vector again.5057V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);5058V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);5059for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {5060auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];5061if (SrcVecIdx == -1)5062continue;5063unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;5064SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;5065SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,5066DAG.getVectorIdxConstant(ExtractIdx, DL));5067SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);5068SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);5069SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);5070unsigned InsertIdx = DstVecIdx * NumOpElts;5071Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,5072DAG.getVectorIdxConstant(InsertIdx, DL));5073}5074return convertFromScalableVector(VT, Vec, DAG, Subtarget);5075}50765077static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,5078const RISCVSubtarget &Subtarget) {5079SDValue V1 = Op.getOperand(0);5080SDValue V2 = Op.getOperand(1);5081SDLoc DL(Op);5082MVT XLenVT = Subtarget.getXLenVT();5083MVT VT = Op.getSimpleValueType();5084unsigned NumElts = VT.getVectorNumElements();5085ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());50865087if (VT.getVectorElementType() == MVT::i1) {5088// Lower to a vror.vi of a larger element type if possible before we promote5089// i1s to i8s.5090if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))5091return V;5092if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))5093return V;50945095// Promote i1 shuffle to i8 shuffle.5096MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());5097V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);5098V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)5099: DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);5100SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());5101return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),5102ISD::SETNE);5103}51045105MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);51065107auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);51085109if (SVN->isSplat()) {5110const int Lane = SVN->getSplatIndex();5111if (Lane >= 0) {5112MVT SVT = VT.getVectorElementType();51135114// Turn splatted vector load into a strided load with an X0 stride.5115SDValue V = V1;5116// Peek through CONCAT_VECTORS as VectorCombine can concat a vector5117// with undef.5118// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?5119int Offset = Lane;5120if (V.getOpcode() == ISD::CONCAT_VECTORS) {5121int OpElements =5122V.getOperand(0).getSimpleValueType().getVectorNumElements();5123V = V.getOperand(Offset / OpElements);5124Offset %= OpElements;5125}51265127// We need to ensure the load isn't atomic or volatile.5128if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {5129auto *Ld = cast<LoadSDNode>(V);5130Offset *= SVT.getStoreSize();5131SDValue NewAddr = DAG.getMemBasePlusOffset(5132Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);51335134// If this is SEW=64 on RV32, use a strided load with a stride of x0.5135if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {5136SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});5137SDValue IntID =5138DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);5139SDValue Ops[] = {Ld->getChain(),5140IntID,5141DAG.getUNDEF(ContainerVT),5142NewAddr,5143DAG.getRegister(RISCV::X0, XLenVT),5144VL};5145SDValue NewLoad = DAG.getMemIntrinsicNode(5146ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,5147DAG.getMachineFunction().getMachineMemOperand(5148Ld->getMemOperand(), Offset, SVT.getStoreSize()));5149DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);5150return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);5151}51525153MVT SplatVT = ContainerVT;51545155// If we don't have Zfh, we need to use an integer scalar load.5156if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {5157SVT = MVT::i16;5158SplatVT = ContainerVT.changeVectorElementType(SVT);5159}51605161// Otherwise use a scalar load and splat. This will give the best5162// opportunity to fold a splat into the operation. ISel can turn it into5163// the x0 strided load if we aren't able to fold away the select.5164if (SVT.isFloatingPoint())5165V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,5166Ld->getPointerInfo().getWithOffset(Offset),5167Ld->getOriginalAlign(),5168Ld->getMemOperand()->getFlags());5169else5170V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,5171Ld->getPointerInfo().getWithOffset(Offset), SVT,5172Ld->getOriginalAlign(),5173Ld->getMemOperand()->getFlags());5174DAG.makeEquivalentMemoryOrdering(Ld, V);51755176unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL5177: RISCVISD::VMV_V_X_VL;5178SDValue Splat =5179DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);5180Splat = DAG.getBitcast(ContainerVT, Splat);5181return convertFromScalableVector(VT, Splat, DAG, Subtarget);5182}51835184V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);5185assert(Lane < (int)NumElts && "Unexpected lane!");5186SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,5187V1, DAG.getConstant(Lane, DL, XLenVT),5188DAG.getUNDEF(ContainerVT), TrueMask, VL);5189return convertFromScalableVector(VT, Gather, DAG, Subtarget);5190}5191}51925193// For exact VLEN m2 or greater, try to split to m1 operations if we5194// can split cleanly.5195if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))5196return V;51975198ArrayRef<int> Mask = SVN->getMask();51995200if (SDValue V =5201lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))5202return V;52035204if (SDValue V =5205lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))5206return V;52075208// A bitrotate will be one instruction on Zvkb, so try to lower to it first if5209// available.5210if (Subtarget.hasStdExtZvkb())5211if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))5212return V;52135214// Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may5215// be undef which can be handled with a single SLIDEDOWN/UP.5216int LoSrc, HiSrc;5217int Rotation = isElementRotate(LoSrc, HiSrc, Mask);5218if (Rotation > 0) {5219SDValue LoV, HiV;5220if (LoSrc >= 0) {5221LoV = LoSrc == 0 ? V1 : V2;5222LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);5223}5224if (HiSrc >= 0) {5225HiV = HiSrc == 0 ? V1 : V2;5226HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);5227}52285229// We found a rotation. We need to slide HiV down by Rotation. Then we need5230// to slide LoV up by (NumElts - Rotation).5231unsigned InvRotate = NumElts - Rotation;52325233SDValue Res = DAG.getUNDEF(ContainerVT);5234if (HiV) {5235// Even though we could use a smaller VL, don't to avoid a vsetivli5236// toggle.5237Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,5238DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);5239}5240if (LoV)5241Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,5242DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,5243RISCVII::TAIL_AGNOSTIC);52445245return convertFromScalableVector(VT, Res, DAG, Subtarget);5246}52475248// If this is a deinterleave and we can widen the vector, then we can use5249// vnsrl to deinterleave.5250if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {5251return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,5252Subtarget, DAG);5253}52545255if (SDValue V =5256lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))5257return V;52585259// Detect an interleave shuffle and lower to5260// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))5261int EvenSrc, OddSrc;5262if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {5263// Extract the halves of the vectors.5264MVT HalfVT = VT.getHalfNumVectorElementsVT();52655266int Size = Mask.size();5267SDValue EvenV, OddV;5268assert(EvenSrc >= 0 && "Undef source?");5269EvenV = (EvenSrc / Size) == 0 ? V1 : V2;5270EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,5271DAG.getVectorIdxConstant(EvenSrc % Size, DL));52725273assert(OddSrc >= 0 && "Undef source?");5274OddV = (OddSrc / Size) == 0 ? V1 : V2;5275OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,5276DAG.getVectorIdxConstant(OddSrc % Size, DL));52775278return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);5279}528052815282// Handle any remaining single source shuffles5283assert(!V1.isUndef() && "Unexpected shuffle canonicalization");5284if (V2.isUndef()) {5285// We might be able to express the shuffle as a bitrotate. But even if we5286// don't have Zvkb and have to expand, the expanded sequence of approx. 25287// shifts and a vor will have a higher throughput than a vrgather.5288if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))5289return V;52905291if (VT.getScalarSizeInBits() == 8 &&5292any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {5293// On such a vector we're unable to use i8 as the index type.5294// FIXME: We could promote the index to i16 and use vrgatherei16, but that5295// may involve vector splitting if we're already at LMUL=8, or our5296// user-supplied maximum fixed-length LMUL.5297return SDValue();5298}52995300// Base case for the two operand recursion below - handle the worst case5301// single source shuffle.5302unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;5303MVT IndexVT = VT.changeTypeToInteger();5304// Since we can't introduce illegal index types at this stage, use i16 and5305// vrgatherei16 if the corresponding index type for plain vrgather is greater5306// than XLenVT.5307if (IndexVT.getScalarType().bitsGT(XLenVT)) {5308GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;5309IndexVT = IndexVT.changeVectorElementType(MVT::i16);5310}53115312// If the mask allows, we can do all the index computation in 16 bits. This5313// requires less work and less register pressure at high LMUL, and creates5314// smaller constants which may be cheaper to materialize.5315if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&5316(IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {5317GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;5318IndexVT = IndexVT.changeVectorElementType(MVT::i16);5319}53205321MVT IndexContainerVT =5322ContainerVT.changeVectorElementType(IndexVT.getScalarType());53235324V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);5325SmallVector<SDValue> GatherIndicesLHS;5326for (int MaskIndex : Mask) {5327bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;5328GatherIndicesLHS.push_back(IsLHSIndex5329? DAG.getConstant(MaskIndex, DL, XLenVT)5330: DAG.getUNDEF(XLenVT));5331}5332SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);5333LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,5334Subtarget);5335SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,5336DAG.getUNDEF(ContainerVT), TrueMask, VL);5337return convertFromScalableVector(VT, Gather, DAG, Subtarget);5338}53395340// As a backup, shuffles can be lowered via a vrgather instruction, possibly5341// merged with a second vrgather.5342SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;53435344// Now construct the mask that will be used by the blended vrgather operation.5345// Construct the appropriate indices into each vector.5346for (int MaskIndex : Mask) {5347bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;5348ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 05349? MaskIndex : -1);5350ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));5351}53525353// Try to pick a profitable operand order.5354bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);5355SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);53565357// Recursively invoke lowering for each operand if we had two5358// independent single source shuffles, and then combine the result via a5359// vselect. Note that the vselect will likely be folded back into the5360// second permute (vrgather, or other) by the post-isel combine.5361V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);5362V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);53635364SmallVector<SDValue> MaskVals;5365for (int MaskIndex : Mask) {5366bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;5367MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));5368}53695370assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");5371MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);5372SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);53735374if (SwapOps)5375return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);5376return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);5377}53785379bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {5380// Support splats for any type. These should type legalize well.5381if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))5382return true;53835384// Only support legal VTs for other shuffles for now.5385if (!isTypeLegal(VT))5386return false;53875388MVT SVT = VT.getSimpleVT();53895390// Not for i1 vectors.5391if (SVT.getScalarType() == MVT::i1)5392return false;53935394int Dummy1, Dummy2;5395return (isElementRotate(Dummy1, Dummy2, M) > 0) ||5396isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);5397}53985399// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting5400// the exponent.5401SDValue5402RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,5403SelectionDAG &DAG) const {5404MVT VT = Op.getSimpleValueType();5405unsigned EltSize = VT.getScalarSizeInBits();5406SDValue Src = Op.getOperand(0);5407SDLoc DL(Op);5408MVT ContainerVT = VT;54095410SDValue Mask, VL;5411if (Op->isVPOpcode()) {5412Mask = Op.getOperand(1);5413if (VT.isFixedLengthVector())5414Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,5415Subtarget);5416VL = Op.getOperand(2);5417}54185419// We choose FP type that can represent the value if possible. Otherwise, we5420// use rounding to zero conversion for correct exponent of the result.5421// TODO: Use f16 for i8 when possible?5422MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;5423if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))5424FloatEltVT = MVT::f32;5425MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());54265427// Legal types should have been checked in the RISCVTargetLowering5428// constructor.5429// TODO: Splitting may make sense in some cases.5430assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&5431"Expected legal float type!");54325433// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.5434// The trailing zero count is equal to log2 of this single bit value.5435if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {5436SDValue Neg = DAG.getNegative(Src, DL, VT);5437Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);5438} else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {5439SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),5440Src, Mask, VL);5441Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);5442}54435444// We have a legal FP type, convert to it.5445SDValue FloatVal;5446if (FloatVT.bitsGT(VT)) {5447if (Op->isVPOpcode())5448FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);5449else5450FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);5451} else {5452// Use RTZ to avoid rounding influencing exponent of FloatVal.5453if (VT.isFixedLengthVector()) {5454ContainerVT = getContainerForFixedLengthVector(VT);5455Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);5456}5457if (!Op->isVPOpcode())5458std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);5459SDValue RTZRM =5460DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());5461MVT ContainerFloatVT =5462MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());5463FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,5464Src, Mask, RTZRM, VL);5465if (VT.isFixedLengthVector())5466FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);5467}5468// Bitcast to integer and shift the exponent to the LSB.5469EVT IntVT = FloatVT.changeVectorElementTypeToInteger();5470SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);5471unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;54725473SDValue Exp;5474// Restore back to original type. Truncation after SRL is to generate vnsrl.5475if (Op->isVPOpcode()) {5476Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,5477DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);5478Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);5479} else {5480Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,5481DAG.getConstant(ShiftAmt, DL, IntVT));5482if (IntVT.bitsLT(VT))5483Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);5484else if (IntVT.bitsGT(VT))5485Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);5486}54875488// The exponent contains log2 of the value in biased form.5489unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;5490// For trailing zeros, we just need to subtract the bias.5491if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)5492return DAG.getNode(ISD::SUB, DL, VT, Exp,5493DAG.getConstant(ExponentBias, DL, VT));5494if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)5495return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,5496DAG.getConstant(ExponentBias, DL, VT), Mask, VL);54975498// For leading zeros, we need to remove the bias and convert from log2 to5499// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).5500unsigned Adjust = ExponentBias + (EltSize - 1);5501SDValue Res;5502if (Op->isVPOpcode())5503Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,5504Mask, VL);5505else5506Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);55075508// The above result with zero input equals to Adjust which is greater than5509// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.5510if (Op.getOpcode() == ISD::CTLZ)5511Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));5512else if (Op.getOpcode() == ISD::VP_CTLZ)5513Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,5514DAG.getConstant(EltSize, DL, VT), Mask, VL);5515return Res;5516}55175518SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,5519SelectionDAG &DAG) const {5520SDLoc DL(Op);5521MVT XLenVT = Subtarget.getXLenVT();5522SDValue Source = Op->getOperand(0);5523MVT SrcVT = Source.getSimpleValueType();5524SDValue Mask = Op->getOperand(1);5525SDValue EVL = Op->getOperand(2);55265527if (SrcVT.isFixedLengthVector()) {5528MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);5529Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);5530Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,5531Subtarget);5532SrcVT = ContainerVT;5533}55345535// Convert to boolean vector.5536if (SrcVT.getScalarType() != MVT::i1) {5537SDValue AllZero = DAG.getConstant(0, DL, SrcVT);5538SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());5539Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,5540{Source, AllZero, DAG.getCondCode(ISD::SETNE),5541DAG.getUNDEF(SrcVT), Mask, EVL});5542}55435544SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);5545if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)5546// In this case, we can interpret poison as -1, so nothing to do further.5547return Res;55485549// Convert -1 to VL.5550SDValue SetCC =5551DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);5552Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);5553return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);5554}55555556// While RVV has alignment restrictions, we should always be able to load as a5557// legal equivalently-sized byte-typed vector instead. This method is5558// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If5559// the load is already correctly-aligned, it returns SDValue().5560SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,5561SelectionDAG &DAG) const {5562auto *Load = cast<LoadSDNode>(Op);5563assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");55645565if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),5566Load->getMemoryVT(),5567*Load->getMemOperand()))5568return SDValue();55695570SDLoc DL(Op);5571MVT VT = Op.getSimpleValueType();5572unsigned EltSizeBits = VT.getScalarSizeInBits();5573assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&5574"Unexpected unaligned RVV load type");5575MVT NewVT =5576MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));5577assert(NewVT.isValid() &&5578"Expecting equally-sized RVV vector types to be legal");5579SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),5580Load->getPointerInfo(), Load->getOriginalAlign(),5581Load->getMemOperand()->getFlags());5582return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);5583}55845585// While RVV has alignment restrictions, we should always be able to store as a5586// legal equivalently-sized byte-typed vector instead. This method is5587// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It5588// returns SDValue() if the store is already correctly aligned.5589SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,5590SelectionDAG &DAG) const {5591auto *Store = cast<StoreSDNode>(Op);5592assert(Store && Store->getValue().getValueType().isVector() &&5593"Expected vector store");55945595if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),5596Store->getMemoryVT(),5597*Store->getMemOperand()))5598return SDValue();55995600SDLoc DL(Op);5601SDValue StoredVal = Store->getValue();5602MVT VT = StoredVal.getSimpleValueType();5603unsigned EltSizeBits = VT.getScalarSizeInBits();5604assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&5605"Unexpected unaligned RVV store type");5606MVT NewVT =5607MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));5608assert(NewVT.isValid() &&5609"Expecting equally-sized RVV vector types to be legal");5610StoredVal = DAG.getBitcast(NewVT, StoredVal);5611return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),5612Store->getPointerInfo(), Store->getOriginalAlign(),5613Store->getMemOperand()->getFlags());5614}56155616static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,5617const RISCVSubtarget &Subtarget) {5618assert(Op.getValueType() == MVT::i64 && "Unexpected VT");56195620int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();56215622// All simm32 constants should be handled by isel.5623// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making5624// this check redundant, but small immediates are common so this check5625// should have better compile time.5626if (isInt<32>(Imm))5627return Op;56285629// We only need to cost the immediate, if constant pool lowering is enabled.5630if (!Subtarget.useConstantPoolForLargeInts())5631return Op;56325633RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);5634if (Seq.size() <= Subtarget.getMaxBuildIntsCost())5635return Op;56365637// Optimizations below are disabled for opt size. If we're optimizing for5638// size, use a constant pool.5639if (DAG.shouldOptForSize())5640return SDValue();56415642// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do5643// that if it will avoid a constant pool.5644// It will require an extra temporary register though.5645// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where5646// low and high 32 bits are the same and bit 31 and 63 are set.5647unsigned ShiftAmt, AddOpc;5648RISCVMatInt::InstSeq SeqLo =5649RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);5650if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())5651return Op;56525653return SDValue();5654}56555656static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,5657const RISCVSubtarget &Subtarget) {5658SDLoc dl(Op);5659AtomicOrdering FenceOrdering =5660static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));5661SyncScope::ID FenceSSID =5662static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));56635664if (Subtarget.hasStdExtZtso()) {5665// The only fence that needs an instruction is a sequentially-consistent5666// cross-thread fence.5667if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&5668FenceSSID == SyncScope::System)5669return Op;56705671// MEMBARRIER is a compiler barrier; it codegens to a no-op.5672return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));5673}56745675// singlethread fences only synchronize with signal handlers on the same5676// thread and thus only need to preserve instruction order, not actually5677// enforce memory ordering.5678if (FenceSSID == SyncScope::SingleThread)5679// MEMBARRIER is a compiler barrier; it codegens to a no-op.5680return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));56815682return Op;5683}56845685static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG) {5686assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&5687"Unexpected custom legalisation");56885689// With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.5690bool IsAdd = Op.getOpcode() == ISD::SADDSAT;5691SDLoc DL(Op);5692SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));5693SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));5694SDValue Result =5695DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);56965697APInt MinVal = APInt::getSignedMinValue(32).sext(64);5698APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);5699SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);5700SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);5701Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);5702Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);5703return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);5704}57055706static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {5707assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&5708"Unexpected custom legalisation");57095710// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using5711// sign extend allows overflow of the lower 32 bits to be detected on5712// the promoted size.5713SDLoc DL(Op);5714SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));5715SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));5716SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);5717return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);5718}57195720// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.5721static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {5722assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&5723"Unexpected custom legalisation");5724if (isa<ConstantSDNode>(Op.getOperand(1)))5725return SDValue();57265727bool IsAdd = Op.getOpcode() == ISD::SADDO;5728SDLoc DL(Op);5729SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));5730SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));5731SDValue WideOp =5732DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);5733SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);5734SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,5735DAG.getValueType(MVT::i32));5736SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,5737ISD::SETNE);5738return DAG.getMergeValues({Res, Ovf}, DL);5739}57405741// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.5742static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {5743assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&5744"Unexpected custom legalisation");5745SDLoc DL(Op);5746SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));5747SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));5748SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);5749SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);5750SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,5751DAG.getValueType(MVT::i32));5752SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,5753ISD::SETNE);5754return DAG.getMergeValues({Res, Ovf}, DL);5755}57565757SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,5758SelectionDAG &DAG) const {5759SDLoc DL(Op);5760MVT VT = Op.getSimpleValueType();5761MVT XLenVT = Subtarget.getXLenVT();5762unsigned Check = Op.getConstantOperandVal(1);5763unsigned TDCMask = 0;5764if (Check & fcSNan)5765TDCMask |= RISCV::FPMASK_Signaling_NaN;5766if (Check & fcQNan)5767TDCMask |= RISCV::FPMASK_Quiet_NaN;5768if (Check & fcPosInf)5769TDCMask |= RISCV::FPMASK_Positive_Infinity;5770if (Check & fcNegInf)5771TDCMask |= RISCV::FPMASK_Negative_Infinity;5772if (Check & fcPosNormal)5773TDCMask |= RISCV::FPMASK_Positive_Normal;5774if (Check & fcNegNormal)5775TDCMask |= RISCV::FPMASK_Negative_Normal;5776if (Check & fcPosSubnormal)5777TDCMask |= RISCV::FPMASK_Positive_Subnormal;5778if (Check & fcNegSubnormal)5779TDCMask |= RISCV::FPMASK_Negative_Subnormal;5780if (Check & fcPosZero)5781TDCMask |= RISCV::FPMASK_Positive_Zero;5782if (Check & fcNegZero)5783TDCMask |= RISCV::FPMASK_Negative_Zero;57845785bool IsOneBitMask = isPowerOf2_32(TDCMask);57865787SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);57885789if (VT.isVector()) {5790SDValue Op0 = Op.getOperand(0);5791MVT VT0 = Op.getOperand(0).getSimpleValueType();57925793if (VT.isScalableVector()) {5794MVT DstVT = VT0.changeVectorElementTypeToInteger();5795auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);5796if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {5797Mask = Op.getOperand(2);5798VL = Op.getOperand(3);5799}5800SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,5801VL, Op->getFlags());5802if (IsOneBitMask)5803return DAG.getSetCC(DL, VT, FPCLASS,5804DAG.getConstant(TDCMask, DL, DstVT),5805ISD::CondCode::SETEQ);5806SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,5807DAG.getConstant(TDCMask, DL, DstVT));5808return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),5809ISD::SETNE);5810}58115812MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);5813MVT ContainerVT = getContainerForFixedLengthVector(VT);5814MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();5815auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);5816if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {5817Mask = Op.getOperand(2);5818MVT MaskContainerVT =5819getContainerForFixedLengthVector(Mask.getSimpleValueType());5820Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);5821VL = Op.getOperand(3);5822}5823Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);58245825SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,5826Mask, VL, Op->getFlags());58275828TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,5829DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);5830if (IsOneBitMask) {5831SDValue VMSEQ =5832DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,5833{FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),5834DAG.getUNDEF(ContainerVT), Mask, VL});5835return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);5836}5837SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,5838TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);58395840SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);5841SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,5842DAG.getUNDEF(ContainerDstVT), SplatZero, VL);58435844SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,5845{AND, SplatZero, DAG.getCondCode(ISD::SETNE),5846DAG.getUNDEF(ContainerVT), Mask, VL});5847return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);5848}58495850SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));5851SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);5852SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),5853ISD::CondCode::SETNE);5854return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);5855}58565857// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these5858// operations propagate nans.5859static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,5860const RISCVSubtarget &Subtarget) {5861SDLoc DL(Op);5862MVT VT = Op.getSimpleValueType();58635864SDValue X = Op.getOperand(0);5865SDValue Y = Op.getOperand(1);58665867if (!VT.isVector()) {5868MVT XLenVT = Subtarget.getXLenVT();58695870// If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This5871// ensures that when one input is a nan, the other will also be a nan5872// allowing the nan to propagate. If both inputs are nan, this will swap the5873// inputs which is harmless.58745875SDValue NewY = Y;5876if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {5877SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);5878NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);5879}58805881SDValue NewX = X;5882if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {5883SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);5884NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);5885}58865887unsigned Opc =5888Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;5889return DAG.getNode(Opc, DL, VT, NewX, NewY);5890}58915892// Check no NaNs before converting to fixed vector scalable.5893bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);5894bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);58955896MVT ContainerVT = VT;5897if (VT.isFixedLengthVector()) {5898ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);5899X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);5900Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);5901}59025903SDValue Mask, VL;5904if (Op->isVPOpcode()) {5905Mask = Op.getOperand(2);5906if (VT.isFixedLengthVector())5907Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,5908Subtarget);5909VL = Op.getOperand(3);5910} else {5911std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);5912}59135914SDValue NewY = Y;5915if (!XIsNeverNan) {5916SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),5917{X, X, DAG.getCondCode(ISD::SETOEQ),5918DAG.getUNDEF(ContainerVT), Mask, VL});5919NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,5920DAG.getUNDEF(ContainerVT), VL);5921}59225923SDValue NewX = X;5924if (!YIsNeverNan) {5925SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),5926{Y, Y, DAG.getCondCode(ISD::SETOEQ),5927DAG.getUNDEF(ContainerVT), Mask, VL});5928NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,5929DAG.getUNDEF(ContainerVT), VL);5930}59315932unsigned Opc =5933Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM5934? RISCVISD::VFMAX_VL5935: RISCVISD::VFMIN_VL;5936SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,5937DAG.getUNDEF(ContainerVT), Mask, VL);5938if (VT.isFixedLengthVector())5939Res = convertFromScalableVector(VT, Res, DAG, Subtarget);5940return Res;5941}59425943/// Get a RISC-V target specified VL op for a given SDNode.5944static unsigned getRISCVVLOp(SDValue Op) {5945#define OP_CASE(NODE) \5946case ISD::NODE: \5947return RISCVISD::NODE##_VL;5948#define VP_CASE(NODE) \5949case ISD::VP_##NODE: \5950return RISCVISD::NODE##_VL;5951// clang-format off5952switch (Op.getOpcode()) {5953default:5954llvm_unreachable("don't have RISC-V specified VL op for this SDNode");5955OP_CASE(ADD)5956OP_CASE(SUB)5957OP_CASE(MUL)5958OP_CASE(MULHS)5959OP_CASE(MULHU)5960OP_CASE(SDIV)5961OP_CASE(SREM)5962OP_CASE(UDIV)5963OP_CASE(UREM)5964OP_CASE(SHL)5965OP_CASE(SRA)5966OP_CASE(SRL)5967OP_CASE(ROTL)5968OP_CASE(ROTR)5969OP_CASE(BSWAP)5970OP_CASE(CTTZ)5971OP_CASE(CTLZ)5972OP_CASE(CTPOP)5973OP_CASE(BITREVERSE)5974OP_CASE(SADDSAT)5975OP_CASE(UADDSAT)5976OP_CASE(SSUBSAT)5977OP_CASE(USUBSAT)5978OP_CASE(AVGFLOORS)5979OP_CASE(AVGFLOORU)5980OP_CASE(AVGCEILS)5981OP_CASE(AVGCEILU)5982OP_CASE(FADD)5983OP_CASE(FSUB)5984OP_CASE(FMUL)5985OP_CASE(FDIV)5986OP_CASE(FNEG)5987OP_CASE(FABS)5988OP_CASE(FSQRT)5989OP_CASE(SMIN)5990OP_CASE(SMAX)5991OP_CASE(UMIN)5992OP_CASE(UMAX)5993OP_CASE(STRICT_FADD)5994OP_CASE(STRICT_FSUB)5995OP_CASE(STRICT_FMUL)5996OP_CASE(STRICT_FDIV)5997OP_CASE(STRICT_FSQRT)5998VP_CASE(ADD) // VP_ADD5999VP_CASE(SUB) // VP_SUB6000VP_CASE(MUL) // VP_MUL6001VP_CASE(SDIV) // VP_SDIV6002VP_CASE(SREM) // VP_SREM6003VP_CASE(UDIV) // VP_UDIV6004VP_CASE(UREM) // VP_UREM6005VP_CASE(SHL) // VP_SHL6006VP_CASE(FADD) // VP_FADD6007VP_CASE(FSUB) // VP_FSUB6008VP_CASE(FMUL) // VP_FMUL6009VP_CASE(FDIV) // VP_FDIV6010VP_CASE(FNEG) // VP_FNEG6011VP_CASE(FABS) // VP_FABS6012VP_CASE(SMIN) // VP_SMIN6013VP_CASE(SMAX) // VP_SMAX6014VP_CASE(UMIN) // VP_UMIN6015VP_CASE(UMAX) // VP_UMAX6016VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN6017VP_CASE(SETCC) // VP_SETCC6018VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP6019VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP6020VP_CASE(BITREVERSE) // VP_BITREVERSE6021VP_CASE(SADDSAT) // VP_SADDSAT6022VP_CASE(UADDSAT) // VP_UADDSAT6023VP_CASE(SSUBSAT) // VP_SSUBSAT6024VP_CASE(USUBSAT) // VP_USUBSAT6025VP_CASE(BSWAP) // VP_BSWAP6026VP_CASE(CTLZ) // VP_CTLZ6027VP_CASE(CTTZ) // VP_CTTZ6028VP_CASE(CTPOP) // VP_CTPOP6029case ISD::CTLZ_ZERO_UNDEF:6030case ISD::VP_CTLZ_ZERO_UNDEF:6031return RISCVISD::CTLZ_VL;6032case ISD::CTTZ_ZERO_UNDEF:6033case ISD::VP_CTTZ_ZERO_UNDEF:6034return RISCVISD::CTTZ_VL;6035case ISD::FMA:6036case ISD::VP_FMA:6037return RISCVISD::VFMADD_VL;6038case ISD::STRICT_FMA:6039return RISCVISD::STRICT_VFMADD_VL;6040case ISD::AND:6041case ISD::VP_AND:6042if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)6043return RISCVISD::VMAND_VL;6044return RISCVISD::AND_VL;6045case ISD::OR:6046case ISD::VP_OR:6047if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)6048return RISCVISD::VMOR_VL;6049return RISCVISD::OR_VL;6050case ISD::XOR:6051case ISD::VP_XOR:6052if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)6053return RISCVISD::VMXOR_VL;6054return RISCVISD::XOR_VL;6055case ISD::VP_SELECT:6056case ISD::VP_MERGE:6057return RISCVISD::VMERGE_VL;6058case ISD::VP_SRA:6059return RISCVISD::SRA_VL;6060case ISD::VP_SRL:6061return RISCVISD::SRL_VL;6062case ISD::VP_SQRT:6063return RISCVISD::FSQRT_VL;6064case ISD::VP_SIGN_EXTEND:6065return RISCVISD::VSEXT_VL;6066case ISD::VP_ZERO_EXTEND:6067return RISCVISD::VZEXT_VL;6068case ISD::VP_FP_TO_SINT:6069return RISCVISD::VFCVT_RTZ_X_F_VL;6070case ISD::VP_FP_TO_UINT:6071return RISCVISD::VFCVT_RTZ_XU_F_VL;6072case ISD::FMINNUM:6073case ISD::VP_FMINNUM:6074return RISCVISD::VFMIN_VL;6075case ISD::FMAXNUM:6076case ISD::VP_FMAXNUM:6077return RISCVISD::VFMAX_VL;6078case ISD::LRINT:6079case ISD::VP_LRINT:6080case ISD::LLRINT:6081case ISD::VP_LLRINT:6082return RISCVISD::VFCVT_X_F_VL;6083}6084// clang-format on6085#undef OP_CASE6086#undef VP_CASE6087}60886089/// Return true if a RISC-V target specified op has a merge operand.6090static bool hasMergeOp(unsigned Opcode) {6091assert(Opcode > RISCVISD::FIRST_NUMBER &&6092Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&6093"not a RISC-V target specific op");6094static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==6095130 &&6096RISCVISD::LAST_RISCV_STRICTFP_OPCODE -6097ISD::FIRST_TARGET_STRICTFP_OPCODE ==609821 &&6099"adding target specific op should update this function");6100if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)6101return true;6102if (Opcode == RISCVISD::FCOPYSIGN_VL)6103return true;6104if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)6105return true;6106if (Opcode == RISCVISD::SETCC_VL)6107return true;6108if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)6109return true;6110if (Opcode == RISCVISD::VMERGE_VL)6111return true;6112return false;6113}61146115/// Return true if a RISC-V target specified op has a mask operand.6116static bool hasMaskOp(unsigned Opcode) {6117assert(Opcode > RISCVISD::FIRST_NUMBER &&6118Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&6119"not a RISC-V target specific op");6120static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==6121130 &&6122RISCVISD::LAST_RISCV_STRICTFP_OPCODE -6123ISD::FIRST_TARGET_STRICTFP_OPCODE ==612421 &&6125"adding target specific op should update this function");6126if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)6127return true;6128if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)6129return true;6130if (Opcode >= RISCVISD::STRICT_FADD_VL &&6131Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)6132return true;6133return false;6134}61356136static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {6137auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());6138SDLoc DL(Op);61396140SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());6141SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());61426143for (unsigned j = 0; j != Op.getNumOperands(); ++j) {6144if (!Op.getOperand(j).getValueType().isVector()) {6145LoOperands[j] = Op.getOperand(j);6146HiOperands[j] = Op.getOperand(j);6147continue;6148}6149std::tie(LoOperands[j], HiOperands[j]) =6150DAG.SplitVector(Op.getOperand(j), DL);6151}61526153SDValue LoRes =6154DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());6155SDValue HiRes =6156DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());61576158return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);6159}61606161static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG) {6162assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");6163auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());6164SDLoc DL(Op);61656166SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());6167SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());61686169for (unsigned j = 0; j != Op.getNumOperands(); ++j) {6170if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {6171std::tie(LoOperands[j], HiOperands[j]) =6172DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);6173continue;6174}6175if (!Op.getOperand(j).getValueType().isVector()) {6176LoOperands[j] = Op.getOperand(j);6177HiOperands[j] = Op.getOperand(j);6178continue;6179}6180std::tie(LoOperands[j], HiOperands[j]) =6181DAG.SplitVector(Op.getOperand(j), DL);6182}61836184SDValue LoRes =6185DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());6186SDValue HiRes =6187DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());61886189return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);6190}61916192static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {6193SDLoc DL(Op);61946195auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);6196auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);6197auto [EVLLo, EVLHi] =6198DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);61996200SDValue ResLo =6201DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),6202{Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());6203return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),6204{ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());6205}62066207static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {62086209assert(Op->isStrictFPOpcode());62106211auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));62126213SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));6214SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));62156216SDLoc DL(Op);62176218SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());6219SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());62206221for (unsigned j = 0; j != Op.getNumOperands(); ++j) {6222if (!Op.getOperand(j).getValueType().isVector()) {6223LoOperands[j] = Op.getOperand(j);6224HiOperands[j] = Op.getOperand(j);6225continue;6226}6227std::tie(LoOperands[j], HiOperands[j]) =6228DAG.SplitVector(Op.getOperand(j), DL);6229}62306231SDValue LoRes =6232DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());6233HiOperands[0] = LoRes.getValue(1);6234SDValue HiRes =6235DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());62366237SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),6238LoRes.getValue(0), HiRes.getValue(0));6239return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);6240}62416242SDValue RISCVTargetLowering::LowerOperation(SDValue Op,6243SelectionDAG &DAG) const {6244switch (Op.getOpcode()) {6245default:6246report_fatal_error("unimplemented operand");6247case ISD::ATOMIC_FENCE:6248return LowerATOMIC_FENCE(Op, DAG, Subtarget);6249case ISD::GlobalAddress:6250return lowerGlobalAddress(Op, DAG);6251case ISD::BlockAddress:6252return lowerBlockAddress(Op, DAG);6253case ISD::ConstantPool:6254return lowerConstantPool(Op, DAG);6255case ISD::JumpTable:6256return lowerJumpTable(Op, DAG);6257case ISD::GlobalTLSAddress:6258return lowerGlobalTLSAddress(Op, DAG);6259case ISD::Constant:6260return lowerConstant(Op, DAG, Subtarget);6261case ISD::SELECT:6262return lowerSELECT(Op, DAG);6263case ISD::BRCOND:6264return lowerBRCOND(Op, DAG);6265case ISD::VASTART:6266return lowerVASTART(Op, DAG);6267case ISD::FRAMEADDR:6268return lowerFRAMEADDR(Op, DAG);6269case ISD::RETURNADDR:6270return lowerRETURNADDR(Op, DAG);6271case ISD::SADDO:6272case ISD::SSUBO:6273return lowerSADDO_SSUBO(Op, DAG);6274case ISD::SMULO:6275return lowerSMULO(Op, DAG);6276case ISD::SHL_PARTS:6277return lowerShiftLeftParts(Op, DAG);6278case ISD::SRA_PARTS:6279return lowerShiftRightParts(Op, DAG, true);6280case ISD::SRL_PARTS:6281return lowerShiftRightParts(Op, DAG, false);6282case ISD::ROTL:6283case ISD::ROTR:6284if (Op.getValueType().isFixedLengthVector()) {6285assert(Subtarget.hasStdExtZvkb());6286return lowerToScalableOp(Op, DAG);6287}6288assert(Subtarget.hasVendorXTHeadBb() &&6289!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&6290"Unexpected custom legalization");6291// XTHeadBb only supports rotate by constant.6292if (!isa<ConstantSDNode>(Op.getOperand(1)))6293return SDValue();6294return Op;6295case ISD::BITCAST: {6296SDLoc DL(Op);6297EVT VT = Op.getValueType();6298SDValue Op0 = Op.getOperand(0);6299EVT Op0VT = Op0.getValueType();6300MVT XLenVT = Subtarget.getXLenVT();6301if (VT == MVT::f16 && Op0VT == MVT::i16 &&6302Subtarget.hasStdExtZfhminOrZhinxmin()) {6303SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);6304SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);6305return FPConv;6306}6307if (VT == MVT::bf16 && Op0VT == MVT::i16 &&6308Subtarget.hasStdExtZfbfmin()) {6309SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);6310SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);6311return FPConv;6312}6313if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&6314Subtarget.hasStdExtFOrZfinx()) {6315SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);6316SDValue FPConv =6317DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);6318return FPConv;6319}6320if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&6321Subtarget.hasStdExtDOrZdinx()) {6322SDValue Lo, Hi;6323std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);6324SDValue RetReg =6325DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);6326return RetReg;6327}63286329// Consider other scalar<->scalar casts as legal if the types are legal.6330// Otherwise expand them.6331if (!VT.isVector() && !Op0VT.isVector()) {6332if (isTypeLegal(VT) && isTypeLegal(Op0VT))6333return Op;6334return SDValue();6335}63366337assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&6338"Unexpected types");63396340if (VT.isFixedLengthVector()) {6341// We can handle fixed length vector bitcasts with a simple replacement6342// in isel.6343if (Op0VT.isFixedLengthVector())6344return Op;6345// When bitcasting from scalar to fixed-length vector, insert the scalar6346// into a one-element vector of the result type, and perform a vector6347// bitcast.6348if (!Op0VT.isVector()) {6349EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);6350if (!isTypeLegal(BVT))6351return SDValue();6352return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,6353DAG.getUNDEF(BVT), Op0,6354DAG.getVectorIdxConstant(0, DL)));6355}6356return SDValue();6357}6358// Custom-legalize bitcasts from fixed-length vector types to scalar types6359// thus: bitcast the vector to a one-element vector type whose element type6360// is the same as the result type, and extract the first element.6361if (!VT.isVector() && Op0VT.isFixedLengthVector()) {6362EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);6363if (!isTypeLegal(BVT))6364return SDValue();6365SDValue BVec = DAG.getBitcast(BVT, Op0);6366return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,6367DAG.getVectorIdxConstant(0, DL));6368}6369return SDValue();6370}6371case ISD::INTRINSIC_WO_CHAIN:6372return LowerINTRINSIC_WO_CHAIN(Op, DAG);6373case ISD::INTRINSIC_W_CHAIN:6374return LowerINTRINSIC_W_CHAIN(Op, DAG);6375case ISD::INTRINSIC_VOID:6376return LowerINTRINSIC_VOID(Op, DAG);6377case ISD::IS_FPCLASS:6378return LowerIS_FPCLASS(Op, DAG);6379case ISD::BITREVERSE: {6380MVT VT = Op.getSimpleValueType();6381if (VT.isFixedLengthVector()) {6382assert(Subtarget.hasStdExtZvbb());6383return lowerToScalableOp(Op, DAG);6384}6385SDLoc DL(Op);6386assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");6387assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");6388// Expand bitreverse to a bswap(rev8) followed by brev8.6389SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));6390return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);6391}6392case ISD::TRUNCATE:6393// Only custom-lower vector truncates6394if (!Op.getSimpleValueType().isVector())6395return Op;6396return lowerVectorTruncLike(Op, DAG);6397case ISD::ANY_EXTEND:6398case ISD::ZERO_EXTEND:6399if (Op.getOperand(0).getValueType().isVector() &&6400Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)6401return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);6402return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);6403case ISD::SIGN_EXTEND:6404if (Op.getOperand(0).getValueType().isVector() &&6405Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)6406return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);6407return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);6408case ISD::SPLAT_VECTOR_PARTS:6409return lowerSPLAT_VECTOR_PARTS(Op, DAG);6410case ISD::INSERT_VECTOR_ELT:6411return lowerINSERT_VECTOR_ELT(Op, DAG);6412case ISD::EXTRACT_VECTOR_ELT:6413return lowerEXTRACT_VECTOR_ELT(Op, DAG);6414case ISD::SCALAR_TO_VECTOR: {6415MVT VT = Op.getSimpleValueType();6416SDLoc DL(Op);6417SDValue Scalar = Op.getOperand(0);6418if (VT.getVectorElementType() == MVT::i1) {6419MVT WideVT = VT.changeVectorElementType(MVT::i8);6420SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);6421return DAG.getNode(ISD::TRUNCATE, DL, VT, V);6422}6423MVT ContainerVT = VT;6424if (VT.isFixedLengthVector())6425ContainerVT = getContainerForFixedLengthVector(VT);6426SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;6427Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);6428SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,6429DAG.getUNDEF(ContainerVT), Scalar, VL);6430if (VT.isFixedLengthVector())6431V = convertFromScalableVector(VT, V, DAG, Subtarget);6432return V;6433}6434case ISD::VSCALE: {6435MVT XLenVT = Subtarget.getXLenVT();6436MVT VT = Op.getSimpleValueType();6437SDLoc DL(Op);6438SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);6439// We define our scalable vector types for lmul=1 to use a 64 bit known6440// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate6441// vscale as VLENB / 8.6442static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");6443if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)6444report_fatal_error("Support for VLEN==32 is incomplete.");6445// We assume VLENB is a multiple of 8. We manually choose the best shift6446// here because SimplifyDemandedBits isn't always able to simplify it.6447uint64_t Val = Op.getConstantOperandVal(0);6448if (isPowerOf2_64(Val)) {6449uint64_t Log2 = Log2_64(Val);6450if (Log2 < 3)6451Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,6452DAG.getConstant(3 - Log2, DL, VT));6453else if (Log2 > 3)6454Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,6455DAG.getConstant(Log2 - 3, DL, XLenVT));6456} else if ((Val % 8) == 0) {6457// If the multiplier is a multiple of 8, scale it down to avoid needing6458// to shift the VLENB value.6459Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,6460DAG.getConstant(Val / 8, DL, XLenVT));6461} else {6462SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,6463DAG.getConstant(3, DL, XLenVT));6464Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,6465DAG.getConstant(Val, DL, XLenVT));6466}6467return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);6468}6469case ISD::FPOWI: {6470// Custom promote f16 powi with illegal i32 integer type on RV64. Once6471// promoted this will be legalized into a libcall by LegalizeIntegerTypes.6472if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&6473Op.getOperand(1).getValueType() == MVT::i32) {6474SDLoc DL(Op);6475SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));6476SDValue Powi =6477DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));6478return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,6479DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));6480}6481return SDValue();6482}6483case ISD::FMAXIMUM:6484case ISD::FMINIMUM:6485if (Op.getValueType() == MVT::nxv32f16 &&6486(Subtarget.hasVInstructionsF16Minimal() &&6487!Subtarget.hasVInstructionsF16()))6488return SplitVectorOp(Op, DAG);6489return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);6490case ISD::FP_EXTEND: {6491SDLoc DL(Op);6492EVT VT = Op.getValueType();6493SDValue Op0 = Op.getOperand(0);6494EVT Op0VT = Op0.getValueType();6495if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())6496return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);6497if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {6498SDValue FloatVal =6499DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);6500return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);6501}65026503if (!Op.getValueType().isVector())6504return Op;6505return lowerVectorFPExtendOrRoundLike(Op, DAG);6506}6507case ISD::FP_ROUND: {6508SDLoc DL(Op);6509EVT VT = Op.getValueType();6510SDValue Op0 = Op.getOperand(0);6511EVT Op0VT = Op0.getValueType();6512if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())6513return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);6514if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&6515Subtarget.hasStdExtDOrZdinx()) {6516SDValue FloatVal =6517DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,6518DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));6519return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);6520}65216522if (!Op.getValueType().isVector())6523return Op;6524return lowerVectorFPExtendOrRoundLike(Op, DAG);6525}6526case ISD::STRICT_FP_ROUND:6527case ISD::STRICT_FP_EXTEND:6528return lowerStrictFPExtendOrRoundLike(Op, DAG);6529case ISD::SINT_TO_FP:6530case ISD::UINT_TO_FP:6531if (Op.getValueType().isVector() &&6532Op.getValueType().getScalarType() == MVT::f16 &&6533(Subtarget.hasVInstructionsF16Minimal() &&6534!Subtarget.hasVInstructionsF16())) {6535if (Op.getValueType() == MVT::nxv32f16)6536return SplitVectorOp(Op, DAG);6537// int -> f326538SDLoc DL(Op);6539MVT NVT =6540MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());6541SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());6542// f32 -> f166543return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,6544DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));6545}6546[[fallthrough]];6547case ISD::FP_TO_SINT:6548case ISD::FP_TO_UINT:6549if (SDValue Op1 = Op.getOperand(0);6550Op1.getValueType().isVector() &&6551Op1.getValueType().getScalarType() == MVT::f16 &&6552(Subtarget.hasVInstructionsF16Minimal() &&6553!Subtarget.hasVInstructionsF16())) {6554if (Op1.getValueType() == MVT::nxv32f16)6555return SplitVectorOp(Op, DAG);6556// f16 -> f326557SDLoc DL(Op);6558MVT NVT = MVT::getVectorVT(MVT::f32,6559Op1.getValueType().getVectorElementCount());6560SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);6561// f32 -> int6562return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);6563}6564[[fallthrough]];6565case ISD::STRICT_FP_TO_SINT:6566case ISD::STRICT_FP_TO_UINT:6567case ISD::STRICT_SINT_TO_FP:6568case ISD::STRICT_UINT_TO_FP: {6569// RVV can only do fp<->int conversions to types half/double the size as6570// the source. We custom-lower any conversions that do two hops into6571// sequences.6572MVT VT = Op.getSimpleValueType();6573if (!VT.isVector())6574return Op;6575SDLoc DL(Op);6576bool IsStrict = Op->isStrictFPOpcode();6577SDValue Src = Op.getOperand(0 + IsStrict);6578MVT EltVT = VT.getVectorElementType();6579MVT SrcVT = Src.getSimpleValueType();6580MVT SrcEltVT = SrcVT.getVectorElementType();6581unsigned EltSize = EltVT.getSizeInBits();6582unsigned SrcEltSize = SrcEltVT.getSizeInBits();6583assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&6584"Unexpected vector element types");65856586bool IsInt2FP = SrcEltVT.isInteger();6587// Widening conversions6588if (EltSize > (2 * SrcEltSize)) {6589if (IsInt2FP) {6590// Do a regular integer sign/zero extension then convert to float.6591MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),6592VT.getVectorElementCount());6593unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||6594Op.getOpcode() == ISD::STRICT_UINT_TO_FP)6595? ISD::ZERO_EXTEND6596: ISD::SIGN_EXTEND;6597SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);6598if (IsStrict)6599return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),6600Op.getOperand(0), Ext);6601return DAG.getNode(Op.getOpcode(), DL, VT, Ext);6602}6603// FP2Int6604assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");6605// Do one doubling fp_extend then complete the operation by converting6606// to int.6607MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());6608if (IsStrict) {6609auto [FExt, Chain] =6610DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);6611return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);6612}6613SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);6614return DAG.getNode(Op.getOpcode(), DL, VT, FExt);6615}66166617// Narrowing conversions6618if (SrcEltSize > (2 * EltSize)) {6619if (IsInt2FP) {6620// One narrowing int_to_fp, then an fp_round.6621assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");6622MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());6623if (IsStrict) {6624SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,6625DAG.getVTList(InterimFVT, MVT::Other),6626Op.getOperand(0), Src);6627SDValue Chain = Int2FP.getValue(1);6628return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;6629}6630SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);6631return DAG.getFPExtendOrRound(Int2FP, DL, VT);6632}6633// FP2Int6634// One narrowing fp_to_int, then truncate the integer. If the float isn't6635// representable by the integer, the result is poison.6636MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),6637VT.getVectorElementCount());6638if (IsStrict) {6639SDValue FP2Int =6640DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),6641Op.getOperand(0), Src);6642SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);6643return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);6644}6645SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);6646return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);6647}66486649// Scalable vectors can exit here. Patterns will handle equally-sized6650// conversions halving/doubling ones.6651if (!VT.isFixedLengthVector())6652return Op;66536654// For fixed-length vectors we lower to a custom "VL" node.6655unsigned RVVOpc = 0;6656switch (Op.getOpcode()) {6657default:6658llvm_unreachable("Impossible opcode");6659case ISD::FP_TO_SINT:6660RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;6661break;6662case ISD::FP_TO_UINT:6663RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;6664break;6665case ISD::SINT_TO_FP:6666RVVOpc = RISCVISD::SINT_TO_FP_VL;6667break;6668case ISD::UINT_TO_FP:6669RVVOpc = RISCVISD::UINT_TO_FP_VL;6670break;6671case ISD::STRICT_FP_TO_SINT:6672RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;6673break;6674case ISD::STRICT_FP_TO_UINT:6675RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;6676break;6677case ISD::STRICT_SINT_TO_FP:6678RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;6679break;6680case ISD::STRICT_UINT_TO_FP:6681RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;6682break;6683}66846685MVT ContainerVT = getContainerForFixedLengthVector(VT);6686MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);6687assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&6688"Expected same element count");66896690auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);66916692Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);6693if (IsStrict) {6694Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),6695Op.getOperand(0), Src, Mask, VL);6696SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);6697return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);6698}6699Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);6700return convertFromScalableVector(VT, Src, DAG, Subtarget);6701}6702case ISD::FP_TO_SINT_SAT:6703case ISD::FP_TO_UINT_SAT:6704return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);6705case ISD::FP_TO_BF16: {6706// Custom lower to ensure the libcall return is passed in an FPR on hard6707// float ABIs.6708assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");6709SDLoc DL(Op);6710MakeLibCallOptions CallOptions;6711RTLIB::Libcall LC =6712RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);6713SDValue Res =6714makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;6715if (Subtarget.is64Bit() && !RV64LegalI32)6716return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);6717return DAG.getBitcast(MVT::i32, Res);6718}6719case ISD::BF16_TO_FP: {6720assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");6721MVT VT = Op.getSimpleValueType();6722SDLoc DL(Op);6723Op = DAG.getNode(6724ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),6725DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));6726SDValue Res = Subtarget.is64Bit()6727? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)6728: DAG.getBitcast(MVT::f32, Op);6729// fp_extend if the target VT is bigger than f32.6730if (VT != MVT::f32)6731return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);6732return Res;6733}6734case ISD::FP_TO_FP16: {6735// Custom lower to ensure the libcall return is passed in an FPR on hard6736// float ABIs.6737assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");6738SDLoc DL(Op);6739MakeLibCallOptions CallOptions;6740RTLIB::Libcall LC =6741RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);6742SDValue Res =6743makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;6744if (Subtarget.is64Bit() && !RV64LegalI32)6745return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);6746return DAG.getBitcast(MVT::i32, Res);6747}6748case ISD::FP16_TO_FP: {6749// Custom lower to ensure the libcall argument is passed in an FPR on hard6750// float ABIs.6751assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");6752SDLoc DL(Op);6753MakeLibCallOptions CallOptions;6754SDValue Arg = Subtarget.is64Bit()6755? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,6756Op.getOperand(0))6757: DAG.getBitcast(MVT::f32, Op.getOperand(0));6758SDValue Res =6759makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)6760.first;6761return Res;6762}6763case ISD::FTRUNC:6764case ISD::FCEIL:6765case ISD::FFLOOR:6766case ISD::FNEARBYINT:6767case ISD::FRINT:6768case ISD::FROUND:6769case ISD::FROUNDEVEN:6770return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);6771case ISD::LRINT:6772case ISD::LLRINT:6773return lowerVectorXRINT(Op, DAG, Subtarget);6774case ISD::VECREDUCE_ADD:6775case ISD::VECREDUCE_UMAX:6776case ISD::VECREDUCE_SMAX:6777case ISD::VECREDUCE_UMIN:6778case ISD::VECREDUCE_SMIN:6779return lowerVECREDUCE(Op, DAG);6780case ISD::VECREDUCE_AND:6781case ISD::VECREDUCE_OR:6782case ISD::VECREDUCE_XOR:6783if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)6784return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);6785return lowerVECREDUCE(Op, DAG);6786case ISD::VECREDUCE_FADD:6787case ISD::VECREDUCE_SEQ_FADD:6788case ISD::VECREDUCE_FMIN:6789case ISD::VECREDUCE_FMAX:6790case ISD::VECREDUCE_FMAXIMUM:6791case ISD::VECREDUCE_FMINIMUM:6792return lowerFPVECREDUCE(Op, DAG);6793case ISD::VP_REDUCE_ADD:6794case ISD::VP_REDUCE_UMAX:6795case ISD::VP_REDUCE_SMAX:6796case ISD::VP_REDUCE_UMIN:6797case ISD::VP_REDUCE_SMIN:6798case ISD::VP_REDUCE_FADD:6799case ISD::VP_REDUCE_SEQ_FADD:6800case ISD::VP_REDUCE_FMIN:6801case ISD::VP_REDUCE_FMAX:6802case ISD::VP_REDUCE_FMINIMUM:6803case ISD::VP_REDUCE_FMAXIMUM:6804if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&6805(Subtarget.hasVInstructionsF16Minimal() &&6806!Subtarget.hasVInstructionsF16()))6807return SplitVectorReductionOp(Op, DAG);6808return lowerVPREDUCE(Op, DAG);6809case ISD::VP_REDUCE_AND:6810case ISD::VP_REDUCE_OR:6811case ISD::VP_REDUCE_XOR:6812if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)6813return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);6814return lowerVPREDUCE(Op, DAG);6815case ISD::VP_CTTZ_ELTS:6816case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:6817return lowerVPCttzElements(Op, DAG);6818case ISD::UNDEF: {6819MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());6820return convertFromScalableVector(Op.getSimpleValueType(),6821DAG.getUNDEF(ContainerVT), DAG, Subtarget);6822}6823case ISD::INSERT_SUBVECTOR:6824return lowerINSERT_SUBVECTOR(Op, DAG);6825case ISD::EXTRACT_SUBVECTOR:6826return lowerEXTRACT_SUBVECTOR(Op, DAG);6827case ISD::VECTOR_DEINTERLEAVE:6828return lowerVECTOR_DEINTERLEAVE(Op, DAG);6829case ISD::VECTOR_INTERLEAVE:6830return lowerVECTOR_INTERLEAVE(Op, DAG);6831case ISD::STEP_VECTOR:6832return lowerSTEP_VECTOR(Op, DAG);6833case ISD::VECTOR_REVERSE:6834return lowerVECTOR_REVERSE(Op, DAG);6835case ISD::VECTOR_SPLICE:6836return lowerVECTOR_SPLICE(Op, DAG);6837case ISD::BUILD_VECTOR:6838return lowerBUILD_VECTOR(Op, DAG, Subtarget);6839case ISD::SPLAT_VECTOR:6840if ((Op.getValueType().getScalarType() == MVT::f16 &&6841(Subtarget.hasVInstructionsF16Minimal() &&6842Subtarget.hasStdExtZfhminOrZhinxmin() &&6843!Subtarget.hasVInstructionsF16())) ||6844(Op.getValueType().getScalarType() == MVT::bf16 &&6845(Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) {6846if (Op.getValueType() == MVT::nxv32f16 ||6847Op.getValueType() == MVT::nxv32bf16)6848return SplitVectorOp(Op, DAG);6849SDLoc DL(Op);6850SDValue NewScalar =6851DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));6852SDValue NewSplat = DAG.getNode(6853ISD::SPLAT_VECTOR, DL,6854MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),6855NewScalar);6856return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,6857DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));6858}6859if (Op.getValueType().getVectorElementType() == MVT::i1)6860return lowerVectorMaskSplat(Op, DAG);6861return SDValue();6862case ISD::VECTOR_SHUFFLE:6863return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);6864case ISD::CONCAT_VECTORS: {6865// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is6866// better than going through the stack, as the default expansion does.6867SDLoc DL(Op);6868MVT VT = Op.getSimpleValueType();6869MVT ContainerVT = VT;6870if (VT.isFixedLengthVector())6871ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);68726873// Recursively split concat_vectors with more than 2 operands:6874//6875// concat_vector op1, op2, op3, op46876// ->6877// concat_vector (concat_vector op1, op2), (concat_vector op3, op4)6878//6879// This reduces the length of the chain of vslideups and allows us to6880// perform the vslideups at a smaller LMUL, limited to MF2.6881if (Op.getNumOperands() > 2 &&6882ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {6883MVT HalfVT = VT.getHalfNumVectorElementsVT();6884assert(isPowerOf2_32(Op.getNumOperands()));6885size_t HalfNumOps = Op.getNumOperands() / 2;6886SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,6887Op->ops().take_front(HalfNumOps));6888SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,6889Op->ops().drop_front(HalfNumOps));6890return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);6891}68926893unsigned NumOpElts =6894Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();6895SDValue Vec = DAG.getUNDEF(VT);6896for (const auto &OpIdx : enumerate(Op->ops())) {6897SDValue SubVec = OpIdx.value();6898// Don't insert undef subvectors.6899if (SubVec.isUndef())6900continue;6901Vec =6902DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,6903DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));6904}6905return Vec;6906}6907case ISD::LOAD:6908if (auto V = expandUnalignedRVVLoad(Op, DAG))6909return V;6910if (Op.getValueType().isFixedLengthVector())6911return lowerFixedLengthVectorLoadToRVV(Op, DAG);6912return Op;6913case ISD::STORE:6914if (auto V = expandUnalignedRVVStore(Op, DAG))6915return V;6916if (Op.getOperand(1).getValueType().isFixedLengthVector())6917return lowerFixedLengthVectorStoreToRVV(Op, DAG);6918return Op;6919case ISD::MLOAD:6920case ISD::VP_LOAD:6921return lowerMaskedLoad(Op, DAG);6922case ISD::MSTORE:6923case ISD::VP_STORE:6924return lowerMaskedStore(Op, DAG);6925case ISD::SELECT_CC: {6926// This occurs because we custom legalize SETGT and SETUGT for setcc. That6927// causes LegalizeDAG to think we need to custom legalize select_cc. Expand6928// into separate SETCC+SELECT just like LegalizeDAG.6929SDValue Tmp1 = Op.getOperand(0);6930SDValue Tmp2 = Op.getOperand(1);6931SDValue True = Op.getOperand(2);6932SDValue False = Op.getOperand(3);6933EVT VT = Op.getValueType();6934SDValue CC = Op.getOperand(4);6935EVT CmpVT = Tmp1.getValueType();6936EVT CCVT =6937getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);6938SDLoc DL(Op);6939SDValue Cond =6940DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());6941return DAG.getSelect(DL, VT, Cond, True, False);6942}6943case ISD::SETCC: {6944MVT OpVT = Op.getOperand(0).getSimpleValueType();6945if (OpVT.isScalarInteger()) {6946MVT VT = Op.getSimpleValueType();6947SDValue LHS = Op.getOperand(0);6948SDValue RHS = Op.getOperand(1);6949ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();6950assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&6951"Unexpected CondCode");69526953SDLoc DL(Op);69546955// If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can6956// convert this to the equivalent of (set(u)ge X, C+1) by using6957// (xori (slti(u) X, C+1), 1). This avoids materializing a small constant6958// in a register.6959if (isa<ConstantSDNode>(RHS)) {6960int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();6961if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {6962// If this is an unsigned compare and the constant is -1, incrementing6963// the constant would change behavior. The result should be false.6964if (CCVal == ISD::SETUGT && Imm == -1)6965return DAG.getConstant(0, DL, VT);6966// Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.6967CCVal = ISD::getSetCCSwappedOperands(CCVal);6968SDValue SetCC = DAG.getSetCC(6969DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);6970return DAG.getLogicalNOT(DL, SetCC, VT);6971}6972}69736974// Not a constant we could handle, swap the operands and condition code to6975// SETLT/SETULT.6976CCVal = ISD::getSetCCSwappedOperands(CCVal);6977return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);6978}69796980if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&6981(Subtarget.hasVInstructionsF16Minimal() &&6982!Subtarget.hasVInstructionsF16()))6983return SplitVectorOp(Op, DAG);69846985return lowerFixedLengthVectorSetccToRVV(Op, DAG);6986}6987case ISD::ADD:6988case ISD::SUB:6989case ISD::MUL:6990case ISD::MULHS:6991case ISD::MULHU:6992case ISD::AND:6993case ISD::OR:6994case ISD::XOR:6995case ISD::SDIV:6996case ISD::SREM:6997case ISD::UDIV:6998case ISD::UREM:6999case ISD::BSWAP:7000case ISD::CTPOP:7001return lowerToScalableOp(Op, DAG);7002case ISD::SHL:7003case ISD::SRA:7004case ISD::SRL:7005if (Op.getSimpleValueType().isFixedLengthVector())7006return lowerToScalableOp(Op, DAG);7007// This can be called for an i32 shift amount that needs to be promoted.7008assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&7009"Unexpected custom legalisation");7010return SDValue();7011case ISD::FADD:7012case ISD::FSUB:7013case ISD::FMUL:7014case ISD::FDIV:7015case ISD::FNEG:7016case ISD::FABS:7017case ISD::FSQRT:7018case ISD::FMA:7019case ISD::FMINNUM:7020case ISD::FMAXNUM:7021if (Op.getValueType() == MVT::nxv32f16 &&7022(Subtarget.hasVInstructionsF16Minimal() &&7023!Subtarget.hasVInstructionsF16()))7024return SplitVectorOp(Op, DAG);7025[[fallthrough]];7026case ISD::AVGFLOORS:7027case ISD::AVGFLOORU:7028case ISD::AVGCEILS:7029case ISD::AVGCEILU:7030case ISD::SMIN:7031case ISD::SMAX:7032case ISD::UMIN:7033case ISD::UMAX:7034return lowerToScalableOp(Op, DAG);7035case ISD::UADDSAT:7036case ISD::USUBSAT:7037if (!Op.getValueType().isVector())7038return lowerUADDSAT_USUBSAT(Op, DAG);7039return lowerToScalableOp(Op, DAG);7040case ISD::SADDSAT:7041case ISD::SSUBSAT:7042if (!Op.getValueType().isVector())7043return lowerSADDSAT_SSUBSAT(Op, DAG);7044return lowerToScalableOp(Op, DAG);7045case ISD::ABDS:7046case ISD::ABDU: {7047SDLoc dl(Op);7048EVT VT = Op->getValueType(0);7049SDValue LHS = DAG.getFreeze(Op->getOperand(0));7050SDValue RHS = DAG.getFreeze(Op->getOperand(1));7051bool IsSigned = Op->getOpcode() == ISD::ABDS;70527053// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))7054// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))7055unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;7056unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;7057SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);7058SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);7059return DAG.getNode(ISD::SUB, dl, VT, Max, Min);7060}7061case ISD::ABS:7062case ISD::VP_ABS:7063return lowerABS(Op, DAG);7064case ISD::CTLZ:7065case ISD::CTLZ_ZERO_UNDEF:7066case ISD::CTTZ:7067case ISD::CTTZ_ZERO_UNDEF:7068if (Subtarget.hasStdExtZvbb())7069return lowerToScalableOp(Op, DAG);7070assert(Op.getOpcode() != ISD::CTTZ);7071return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);7072case ISD::VSELECT:7073return lowerFixedLengthVectorSelectToRVV(Op, DAG);7074case ISD::FCOPYSIGN:7075if (Op.getValueType() == MVT::nxv32f16 &&7076(Subtarget.hasVInstructionsF16Minimal() &&7077!Subtarget.hasVInstructionsF16()))7078return SplitVectorOp(Op, DAG);7079return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);7080case ISD::STRICT_FADD:7081case ISD::STRICT_FSUB:7082case ISD::STRICT_FMUL:7083case ISD::STRICT_FDIV:7084case ISD::STRICT_FSQRT:7085case ISD::STRICT_FMA:7086if (Op.getValueType() == MVT::nxv32f16 &&7087(Subtarget.hasVInstructionsF16Minimal() &&7088!Subtarget.hasVInstructionsF16()))7089return SplitStrictFPVectorOp(Op, DAG);7090return lowerToScalableOp(Op, DAG);7091case ISD::STRICT_FSETCC:7092case ISD::STRICT_FSETCCS:7093return lowerVectorStrictFSetcc(Op, DAG);7094case ISD::STRICT_FCEIL:7095case ISD::STRICT_FRINT:7096case ISD::STRICT_FFLOOR:7097case ISD::STRICT_FTRUNC:7098case ISD::STRICT_FNEARBYINT:7099case ISD::STRICT_FROUND:7100case ISD::STRICT_FROUNDEVEN:7101return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);7102case ISD::MGATHER:7103case ISD::VP_GATHER:7104return lowerMaskedGather(Op, DAG);7105case ISD::MSCATTER:7106case ISD::VP_SCATTER:7107return lowerMaskedScatter(Op, DAG);7108case ISD::GET_ROUNDING:7109return lowerGET_ROUNDING(Op, DAG);7110case ISD::SET_ROUNDING:7111return lowerSET_ROUNDING(Op, DAG);7112case ISD::EH_DWARF_CFA:7113return lowerEH_DWARF_CFA(Op, DAG);7114case ISD::VP_SELECT:7115case ISD::VP_MERGE:7116case ISD::VP_ADD:7117case ISD::VP_SUB:7118case ISD::VP_MUL:7119case ISD::VP_SDIV:7120case ISD::VP_UDIV:7121case ISD::VP_SREM:7122case ISD::VP_UREM:7123case ISD::VP_UADDSAT:7124case ISD::VP_USUBSAT:7125case ISD::VP_SADDSAT:7126case ISD::VP_SSUBSAT:7127case ISD::VP_LRINT:7128case ISD::VP_LLRINT:7129return lowerVPOp(Op, DAG);7130case ISD::VP_AND:7131case ISD::VP_OR:7132case ISD::VP_XOR:7133return lowerLogicVPOp(Op, DAG);7134case ISD::VP_FADD:7135case ISD::VP_FSUB:7136case ISD::VP_FMUL:7137case ISD::VP_FDIV:7138case ISD::VP_FNEG:7139case ISD::VP_FABS:7140case ISD::VP_SQRT:7141case ISD::VP_FMA:7142case ISD::VP_FMINNUM:7143case ISD::VP_FMAXNUM:7144case ISD::VP_FCOPYSIGN:7145if (Op.getValueType() == MVT::nxv32f16 &&7146(Subtarget.hasVInstructionsF16Minimal() &&7147!Subtarget.hasVInstructionsF16()))7148return SplitVPOp(Op, DAG);7149[[fallthrough]];7150case ISD::VP_SRA:7151case ISD::VP_SRL:7152case ISD::VP_SHL:7153return lowerVPOp(Op, DAG);7154case ISD::VP_IS_FPCLASS:7155return LowerIS_FPCLASS(Op, DAG);7156case ISD::VP_SIGN_EXTEND:7157case ISD::VP_ZERO_EXTEND:7158if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)7159return lowerVPExtMaskOp(Op, DAG);7160return lowerVPOp(Op, DAG);7161case ISD::VP_TRUNCATE:7162return lowerVectorTruncLike(Op, DAG);7163case ISD::VP_FP_EXTEND:7164case ISD::VP_FP_ROUND:7165return lowerVectorFPExtendOrRoundLike(Op, DAG);7166case ISD::VP_SINT_TO_FP:7167case ISD::VP_UINT_TO_FP:7168if (Op.getValueType().isVector() &&7169Op.getValueType().getScalarType() == MVT::f16 &&7170(Subtarget.hasVInstructionsF16Minimal() &&7171!Subtarget.hasVInstructionsF16())) {7172if (Op.getValueType() == MVT::nxv32f16)7173return SplitVPOp(Op, DAG);7174// int -> f327175SDLoc DL(Op);7176MVT NVT =7177MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());7178auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());7179// f32 -> f167180return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,7181DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));7182}7183[[fallthrough]];7184case ISD::VP_FP_TO_SINT:7185case ISD::VP_FP_TO_UINT:7186if (SDValue Op1 = Op.getOperand(0);7187Op1.getValueType().isVector() &&7188Op1.getValueType().getScalarType() == MVT::f16 &&7189(Subtarget.hasVInstructionsF16Minimal() &&7190!Subtarget.hasVInstructionsF16())) {7191if (Op1.getValueType() == MVT::nxv32f16)7192return SplitVPOp(Op, DAG);7193// f16 -> f327194SDLoc DL(Op);7195MVT NVT = MVT::getVectorVT(MVT::f32,7196Op1.getValueType().getVectorElementCount());7197SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);7198// f32 -> int7199return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),7200{WidenVec, Op.getOperand(1), Op.getOperand(2)});7201}7202return lowerVPFPIntConvOp(Op, DAG);7203case ISD::VP_SETCC:7204if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&7205(Subtarget.hasVInstructionsF16Minimal() &&7206!Subtarget.hasVInstructionsF16()))7207return SplitVPOp(Op, DAG);7208if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)7209return lowerVPSetCCMaskOp(Op, DAG);7210[[fallthrough]];7211case ISD::VP_SMIN:7212case ISD::VP_SMAX:7213case ISD::VP_UMIN:7214case ISD::VP_UMAX:7215case ISD::VP_BITREVERSE:7216case ISD::VP_BSWAP:7217return lowerVPOp(Op, DAG);7218case ISD::VP_CTLZ:7219case ISD::VP_CTLZ_ZERO_UNDEF:7220if (Subtarget.hasStdExtZvbb())7221return lowerVPOp(Op, DAG);7222return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);7223case ISD::VP_CTTZ:7224case ISD::VP_CTTZ_ZERO_UNDEF:7225if (Subtarget.hasStdExtZvbb())7226return lowerVPOp(Op, DAG);7227return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);7228case ISD::VP_CTPOP:7229return lowerVPOp(Op, DAG);7230case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:7231return lowerVPStridedLoad(Op, DAG);7232case ISD::EXPERIMENTAL_VP_STRIDED_STORE:7233return lowerVPStridedStore(Op, DAG);7234case ISD::VP_FCEIL:7235case ISD::VP_FFLOOR:7236case ISD::VP_FRINT:7237case ISD::VP_FNEARBYINT:7238case ISD::VP_FROUND:7239case ISD::VP_FROUNDEVEN:7240case ISD::VP_FROUNDTOZERO:7241if (Op.getValueType() == MVT::nxv32f16 &&7242(Subtarget.hasVInstructionsF16Minimal() &&7243!Subtarget.hasVInstructionsF16()))7244return SplitVPOp(Op, DAG);7245return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);7246case ISD::VP_FMAXIMUM:7247case ISD::VP_FMINIMUM:7248if (Op.getValueType() == MVT::nxv32f16 &&7249(Subtarget.hasVInstructionsF16Minimal() &&7250!Subtarget.hasVInstructionsF16()))7251return SplitVPOp(Op, DAG);7252return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);7253case ISD::EXPERIMENTAL_VP_SPLICE:7254return lowerVPSpliceExperimental(Op, DAG);7255case ISD::EXPERIMENTAL_VP_REVERSE:7256return lowerVPReverseExperimental(Op, DAG);7257case ISD::EXPERIMENTAL_VP_SPLAT:7258return lowerVPSplatExperimental(Op, DAG);7259case ISD::CLEAR_CACHE: {7260assert(getTargetMachine().getTargetTriple().isOSLinux() &&7261"llvm.clear_cache only needs custom lower on Linux targets");7262SDLoc DL(Op);7263SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());7264return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),7265Op.getOperand(2), Flags, DL);7266}7267}7268}72697270SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,7271SDValue Start, SDValue End,7272SDValue Flags, SDLoc DL) const {7273MakeLibCallOptions CallOptions;7274std::pair<SDValue, SDValue> CallResult =7275makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,7276{Start, End, Flags}, CallOptions, DL, InChain);72777278// This function returns void so only the out chain matters.7279return CallResult.second;7280}72817282static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,7283SelectionDAG &DAG, unsigned Flags) {7284return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);7285}72867287static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,7288SelectionDAG &DAG, unsigned Flags) {7289return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),7290Flags);7291}72927293static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,7294SelectionDAG &DAG, unsigned Flags) {7295return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),7296N->getOffset(), Flags);7297}72987299static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,7300SelectionDAG &DAG, unsigned Flags) {7301return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);7302}73037304template <class NodeTy>7305SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,7306bool IsLocal, bool IsExternWeak) const {7307SDLoc DL(N);7308EVT Ty = getPointerTy(DAG.getDataLayout());73097310// When HWASAN is used and tagging of global variables is enabled7311// they should be accessed via the GOT, since the tagged address of a global7312// is incompatible with existing code models. This also applies to non-pic7313// mode.7314if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {7315SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);7316if (IsLocal && !Subtarget.allowTaggedGlobals())7317// Use PC-relative addressing to access the symbol. This generates the7318// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))7319// %pcrel_lo(auipc)).7320return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);73217322// Use PC-relative addressing to access the GOT for this symbol, then load7323// the address from the GOT. This generates the pattern (PseudoLGA sym),7324// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).7325SDValue Load =7326SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);7327MachineFunction &MF = DAG.getMachineFunction();7328MachineMemOperand *MemOp = MF.getMachineMemOperand(7329MachinePointerInfo::getGOT(MF),7330MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |7331MachineMemOperand::MOInvariant,7332LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));7333DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});7334return Load;7335}73367337switch (getTargetMachine().getCodeModel()) {7338default:7339report_fatal_error("Unsupported code model for lowering");7340case CodeModel::Small: {7341// Generate a sequence for accessing addresses within the first 2 GiB of7342// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).7343SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);7344SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);7345SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);7346return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);7347}7348case CodeModel::Medium: {7349SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);7350if (IsExternWeak) {7351// An extern weak symbol may be undefined, i.e. have value 0, which may7352// not be within 2GiB of PC, so use GOT-indirect addressing to access the7353// symbol. This generates the pattern (PseudoLGA sym), which expands to7354// (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).7355SDValue Load =7356SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);7357MachineFunction &MF = DAG.getMachineFunction();7358MachineMemOperand *MemOp = MF.getMachineMemOperand(7359MachinePointerInfo::getGOT(MF),7360MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |7361MachineMemOperand::MOInvariant,7362LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));7363DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});7364return Load;7365}73667367// Generate a sequence for accessing addresses within any 2GiB range within7368// the address space. This generates the pattern (PseudoLLA sym), which7369// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).7370return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);7371}7372}7373}73747375SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,7376SelectionDAG &DAG) const {7377GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);7378assert(N->getOffset() == 0 && "unexpected offset in global node");7379const GlobalValue *GV = N->getGlobal();7380return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());7381}73827383SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,7384SelectionDAG &DAG) const {7385BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);73867387return getAddr(N, DAG);7388}73897390SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,7391SelectionDAG &DAG) const {7392ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);73937394return getAddr(N, DAG);7395}73967397SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,7398SelectionDAG &DAG) const {7399JumpTableSDNode *N = cast<JumpTableSDNode>(Op);74007401return getAddr(N, DAG);7402}74037404SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,7405SelectionDAG &DAG,7406bool UseGOT) const {7407SDLoc DL(N);7408EVT Ty = getPointerTy(DAG.getDataLayout());7409const GlobalValue *GV = N->getGlobal();7410MVT XLenVT = Subtarget.getXLenVT();74117412if (UseGOT) {7413// Use PC-relative addressing to access the GOT for this TLS symbol, then7414// load the address from the GOT and add the thread pointer. This generates7415// the pattern (PseudoLA_TLS_IE sym), which expands to7416// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).7417SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);7418SDValue Load =7419SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);7420MachineFunction &MF = DAG.getMachineFunction();7421MachineMemOperand *MemOp = MF.getMachineMemOperand(7422MachinePointerInfo::getGOT(MF),7423MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |7424MachineMemOperand::MOInvariant,7425LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));7426DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});74277428// Add the thread pointer.7429SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);7430return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);7431}74327433// Generate a sequence for accessing the address relative to the thread7434// pointer, with the appropriate adjustment for the thread pointer offset.7435// This generates the pattern7436// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))7437SDValue AddrHi =7438DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);7439SDValue AddrAdd =7440DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);7441SDValue AddrLo =7442DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);74437444SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);7445SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);7446SDValue MNAdd =7447DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);7448return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);7449}74507451SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,7452SelectionDAG &DAG) const {7453SDLoc DL(N);7454EVT Ty = getPointerTy(DAG.getDataLayout());7455IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());7456const GlobalValue *GV = N->getGlobal();74577458// Use a PC-relative addressing mode to access the global dynamic GOT address.7459// This generates the pattern (PseudoLA_TLS_GD sym), which expands to7460// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).7461SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);7462SDValue Load =7463SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);74647465// Prepare argument list to generate call.7466ArgListTy Args;7467ArgListEntry Entry;7468Entry.Node = Load;7469Entry.Ty = CallTy;7470Args.push_back(Entry);74717472// Setup call to __tls_get_addr.7473TargetLowering::CallLoweringInfo CLI(DAG);7474CLI.setDebugLoc(DL)7475.setChain(DAG.getEntryNode())7476.setLibCallee(CallingConv::C, CallTy,7477DAG.getExternalSymbol("__tls_get_addr", Ty),7478std::move(Args));74797480return LowerCallTo(CLI).first;7481}74827483SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,7484SelectionDAG &DAG) const {7485SDLoc DL(N);7486EVT Ty = getPointerTy(DAG.getDataLayout());7487const GlobalValue *GV = N->getGlobal();74887489// Use a PC-relative addressing mode to access the global dynamic GOT address.7490// This generates the pattern (PseudoLA_TLSDESC sym), which expands to7491//7492// auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)7493// lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)7494// addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)7495// jalr t0, tY // R_RISCV_TLSDESC_CALL(label)7496SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);7497return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);7498}74997500SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,7501SelectionDAG &DAG) const {7502GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);7503assert(N->getOffset() == 0 && "unexpected offset in global node");75047505if (DAG.getTarget().useEmulatedTLS())7506return LowerToTLSEmulatedModel(N, DAG);75077508TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());75097510if (DAG.getMachineFunction().getFunction().getCallingConv() ==7511CallingConv::GHC)7512report_fatal_error("In GHC calling convention TLS is not supported");75137514SDValue Addr;7515switch (Model) {7516case TLSModel::LocalExec:7517Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);7518break;7519case TLSModel::InitialExec:7520Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);7521break;7522case TLSModel::LocalDynamic:7523case TLSModel::GeneralDynamic:7524Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)7525: getDynamicTLSAddr(N, DAG);7526break;7527}75287529return Addr;7530}75317532// Return true if Val is equal to (setcc LHS, RHS, CC).7533// Return false if Val is the inverse of (setcc LHS, RHS, CC).7534// Otherwise, return std::nullopt.7535static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,7536ISD::CondCode CC, SDValue Val) {7537assert(Val->getOpcode() == ISD::SETCC);7538SDValue LHS2 = Val.getOperand(0);7539SDValue RHS2 = Val.getOperand(1);7540ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();75417542if (LHS == LHS2 && RHS == RHS2) {7543if (CC == CC2)7544return true;7545if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))7546return false;7547} else if (LHS == RHS2 && RHS == LHS2) {7548CC2 = ISD::getSetCCSwappedOperands(CC2);7549if (CC == CC2)7550return true;7551if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))7552return false;7553}75547555return std::nullopt;7556}75577558static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,7559const RISCVSubtarget &Subtarget) {7560SDValue CondV = N->getOperand(0);7561SDValue TrueV = N->getOperand(1);7562SDValue FalseV = N->getOperand(2);7563MVT VT = N->getSimpleValueType(0);7564SDLoc DL(N);75657566if (!Subtarget.hasConditionalMoveFusion()) {7567// (select c, -1, y) -> -c | y7568if (isAllOnesConstant(TrueV)) {7569SDValue Neg = DAG.getNegative(CondV, DL, VT);7570return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));7571}7572// (select c, y, -1) -> (c-1) | y7573if (isAllOnesConstant(FalseV)) {7574SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,7575DAG.getAllOnesConstant(DL, VT));7576return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));7577}75787579// (select c, 0, y) -> (c-1) & y7580if (isNullConstant(TrueV)) {7581SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,7582DAG.getAllOnesConstant(DL, VT));7583return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));7584}7585// (select c, y, 0) -> -c & y7586if (isNullConstant(FalseV)) {7587SDValue Neg = DAG.getNegative(CondV, DL, VT);7588return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));7589}7590}75917592// select c, ~x, x --> xor -c, x7593if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {7594const APInt &TrueVal = TrueV->getAsAPIntVal();7595const APInt &FalseVal = FalseV->getAsAPIntVal();7596if (~TrueVal == FalseVal) {7597SDValue Neg = DAG.getNegative(CondV, DL, VT);7598return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);7599}7600}76017602// Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops7603// when both truev and falsev are also setcc.7604if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&7605FalseV.getOpcode() == ISD::SETCC) {7606SDValue LHS = CondV.getOperand(0);7607SDValue RHS = CondV.getOperand(1);7608ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();76097610// (select x, x, y) -> x | y7611// (select !x, x, y) -> x & y7612if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {7613return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,7614DAG.getFreeze(FalseV));7615}7616// (select x, y, x) -> x & y7617// (select !x, y, x) -> x | y7618if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {7619return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,7620DAG.getFreeze(TrueV), FalseV);7621}7622}76237624return SDValue();7625}76267627// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants7628// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.7629// For now we only consider transformation profitable if `binOp(c0, c1)` ends up7630// being `0` or `-1`. In such cases we can replace `select` with `and`.7631// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize7632// than `c0`?7633static SDValue7634foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,7635const RISCVSubtarget &Subtarget) {7636if (Subtarget.hasShortForwardBranchOpt())7637return SDValue();76387639unsigned SelOpNo = 0;7640SDValue Sel = BO->getOperand(0);7641if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {7642SelOpNo = 1;7643Sel = BO->getOperand(1);7644}76457646if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())7647return SDValue();76487649unsigned ConstSelOpNo = 1;7650unsigned OtherSelOpNo = 2;7651if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {7652ConstSelOpNo = 2;7653OtherSelOpNo = 1;7654}7655SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);7656ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);7657if (!ConstSelOpNode || ConstSelOpNode->isOpaque())7658return SDValue();76597660SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);7661ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);7662if (!ConstBinOpNode || ConstBinOpNode->isOpaque())7663return SDValue();76647665SDLoc DL(Sel);7666EVT VT = BO->getValueType(0);76677668SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};7669if (SelOpNo == 1)7670std::swap(NewConstOps[0], NewConstOps[1]);76717672SDValue NewConstOp =7673DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);7674if (!NewConstOp)7675return SDValue();76767677const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();7678if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())7679return SDValue();76807681SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);7682SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};7683if (SelOpNo == 1)7684std::swap(NewNonConstOps[0], NewNonConstOps[1]);7685SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);76867687SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;7688SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;7689return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);7690}76917692SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {7693SDValue CondV = Op.getOperand(0);7694SDValue TrueV = Op.getOperand(1);7695SDValue FalseV = Op.getOperand(2);7696SDLoc DL(Op);7697MVT VT = Op.getSimpleValueType();7698MVT XLenVT = Subtarget.getXLenVT();76997700// Lower vector SELECTs to VSELECTs by splatting the condition.7701if (VT.isVector()) {7702MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);7703SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);7704return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);7705}77067707// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ7708// nodes to implement the SELECT. Performing the lowering here allows for7709// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless7710// sequence or RISCVISD::SELECT_CC node (branch-based select).7711if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&7712VT.isScalarInteger()) {7713// (select c, t, 0) -> (czero_eqz t, c)7714if (isNullConstant(FalseV))7715return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);7716// (select c, 0, f) -> (czero_nez f, c)7717if (isNullConstant(TrueV))7718return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);77197720// (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))7721if (TrueV.getOpcode() == ISD::AND &&7722(TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))7723return DAG.getNode(7724ISD::OR, DL, VT, TrueV,7725DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));7726// (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))7727if (FalseV.getOpcode() == ISD::AND &&7728(FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))7729return DAG.getNode(7730ISD::OR, DL, VT, FalseV,7731DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));77327733// Try some other optimizations before falling back to generic lowering.7734if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))7735return V;77367737// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)7738// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)7739if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {7740const APInt &TrueVal = TrueV->getAsAPIntVal();7741const APInt &FalseVal = FalseV->getAsAPIntVal();7742const int TrueValCost = RISCVMatInt::getIntMatCost(7743TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);7744const int FalseValCost = RISCVMatInt::getIntMatCost(7745FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);7746bool IsCZERO_NEZ = TrueValCost <= FalseValCost;7747SDValue LHSVal = DAG.getConstant(7748IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);7749SDValue RHSVal =7750DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);7751SDValue CMOV =7752DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,7753DL, VT, LHSVal, CondV);7754return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);7755}77567757// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))7758// Unless we have the short forward branch optimization.7759if (!Subtarget.hasConditionalMoveFusion())7760return DAG.getNode(7761ISD::OR, DL, VT,7762DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),7763DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));7764}77657766if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))7767return V;77687769if (Op.hasOneUse()) {7770unsigned UseOpc = Op->use_begin()->getOpcode();7771if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {7772SDNode *BinOp = *Op->use_begin();7773if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),7774DAG, Subtarget)) {7775DAG.ReplaceAllUsesWith(BinOp, &NewSel);7776// Opcode check is necessary because foldBinOpIntoSelectIfProfitable7777// may return a constant node and cause crash in lowerSELECT.7778if (NewSel.getOpcode() == ISD::SELECT)7779return lowerSELECT(NewSel, DAG);7780return NewSel;7781}7782}7783}77847785// (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))7786// (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))7787const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);7788const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);7789if (FPTV && FPFV) {7790if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))7791return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);7792if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {7793SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,7794DAG.getConstant(1, DL, XLenVT));7795return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);7796}7797}77987799// If the condition is not an integer SETCC which operates on XLenVT, we need7800// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:7801// (select condv, truev, falsev)7802// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)7803if (CondV.getOpcode() != ISD::SETCC ||7804CondV.getOperand(0).getSimpleValueType() != XLenVT) {7805SDValue Zero = DAG.getConstant(0, DL, XLenVT);7806SDValue SetNE = DAG.getCondCode(ISD::SETNE);78077808SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};78097810return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);7811}78127813// If the CondV is the output of a SETCC node which operates on XLenVT inputs,7814// then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take7815// advantage of the integer compare+branch instructions. i.e.:7816// (select (setcc lhs, rhs, cc), truev, falsev)7817// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)7818SDValue LHS = CondV.getOperand(0);7819SDValue RHS = CondV.getOperand(1);7820ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();78217822// Special case for a select of 2 constants that have a diffence of 1.7823// Normally this is done by DAGCombine, but if the select is introduced by7824// type legalization or op legalization, we miss it. Restricting to SETLT7825// case for now because that is what signed saturating add/sub need.7826// FIXME: We don't need the condition to be SETLT or even a SETCC,7827// but we would probably want to swap the true/false values if the condition7828// is SETGE/SETLE to avoid an XORI.7829if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&7830CCVal == ISD::SETLT) {7831const APInt &TrueVal = TrueV->getAsAPIntVal();7832const APInt &FalseVal = FalseV->getAsAPIntVal();7833if (TrueVal - 1 == FalseVal)7834return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);7835if (TrueVal + 1 == FalseVal)7836return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);7837}78387839translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);7840// 1 < x ? x : 1 -> 0 < x ? x : 17841if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&7842RHS == TrueV && LHS == FalseV) {7843LHS = DAG.getConstant(0, DL, VT);7844// 0 <u x is the same as x != 0.7845if (CCVal == ISD::SETULT) {7846std::swap(LHS, RHS);7847CCVal = ISD::SETNE;7848}7849}78507851// x <s -1 ? x : -1 -> x <s 0 ? x : -17852if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&7853RHS == FalseV) {7854RHS = DAG.getConstant(0, DL, VT);7855}78567857SDValue TargetCC = DAG.getCondCode(CCVal);78587859if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {7860// (select (setcc lhs, rhs, CC), constant, falsev)7861// -> (select (setcc lhs, rhs, InverseCC), falsev, constant)7862std::swap(TrueV, FalseV);7863TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));7864}78657866SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};7867return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);7868}78697870SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {7871SDValue CondV = Op.getOperand(1);7872SDLoc DL(Op);7873MVT XLenVT = Subtarget.getXLenVT();78747875if (CondV.getOpcode() == ISD::SETCC &&7876CondV.getOperand(0).getValueType() == XLenVT) {7877SDValue LHS = CondV.getOperand(0);7878SDValue RHS = CondV.getOperand(1);7879ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();78807881translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);78827883SDValue TargetCC = DAG.getCondCode(CCVal);7884return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),7885LHS, RHS, TargetCC, Op.getOperand(2));7886}78877888return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),7889CondV, DAG.getConstant(0, DL, XLenVT),7890DAG.getCondCode(ISD::SETNE), Op.getOperand(2));7891}78927893SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {7894MachineFunction &MF = DAG.getMachineFunction();7895RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();78967897SDLoc DL(Op);7898SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),7899getPointerTy(MF.getDataLayout()));79007901// vastart just stores the address of the VarArgsFrameIndex slot into the7902// memory location argument.7903const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();7904return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),7905MachinePointerInfo(SV));7906}79077908SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,7909SelectionDAG &DAG) const {7910const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();7911MachineFunction &MF = DAG.getMachineFunction();7912MachineFrameInfo &MFI = MF.getFrameInfo();7913MFI.setFrameAddressIsTaken(true);7914Register FrameReg = RI.getFrameRegister(MF);7915int XLenInBytes = Subtarget.getXLen() / 8;79167917EVT VT = Op.getValueType();7918SDLoc DL(Op);7919SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);7920unsigned Depth = Op.getConstantOperandVal(0);7921while (Depth--) {7922int Offset = -(XLenInBytes * 2);7923SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,7924DAG.getIntPtrConstant(Offset, DL));7925FrameAddr =7926DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());7927}7928return FrameAddr;7929}79307931SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,7932SelectionDAG &DAG) const {7933const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();7934MachineFunction &MF = DAG.getMachineFunction();7935MachineFrameInfo &MFI = MF.getFrameInfo();7936MFI.setReturnAddressIsTaken(true);7937MVT XLenVT = Subtarget.getXLenVT();7938int XLenInBytes = Subtarget.getXLen() / 8;79397940if (verifyReturnAddressArgumentIsConstant(Op, DAG))7941return SDValue();79427943EVT VT = Op.getValueType();7944SDLoc DL(Op);7945unsigned Depth = Op.getConstantOperandVal(0);7946if (Depth) {7947int Off = -XLenInBytes;7948SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);7949SDValue Offset = DAG.getConstant(Off, DL, VT);7950return DAG.getLoad(VT, DL, DAG.getEntryNode(),7951DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),7952MachinePointerInfo());7953}79547955// Return the value of the return address register, marking it an implicit7956// live-in.7957Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));7958return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);7959}79607961SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,7962SelectionDAG &DAG) const {7963SDLoc DL(Op);7964SDValue Lo = Op.getOperand(0);7965SDValue Hi = Op.getOperand(1);7966SDValue Shamt = Op.getOperand(2);7967EVT VT = Lo.getValueType();79687969// if Shamt-XLEN < 0: // Shamt < XLEN7970// Lo = Lo << Shamt7971// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))7972// else:7973// Lo = 07974// Hi = Lo << (Shamt-XLEN)79757976SDValue Zero = DAG.getConstant(0, DL, VT);7977SDValue One = DAG.getConstant(1, DL, VT);7978SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);7979SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);7980SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);7981SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);79827983SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);7984SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);7985SDValue ShiftRightLo =7986DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);7987SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);7988SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);7989SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);79907991SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);79927993Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);7994Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);79957996SDValue Parts[2] = {Lo, Hi};7997return DAG.getMergeValues(Parts, DL);7998}79998000SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,8001bool IsSRA) const {8002SDLoc DL(Op);8003SDValue Lo = Op.getOperand(0);8004SDValue Hi = Op.getOperand(1);8005SDValue Shamt = Op.getOperand(2);8006EVT VT = Lo.getValueType();80078008// SRA expansion:8009// if Shamt-XLEN < 0: // Shamt < XLEN8010// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))8011// Hi = Hi >>s Shamt8012// else:8013// Lo = Hi >>s (Shamt-XLEN);8014// Hi = Hi >>s (XLEN-1)8015//8016// SRL expansion:8017// if Shamt-XLEN < 0: // Shamt < XLEN8018// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))8019// Hi = Hi >>u Shamt8020// else:8021// Lo = Hi >>u (Shamt-XLEN);8022// Hi = 0;80238024unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;80258026SDValue Zero = DAG.getConstant(0, DL, VT);8027SDValue One = DAG.getConstant(1, DL, VT);8028SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);8029SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);8030SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);8031SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);80328033SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);8034SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);8035SDValue ShiftLeftHi =8036DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);8037SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);8038SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);8039SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);8040SDValue HiFalse =8041IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;80428043SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);80448045Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);8046Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);80478048SDValue Parts[2] = {Lo, Hi};8049return DAG.getMergeValues(Parts, DL);8050}80518052// Lower splats of i1 types to SETCC. For each mask vector type, we have a8053// legal equivalently-sized i8 type, so we can use that as a go-between.8054SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,8055SelectionDAG &DAG) const {8056SDLoc DL(Op);8057MVT VT = Op.getSimpleValueType();8058SDValue SplatVal = Op.getOperand(0);8059// All-zeros or all-ones splats are handled specially.8060if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {8061SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;8062return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);8063}8064if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {8065SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;8066return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);8067}8068MVT InterVT = VT.changeVectorElementType(MVT::i8);8069SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,8070DAG.getConstant(1, DL, SplatVal.getValueType()));8071SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);8072SDValue Zero = DAG.getConstant(0, DL, InterVT);8073return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);8074}80758076// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is8077// illegal (currently only vXi64 RV32).8078// FIXME: We could also catch non-constant sign-extended i32 values and lower8079// them to VMV_V_X_VL.8080SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,8081SelectionDAG &DAG) const {8082SDLoc DL(Op);8083MVT VecVT = Op.getSimpleValueType();8084assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&8085"Unexpected SPLAT_VECTOR_PARTS lowering");80868087assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");8088SDValue Lo = Op.getOperand(0);8089SDValue Hi = Op.getOperand(1);80908091MVT ContainerVT = VecVT;8092if (VecVT.isFixedLengthVector())8093ContainerVT = getContainerForFixedLengthVector(VecVT);80948095auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;80968097SDValue Res =8098splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);80998100if (VecVT.isFixedLengthVector())8101Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);81028103return Res;8104}81058106// Custom-lower extensions from mask vectors by using a vselect either with 18107// for zero/any-extension or -1 for sign-extension:8108// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)8109// Note that any-extension is lowered identically to zero-extension.8110SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,8111int64_t ExtTrueVal) const {8112SDLoc DL(Op);8113MVT VecVT = Op.getSimpleValueType();8114SDValue Src = Op.getOperand(0);8115// Only custom-lower extensions from mask types8116assert(Src.getValueType().isVector() &&8117Src.getValueType().getVectorElementType() == MVT::i1);81188119if (VecVT.isScalableVector()) {8120SDValue SplatZero = DAG.getConstant(0, DL, VecVT);8121SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);8122return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);8123}81248125MVT ContainerVT = getContainerForFixedLengthVector(VecVT);8126MVT I1ContainerVT =8127MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());81288129SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);81308131SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;81328133MVT XLenVT = Subtarget.getXLenVT();8134SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);8135SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);81368137SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,8138DAG.getUNDEF(ContainerVT), SplatZero, VL);8139SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,8140DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);8141SDValue Select =8142DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,8143SplatZero, DAG.getUNDEF(ContainerVT), VL);81448145return convertFromScalableVector(VecVT, Select, DAG, Subtarget);8146}81478148SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(8149SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {8150MVT ExtVT = Op.getSimpleValueType();8151// Only custom-lower extensions from fixed-length vector types.8152if (!ExtVT.isFixedLengthVector())8153return Op;8154MVT VT = Op.getOperand(0).getSimpleValueType();8155// Grab the canonical container type for the extended type. Infer the smaller8156// type from that to ensure the same number of vector elements, as we know8157// the LMUL will be sufficient to hold the smaller type.8158MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);8159// Get the extended container type manually to ensure the same number of8160// vector elements between source and dest.8161MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),8162ContainerExtVT.getVectorElementCount());81638164SDValue Op1 =8165convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);81668167SDLoc DL(Op);8168auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);81698170SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);81718172return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);8173}81748175// Custom-lower truncations from vectors to mask vectors by using a mask and a8176// setcc operation:8177// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)8178SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,8179SelectionDAG &DAG) const {8180bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;8181SDLoc DL(Op);8182EVT MaskVT = Op.getValueType();8183// Only expect to custom-lower truncations to mask types8184assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&8185"Unexpected type for vector mask lowering");8186SDValue Src = Op.getOperand(0);8187MVT VecVT = Src.getSimpleValueType();8188SDValue Mask, VL;8189if (IsVPTrunc) {8190Mask = Op.getOperand(1);8191VL = Op.getOperand(2);8192}8193// If this is a fixed vector, we need to convert it to a scalable vector.8194MVT ContainerVT = VecVT;81958196if (VecVT.isFixedLengthVector()) {8197ContainerVT = getContainerForFixedLengthVector(VecVT);8198Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);8199if (IsVPTrunc) {8200MVT MaskContainerVT =8201getContainerForFixedLengthVector(Mask.getSimpleValueType());8202Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);8203}8204}82058206if (!IsVPTrunc) {8207std::tie(Mask, VL) =8208getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);8209}82108211SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());8212SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());82138214SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,8215DAG.getUNDEF(ContainerVT), SplatOne, VL);8216SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,8217DAG.getUNDEF(ContainerVT), SplatZero, VL);82188219MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);8220SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,8221DAG.getUNDEF(ContainerVT), Mask, VL);8222Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,8223{Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),8224DAG.getUNDEF(MaskContainerVT), Mask, VL});8225if (MaskVT.isFixedLengthVector())8226Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);8227return Trunc;8228}82298230SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,8231SelectionDAG &DAG) const {8232bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;8233SDLoc DL(Op);82348235MVT VT = Op.getSimpleValueType();8236// Only custom-lower vector truncates8237assert(VT.isVector() && "Unexpected type for vector truncate lowering");82388239// Truncates to mask types are handled differently8240if (VT.getVectorElementType() == MVT::i1)8241return lowerVectorMaskTruncLike(Op, DAG);82428243// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary8244// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which8245// truncate by one power of two at a time.8246MVT DstEltVT = VT.getVectorElementType();82478248SDValue Src = Op.getOperand(0);8249MVT SrcVT = Src.getSimpleValueType();8250MVT SrcEltVT = SrcVT.getVectorElementType();82518252assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&8253isPowerOf2_64(SrcEltVT.getSizeInBits()) &&8254"Unexpected vector truncate lowering");82558256MVT ContainerVT = SrcVT;8257SDValue Mask, VL;8258if (IsVPTrunc) {8259Mask = Op.getOperand(1);8260VL = Op.getOperand(2);8261}8262if (SrcVT.isFixedLengthVector()) {8263ContainerVT = getContainerForFixedLengthVector(SrcVT);8264Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);8265if (IsVPTrunc) {8266MVT MaskVT = getMaskTypeFor(ContainerVT);8267Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);8268}8269}82708271SDValue Result = Src;8272if (!IsVPTrunc) {8273std::tie(Mask, VL) =8274getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);8275}82768277LLVMContext &Context = *DAG.getContext();8278const ElementCount Count = ContainerVT.getVectorElementCount();8279do {8280SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);8281EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);8282Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,8283Mask, VL);8284} while (SrcEltVT != DstEltVT);82858286if (SrcVT.isFixedLengthVector())8287Result = convertFromScalableVector(VT, Result, DAG, Subtarget);82888289return Result;8290}82918292SDValue8293RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,8294SelectionDAG &DAG) const {8295SDLoc DL(Op);8296SDValue Chain = Op.getOperand(0);8297SDValue Src = Op.getOperand(1);8298MVT VT = Op.getSimpleValueType();8299MVT SrcVT = Src.getSimpleValueType();8300MVT ContainerVT = VT;8301if (VT.isFixedLengthVector()) {8302MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);8303ContainerVT =8304SrcContainerVT.changeVectorElementType(VT.getVectorElementType());8305Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);8306}83078308auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);83098310// RVV can only widen/truncate fp to types double/half the size as the source.8311if ((VT.getVectorElementType() == MVT::f64 &&8312(SrcVT.getVectorElementType() == MVT::f16 ||8313SrcVT.getVectorElementType() == MVT::bf16)) ||8314((VT.getVectorElementType() == MVT::f16 ||8315VT.getVectorElementType() == MVT::bf16) &&8316SrcVT.getVectorElementType() == MVT::f64)) {8317// For double rounding, the intermediate rounding should be round-to-odd.8318unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND8319? RISCVISD::STRICT_FP_EXTEND_VL8320: RISCVISD::STRICT_VFNCVT_ROD_VL;8321MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);8322Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),8323Chain, Src, Mask, VL);8324Chain = Src.getValue(1);8325}83268327unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND8328? RISCVISD::STRICT_FP_EXTEND_VL8329: RISCVISD::STRICT_FP_ROUND_VL;8330SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),8331Chain, Src, Mask, VL);8332if (VT.isFixedLengthVector()) {8333// StrictFP operations have two result values. Their lowered result should8334// have same result count.8335SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);8336Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);8337}8338return Res;8339}83408341SDValue8342RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,8343SelectionDAG &DAG) const {8344bool IsVP =8345Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;8346bool IsExtend =8347Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;8348// RVV can only do truncate fp to types half the size as the source. We8349// custom-lower f64->f16 rounds via RVV's round-to-odd float8350// conversion instruction.8351SDLoc DL(Op);8352MVT VT = Op.getSimpleValueType();83538354assert(VT.isVector() && "Unexpected type for vector truncate lowering");83558356SDValue Src = Op.getOperand(0);8357MVT SrcVT = Src.getSimpleValueType();83588359bool IsDirectExtend =8360IsExtend && (VT.getVectorElementType() != MVT::f64 ||8361(SrcVT.getVectorElementType() != MVT::f16 &&8362SrcVT.getVectorElementType() != MVT::bf16));8363bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&8364VT.getVectorElementType() != MVT::bf16) ||8365SrcVT.getVectorElementType() != MVT::f64);83668367bool IsDirectConv = IsDirectExtend || IsDirectTrunc;83688369// Prepare any fixed-length vector operands.8370MVT ContainerVT = VT;8371SDValue Mask, VL;8372if (IsVP) {8373Mask = Op.getOperand(1);8374VL = Op.getOperand(2);8375}8376if (VT.isFixedLengthVector()) {8377MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);8378ContainerVT =8379SrcContainerVT.changeVectorElementType(VT.getVectorElementType());8380Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);8381if (IsVP) {8382MVT MaskVT = getMaskTypeFor(ContainerVT);8383Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);8384}8385}83868387if (!IsVP)8388std::tie(Mask, VL) =8389getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);83908391unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;83928393if (IsDirectConv) {8394Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);8395if (VT.isFixedLengthVector())8396Src = convertFromScalableVector(VT, Src, DAG, Subtarget);8397return Src;8398}83998400unsigned InterConvOpc =8401IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;84028403MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);8404SDValue IntermediateConv =8405DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);8406SDValue Result =8407DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);8408if (VT.isFixedLengthVector())8409return convertFromScalableVector(VT, Result, DAG, Subtarget);8410return Result;8411}84128413// Given a scalable vector type and an index into it, returns the type for the8414// smallest subvector that the index fits in. This can be used to reduce LMUL8415// for operations like vslidedown.8416//8417// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.8418static std::optional<MVT>8419getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,8420const RISCVSubtarget &Subtarget) {8421assert(VecVT.isScalableVector());8422const unsigned EltSize = VecVT.getScalarSizeInBits();8423const unsigned VectorBitsMin = Subtarget.getRealMinVLen();8424const unsigned MinVLMAX = VectorBitsMin / EltSize;8425MVT SmallerVT;8426if (MaxIdx < MinVLMAX)8427SmallerVT = getLMUL1VT(VecVT);8428else if (MaxIdx < MinVLMAX * 2)8429SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();8430else if (MaxIdx < MinVLMAX * 4)8431SmallerVT = getLMUL1VT(VecVT)8432.getDoubleNumVectorElementsVT()8433.getDoubleNumVectorElementsVT();8434if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))8435return std::nullopt;8436return SmallerVT;8437}84388439// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the8440// first position of a vector, and that vector is slid up to the insert index.8441// By limiting the active vector length to index+1 and merging with the8442// original vector (with an undisturbed tail policy for elements >= VL), we8443// achieve the desired result of leaving all elements untouched except the one8444// at VL-1, which is replaced with the desired value.8445SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,8446SelectionDAG &DAG) const {8447SDLoc DL(Op);8448MVT VecVT = Op.getSimpleValueType();8449SDValue Vec = Op.getOperand(0);8450SDValue Val = Op.getOperand(1);8451SDValue Idx = Op.getOperand(2);84528453if (VecVT.getVectorElementType() == MVT::i1) {8454// FIXME: For now we just promote to an i8 vector and insert into that,8455// but this is probably not optimal.8456MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());8457Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);8458Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);8459return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);8460}84618462MVT ContainerVT = VecVT;8463// If the operand is a fixed-length vector, convert to a scalable one.8464if (VecVT.isFixedLengthVector()) {8465ContainerVT = getContainerForFixedLengthVector(VecVT);8466Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);8467}84688469// If we know the index we're going to insert at, we can shrink Vec so that8470// we're performing the scalar inserts and slideup on a smaller LMUL.8471MVT OrigContainerVT = ContainerVT;8472SDValue OrigVec = Vec;8473SDValue AlignedIdx;8474if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {8475const unsigned OrigIdx = IdxC->getZExtValue();8476// Do we know an upper bound on LMUL?8477if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,8478DL, DAG, Subtarget)) {8479ContainerVT = *ShrunkVT;8480AlignedIdx = DAG.getVectorIdxConstant(0, DL);8481}84828483// If we're compiling for an exact VLEN value, we can always perform8484// the insert in m1 as we can determine the register corresponding to8485// the index in the register group.8486const MVT M1VT = getLMUL1VT(ContainerVT);8487if (auto VLEN = Subtarget.getRealVLen();8488VLEN && ContainerVT.bitsGT(M1VT)) {8489EVT ElemVT = VecVT.getVectorElementType();8490unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();8491unsigned RemIdx = OrigIdx % ElemsPerVReg;8492unsigned SubRegIdx = OrigIdx / ElemsPerVReg;8493unsigned ExtractIdx =8494SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();8495AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);8496Idx = DAG.getVectorIdxConstant(RemIdx, DL);8497ContainerVT = M1VT;8498}84998500if (AlignedIdx)8501Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,8502AlignedIdx);8503}85048505MVT XLenVT = Subtarget.getXLenVT();85068507bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;8508// Even i64-element vectors on RV32 can be lowered without scalar8509// legalization if the most-significant 32 bits of the value are not affected8510// by the sign-extension of the lower 32 bits.8511// TODO: We could also catch sign extensions of a 32-bit value.8512if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {8513const auto *CVal = cast<ConstantSDNode>(Val);8514if (isInt<32>(CVal->getSExtValue())) {8515IsLegalInsert = true;8516Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);8517}8518}85198520auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);85218522SDValue ValInVec;85238524if (IsLegalInsert) {8525unsigned Opc =8526VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;8527if (isNullConstant(Idx)) {8528if (!VecVT.isFloatingPoint())8529Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);8530Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);85318532if (AlignedIdx)8533Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,8534Vec, AlignedIdx);8535if (!VecVT.isFixedLengthVector())8536return Vec;8537return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);8538}8539ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);8540} else {8541// On RV32, i64-element vectors must be specially handled to place the8542// value at element 0, by using two vslide1down instructions in sequence on8543// the i32 split lo/hi value. Use an equivalently-sized i32 vector for8544// this.8545SDValue ValLo, ValHi;8546std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);8547MVT I32ContainerVT =8548MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);8549SDValue I32Mask =8550getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;8551// Limit the active VL to two.8552SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);8553// If the Idx is 0 we can insert directly into the vector.8554if (isNullConstant(Idx)) {8555// First slide in the lo value, then the hi in above it. We use slide1down8556// to avoid the register group overlap constraint of vslide1up.8557ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,8558Vec, Vec, ValLo, I32Mask, InsertI64VL);8559// If the source vector is undef don't pass along the tail elements from8560// the previous slide1down.8561SDValue Tail = Vec.isUndef() ? Vec : ValInVec;8562ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,8563Tail, ValInVec, ValHi, I32Mask, InsertI64VL);8564// Bitcast back to the right container type.8565ValInVec = DAG.getBitcast(ContainerVT, ValInVec);85668567if (AlignedIdx)8568ValInVec =8569DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,8570ValInVec, AlignedIdx);8571if (!VecVT.isFixedLengthVector())8572return ValInVec;8573return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);8574}85758576// First slide in the lo value, then the hi in above it. We use slide1down8577// to avoid the register group overlap constraint of vslide1up.8578ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,8579DAG.getUNDEF(I32ContainerVT),8580DAG.getUNDEF(I32ContainerVT), ValLo,8581I32Mask, InsertI64VL);8582ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,8583DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,8584I32Mask, InsertI64VL);8585// Bitcast back to the right container type.8586ValInVec = DAG.getBitcast(ContainerVT, ValInVec);8587}85888589// Now that the value is in a vector, slide it into position.8590SDValue InsertVL =8591DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));85928593// Use tail agnostic policy if Idx is the last index of Vec.8594unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;8595if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&8596Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())8597Policy = RISCVII::TAIL_AGNOSTIC;8598SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,8599Idx, Mask, InsertVL, Policy);86008601if (AlignedIdx)8602Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,8603Slideup, AlignedIdx);8604if (!VecVT.isFixedLengthVector())8605return Slideup;8606return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);8607}86088609// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then8610// extract the first element: (extractelt (slidedown vec, idx), 0). For integer8611// types this is done using VMV_X_S to allow us to glean information about the8612// sign bits of the result.8613SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,8614SelectionDAG &DAG) const {8615SDLoc DL(Op);8616SDValue Idx = Op.getOperand(1);8617SDValue Vec = Op.getOperand(0);8618EVT EltVT = Op.getValueType();8619MVT VecVT = Vec.getSimpleValueType();8620MVT XLenVT = Subtarget.getXLenVT();86218622if (VecVT.getVectorElementType() == MVT::i1) {8623// Use vfirst.m to extract the first bit.8624if (isNullConstant(Idx)) {8625MVT ContainerVT = VecVT;8626if (VecVT.isFixedLengthVector()) {8627ContainerVT = getContainerForFixedLengthVector(VecVT);8628Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);8629}8630auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);8631SDValue Vfirst =8632DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);8633SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,8634DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);8635return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);8636}8637if (VecVT.isFixedLengthVector()) {8638unsigned NumElts = VecVT.getVectorNumElements();8639if (NumElts >= 8) {8640MVT WideEltVT;8641unsigned WidenVecLen;8642SDValue ExtractElementIdx;8643SDValue ExtractBitIdx;8644unsigned MaxEEW = Subtarget.getELen();8645MVT LargestEltVT = MVT::getIntegerVT(8646std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));8647if (NumElts <= LargestEltVT.getSizeInBits()) {8648assert(isPowerOf2_32(NumElts) &&8649"the number of elements should be power of 2");8650WideEltVT = MVT::getIntegerVT(NumElts);8651WidenVecLen = 1;8652ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);8653ExtractBitIdx = Idx;8654} else {8655WideEltVT = LargestEltVT;8656WidenVecLen = NumElts / WideEltVT.getSizeInBits();8657// extract element index = index / element width8658ExtractElementIdx = DAG.getNode(8659ISD::SRL, DL, XLenVT, Idx,8660DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));8661// mask bit index = index % element width8662ExtractBitIdx = DAG.getNode(8663ISD::AND, DL, XLenVT, Idx,8664DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));8665}8666MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);8667Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);8668SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,8669Vec, ExtractElementIdx);8670// Extract the bit from GPR.8671SDValue ShiftRight =8672DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);8673SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,8674DAG.getConstant(1, DL, XLenVT));8675return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);8676}8677}8678// Otherwise, promote to an i8 vector and extract from that.8679MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());8680Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);8681return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);8682}86838684// If this is a fixed vector, we need to convert it to a scalable vector.8685MVT ContainerVT = VecVT;8686if (VecVT.isFixedLengthVector()) {8687ContainerVT = getContainerForFixedLengthVector(VecVT);8688Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);8689}86908691// If we're compiling for an exact VLEN value and we have a known8692// constant index, we can always perform the extract in m1 (or8693// smaller) as we can determine the register corresponding to8694// the index in the register group.8695const auto VLen = Subtarget.getRealVLen();8696if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);8697IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {8698MVT M1VT = getLMUL1VT(ContainerVT);8699unsigned OrigIdx = IdxC->getZExtValue();8700EVT ElemVT = VecVT.getVectorElementType();8701unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();8702unsigned RemIdx = OrigIdx % ElemsPerVReg;8703unsigned SubRegIdx = OrigIdx / ElemsPerVReg;8704unsigned ExtractIdx =8705SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();8706Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,8707DAG.getVectorIdxConstant(ExtractIdx, DL));8708Idx = DAG.getVectorIdxConstant(RemIdx, DL);8709ContainerVT = M1VT;8710}87118712// Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which8713// contains our index.8714std::optional<uint64_t> MaxIdx;8715if (VecVT.isFixedLengthVector())8716MaxIdx = VecVT.getVectorNumElements() - 1;8717if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))8718MaxIdx = IdxC->getZExtValue();8719if (MaxIdx) {8720if (auto SmallerVT =8721getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {8722ContainerVT = *SmallerVT;8723Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,8724DAG.getConstant(0, DL, XLenVT));8725}8726}87278728// If after narrowing, the required slide is still greater than LMUL2,8729// fallback to generic expansion and go through the stack. This is done8730// for a subtle reason: extracting *all* elements out of a vector is8731// widely expected to be linear in vector size, but because vslidedown8732// is linear in LMUL, performing N extracts using vslidedown becomes8733// O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack8734// seems to have the same problem (the store is linear in LMUL), but the8735// generic expansion *memoizes* the store, and thus for many extracts of8736// the same vector we end up with one store and a bunch of loads.8737// TODO: We don't have the same code for insert_vector_elt because we8738// have BUILD_VECTOR and handle the degenerate case there. Should we8739// consider adding an inverse BUILD_VECTOR node?8740MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();8741if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())8742return SDValue();87438744// If the index is 0, the vector is already in the right position.8745if (!isNullConstant(Idx)) {8746// Use a VL of 1 to avoid processing more elements than we need.8747auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);8748Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,8749DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);8750}87518752if (!EltVT.isInteger()) {8753// Floating-point extracts are handled in TableGen.8754return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,8755DAG.getVectorIdxConstant(0, DL));8756}87578758SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);8759return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);8760}87618762// Some RVV intrinsics may claim that they want an integer operand to be8763// promoted or expanded.8764static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,8765const RISCVSubtarget &Subtarget) {8766assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||8767Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||8768Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&8769"Unexpected opcode");87708771if (!Subtarget.hasVInstructions())8772return SDValue();87738774bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||8775Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;8776unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);87778778SDLoc DL(Op);87798780const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =8781RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);8782if (!II || !II->hasScalarOperand())8783return SDValue();87848785unsigned SplatOp = II->ScalarOperand + 1 + HasChain;8786assert(SplatOp < Op.getNumOperands());87878788SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());8789SDValue &ScalarOp = Operands[SplatOp];8790MVT OpVT = ScalarOp.getSimpleValueType();8791MVT XLenVT = Subtarget.getXLenVT();87928793// If this isn't a scalar, or its type is XLenVT we're done.8794if (!OpVT.isScalarInteger() || OpVT == XLenVT)8795return SDValue();87968797// Simplest case is that the operand needs to be promoted to XLenVT.8798if (OpVT.bitsLT(XLenVT)) {8799// If the operand is a constant, sign extend to increase our chances8800// of being able to use a .vi instruction. ANY_EXTEND would become a8801// a zero extend and the simm5 check in isel would fail.8802// FIXME: Should we ignore the upper bits in isel instead?8803unsigned ExtOpc =8804isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;8805ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);8806return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);8807}88088809// Use the previous operand to get the vXi64 VT. The result might be a mask8810// VT for compares. Using the previous operand assumes that the previous8811// operand will never have a smaller element size than a scalar operand and8812// that a widening operation never uses SEW=64.8813// NOTE: If this fails the below assert, we can probably just find the8814// element count from any operand or result and use it to construct the VT.8815assert(II->ScalarOperand > 0 && "Unexpected splat operand!");8816MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();88178818// The more complex case is when the scalar is larger than XLenVT.8819assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&8820VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");88218822// If this is a sign-extended 32-bit value, we can truncate it and rely on the8823// instruction to sign-extend since SEW>XLEN.8824if (DAG.ComputeNumSignBits(ScalarOp) > 32) {8825ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);8826return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);8827}88288829switch (IntNo) {8830case Intrinsic::riscv_vslide1up:8831case Intrinsic::riscv_vslide1down:8832case Intrinsic::riscv_vslide1up_mask:8833case Intrinsic::riscv_vslide1down_mask: {8834// We need to special case these when the scalar is larger than XLen.8835unsigned NumOps = Op.getNumOperands();8836bool IsMasked = NumOps == 7;88378838// Convert the vector source to the equivalent nxvXi32 vector.8839MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);8840SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);8841SDValue ScalarLo, ScalarHi;8842std::tie(ScalarLo, ScalarHi) =8843DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);88448845// Double the VL since we halved SEW.8846SDValue AVL = getVLOperand(Op);8847SDValue I32VL;88488849// Optimize for constant AVL8850if (isa<ConstantSDNode>(AVL)) {8851const auto [MinVLMAX, MaxVLMAX] =8852RISCVTargetLowering::computeVLMAXBounds(VT, Subtarget);88538854uint64_t AVLInt = AVL->getAsZExtVal();8855if (AVLInt <= MinVLMAX) {8856I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);8857} else if (AVLInt >= 2 * MaxVLMAX) {8858// Just set vl to VLMAX in this situation8859RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);8860SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);8861unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());8862SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);8863SDValue SETVLMAX = DAG.getTargetConstant(8864Intrinsic::riscv_vsetvlimax, DL, MVT::i32);8865I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,8866LMUL);8867} else {8868// For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl8869// is related to the hardware implementation.8870// So let the following code handle8871}8872}8873if (!I32VL) {8874RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);8875SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);8876unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());8877SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);8878SDValue SETVL =8879DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);8880// Using vsetvli instruction to get actually used length which related to8881// the hardware implementation8882SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,8883SEW, LMUL);8884I32VL =8885DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));8886}88878888SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);88898890// Shift the two scalar parts in using SEW=32 slide1up/slide1down8891// instructions.8892SDValue Passthru;8893if (IsMasked)8894Passthru = DAG.getUNDEF(I32VT);8895else8896Passthru = DAG.getBitcast(I32VT, Operands[1]);88978898if (IntNo == Intrinsic::riscv_vslide1up ||8899IntNo == Intrinsic::riscv_vslide1up_mask) {8900Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,8901ScalarHi, I32Mask, I32VL);8902Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,8903ScalarLo, I32Mask, I32VL);8904} else {8905Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,8906ScalarLo, I32Mask, I32VL);8907Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,8908ScalarHi, I32Mask, I32VL);8909}89108911// Convert back to nxvXi64.8912Vec = DAG.getBitcast(VT, Vec);89138914if (!IsMasked)8915return Vec;8916// Apply mask after the operation.8917SDValue Mask = Operands[NumOps - 3];8918SDValue MaskedOff = Operands[1];8919// Assume Policy operand is the last operand.8920uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();8921// We don't need to select maskedoff if it's undef.8922if (MaskedOff.isUndef())8923return Vec;8924// TAMU8925if (Policy == RISCVII::TAIL_AGNOSTIC)8926return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,8927DAG.getUNDEF(VT), AVL);8928// TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.8929// It's fine because vmerge does not care mask policy.8930return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,8931MaskedOff, AVL);8932}8933}89348935// We need to convert the scalar to a splat vector.8936SDValue VL = getVLOperand(Op);8937assert(VL.getValueType() == XLenVT);8938ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);8939return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);8940}89418942// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support8943// scalable vector llvm.get.vector.length for now.8944//8945// We need to convert from a scalable VF to a vsetvli with VLMax equal to8946// (vscale * VF). The vscale and VF are independent of element width. We use8947// SEW=8 for the vsetvli because it is the only element width that supports all8948// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is8949// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The8950// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different8951// SEW and LMUL are better for the surrounding vector instructions.8952static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,8953const RISCVSubtarget &Subtarget) {8954MVT XLenVT = Subtarget.getXLenVT();89558956// The smallest LMUL is only valid for the smallest element width.8957const unsigned ElementWidth = 8;89588959// Determine the VF that corresponds to LMUL 1 for ElementWidth.8960unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;8961// We don't support VF==1 with ELEN==32.8962[[maybe_unused]] unsigned MinVF =8963RISCV::RVVBitsPerBlock / Subtarget.getELen();89648965[[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);8966assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&8967"Unexpected VF");89688969bool Fractional = VF < LMul1VF;8970unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;8971unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);8972unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);89738974SDLoc DL(N);89758976SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);8977SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);89788979SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));89808981SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);8982SDValue Res =8983DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);8984return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);8985}89868987static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG,8988const RISCVSubtarget &Subtarget) {8989SDValue Op0 = N->getOperand(1);8990MVT OpVT = Op0.getSimpleValueType();8991MVT ContainerVT = OpVT;8992if (OpVT.isFixedLengthVector()) {8993ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);8994Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);8995}8996MVT XLenVT = Subtarget.getXLenVT();8997SDLoc DL(N);8998auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);8999SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);9000if (isOneConstant(N->getOperand(2)))9001return Res;90029003// Convert -1 to VL.9004SDValue Setcc =9005DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);9006VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());9007return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);9008}90099010static inline void promoteVCIXScalar(const SDValue &Op,9011SmallVectorImpl<SDValue> &Operands,9012SelectionDAG &DAG) {9013const RISCVSubtarget &Subtarget =9014DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();90159016bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||9017Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;9018unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);9019SDLoc DL(Op);90209021const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =9022RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);9023if (!II || !II->hasScalarOperand())9024return;90259026unsigned SplatOp = II->ScalarOperand + 1;9027assert(SplatOp < Op.getNumOperands());90289029SDValue &ScalarOp = Operands[SplatOp];9030MVT OpVT = ScalarOp.getSimpleValueType();9031MVT XLenVT = Subtarget.getXLenVT();90329033// The code below is partially copied from lowerVectorIntrinsicScalars.9034// If this isn't a scalar, or its type is XLenVT we're done.9035if (!OpVT.isScalarInteger() || OpVT == XLenVT)9036return;90379038// Manually emit promote operation for scalar operation.9039if (OpVT.bitsLT(XLenVT)) {9040unsigned ExtOpc =9041isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;9042ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);9043}90449045return;9046}90479048static void processVCIXOperands(SDValue &OrigOp,9049SmallVectorImpl<SDValue> &Operands,9050SelectionDAG &DAG) {9051promoteVCIXScalar(OrigOp, Operands, DAG);9052const RISCVSubtarget &Subtarget =9053DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();9054for (SDValue &V : Operands) {9055EVT ValType = V.getValueType();9056if (ValType.isVector() && ValType.isFloatingPoint()) {9057MVT InterimIVT =9058MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),9059ValType.getVectorElementCount());9060V = DAG.getBitcast(InterimIVT, V);9061}9062if (ValType.isFixedLengthVector()) {9063MVT OpContainerVT = getContainerForFixedLengthVector(9064DAG, V.getSimpleValueType(), Subtarget);9065V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);9066}9067}9068}90699070// LMUL * VLEN should be greater than or equal to EGS * SEW9071static inline bool isValidEGW(int EGS, EVT VT,9072const RISCVSubtarget &Subtarget) {9073return (Subtarget.getRealMinVLen() *9074VT.getSizeInBits().getKnownMinValue()) / RISCV::RVVBitsPerBlock >=9075EGS * VT.getScalarSizeInBits();9076}90779078SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,9079SelectionDAG &DAG) const {9080unsigned IntNo = Op.getConstantOperandVal(0);9081SDLoc DL(Op);9082MVT XLenVT = Subtarget.getXLenVT();90839084switch (IntNo) {9085default:9086break; // Don't custom lower most intrinsics.9087case Intrinsic::thread_pointer: {9088EVT PtrVT = getPointerTy(DAG.getDataLayout());9089return DAG.getRegister(RISCV::X4, PtrVT);9090}9091case Intrinsic::riscv_orc_b:9092case Intrinsic::riscv_brev8:9093case Intrinsic::riscv_sha256sig0:9094case Intrinsic::riscv_sha256sig1:9095case Intrinsic::riscv_sha256sum0:9096case Intrinsic::riscv_sha256sum1:9097case Intrinsic::riscv_sm3p0:9098case Intrinsic::riscv_sm3p1: {9099unsigned Opc;9100switch (IntNo) {9101case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;9102case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;9103case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;9104case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;9105case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;9106case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;9107case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;9108case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;9109}91109111if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9112SDValue NewOp =9113DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9114SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);9115return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9116}91179118return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));9119}9120case Intrinsic::riscv_sm4ks:9121case Intrinsic::riscv_sm4ed: {9122unsigned Opc =9123IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;91249125if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9126SDValue NewOp0 =9127DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9128SDValue NewOp1 =9129DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));9130SDValue Res =9131DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));9132return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9133}91349135return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),9136Op.getOperand(3));9137}9138case Intrinsic::riscv_zip:9139case Intrinsic::riscv_unzip: {9140unsigned Opc =9141IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;9142return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));9143}9144case Intrinsic::riscv_mopr: {9145if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9146SDValue NewOp =9147DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9148SDValue Res = DAG.getNode(9149RISCVISD::MOPR, DL, MVT::i64, NewOp,9150DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));9151return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9152}9153return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),9154Op.getOperand(2));9155}91569157case Intrinsic::riscv_moprr: {9158if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9159SDValue NewOp0 =9160DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9161SDValue NewOp1 =9162DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));9163SDValue Res = DAG.getNode(9164RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,9165DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));9166return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9167}9168return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),9169Op.getOperand(2), Op.getOperand(3));9170}9171case Intrinsic::riscv_clmul:9172if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9173SDValue NewOp0 =9174DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9175SDValue NewOp1 =9176DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));9177SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);9178return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9179}9180return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),9181Op.getOperand(2));9182case Intrinsic::riscv_clmulh:9183case Intrinsic::riscv_clmulr: {9184unsigned Opc =9185IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;9186if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {9187SDValue NewOp0 =9188DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));9189SDValue NewOp1 =9190DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));9191NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,9192DAG.getConstant(32, DL, MVT::i64));9193NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,9194DAG.getConstant(32, DL, MVT::i64));9195SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);9196Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,9197DAG.getConstant(32, DL, MVT::i64));9198return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);9199}92009201return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));9202}9203case Intrinsic::experimental_get_vector_length:9204return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);9205case Intrinsic::experimental_cttz_elts:9206return lowerCttzElts(Op.getNode(), DAG, Subtarget);9207case Intrinsic::riscv_vmv_x_s: {9208SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));9209return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);9210}9211case Intrinsic::riscv_vfmv_f_s:9212return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),9213Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));9214case Intrinsic::riscv_vmv_v_x:9215return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),9216Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,9217Subtarget);9218case Intrinsic::riscv_vfmv_v_f:9219return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),9220Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));9221case Intrinsic::riscv_vmv_s_x: {9222SDValue Scalar = Op.getOperand(2);92239224if (Scalar.getValueType().bitsLE(XLenVT)) {9225Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);9226return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),9227Op.getOperand(1), Scalar, Op.getOperand(3));9228}92299230assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");92319232// This is an i64 value that lives in two scalar registers. We have to9233// insert this in a convoluted way. First we build vXi64 splat containing9234// the two values that we assemble using some bit math. Next we'll use9235// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask9236// to merge element 0 from our splat into the source vector.9237// FIXME: This is probably not the best way to do this, but it is9238// consistent with INSERT_VECTOR_ELT lowering so it is a good starting9239// point.9240// sw lo, (a0)9241// sw hi, 4(a0)9242// vlse vX, (a0)9243//9244// vid.v vVid9245// vmseq.vx mMask, vVid, 09246// vmerge.vvm vDest, vSrc, vVal, mMask9247MVT VT = Op.getSimpleValueType();9248SDValue Vec = Op.getOperand(1);9249SDValue VL = getVLOperand(Op);92509251SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);9252if (Op.getOperand(1).isUndef())9253return SplattedVal;9254SDValue SplattedIdx =9255DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),9256DAG.getConstant(0, DL, MVT::i32), VL);92579258MVT MaskVT = getMaskTypeFor(VT);9259SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);9260SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);9261SDValue SelectCond =9262DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,9263{VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),9264DAG.getUNDEF(MaskVT), Mask, VL});9265return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,9266Vec, DAG.getUNDEF(VT), VL);9267}9268case Intrinsic::riscv_vfmv_s_f:9269return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),9270Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));9271// EGS * EEW >= 128 bits9272case Intrinsic::riscv_vaesdf_vv:9273case Intrinsic::riscv_vaesdf_vs:9274case Intrinsic::riscv_vaesdm_vv:9275case Intrinsic::riscv_vaesdm_vs:9276case Intrinsic::riscv_vaesef_vv:9277case Intrinsic::riscv_vaesef_vs:9278case Intrinsic::riscv_vaesem_vv:9279case Intrinsic::riscv_vaesem_vs:9280case Intrinsic::riscv_vaeskf1:9281case Intrinsic::riscv_vaeskf2:9282case Intrinsic::riscv_vaesz_vs:9283case Intrinsic::riscv_vsm4k:9284case Intrinsic::riscv_vsm4r_vv:9285case Intrinsic::riscv_vsm4r_vs: {9286if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||9287!isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||9288!isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))9289report_fatal_error("EGW should be greater than or equal to 4 * SEW.");9290return Op;9291}9292// EGS * EEW >= 256 bits9293case Intrinsic::riscv_vsm3c:9294case Intrinsic::riscv_vsm3me: {9295if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||9296!isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))9297report_fatal_error("EGW should be greater than or equal to 8 * SEW.");9298return Op;9299}9300// zvknha(SEW=32)/zvknhb(SEW=[32|64])9301case Intrinsic::riscv_vsha2ch:9302case Intrinsic::riscv_vsha2cl:9303case Intrinsic::riscv_vsha2ms: {9304if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&9305!Subtarget.hasStdExtZvknhb())9306report_fatal_error("SEW=64 needs Zvknhb to be enabled.");9307if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||9308!isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||9309!isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))9310report_fatal_error("EGW should be greater than or equal to 4 * SEW.");9311return Op;9312}9313case Intrinsic::riscv_sf_vc_v_x:9314case Intrinsic::riscv_sf_vc_v_i:9315case Intrinsic::riscv_sf_vc_v_xv:9316case Intrinsic::riscv_sf_vc_v_iv:9317case Intrinsic::riscv_sf_vc_v_vv:9318case Intrinsic::riscv_sf_vc_v_fv:9319case Intrinsic::riscv_sf_vc_v_xvv:9320case Intrinsic::riscv_sf_vc_v_ivv:9321case Intrinsic::riscv_sf_vc_v_vvv:9322case Intrinsic::riscv_sf_vc_v_fvv:9323case Intrinsic::riscv_sf_vc_v_xvw:9324case Intrinsic::riscv_sf_vc_v_ivw:9325case Intrinsic::riscv_sf_vc_v_vvw:9326case Intrinsic::riscv_sf_vc_v_fvw: {9327MVT VT = Op.getSimpleValueType();93289329SmallVector<SDValue> Operands{Op->op_values()};9330processVCIXOperands(Op, Operands, DAG);93319332MVT RetVT = VT;9333if (VT.isFixedLengthVector())9334RetVT = getContainerForFixedLengthVector(VT);9335else if (VT.isFloatingPoint())9336RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),9337VT.getVectorElementCount());93389339SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);93409341if (VT.isFixedLengthVector())9342NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);9343else if (VT.isFloatingPoint())9344NewNode = DAG.getBitcast(VT, NewNode);93459346if (Op == NewNode)9347break;93489349return NewNode;9350}9351}93529353return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);9354}93559356static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,9357unsigned Type) {9358SDLoc DL(Op);9359SmallVector<SDValue> Operands{Op->op_values()};9360Operands.erase(Operands.begin() + 1);93619362const RISCVSubtarget &Subtarget =9363DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();9364MVT VT = Op.getSimpleValueType();9365MVT RetVT = VT;9366MVT FloatVT = VT;93679368if (VT.isFloatingPoint()) {9369RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),9370VT.getVectorElementCount());9371FloatVT = RetVT;9372}9373if (VT.isFixedLengthVector())9374RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,9375Subtarget);93769377processVCIXOperands(Op, Operands, DAG);93789379SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});9380SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);9381SDValue Chain = NewNode.getValue(1);93829383if (VT.isFixedLengthVector())9384NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);9385if (VT.isFloatingPoint())9386NewNode = DAG.getBitcast(VT, NewNode);93879388NewNode = DAG.getMergeValues({NewNode, Chain}, DL);93899390return NewNode;9391}93929393static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,9394unsigned Type) {9395SmallVector<SDValue> Operands{Op->op_values()};9396Operands.erase(Operands.begin() + 1);9397processVCIXOperands(Op, Operands, DAG);93989399return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);9400}94019402SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,9403SelectionDAG &DAG) const {9404unsigned IntNo = Op.getConstantOperandVal(1);9405switch (IntNo) {9406default:9407break;9408case Intrinsic::riscv_masked_strided_load: {9409SDLoc DL(Op);9410MVT XLenVT = Subtarget.getXLenVT();94119412// If the mask is known to be all ones, optimize to an unmasked intrinsic;9413// the selection of the masked intrinsics doesn't do this for us.9414SDValue Mask = Op.getOperand(5);9415bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());94169417MVT VT = Op->getSimpleValueType(0);9418MVT ContainerVT = VT;9419if (VT.isFixedLengthVector())9420ContainerVT = getContainerForFixedLengthVector(VT);94219422SDValue PassThru = Op.getOperand(2);9423if (!IsUnmasked) {9424MVT MaskVT = getMaskTypeFor(ContainerVT);9425if (VT.isFixedLengthVector()) {9426Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);9427PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);9428}9429}94309431auto *Load = cast<MemIntrinsicSDNode>(Op);9432SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;9433SDValue Ptr = Op.getOperand(3);9434SDValue Stride = Op.getOperand(4);9435SDValue Result, Chain;94369437// TODO: We restrict this to unmasked loads currently in consideration of9438// the complexity of handling all falses masks.9439MVT ScalarVT = ContainerVT.getVectorElementType();9440if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {9441SDValue ScalarLoad =9442DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,9443ScalarVT, Load->getMemOperand());9444Chain = ScalarLoad.getValue(1);9445Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,9446Subtarget);9447} else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {9448SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,9449Load->getMemOperand());9450Chain = ScalarLoad.getValue(1);9451Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);9452} else {9453SDValue IntID = DAG.getTargetConstant(9454IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,9455XLenVT);94569457SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};9458if (IsUnmasked)9459Ops.push_back(DAG.getUNDEF(ContainerVT));9460else9461Ops.push_back(PassThru);9462Ops.push_back(Ptr);9463Ops.push_back(Stride);9464if (!IsUnmasked)9465Ops.push_back(Mask);9466Ops.push_back(VL);9467if (!IsUnmasked) {9468SDValue Policy =9469DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);9470Ops.push_back(Policy);9471}94729473SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});9474Result =9475DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,9476Load->getMemoryVT(), Load->getMemOperand());9477Chain = Result.getValue(1);9478}9479if (VT.isFixedLengthVector())9480Result = convertFromScalableVector(VT, Result, DAG, Subtarget);9481return DAG.getMergeValues({Result, Chain}, DL);9482}9483case Intrinsic::riscv_seg2_load:9484case Intrinsic::riscv_seg3_load:9485case Intrinsic::riscv_seg4_load:9486case Intrinsic::riscv_seg5_load:9487case Intrinsic::riscv_seg6_load:9488case Intrinsic::riscv_seg7_load:9489case Intrinsic::riscv_seg8_load: {9490SDLoc DL(Op);9491static const Intrinsic::ID VlsegInts[7] = {9492Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,9493Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,9494Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,9495Intrinsic::riscv_vlseg8};9496unsigned NF = Op->getNumValues() - 1;9497assert(NF >= 2 && NF <= 8 && "Unexpected seg number");9498MVT XLenVT = Subtarget.getXLenVT();9499MVT VT = Op->getSimpleValueType(0);9500MVT ContainerVT = getContainerForFixedLengthVector(VT);95019502SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,9503Subtarget);9504SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);9505auto *Load = cast<MemIntrinsicSDNode>(Op);9506SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);9507ContainerVTs.push_back(MVT::Other);9508SDVTList VTs = DAG.getVTList(ContainerVTs);9509SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};9510Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));9511Ops.push_back(Op.getOperand(2));9512Ops.push_back(VL);9513SDValue Result =9514DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,9515Load->getMemoryVT(), Load->getMemOperand());9516SmallVector<SDValue, 9> Results;9517for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)9518Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),9519DAG, Subtarget));9520Results.push_back(Result.getValue(NF));9521return DAG.getMergeValues(Results, DL);9522}9523case Intrinsic::riscv_sf_vc_v_x_se:9524return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);9525case Intrinsic::riscv_sf_vc_v_i_se:9526return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);9527case Intrinsic::riscv_sf_vc_v_xv_se:9528return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);9529case Intrinsic::riscv_sf_vc_v_iv_se:9530return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);9531case Intrinsic::riscv_sf_vc_v_vv_se:9532return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);9533case Intrinsic::riscv_sf_vc_v_fv_se:9534return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);9535case Intrinsic::riscv_sf_vc_v_xvv_se:9536return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);9537case Intrinsic::riscv_sf_vc_v_ivv_se:9538return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);9539case Intrinsic::riscv_sf_vc_v_vvv_se:9540return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);9541case Intrinsic::riscv_sf_vc_v_fvv_se:9542return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);9543case Intrinsic::riscv_sf_vc_v_xvw_se:9544return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);9545case Intrinsic::riscv_sf_vc_v_ivw_se:9546return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);9547case Intrinsic::riscv_sf_vc_v_vvw_se:9548return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);9549case Intrinsic::riscv_sf_vc_v_fvw_se:9550return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);9551}95529553return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);9554}95559556SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,9557SelectionDAG &DAG) const {9558unsigned IntNo = Op.getConstantOperandVal(1);9559switch (IntNo) {9560default:9561break;9562case Intrinsic::riscv_masked_strided_store: {9563SDLoc DL(Op);9564MVT XLenVT = Subtarget.getXLenVT();95659566// If the mask is known to be all ones, optimize to an unmasked intrinsic;9567// the selection of the masked intrinsics doesn't do this for us.9568SDValue Mask = Op.getOperand(5);9569bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());95709571SDValue Val = Op.getOperand(2);9572MVT VT = Val.getSimpleValueType();9573MVT ContainerVT = VT;9574if (VT.isFixedLengthVector()) {9575ContainerVT = getContainerForFixedLengthVector(VT);9576Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);9577}9578if (!IsUnmasked) {9579MVT MaskVT = getMaskTypeFor(ContainerVT);9580if (VT.isFixedLengthVector())9581Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);9582}95839584SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;95859586SDValue IntID = DAG.getTargetConstant(9587IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,9588XLenVT);95899590auto *Store = cast<MemIntrinsicSDNode>(Op);9591SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};9592Ops.push_back(Val);9593Ops.push_back(Op.getOperand(3)); // Ptr9594Ops.push_back(Op.getOperand(4)); // Stride9595if (!IsUnmasked)9596Ops.push_back(Mask);9597Ops.push_back(VL);95989599return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),9600Ops, Store->getMemoryVT(),9601Store->getMemOperand());9602}9603case Intrinsic::riscv_seg2_store:9604case Intrinsic::riscv_seg3_store:9605case Intrinsic::riscv_seg4_store:9606case Intrinsic::riscv_seg5_store:9607case Intrinsic::riscv_seg6_store:9608case Intrinsic::riscv_seg7_store:9609case Intrinsic::riscv_seg8_store: {9610SDLoc DL(Op);9611static const Intrinsic::ID VssegInts[] = {9612Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,9613Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,9614Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,9615Intrinsic::riscv_vsseg8};9616// Operands are (chain, int_id, vec*, ptr, vl)9617unsigned NF = Op->getNumOperands() - 4;9618assert(NF >= 2 && NF <= 8 && "Unexpected seg number");9619MVT XLenVT = Subtarget.getXLenVT();9620MVT VT = Op->getOperand(2).getSimpleValueType();9621MVT ContainerVT = getContainerForFixedLengthVector(VT);96229623SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,9624Subtarget);9625SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);9626SDValue Ptr = Op->getOperand(NF + 2);96279628auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);9629SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};9630for (unsigned i = 0; i < NF; i++)9631Ops.push_back(convertToScalableVector(9632ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));9633Ops.append({Ptr, VL});96349635return DAG.getMemIntrinsicNode(9636ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,9637FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());9638}9639case Intrinsic::riscv_sf_vc_xv_se:9640return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);9641case Intrinsic::riscv_sf_vc_iv_se:9642return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);9643case Intrinsic::riscv_sf_vc_vv_se:9644return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);9645case Intrinsic::riscv_sf_vc_fv_se:9646return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);9647case Intrinsic::riscv_sf_vc_xvv_se:9648return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);9649case Intrinsic::riscv_sf_vc_ivv_se:9650return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);9651case Intrinsic::riscv_sf_vc_vvv_se:9652return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);9653case Intrinsic::riscv_sf_vc_fvv_se:9654return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);9655case Intrinsic::riscv_sf_vc_xvw_se:9656return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);9657case Intrinsic::riscv_sf_vc_ivw_se:9658return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);9659case Intrinsic::riscv_sf_vc_vvw_se:9660return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);9661case Intrinsic::riscv_sf_vc_fvw_se:9662return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);9663}96649665return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);9666}96679668static unsigned getRVVReductionOp(unsigned ISDOpcode) {9669switch (ISDOpcode) {9670default:9671llvm_unreachable("Unhandled reduction");9672case ISD::VP_REDUCE_ADD:9673case ISD::VECREDUCE_ADD:9674return RISCVISD::VECREDUCE_ADD_VL;9675case ISD::VP_REDUCE_UMAX:9676case ISD::VECREDUCE_UMAX:9677return RISCVISD::VECREDUCE_UMAX_VL;9678case ISD::VP_REDUCE_SMAX:9679case ISD::VECREDUCE_SMAX:9680return RISCVISD::VECREDUCE_SMAX_VL;9681case ISD::VP_REDUCE_UMIN:9682case ISD::VECREDUCE_UMIN:9683return RISCVISD::VECREDUCE_UMIN_VL;9684case ISD::VP_REDUCE_SMIN:9685case ISD::VECREDUCE_SMIN:9686return RISCVISD::VECREDUCE_SMIN_VL;9687case ISD::VP_REDUCE_AND:9688case ISD::VECREDUCE_AND:9689return RISCVISD::VECREDUCE_AND_VL;9690case ISD::VP_REDUCE_OR:9691case ISD::VECREDUCE_OR:9692return RISCVISD::VECREDUCE_OR_VL;9693case ISD::VP_REDUCE_XOR:9694case ISD::VECREDUCE_XOR:9695return RISCVISD::VECREDUCE_XOR_VL;9696case ISD::VP_REDUCE_FADD:9697return RISCVISD::VECREDUCE_FADD_VL;9698case ISD::VP_REDUCE_SEQ_FADD:9699return RISCVISD::VECREDUCE_SEQ_FADD_VL;9700case ISD::VP_REDUCE_FMAX:9701case ISD::VP_REDUCE_FMAXIMUM:9702return RISCVISD::VECREDUCE_FMAX_VL;9703case ISD::VP_REDUCE_FMIN:9704case ISD::VP_REDUCE_FMINIMUM:9705return RISCVISD::VECREDUCE_FMIN_VL;9706}97079708}97099710SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,9711SelectionDAG &DAG,9712bool IsVP) const {9713SDLoc DL(Op);9714SDValue Vec = Op.getOperand(IsVP ? 1 : 0);9715MVT VecVT = Vec.getSimpleValueType();9716assert((Op.getOpcode() == ISD::VECREDUCE_AND ||9717Op.getOpcode() == ISD::VECREDUCE_OR ||9718Op.getOpcode() == ISD::VECREDUCE_XOR ||9719Op.getOpcode() == ISD::VP_REDUCE_AND ||9720Op.getOpcode() == ISD::VP_REDUCE_OR ||9721Op.getOpcode() == ISD::VP_REDUCE_XOR) &&9722"Unexpected reduction lowering");97239724MVT XLenVT = Subtarget.getXLenVT();97259726MVT ContainerVT = VecVT;9727if (VecVT.isFixedLengthVector()) {9728ContainerVT = getContainerForFixedLengthVector(VecVT);9729Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);9730}97319732SDValue Mask, VL;9733if (IsVP) {9734Mask = Op.getOperand(2);9735VL = Op.getOperand(3);9736} else {9737std::tie(Mask, VL) =9738getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);9739}97409741unsigned BaseOpc;9742ISD::CondCode CC;9743SDValue Zero = DAG.getConstant(0, DL, XLenVT);97449745switch (Op.getOpcode()) {9746default:9747llvm_unreachable("Unhandled reduction");9748case ISD::VECREDUCE_AND:9749case ISD::VP_REDUCE_AND: {9750// vcpop ~x == 09751SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);9752Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);9753Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);9754CC = ISD::SETEQ;9755BaseOpc = ISD::AND;9756break;9757}9758case ISD::VECREDUCE_OR:9759case ISD::VP_REDUCE_OR:9760// vcpop x != 09761Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);9762CC = ISD::SETNE;9763BaseOpc = ISD::OR;9764break;9765case ISD::VECREDUCE_XOR:9766case ISD::VP_REDUCE_XOR: {9767// ((vcpop x) & 1) != 09768SDValue One = DAG.getConstant(1, DL, XLenVT);9769Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);9770Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);9771CC = ISD::SETNE;9772BaseOpc = ISD::XOR;9773break;9774}9775}97769777SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);9778SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);97799780if (!IsVP)9781return SetCC;97829783// Now include the start value in the operation.9784// Note that we must return the start value when no elements are operated9785// upon. The vcpop instructions we've emitted in each case above will return9786// 0 for an inactive vector, and so we've already received the neutral value:9787// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we9788// can simply include the start value.9789return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));9790}97919792static bool isNonZeroAVL(SDValue AVL) {9793auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);9794auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);9795return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||9796(ImmAVL && ImmAVL->getZExtValue() >= 1);9797}97989799/// Helper to lower a reduction sequence of the form:9800/// scalar = reduce_op vec, scalar_start9801static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,9802SDValue StartValue, SDValue Vec, SDValue Mask,9803SDValue VL, const SDLoc &DL, SelectionDAG &DAG,9804const RISCVSubtarget &Subtarget) {9805const MVT VecVT = Vec.getSimpleValueType();9806const MVT M1VT = getLMUL1VT(VecVT);9807const MVT XLenVT = Subtarget.getXLenVT();9808const bool NonZeroAVL = isNonZeroAVL(VL);98099810// The reduction needs an LMUL1 input; do the splat at either LMUL19811// or the original VT if fractional.9812auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;9813// We reuse the VL of the reduction to reduce vsetvli toggles if we can9814// prove it is non-zero. For the AVL=0 case, we need the scalar to9815// be the result of the reduction operation.9816auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);9817SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,9818DAG, Subtarget);9819if (M1VT != InnerVT)9820InitialValue =9821DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),9822InitialValue, DAG.getVectorIdxConstant(0, DL));9823SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;9824SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);9825SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};9826SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);9827return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,9828DAG.getVectorIdxConstant(0, DL));9829}98309831SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,9832SelectionDAG &DAG) const {9833SDLoc DL(Op);9834SDValue Vec = Op.getOperand(0);9835EVT VecEVT = Vec.getValueType();98369837unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());98389839// Due to ordering in legalize types we may have a vector type that needs to9840// be split. Do that manually so we can get down to a legal type.9841while (getTypeAction(*DAG.getContext(), VecEVT) ==9842TargetLowering::TypeSplitVector) {9843auto [Lo, Hi] = DAG.SplitVector(Vec, DL);9844VecEVT = Lo.getValueType();9845Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);9846}98479848// TODO: The type may need to be widened rather than split. Or widened before9849// it can be split.9850if (!isTypeLegal(VecEVT))9851return SDValue();98529853MVT VecVT = VecEVT.getSimpleVT();9854MVT VecEltVT = VecVT.getVectorElementType();9855unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());98569857MVT ContainerVT = VecVT;9858if (VecVT.isFixedLengthVector()) {9859ContainerVT = getContainerForFixedLengthVector(VecVT);9860Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);9861}98629863auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);98649865SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());9866switch (BaseOpc) {9867case ISD::AND:9868case ISD::OR:9869case ISD::UMAX:9870case ISD::UMIN:9871case ISD::SMAX:9872case ISD::SMIN:9873StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,9874DAG.getVectorIdxConstant(0, DL));9875}9876return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,9877Mask, VL, DL, DAG, Subtarget);9878}98799880// Given a reduction op, this function returns the matching reduction opcode,9881// the vector SDValue and the scalar SDValue required to lower this to a9882// RISCVISD node.9883static std::tuple<unsigned, SDValue, SDValue>9884getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT,9885const RISCVSubtarget &Subtarget) {9886SDLoc DL(Op);9887auto Flags = Op->getFlags();9888unsigned Opcode = Op.getOpcode();9889switch (Opcode) {9890default:9891llvm_unreachable("Unhandled reduction");9892case ISD::VECREDUCE_FADD: {9893// Use positive zero if we can. It is cheaper to materialize.9894SDValue Zero =9895DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);9896return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);9897}9898case ISD::VECREDUCE_SEQ_FADD:9899return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),9900Op.getOperand(0));9901case ISD::VECREDUCE_FMINIMUM:9902case ISD::VECREDUCE_FMAXIMUM:9903case ISD::VECREDUCE_FMIN:9904case ISD::VECREDUCE_FMAX: {9905SDValue Front =9906DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),9907DAG.getVectorIdxConstant(0, DL));9908unsigned RVVOpc =9909(Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)9910? RISCVISD::VECREDUCE_FMIN_VL9911: RISCVISD::VECREDUCE_FMAX_VL;9912return std::make_tuple(RVVOpc, Op.getOperand(0), Front);9913}9914}9915}99169917SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,9918SelectionDAG &DAG) const {9919SDLoc DL(Op);9920MVT VecEltVT = Op.getSimpleValueType();99219922unsigned RVVOpcode;9923SDValue VectorVal, ScalarVal;9924std::tie(RVVOpcode, VectorVal, ScalarVal) =9925getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);9926MVT VecVT = VectorVal.getSimpleValueType();99279928MVT ContainerVT = VecVT;9929if (VecVT.isFixedLengthVector()) {9930ContainerVT = getContainerForFixedLengthVector(VecVT);9931VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);9932}99339934MVT ResVT = Op.getSimpleValueType();9935auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);9936SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,9937VL, DL, DAG, Subtarget);9938if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&9939Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)9940return Res;99419942if (Op->getFlags().hasNoNaNs())9943return Res;99449945// Force output to NaN if any element is Nan.9946SDValue IsNan =9947DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),9948{VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),9949DAG.getUNDEF(Mask.getValueType()), Mask, VL});9950MVT XLenVT = Subtarget.getXLenVT();9951SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);9952SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,9953DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);9954return DAG.getSelect(9955DL, ResVT, NoNaNs, Res,9956DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL,9957ResVT));9958}99599960SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,9961SelectionDAG &DAG) const {9962SDLoc DL(Op);9963unsigned Opc = Op.getOpcode();9964SDValue Start = Op.getOperand(0);9965SDValue Vec = Op.getOperand(1);9966EVT VecEVT = Vec.getValueType();9967MVT XLenVT = Subtarget.getXLenVT();99689969// TODO: The type may need to be widened rather than split. Or widened before9970// it can be split.9971if (!isTypeLegal(VecEVT))9972return SDValue();99739974MVT VecVT = VecEVT.getSimpleVT();9975unsigned RVVOpcode = getRVVReductionOp(Opc);99769977if (VecVT.isFixedLengthVector()) {9978auto ContainerVT = getContainerForFixedLengthVector(VecVT);9979Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);9980}99819982SDValue VL = Op.getOperand(3);9983SDValue Mask = Op.getOperand(2);9984SDValue Res =9985lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),9986Vec, Mask, VL, DL, DAG, Subtarget);9987if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||9988Op->getFlags().hasNoNaNs())9989return Res;99909991// Propagate NaNs.9992MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());9993// Check if any of the elements in Vec is NaN.9994SDValue IsNaN = DAG.getNode(9995RISCVISD::SETCC_VL, DL, PredVT,9996{Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});9997SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);9998// Check if the start value is NaN.9999SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);10000VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);10001SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,10002DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);10003MVT ResVT = Res.getSimpleValueType();10004return DAG.getSelect(10005DL, ResVT, NoNaNs, Res,10006DAG.getConstantFP(APFloat::getNaN(DAG.EVTToAPFloatSemantics(ResVT)), DL,10007ResVT));10008}1000910010SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,10011SelectionDAG &DAG) const {10012SDValue Vec = Op.getOperand(0);10013SDValue SubVec = Op.getOperand(1);10014MVT VecVT = Vec.getSimpleValueType();10015MVT SubVecVT = SubVec.getSimpleValueType();1001610017SDLoc DL(Op);10018MVT XLenVT = Subtarget.getXLenVT();10019unsigned OrigIdx = Op.getConstantOperandVal(2);10020const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();1002110022// We don't have the ability to slide mask vectors up indexed by their i110023// elements; the smallest we can do is i8. Often we are able to bitcast to10024// equivalent i8 vectors. Note that when inserting a fixed-length vector10025// into a scalable one, we might not necessarily have enough scalable10026// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.10027if (SubVecVT.getVectorElementType() == MVT::i1 &&10028(OrigIdx != 0 || !Vec.isUndef())) {10029if (VecVT.getVectorMinNumElements() >= 8 &&10030SubVecVT.getVectorMinNumElements() >= 8) {10031assert(OrigIdx % 8 == 0 && "Invalid index");10032assert(VecVT.getVectorMinNumElements() % 8 == 0 &&10033SubVecVT.getVectorMinNumElements() % 8 == 0 &&10034"Unexpected mask vector lowering");10035OrigIdx /= 8;10036SubVecVT =10037MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,10038SubVecVT.isScalableVector());10039VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,10040VecVT.isScalableVector());10041Vec = DAG.getBitcast(VecVT, Vec);10042SubVec = DAG.getBitcast(SubVecVT, SubVec);10043} else {10044// We can't slide this mask vector up indexed by its i1 elements.10045// This poses a problem when we wish to insert a scalable vector which10046// can't be re-expressed as a larger type. Just choose the slow path and10047// extend to a larger type, then truncate back down.10048MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);10049MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);10050Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);10051SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);10052Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,10053Op.getOperand(2));10054SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);10055return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);10056}10057}1005810059// If the subvector vector is a fixed-length type and we don't know VLEN10060// exactly, we cannot use subregister manipulation to simplify the codegen; we10061// don't know which register of a LMUL group contains the specific subvector10062// as we only know the minimum register size. Therefore we must slide the10063// vector group up the full amount.10064const auto VLen = Subtarget.getRealVLen();10065if (SubVecVT.isFixedLengthVector() && !VLen) {10066if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())10067return Op;10068MVT ContainerVT = VecVT;10069if (VecVT.isFixedLengthVector()) {10070ContainerVT = getContainerForFixedLengthVector(VecVT);10071Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);10072}1007310074if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {10075SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,10076DAG.getUNDEF(ContainerVT), SubVec,10077DAG.getVectorIdxConstant(0, DL));10078SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);10079return DAG.getBitcast(Op.getValueType(), SubVec);10080}1008110082SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,10083DAG.getUNDEF(ContainerVT), SubVec,10084DAG.getVectorIdxConstant(0, DL));10085SDValue Mask =10086getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;10087// Set the vector length to only the number of elements we care about. Note10088// that for slideup this includes the offset.10089unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();10090SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);1009110092// Use tail agnostic policy if we're inserting over Vec's tail.10093unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;10094if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())10095Policy = RISCVII::TAIL_AGNOSTIC;1009610097// If we're inserting into the lowest elements, use a tail undisturbed10098// vmv.v.v.10099if (OrigIdx == 0) {10100SubVec =10101DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);10102} else {10103SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);10104SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,10105SlideupAmt, Mask, VL, Policy);10106}1010710108if (VecVT.isFixedLengthVector())10109SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);10110return DAG.getBitcast(Op.getValueType(), SubVec);10111}1011210113MVT ContainerVecVT = VecVT;10114if (VecVT.isFixedLengthVector()) {10115ContainerVecVT = getContainerForFixedLengthVector(VecVT);10116Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);10117}1011810119MVT ContainerSubVecVT = SubVecVT;10120if (SubVecVT.isFixedLengthVector()) {10121ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);10122SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);10123}1012410125unsigned SubRegIdx;10126ElementCount RemIdx;10127// insert_subvector scales the index by vscale if the subvector is scalable,10128// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if10129// we have a fixed length subvector, we need to adjust the index by 1/vscale.10130if (SubVecVT.isFixedLengthVector()) {10131assert(VLen);10132unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;10133auto Decompose =10134RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(10135ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);10136SubRegIdx = Decompose.first;10137RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +10138(OrigIdx % Vscale));10139} else {10140auto Decompose =10141RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(10142ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);10143SubRegIdx = Decompose.first;10144RemIdx = ElementCount::getScalable(Decompose.second);10145}1014610147TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);10148assert(isPowerOf2_64(10149Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));10150bool ExactlyVecRegSized =10151Subtarget.expandVScale(SubVecVT.getSizeInBits())10152.isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));1015310154// 1. If the Idx has been completely eliminated and this subvector's size is10155// a vector register or a multiple thereof, or the surrounding elements are10156// undef, then this is a subvector insert which naturally aligns to a vector10157// register. These can easily be handled using subregister manipulation.10158// 2. If the subvector isn't an exact multiple of a valid register group size,10159// then the insertion must preserve the undisturbed elements of the register.10160// We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=110161// vector type (which resolves to a subregister copy), performing a VSLIDEUP10162// to place the subvector within the vector register, and an INSERT_SUBVECTOR10163// of that LMUL=1 type back into the larger vector (resolving to another10164// subregister operation). See below for how our VSLIDEUP works. We go via a10165// LMUL=1 type to avoid allocating a large register group to hold our10166// subvector.10167if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {10168if (SubVecVT.isFixedLengthVector()) {10169// We may get NoSubRegister if inserting at index 0 and the subvec10170// container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=010171if (SubRegIdx == RISCV::NoSubRegister) {10172assert(OrigIdx == 0);10173return Op;10174}1017510176SDValue Insert =10177DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);10178if (VecVT.isFixedLengthVector())10179Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);10180return Insert;10181}10182return Op;10183}1018410185// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements10186// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy10187// (in our case undisturbed). This means we can set up a subvector insertion10188// where OFFSET is the insertion offset, and the VL is the OFFSET plus the10189// size of the subvector.10190MVT InterSubVT = ContainerVecVT;10191SDValue AlignedExtract = Vec;10192unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();10193if (SubVecVT.isFixedLengthVector())10194AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;10195if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {10196InterSubVT = getLMUL1VT(ContainerVecVT);10197// Extract a subvector equal to the nearest full vector register type. This10198// should resolve to a EXTRACT_SUBREG instruction.10199AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,10200DAG.getVectorIdxConstant(AlignedIdx, DL));10201}1020210203SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,10204DAG.getUNDEF(InterSubVT), SubVec,10205DAG.getVectorIdxConstant(0, DL));1020610207auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);1020810209ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();10210VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());1021110212// Use tail agnostic policy if we're inserting over InterSubVT's tail.10213unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;10214if (Subtarget.expandVScale(EndIndex) ==10215Subtarget.expandVScale(InterSubVT.getVectorElementCount()))10216Policy = RISCVII::TAIL_AGNOSTIC;1021710218// If we're inserting into the lowest elements, use a tail undisturbed10219// vmv.v.v.10220if (RemIdx.isZero()) {10221SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,10222SubVec, VL);10223} else {10224SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);1022510226// Construct the vector length corresponding to RemIdx + length(SubVecVT).10227VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);1022810229SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,10230SlideupAmt, Mask, VL, Policy);10231}1023210233// If required, insert this subvector back into the correct vector register.10234// This should resolve to an INSERT_SUBREG instruction.10235if (ContainerVecVT.bitsGT(InterSubVT))10236SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,10237DAG.getVectorIdxConstant(AlignedIdx, DL));1023810239if (VecVT.isFixedLengthVector())10240SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);1024110242// We might have bitcast from a mask type: cast back to the original type if10243// required.10244return DAG.getBitcast(Op.getSimpleValueType(), SubVec);10245}1024610247SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,10248SelectionDAG &DAG) const {10249SDValue Vec = Op.getOperand(0);10250MVT SubVecVT = Op.getSimpleValueType();10251MVT VecVT = Vec.getSimpleValueType();1025210253SDLoc DL(Op);10254MVT XLenVT = Subtarget.getXLenVT();10255unsigned OrigIdx = Op.getConstantOperandVal(1);10256const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();1025710258// We don't have the ability to slide mask vectors down indexed by their i110259// elements; the smallest we can do is i8. Often we are able to bitcast to10260// equivalent i8 vectors. Note that when extracting a fixed-length vector10261// from a scalable one, we might not necessarily have enough scalable10262// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.10263if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {10264if (VecVT.getVectorMinNumElements() >= 8 &&10265SubVecVT.getVectorMinNumElements() >= 8) {10266assert(OrigIdx % 8 == 0 && "Invalid index");10267assert(VecVT.getVectorMinNumElements() % 8 == 0 &&10268SubVecVT.getVectorMinNumElements() % 8 == 0 &&10269"Unexpected mask vector lowering");10270OrigIdx /= 8;10271SubVecVT =10272MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,10273SubVecVT.isScalableVector());10274VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,10275VecVT.isScalableVector());10276Vec = DAG.getBitcast(VecVT, Vec);10277} else {10278// We can't slide this mask vector down, indexed by its i1 elements.10279// This poses a problem when we wish to extract a scalable vector which10280// can't be re-expressed as a larger type. Just choose the slow path and10281// extend to a larger type, then truncate back down.10282// TODO: We could probably improve this when extracting certain fixed10283// from fixed, where we can extract as i8 and shift the correct element10284// right to reach the desired subvector?10285MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);10286MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);10287Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);10288Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,10289Op.getOperand(1));10290SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);10291return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);10292}10293}1029410295// With an index of 0 this is a cast-like subvector, which can be performed10296// with subregister operations.10297if (OrigIdx == 0)10298return Op;1029910300const auto VLen = Subtarget.getRealVLen();1030110302// If the subvector vector is a fixed-length type and we don't know VLEN10303// exactly, we cannot use subregister manipulation to simplify the codegen; we10304// don't know which register of a LMUL group contains the specific subvector10305// as we only know the minimum register size. Therefore we must slide the10306// vector group down the full amount.10307if (SubVecVT.isFixedLengthVector() && !VLen) {10308MVT ContainerVT = VecVT;10309if (VecVT.isFixedLengthVector()) {10310ContainerVT = getContainerForFixedLengthVector(VecVT);10311Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);10312}1031310314// Shrink down Vec so we're performing the slidedown on a smaller LMUL.10315unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;10316if (auto ShrunkVT =10317getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {10318ContainerVT = *ShrunkVT;10319Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,10320DAG.getVectorIdxConstant(0, DL));10321}1032210323SDValue Mask =10324getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;10325// Set the vector length to only the number of elements we care about. This10326// avoids sliding down elements we're going to discard straight away.10327SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,10328Subtarget);10329SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);10330SDValue Slidedown =10331getVSlidedown(DAG, Subtarget, DL, ContainerVT,10332DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);10333// Now we can use a cast-like subvector extract to get the result.10334Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,10335DAG.getVectorIdxConstant(0, DL));10336return DAG.getBitcast(Op.getValueType(), Slidedown);10337}1033810339if (VecVT.isFixedLengthVector()) {10340VecVT = getContainerForFixedLengthVector(VecVT);10341Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);10342}1034310344MVT ContainerSubVecVT = SubVecVT;10345if (SubVecVT.isFixedLengthVector())10346ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);1034710348unsigned SubRegIdx;10349ElementCount RemIdx;10350// extract_subvector scales the index by vscale if the subvector is scalable,10351// and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if10352// we have a fixed length subvector, we need to adjust the index by 1/vscale.10353if (SubVecVT.isFixedLengthVector()) {10354assert(VLen);10355unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;10356auto Decompose =10357RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(10358VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);10359SubRegIdx = Decompose.first;10360RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +10361(OrigIdx % Vscale));10362} else {10363auto Decompose =10364RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(10365VecVT, ContainerSubVecVT, OrigIdx, TRI);10366SubRegIdx = Decompose.first;10367RemIdx = ElementCount::getScalable(Decompose.second);10368}1036910370// If the Idx has been completely eliminated then this is a subvector extract10371// which naturally aligns to a vector register. These can easily be handled10372// using subregister manipulation.10373if (RemIdx.isZero()) {10374if (SubVecVT.isFixedLengthVector()) {10375Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);10376return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);10377}10378return Op;10379}1038010381// Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT10382// was > M1 then the index would need to be a multiple of VLMAX, and so would10383// divide exactly.10384assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||10385getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);1038610387// If the vector type is an LMUL-group type, extract a subvector equal to the10388// nearest full vector register type.10389MVT InterSubVT = VecVT;10390if (VecVT.bitsGT(getLMUL1VT(VecVT))) {10391// If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and10392// we should have successfully decomposed the extract into a subregister.10393assert(SubRegIdx != RISCV::NoSubRegister);10394InterSubVT = getLMUL1VT(VecVT);10395Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);10396}1039710398// Slide this vector register down by the desired number of elements in order10399// to place the desired subvector starting at element 0.10400SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);10401auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);10402if (SubVecVT.isFixedLengthVector())10403VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,10404Subtarget);10405SDValue Slidedown =10406getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),10407Vec, SlidedownAmt, Mask, VL);1040810409// Now the vector is in the right position, extract our final subvector. This10410// should resolve to a COPY.10411Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,10412DAG.getVectorIdxConstant(0, DL));1041310414// We might have bitcast from a mask type: cast back to the original type if10415// required.10416return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);10417}1041810419// Widen a vector's operands to i8, then truncate its results back to the10420// original type, typically i1. All operand and result types must be the same.10421static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,10422SelectionDAG &DAG) {10423MVT VT = N.getSimpleValueType();10424MVT WideVT = VT.changeVectorElementType(MVT::i8);10425SmallVector<SDValue, 4> WideOps;10426for (SDValue Op : N->ops()) {10427assert(Op.getSimpleValueType() == VT &&10428"Operands and result must be same type");10429WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));10430}1043110432unsigned NumVals = N->getNumValues();1043310434SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(10435NumVals, N.getValueType().changeVectorElementType(MVT::i8)));10436SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);10437SmallVector<SDValue, 4> TruncVals;10438for (unsigned I = 0; I < NumVals; I++) {10439TruncVals.push_back(10440DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),10441DAG.getConstant(0, DL, WideVT), ISD::SETNE));10442}1044310444if (TruncVals.size() > 1)10445return DAG.getMergeValues(TruncVals, DL);10446return TruncVals.front();10447}1044810449SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,10450SelectionDAG &DAG) const {10451SDLoc DL(Op);10452MVT VecVT = Op.getSimpleValueType();1045310454assert(VecVT.isScalableVector() &&10455"vector_interleave on non-scalable vector!");1045610457// 1 bit element vectors need to be widened to e810458if (VecVT.getVectorElementType() == MVT::i1)10459return widenVectorOpsToi8(Op, DL, DAG);1046010461// If the VT is LMUL=8, we need to split and reassemble.10462if (VecVT.getSizeInBits().getKnownMinValue() ==10463(8 * RISCV::RVVBitsPerBlock)) {10464auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);10465auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);10466EVT SplitVT = Op0Lo.getValueType();1046710468SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,10469DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);10470SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,10471DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);1047210473SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,10474ResLo.getValue(0), ResHi.getValue(0));10475SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),10476ResHi.getValue(1));10477return DAG.getMergeValues({Even, Odd}, DL);10478}1047910480// Concatenate the two vectors as one vector to deinterleave10481MVT ConcatVT =10482MVT::getVectorVT(VecVT.getVectorElementType(),10483VecVT.getVectorElementCount().multiplyCoefficientBy(2));10484SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,10485Op.getOperand(0), Op.getOperand(1));1048610487// We want to operate on all lanes, so get the mask and VL and mask for it10488auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);10489SDValue Passthru = DAG.getUNDEF(ConcatVT);1049010491// We can deinterleave through vnsrl.wi if the element type is smaller than10492// ELEN10493if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {10494SDValue Even =10495getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);10496SDValue Odd =10497getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);10498return DAG.getMergeValues({Even, Odd}, DL);10499}1050010501// For the indices, use the same SEW to avoid an extra vsetvli10502MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();10503// Create a vector of even indices {0, 2, 4, ...}10504SDValue EvenIdx =10505DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));10506// Create a vector of odd indices {1, 3, 5, ... }10507SDValue OddIdx =10508DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));1050910510// Gather the even and odd elements into two separate vectors10511SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,10512Concat, EvenIdx, Passthru, Mask, VL);10513SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,10514Concat, OddIdx, Passthru, Mask, VL);1051510516// Extract the result half of the gather for even and odd10517SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,10518DAG.getVectorIdxConstant(0, DL));10519SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,10520DAG.getVectorIdxConstant(0, DL));1052110522return DAG.getMergeValues({Even, Odd}, DL);10523}1052410525SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,10526SelectionDAG &DAG) const {10527SDLoc DL(Op);10528MVT VecVT = Op.getSimpleValueType();1052910530assert(VecVT.isScalableVector() &&10531"vector_interleave on non-scalable vector!");1053210533// i1 vectors need to be widened to i810534if (VecVT.getVectorElementType() == MVT::i1)10535return widenVectorOpsToi8(Op, DL, DAG);1053610537MVT XLenVT = Subtarget.getXLenVT();10538SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);1053910540// If the VT is LMUL=8, we need to split and reassemble.10541if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {10542auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);10543auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);10544EVT SplitVT = Op0Lo.getValueType();1054510546SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,10547DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);10548SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,10549DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);1055010551SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,10552ResLo.getValue(0), ResLo.getValue(1));10553SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,10554ResHi.getValue(0), ResHi.getValue(1));10555return DAG.getMergeValues({Lo, Hi}, DL);10556}1055710558SDValue Interleaved;1055910560// If the element type is smaller than ELEN, then we can interleave with10561// vwaddu.vv and vwmaccu.vx10562if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {10563Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,10564DAG, Subtarget);10565} else {10566// Otherwise, fallback to using vrgathere16.vv10567MVT ConcatVT =10568MVT::getVectorVT(VecVT.getVectorElementType(),10569VecVT.getVectorElementCount().multiplyCoefficientBy(2));10570SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,10571Op.getOperand(0), Op.getOperand(1));1057210573MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);1057410575// 0 1 2 3 4 5 6 7 ...10576SDValue StepVec = DAG.getStepVector(DL, IdxVT);1057710578// 1 1 1 1 1 1 1 1 ...10579SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));1058010581// 1 0 1 0 1 0 1 0 ...10582SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);10583OddMask = DAG.getSetCC(10584DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,10585DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),10586ISD::CondCode::SETNE);1058710588SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));1058910590// Build up the index vector for interleaving the concatenated vector10591// 0 0 1 1 2 2 3 3 ...10592SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);10593// 0 n 1 n+1 2 n+2 3 n+3 ...10594Idx =10595DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);1059610597// Then perform the interleave10598// v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...10599SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);10600Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,10601Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);10602}1060310604// Extract the two halves from the interleaved result10605SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,10606DAG.getVectorIdxConstant(0, DL));10607SDValue Hi = DAG.getNode(10608ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,10609DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));1061010611return DAG.getMergeValues({Lo, Hi}, DL);10612}1061310614// Lower step_vector to the vid instruction. Any non-identity step value must10615// be accounted for my manual expansion.10616SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,10617SelectionDAG &DAG) const {10618SDLoc DL(Op);10619MVT VT = Op.getSimpleValueType();10620assert(VT.isScalableVector() && "Expected scalable vector");10621MVT XLenVT = Subtarget.getXLenVT();10622auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);10623SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);10624uint64_t StepValImm = Op.getConstantOperandVal(0);10625if (StepValImm != 1) {10626if (isPowerOf2_64(StepValImm)) {10627SDValue StepVal =10628DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),10629DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);10630StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);10631} else {10632SDValue StepVal = lowerScalarSplat(10633SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),10634VL, VT, DL, DAG, Subtarget);10635StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);10636}10637}10638return StepVec;10639}1064010641// Implement vector_reverse using vrgather.vv with indices determined by10642// subtracting the id of each element from (VLMAX-1). This will convert10643// the indices like so:10644// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).10645// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.10646SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,10647SelectionDAG &DAG) const {10648SDLoc DL(Op);10649MVT VecVT = Op.getSimpleValueType();10650if (VecVT.getVectorElementType() == MVT::i1) {10651MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());10652SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));10653SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);10654return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);10655}10656unsigned EltSize = VecVT.getScalarSizeInBits();10657unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();10658unsigned VectorBitsMax = Subtarget.getRealMaxVLen();10659unsigned MaxVLMAX =10660RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);1066110662unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;10663MVT IntVT = VecVT.changeVectorElementTypeToInteger();1066410665// If this is SEW=8 and VLMAX is potentially more than 256, we need10666// to use vrgatherei16.vv.10667// TODO: It's also possible to use vrgatherei16.vv for other types to10668// decrease register width for the index calculation.10669if (MaxVLMAX > 256 && EltSize == 8) {10670// If this is LMUL=8, we have to split before can use vrgatherei16.vv.10671// Reverse each half, then reassemble them in reverse order.10672// NOTE: It's also possible that after splitting that VLMAX no longer10673// requires vrgatherei16.vv.10674if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {10675auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);10676auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);10677Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);10678Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);10679// Reassemble the low and high pieces reversed.10680// FIXME: This is a CONCAT_VECTORS.10681SDValue Res =10682DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,10683DAG.getVectorIdxConstant(0, DL));10684return DAG.getNode(10685ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,10686DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));10687}1068810689// Just promote the int type to i16 which will double the LMUL.10690IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());10691GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;10692}1069310694MVT XLenVT = Subtarget.getXLenVT();10695auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);1069610697// Calculate VLMAX-1 for the desired SEW.10698SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,10699computeVLMax(VecVT, DL, DAG),10700DAG.getConstant(1, DL, XLenVT));1070110702// Splat VLMAX-1 taking care to handle SEW==64 on RV32.10703bool IsRV32E64 =10704!Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;10705SDValue SplatVL;10706if (!IsRV32E64)10707SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);10708else10709SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),10710VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));1071110712SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);10713SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,10714DAG.getUNDEF(IntVT), Mask, VL);1071510716return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,10717DAG.getUNDEF(VecVT), Mask, VL);10718}1071910720SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,10721SelectionDAG &DAG) const {10722SDLoc DL(Op);10723SDValue V1 = Op.getOperand(0);10724SDValue V2 = Op.getOperand(1);10725MVT XLenVT = Subtarget.getXLenVT();10726MVT VecVT = Op.getSimpleValueType();1072710728SDValue VLMax = computeVLMax(VecVT, DL, DAG);1072910730int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();10731SDValue DownOffset, UpOffset;10732if (ImmValue >= 0) {10733// The operand is a TargetConstant, we need to rebuild it as a regular10734// constant.10735DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);10736UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);10737} else {10738// The operand is a TargetConstant, we need to rebuild it as a regular10739// constant rather than negating the original operand.10740UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);10741DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);10742}1074310744SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);1074510746SDValue SlideDown =10747getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,10748DownOffset, TrueMask, UpOffset);10749return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,10750TrueMask, DAG.getRegister(RISCV::X0, XLenVT),10751RISCVII::TAIL_AGNOSTIC);10752}1075310754SDValue10755RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,10756SelectionDAG &DAG) const {10757SDLoc DL(Op);10758auto *Load = cast<LoadSDNode>(Op);1075910760assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),10761Load->getMemoryVT(),10762*Load->getMemOperand()) &&10763"Expecting a correctly-aligned load");1076410765MVT VT = Op.getSimpleValueType();10766MVT XLenVT = Subtarget.getXLenVT();10767MVT ContainerVT = getContainerForFixedLengthVector(VT);1076810769// If we know the exact VLEN and our fixed length vector completely fills10770// the container, use a whole register load instead.10771const auto [MinVLMAX, MaxVLMAX] =10772RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);10773if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&10774getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {10775MachineMemOperand *MMO = Load->getMemOperand();10776SDValue NewLoad =10777DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),10778MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),10779MMO->getAAInfo(), MMO->getRanges());10780SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);10781return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);10782}1078310784SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);1078510786bool IsMaskOp = VT.getVectorElementType() == MVT::i1;10787SDValue IntID = DAG.getTargetConstant(10788IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);10789SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};10790if (!IsMaskOp)10791Ops.push_back(DAG.getUNDEF(ContainerVT));10792Ops.push_back(Load->getBasePtr());10793Ops.push_back(VL);10794SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});10795SDValue NewLoad =10796DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,10797Load->getMemoryVT(), Load->getMemOperand());1079810799SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);10800return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);10801}1080210803SDValue10804RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,10805SelectionDAG &DAG) const {10806SDLoc DL(Op);10807auto *Store = cast<StoreSDNode>(Op);1080810809assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),10810Store->getMemoryVT(),10811*Store->getMemOperand()) &&10812"Expecting a correctly-aligned store");1081310814SDValue StoreVal = Store->getValue();10815MVT VT = StoreVal.getSimpleValueType();10816MVT XLenVT = Subtarget.getXLenVT();1081710818// If the size less than a byte, we need to pad with zeros to make a byte.10819if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {10820VT = MVT::v8i1;10821StoreVal =10822DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),10823StoreVal, DAG.getVectorIdxConstant(0, DL));10824}1082510826MVT ContainerVT = getContainerForFixedLengthVector(VT);1082710828SDValue NewValue =10829convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);108301083110832// If we know the exact VLEN and our fixed length vector completely fills10833// the container, use a whole register store instead.10834const auto [MinVLMAX, MaxVLMAX] =10835RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);10836if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&10837getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {10838MachineMemOperand *MMO = Store->getMemOperand();10839return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),10840MMO->getPointerInfo(), MMO->getBaseAlign(),10841MMO->getFlags(), MMO->getAAInfo());10842}1084310844SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,10845Subtarget);1084610847bool IsMaskOp = VT.getVectorElementType() == MVT::i1;10848SDValue IntID = DAG.getTargetConstant(10849IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);10850return DAG.getMemIntrinsicNode(10851ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),10852{Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},10853Store->getMemoryVT(), Store->getMemOperand());10854}1085510856SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,10857SelectionDAG &DAG) const {10858SDLoc DL(Op);10859MVT VT = Op.getSimpleValueType();1086010861const auto *MemSD = cast<MemSDNode>(Op);10862EVT MemVT = MemSD->getMemoryVT();10863MachineMemOperand *MMO = MemSD->getMemOperand();10864SDValue Chain = MemSD->getChain();10865SDValue BasePtr = MemSD->getBasePtr();1086610867SDValue Mask, PassThru, VL;10868if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {10869Mask = VPLoad->getMask();10870PassThru = DAG.getUNDEF(VT);10871VL = VPLoad->getVectorLength();10872} else {10873const auto *MLoad = cast<MaskedLoadSDNode>(Op);10874Mask = MLoad->getMask();10875PassThru = MLoad->getPassThru();10876}1087710878bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());1087910880MVT XLenVT = Subtarget.getXLenVT();1088110882MVT ContainerVT = VT;10883if (VT.isFixedLengthVector()) {10884ContainerVT = getContainerForFixedLengthVector(VT);10885PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);10886if (!IsUnmasked) {10887MVT MaskVT = getMaskTypeFor(ContainerVT);10888Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);10889}10890}1089110892if (!VL)10893VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;1089410895unsigned IntID =10896IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;10897SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};10898if (IsUnmasked)10899Ops.push_back(DAG.getUNDEF(ContainerVT));10900else10901Ops.push_back(PassThru);10902Ops.push_back(BasePtr);10903if (!IsUnmasked)10904Ops.push_back(Mask);10905Ops.push_back(VL);10906if (!IsUnmasked)10907Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));1090810909SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});1091010911SDValue Result =10912DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);10913Chain = Result.getValue(1);1091410915if (VT.isFixedLengthVector())10916Result = convertFromScalableVector(VT, Result, DAG, Subtarget);1091710918return DAG.getMergeValues({Result, Chain}, DL);10919}1092010921SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,10922SelectionDAG &DAG) const {10923SDLoc DL(Op);1092410925const auto *MemSD = cast<MemSDNode>(Op);10926EVT MemVT = MemSD->getMemoryVT();10927MachineMemOperand *MMO = MemSD->getMemOperand();10928SDValue Chain = MemSD->getChain();10929SDValue BasePtr = MemSD->getBasePtr();10930SDValue Val, Mask, VL;1093110932bool IsCompressingStore = false;10933if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {10934Val = VPStore->getValue();10935Mask = VPStore->getMask();10936VL = VPStore->getVectorLength();10937} else {10938const auto *MStore = cast<MaskedStoreSDNode>(Op);10939Val = MStore->getValue();10940Mask = MStore->getMask();10941IsCompressingStore = MStore->isCompressingStore();10942}1094310944bool IsUnmasked =10945ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;1094610947MVT VT = Val.getSimpleValueType();10948MVT XLenVT = Subtarget.getXLenVT();1094910950MVT ContainerVT = VT;10951if (VT.isFixedLengthVector()) {10952ContainerVT = getContainerForFixedLengthVector(VT);1095310954Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);10955if (!IsUnmasked || IsCompressingStore) {10956MVT MaskVT = getMaskTypeFor(ContainerVT);10957Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);10958}10959}1096010961if (!VL)10962VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;1096310964if (IsCompressingStore) {10965Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,10966DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),10967DAG.getUNDEF(ContainerVT), Val, Mask, VL);10968VL =10969DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,10970getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);10971}1097210973unsigned IntID =10974IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;10975SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};10976Ops.push_back(Val);10977Ops.push_back(BasePtr);10978if (!IsUnmasked)10979Ops.push_back(Mask);10980Ops.push_back(VL);1098110982return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,10983DAG.getVTList(MVT::Other), Ops, MemVT, MMO);10984}1098510986SDValue10987RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,10988SelectionDAG &DAG) const {10989MVT InVT = Op.getOperand(0).getSimpleValueType();10990MVT ContainerVT = getContainerForFixedLengthVector(InVT);1099110992MVT VT = Op.getSimpleValueType();1099310994SDValue Op1 =10995convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);10996SDValue Op2 =10997convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);1099810999SDLoc DL(Op);11000auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,11001DAG, Subtarget);11002MVT MaskVT = getMaskTypeFor(ContainerVT);1100311004SDValue Cmp =11005DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,11006{Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});1100711008return convertFromScalableVector(VT, Cmp, DAG, Subtarget);11009}1101011011SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,11012SelectionDAG &DAG) const {11013unsigned Opc = Op.getOpcode();11014SDLoc DL(Op);11015SDValue Chain = Op.getOperand(0);11016SDValue Op1 = Op.getOperand(1);11017SDValue Op2 = Op.getOperand(2);11018SDValue CC = Op.getOperand(3);11019ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();11020MVT VT = Op.getSimpleValueType();11021MVT InVT = Op1.getSimpleValueType();1102211023// RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE11024// condition code.11025if (Opc == ISD::STRICT_FSETCCS) {11026// Expand strict_fsetccs(x, oeq) to11027// (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))11028SDVTList VTList = Op->getVTList();11029if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {11030SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);11031SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,11032Op2, OLECCVal);11033SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,11034Op1, OLECCVal);11035SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,11036Tmp1.getValue(1), Tmp2.getValue(1));11037// Tmp1 and Tmp2 might be the same node.11038if (Tmp1 != Tmp2)11039Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);11040return DAG.getMergeValues({Tmp1, OutChain}, DL);11041}1104211043// Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))11044if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {11045SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);11046SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,11047Op2, OEQCCVal);11048SDValue Res = DAG.getNOT(DL, OEQ, VT);11049return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);11050}11051}1105211053MVT ContainerInVT = InVT;11054if (InVT.isFixedLengthVector()) {11055ContainerInVT = getContainerForFixedLengthVector(InVT);11056Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);11057Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);11058}11059MVT MaskVT = getMaskTypeFor(ContainerInVT);1106011061auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);1106211063SDValue Res;11064if (Opc == ISD::STRICT_FSETCC &&11065(CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||11066CCVal == ISD::SETOLE)) {11067// VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only11068// active when both input elements are ordered.11069SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);11070SDValue OrderMask1 = DAG.getNode(11071RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),11072{Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),11073True, VL});11074SDValue OrderMask2 = DAG.getNode(11075RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),11076{Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),11077True, VL});11078Mask =11079DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);11080// Use Mask as the merge operand to let the result be 0 if either of the11081// inputs is unordered.11082Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,11083DAG.getVTList(MaskVT, MVT::Other),11084{Chain, Op1, Op2, CC, Mask, Mask, VL});11085} else {11086unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL11087: RISCVISD::STRICT_FSETCCS_VL;11088Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),11089{Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});11090}1109111092if (VT.isFixedLengthVector()) {11093SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);11094return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);11095}11096return Res;11097}1109811099// Lower vector ABS to smax(X, sub(0, X)).11100SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {11101SDLoc DL(Op);11102MVT VT = Op.getSimpleValueType();11103SDValue X = Op.getOperand(0);1110411105assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&11106"Unexpected type for ISD::ABS");1110711108MVT ContainerVT = VT;11109if (VT.isFixedLengthVector()) {11110ContainerVT = getContainerForFixedLengthVector(VT);11111X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);11112}1111311114SDValue Mask, VL;11115if (Op->getOpcode() == ISD::VP_ABS) {11116Mask = Op->getOperand(1);11117if (VT.isFixedLengthVector())11118Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,11119Subtarget);11120VL = Op->getOperand(2);11121} else11122std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);1112311124SDValue SplatZero = DAG.getNode(11125RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),11126DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);11127SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,11128DAG.getUNDEF(ContainerVT), Mask, VL);11129SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,11130DAG.getUNDEF(ContainerVT), Mask, VL);1113111132if (VT.isFixedLengthVector())11133Max = convertFromScalableVector(VT, Max, DAG, Subtarget);11134return Max;11135}1113611137SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(11138SDValue Op, SelectionDAG &DAG) const {11139SDLoc DL(Op);11140MVT VT = Op.getSimpleValueType();11141SDValue Mag = Op.getOperand(0);11142SDValue Sign = Op.getOperand(1);11143assert(Mag.getValueType() == Sign.getValueType() &&11144"Can only handle COPYSIGN with matching types.");1114511146MVT ContainerVT = getContainerForFixedLengthVector(VT);11147Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);11148Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);1114911150auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);1115111152SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,11153Sign, DAG.getUNDEF(ContainerVT), Mask, VL);1115411155return convertFromScalableVector(VT, CopySign, DAG, Subtarget);11156}1115711158SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(11159SDValue Op, SelectionDAG &DAG) const {11160MVT VT = Op.getSimpleValueType();11161MVT ContainerVT = getContainerForFixedLengthVector(VT);1116211163MVT I1ContainerVT =11164MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());1116511166SDValue CC =11167convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);11168SDValue Op1 =11169convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);11170SDValue Op2 =11171convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);1117211173SDLoc DL(Op);11174SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;1117511176SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,11177Op2, DAG.getUNDEF(ContainerVT), VL);1117811179return convertFromScalableVector(VT, Select, DAG, Subtarget);11180}1118111182SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,11183SelectionDAG &DAG) const {11184unsigned NewOpc = getRISCVVLOp(Op);11185bool HasMergeOp = hasMergeOp(NewOpc);11186bool HasMask = hasMaskOp(NewOpc);1118711188MVT VT = Op.getSimpleValueType();11189MVT ContainerVT = getContainerForFixedLengthVector(VT);1119011191// Create list of operands by converting existing ones to scalable types.11192SmallVector<SDValue, 6> Ops;11193for (const SDValue &V : Op->op_values()) {11194assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");1119511196// Pass through non-vector operands.11197if (!V.getValueType().isVector()) {11198Ops.push_back(V);11199continue;11200}1120111202// "cast" fixed length vector to a scalable vector.11203assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&11204"Only fixed length vectors are supported!");11205Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));11206}1120711208SDLoc DL(Op);11209auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);11210if (HasMergeOp)11211Ops.push_back(DAG.getUNDEF(ContainerVT));11212if (HasMask)11213Ops.push_back(Mask);11214Ops.push_back(VL);1121511216// StrictFP operations have two result values. Their lowered result should11217// have same result count.11218if (Op->isStrictFPOpcode()) {11219SDValue ScalableRes =11220DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,11221Op->getFlags());11222SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);11223return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);11224}1122511226SDValue ScalableRes =11227DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());11228return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);11229}1123011231// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:11232// * Operands of each node are assumed to be in the same order.11233// * The EVL operand is promoted from i32 to i64 on RV64.11234// * Fixed-length vectors are converted to their scalable-vector container11235// types.11236SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {11237unsigned RISCVISDOpc = getRISCVVLOp(Op);11238bool HasMergeOp = hasMergeOp(RISCVISDOpc);1123911240SDLoc DL(Op);11241MVT VT = Op.getSimpleValueType();11242SmallVector<SDValue, 4> Ops;1124311244MVT ContainerVT = VT;11245if (VT.isFixedLengthVector())11246ContainerVT = getContainerForFixedLengthVector(VT);1124711248for (const auto &OpIdx : enumerate(Op->ops())) {11249SDValue V = OpIdx.value();11250assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");11251// Add dummy merge value before the mask. Or if there isn't a mask, before11252// EVL.11253if (HasMergeOp) {11254auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());11255if (MaskIdx) {11256if (*MaskIdx == OpIdx.index())11257Ops.push_back(DAG.getUNDEF(ContainerVT));11258} else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==11259OpIdx.index()) {11260if (Op.getOpcode() == ISD::VP_MERGE) {11261// For VP_MERGE, copy the false operand instead of an undef value.11262Ops.push_back(Ops.back());11263} else {11264assert(Op.getOpcode() == ISD::VP_SELECT);11265// For VP_SELECT, add an undef value.11266Ops.push_back(DAG.getUNDEF(ContainerVT));11267}11268}11269}11270// Pass through operands which aren't fixed-length vectors.11271if (!V.getValueType().isFixedLengthVector()) {11272Ops.push_back(V);11273continue;11274}11275// "cast" fixed length vector to a scalable vector.11276MVT OpVT = V.getSimpleValueType();11277MVT ContainerVT = getContainerForFixedLengthVector(OpVT);11278assert(useRVVForFixedLengthVectorVT(OpVT) &&11279"Only fixed length vectors are supported!");11280Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));11281}1128211283if (!VT.isFixedLengthVector())11284return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());1128511286SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());1128711288return convertFromScalableVector(VT, VPOp, DAG, Subtarget);11289}1129011291SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,11292SelectionDAG &DAG) const {11293SDLoc DL(Op);11294MVT VT = Op.getSimpleValueType();1129511296SDValue Src = Op.getOperand(0);11297// NOTE: Mask is dropped.11298SDValue VL = Op.getOperand(2);1129911300MVT ContainerVT = VT;11301if (VT.isFixedLengthVector()) {11302ContainerVT = getContainerForFixedLengthVector(VT);11303MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());11304Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);11305}1130611307MVT XLenVT = Subtarget.getXLenVT();11308SDValue Zero = DAG.getConstant(0, DL, XLenVT);11309SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11310DAG.getUNDEF(ContainerVT), Zero, VL);1131111312SDValue SplatValue = DAG.getConstant(11313Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);11314SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11315DAG.getUNDEF(ContainerVT), SplatValue, VL);1131611317SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,11318ZeroSplat, DAG.getUNDEF(ContainerVT), VL);11319if (!VT.isFixedLengthVector())11320return Result;11321return convertFromScalableVector(VT, Result, DAG, Subtarget);11322}1132311324SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,11325SelectionDAG &DAG) const {11326SDLoc DL(Op);11327MVT VT = Op.getSimpleValueType();1132811329SDValue Op1 = Op.getOperand(0);11330SDValue Op2 = Op.getOperand(1);11331ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();11332// NOTE: Mask is dropped.11333SDValue VL = Op.getOperand(4);1133411335MVT ContainerVT = VT;11336if (VT.isFixedLengthVector()) {11337ContainerVT = getContainerForFixedLengthVector(VT);11338Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);11339Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);11340}1134111342SDValue Result;11343SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);1134411345switch (Condition) {11346default:11347break;11348// X != Y --> (X^Y)11349case ISD::SETNE:11350Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);11351break;11352// X == Y --> ~(X^Y)11353case ISD::SETEQ: {11354SDValue Temp =11355DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);11356Result =11357DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);11358break;11359}11360// X >s Y --> X == 0 & Y == 1 --> ~X & Y11361// X <u Y --> X == 0 & Y == 1 --> ~X & Y11362case ISD::SETGT:11363case ISD::SETULT: {11364SDValue Temp =11365DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);11366Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);11367break;11368}11369// X <s Y --> X == 1 & Y == 0 --> ~Y & X11370// X >u Y --> X == 1 & Y == 0 --> ~Y & X11371case ISD::SETLT:11372case ISD::SETUGT: {11373SDValue Temp =11374DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);11375Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);11376break;11377}11378// X >=s Y --> X == 0 | Y == 1 --> ~X | Y11379// X <=u Y --> X == 0 | Y == 1 --> ~X | Y11380case ISD::SETGE:11381case ISD::SETULE: {11382SDValue Temp =11383DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);11384Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);11385break;11386}11387// X <=s Y --> X == 1 | Y == 0 --> ~Y | X11388// X >=u Y --> X == 1 | Y == 0 --> ~Y | X11389case ISD::SETLE:11390case ISD::SETUGE: {11391SDValue Temp =11392DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);11393Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);11394break;11395}11396}1139711398if (!VT.isFixedLengthVector())11399return Result;11400return convertFromScalableVector(VT, Result, DAG, Subtarget);11401}1140211403// Lower Floating-Point/Integer Type-Convert VP SDNodes11404SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,11405SelectionDAG &DAG) const {11406SDLoc DL(Op);1140711408SDValue Src = Op.getOperand(0);11409SDValue Mask = Op.getOperand(1);11410SDValue VL = Op.getOperand(2);11411unsigned RISCVISDOpc = getRISCVVLOp(Op);1141211413MVT DstVT = Op.getSimpleValueType();11414MVT SrcVT = Src.getSimpleValueType();11415if (DstVT.isFixedLengthVector()) {11416DstVT = getContainerForFixedLengthVector(DstVT);11417SrcVT = getContainerForFixedLengthVector(SrcVT);11418Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);11419MVT MaskVT = getMaskTypeFor(DstVT);11420Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11421}1142211423unsigned DstEltSize = DstVT.getScalarSizeInBits();11424unsigned SrcEltSize = SrcVT.getScalarSizeInBits();1142511426SDValue Result;11427if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.11428if (SrcVT.isInteger()) {11429assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");1143011431unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL11432? RISCVISD::VSEXT_VL11433: RISCVISD::VZEXT_VL;1143411435// Do we need to do any pre-widening before converting?11436if (SrcEltSize == 1) {11437MVT IntVT = DstVT.changeVectorElementTypeToInteger();11438MVT XLenVT = Subtarget.getXLenVT();11439SDValue Zero = DAG.getConstant(0, DL, XLenVT);11440SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,11441DAG.getUNDEF(IntVT), Zero, VL);11442SDValue One = DAG.getConstant(11443RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);11444SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,11445DAG.getUNDEF(IntVT), One, VL);11446Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,11447ZeroSplat, DAG.getUNDEF(IntVT), VL);11448} else if (DstEltSize > (2 * SrcEltSize)) {11449// Widen before converting.11450MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),11451DstVT.getVectorElementCount());11452Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);11453}1145411455Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);11456} else {11457assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&11458"Wrong input/output vector types");1145911460// Convert f16 to f32 then convert f32 to i64.11461if (DstEltSize > (2 * SrcEltSize)) {11462assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");11463MVT InterimFVT =11464MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());11465Src =11466DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);11467}1146811469Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);11470}11471} else { // Narrowing + Conversion11472if (SrcVT.isInteger()) {11473assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");11474// First do a narrowing convert to an FP type half the size, then round11475// the FP type to a small FP type if needed.1147611477MVT InterimFVT = DstVT;11478if (SrcEltSize > (2 * DstEltSize)) {11479assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");11480assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");11481InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());11482}1148311484Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);1148511486if (InterimFVT != DstVT) {11487Src = Result;11488Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);11489}11490} else {11491assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&11492"Wrong input/output vector types");11493// First do a narrowing conversion to an integer half the size, then11494// truncate if needed.1149511496if (DstEltSize == 1) {11497// First convert to the same size integer, then convert to mask using11498// setcc.11499assert(SrcEltSize >= 16 && "Unexpected FP type!");11500MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),11501DstVT.getVectorElementCount());11502Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);1150311504// Compare the integer result to 0. The integer should be 0 or 1/-1,11505// otherwise the conversion was undefined.11506MVT XLenVT = Subtarget.getXLenVT();11507SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);11508SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,11509DAG.getUNDEF(InterimIVT), SplatZero, VL);11510Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,11511{Result, SplatZero, DAG.getCondCode(ISD::SETNE),11512DAG.getUNDEF(DstVT), Mask, VL});11513} else {11514MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),11515DstVT.getVectorElementCount());1151611517Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);1151811519while (InterimIVT != DstVT) {11520SrcEltSize /= 2;11521Src = Result;11522InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),11523DstVT.getVectorElementCount());11524Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,11525Src, Mask, VL);11526}11527}11528}11529}1153011531MVT VT = Op.getSimpleValueType();11532if (!VT.isFixedLengthVector())11533return Result;11534return convertFromScalableVector(VT, Result, DAG, Subtarget);11535}1153611537SDValue11538RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,11539SelectionDAG &DAG) const {11540SDLoc DL(Op);1154111542SDValue Op1 = Op.getOperand(0);11543SDValue Op2 = Op.getOperand(1);11544SDValue Offset = Op.getOperand(2);11545SDValue Mask = Op.getOperand(3);11546SDValue EVL1 = Op.getOperand(4);11547SDValue EVL2 = Op.getOperand(5);1154811549const MVT XLenVT = Subtarget.getXLenVT();11550MVT VT = Op.getSimpleValueType();11551MVT ContainerVT = VT;11552if (VT.isFixedLengthVector()) {11553ContainerVT = getContainerForFixedLengthVector(VT);11554Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);11555Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);11556MVT MaskVT = getMaskTypeFor(ContainerVT);11557Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11558}1155911560// EVL1 may need to be extended to XLenVT with RV64LegalI32.11561EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);1156211563bool IsMaskVector = VT.getVectorElementType() == MVT::i1;11564if (IsMaskVector) {11565ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);1156611567// Expand input operands11568SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11569DAG.getUNDEF(ContainerVT),11570DAG.getConstant(1, DL, XLenVT), EVL1);11571SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11572DAG.getUNDEF(ContainerVT),11573DAG.getConstant(0, DL, XLenVT), EVL1);11574Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,11575SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);1157611577SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11578DAG.getUNDEF(ContainerVT),11579DAG.getConstant(1, DL, XLenVT), EVL2);11580SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,11581DAG.getUNDEF(ContainerVT),11582DAG.getConstant(0, DL, XLenVT), EVL2);11583Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,11584SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);11585}1158611587int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();11588SDValue DownOffset, UpOffset;11589if (ImmValue >= 0) {11590// The operand is a TargetConstant, we need to rebuild it as a regular11591// constant.11592DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);11593UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);11594} else {11595// The operand is a TargetConstant, we need to rebuild it as a regular11596// constant rather than negating the original operand.11597UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);11598DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);11599}1160011601SDValue SlideDown =11602getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),11603Op1, DownOffset, Mask, UpOffset);11604SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,11605UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);1160611607if (IsMaskVector) {11608// Truncate Result back to a mask vector (Result has same EVL as Op2)11609Result = DAG.getNode(11610RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),11611{Result, DAG.getConstant(0, DL, ContainerVT),11612DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),11613Mask, EVL2});11614}1161511616if (!VT.isFixedLengthVector())11617return Result;11618return convertFromScalableVector(VT, Result, DAG, Subtarget);11619}1162011621SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,11622SelectionDAG &DAG) const {11623SDLoc DL(Op);11624SDValue Val = Op.getOperand(0);11625SDValue Mask = Op.getOperand(1);11626SDValue VL = Op.getOperand(2);11627MVT VT = Op.getSimpleValueType();1162811629MVT ContainerVT = VT;11630if (VT.isFixedLengthVector()) {11631ContainerVT = getContainerForFixedLengthVector(VT);11632MVT MaskVT = getMaskTypeFor(ContainerVT);11633Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11634}1163511636SDValue Result =11637lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);1163811639if (!VT.isFixedLengthVector())11640return Result;11641return convertFromScalableVector(VT, Result, DAG, Subtarget);11642}1164311644SDValue11645RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,11646SelectionDAG &DAG) const {11647SDLoc DL(Op);11648MVT VT = Op.getSimpleValueType();11649MVT XLenVT = Subtarget.getXLenVT();1165011651SDValue Op1 = Op.getOperand(0);11652SDValue Mask = Op.getOperand(1);11653SDValue EVL = Op.getOperand(2);1165411655MVT ContainerVT = VT;11656if (VT.isFixedLengthVector()) {11657ContainerVT = getContainerForFixedLengthVector(VT);11658Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);11659MVT MaskVT = getMaskTypeFor(ContainerVT);11660Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11661}1166211663MVT GatherVT = ContainerVT;11664MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();11665// Check if we are working with mask vectors11666bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;11667if (IsMaskVector) {11668GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);1166911670// Expand input operand11671SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,11672DAG.getUNDEF(IndicesVT),11673DAG.getConstant(1, DL, XLenVT), EVL);11674SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,11675DAG.getUNDEF(IndicesVT),11676DAG.getConstant(0, DL, XLenVT), EVL);11677Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,11678SplatZero, DAG.getUNDEF(IndicesVT), EVL);11679}1168011681unsigned EltSize = GatherVT.getScalarSizeInBits();11682unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();11683unsigned VectorBitsMax = Subtarget.getRealMaxVLen();11684unsigned MaxVLMAX =11685RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);1168611687unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;11688// If this is SEW=8 and VLMAX is unknown or more than 256, we need11689// to use vrgatherei16.vv.11690// TODO: It's also possible to use vrgatherei16.vv for other types to11691// decrease register width for the index calculation.11692// NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.11693if (MaxVLMAX > 256 && EltSize == 8) {11694// If this is LMUL=8, we have to split before using vrgatherei16.vv.11695// Split the vector in half and reverse each half using a full register11696// reverse.11697// Swap the halves and concatenate them.11698// Slide the concatenated result by (VLMax - VL).11699if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {11700auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);11701auto [Lo, Hi] = DAG.SplitVector(Op1, DL);1170211703SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);11704SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);1170511706// Reassemble the low and high pieces reversed.11707// NOTE: this Result is unmasked (because we do not need masks for11708// shuffles). If in the future this has to change, we can use a SELECT_VL11709// between Result and UNDEF using the mask originally passed to VP_REVERSE11710SDValue Result =11711DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);1171211713// Slide off any elements from past EVL that were reversed into the low11714// elements.11715unsigned MinElts = GatherVT.getVectorMinNumElements();11716SDValue VLMax =11717DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));11718SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);1171911720Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,11721DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);1172211723if (IsMaskVector) {11724// Truncate Result back to a mask vector11725Result =11726DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,11727{Result, DAG.getConstant(0, DL, GatherVT),11728DAG.getCondCode(ISD::SETNE),11729DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});11730}1173111732if (!VT.isFixedLengthVector())11733return Result;11734return convertFromScalableVector(VT, Result, DAG, Subtarget);11735}1173611737// Just promote the int type to i16 which will double the LMUL.11738IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());11739GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;11740}1174111742SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);11743SDValue VecLen =11744DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));11745SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,11746DAG.getUNDEF(IndicesVT), VecLen, EVL);11747SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,11748DAG.getUNDEF(IndicesVT), Mask, EVL);11749SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,11750DAG.getUNDEF(GatherVT), Mask, EVL);1175111752if (IsMaskVector) {11753// Truncate Result back to a mask vector11754Result = DAG.getNode(11755RISCVISD::SETCC_VL, DL, ContainerVT,11756{Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),11757DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});11758}1175911760if (!VT.isFixedLengthVector())11761return Result;11762return convertFromScalableVector(VT, Result, DAG, Subtarget);11763}1176411765SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,11766SelectionDAG &DAG) const {11767MVT VT = Op.getSimpleValueType();11768if (VT.getVectorElementType() != MVT::i1)11769return lowerVPOp(Op, DAG);1177011771// It is safe to drop mask parameter as masked-off elements are undef.11772SDValue Op1 = Op->getOperand(0);11773SDValue Op2 = Op->getOperand(1);11774SDValue VL = Op->getOperand(3);1177511776MVT ContainerVT = VT;11777const bool IsFixed = VT.isFixedLengthVector();11778if (IsFixed) {11779ContainerVT = getContainerForFixedLengthVector(VT);11780Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);11781Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);11782}1178311784SDLoc DL(Op);11785SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);11786if (!IsFixed)11787return Val;11788return convertFromScalableVector(VT, Val, DAG, Subtarget);11789}1179011791SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,11792SelectionDAG &DAG) const {11793SDLoc DL(Op);11794MVT XLenVT = Subtarget.getXLenVT();11795MVT VT = Op.getSimpleValueType();11796MVT ContainerVT = VT;11797if (VT.isFixedLengthVector())11798ContainerVT = getContainerForFixedLengthVector(VT);1179911800SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});1180111802auto *VPNode = cast<VPStridedLoadSDNode>(Op);11803// Check if the mask is known to be all ones11804SDValue Mask = VPNode->getMask();11805bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());1180611807SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse11808: Intrinsic::riscv_vlse_mask,11809DL, XLenVT);11810SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,11811DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),11812VPNode->getStride()};11813if (!IsUnmasked) {11814if (VT.isFixedLengthVector()) {11815MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);11816Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11817}11818Ops.push_back(Mask);11819}11820Ops.push_back(VPNode->getVectorLength());11821if (!IsUnmasked) {11822SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);11823Ops.push_back(Policy);11824}1182511826SDValue Result =11827DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,11828VPNode->getMemoryVT(), VPNode->getMemOperand());11829SDValue Chain = Result.getValue(1);1183011831if (VT.isFixedLengthVector())11832Result = convertFromScalableVector(VT, Result, DAG, Subtarget);1183311834return DAG.getMergeValues({Result, Chain}, DL);11835}1183611837SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,11838SelectionDAG &DAG) const {11839SDLoc DL(Op);11840MVT XLenVT = Subtarget.getXLenVT();1184111842auto *VPNode = cast<VPStridedStoreSDNode>(Op);11843SDValue StoreVal = VPNode->getValue();11844MVT VT = StoreVal.getSimpleValueType();11845MVT ContainerVT = VT;11846if (VT.isFixedLengthVector()) {11847ContainerVT = getContainerForFixedLengthVector(VT);11848StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);11849}1185011851// Check if the mask is known to be all ones11852SDValue Mask = VPNode->getMask();11853bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());1185411855SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse11856: Intrinsic::riscv_vsse_mask,11857DL, XLenVT);11858SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,11859VPNode->getBasePtr(), VPNode->getStride()};11860if (!IsUnmasked) {11861if (VT.isFixedLengthVector()) {11862MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);11863Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11864}11865Ops.push_back(Mask);11866}11867Ops.push_back(VPNode->getVectorLength());1186811869return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),11870Ops, VPNode->getMemoryVT(),11871VPNode->getMemOperand());11872}1187311874// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be11875// matched to a RVV indexed load. The RVV indexed load instructions only11876// support the "unsigned unscaled" addressing mode; indices are implicitly11877// zero-extended or truncated to XLEN and are treated as byte offsets. Any11878// signed or scaled indexing is extended to the XLEN value type and scaled11879// accordingly.11880SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,11881SelectionDAG &DAG) const {11882SDLoc DL(Op);11883MVT VT = Op.getSimpleValueType();1188411885const auto *MemSD = cast<MemSDNode>(Op.getNode());11886EVT MemVT = MemSD->getMemoryVT();11887MachineMemOperand *MMO = MemSD->getMemOperand();11888SDValue Chain = MemSD->getChain();11889SDValue BasePtr = MemSD->getBasePtr();1189011891[[maybe_unused]] ISD::LoadExtType LoadExtType;11892SDValue Index, Mask, PassThru, VL;1189311894if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {11895Index = VPGN->getIndex();11896Mask = VPGN->getMask();11897PassThru = DAG.getUNDEF(VT);11898VL = VPGN->getVectorLength();11899// VP doesn't support extending loads.11900LoadExtType = ISD::NON_EXTLOAD;11901} else {11902// Else it must be a MGATHER.11903auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());11904Index = MGN->getIndex();11905Mask = MGN->getMask();11906PassThru = MGN->getPassThru();11907LoadExtType = MGN->getExtensionType();11908}1190911910MVT IndexVT = Index.getSimpleValueType();11911MVT XLenVT = Subtarget.getXLenVT();1191211913assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&11914"Unexpected VTs!");11915assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");11916// Targets have to explicitly opt-in for extending vector loads.11917assert(LoadExtType == ISD::NON_EXTLOAD &&11918"Unexpected extending MGATHER/VP_GATHER");1191911920// If the mask is known to be all ones, optimize to an unmasked intrinsic;11921// the selection of the masked intrinsics doesn't do this for us.11922bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());1192311924MVT ContainerVT = VT;11925if (VT.isFixedLengthVector()) {11926ContainerVT = getContainerForFixedLengthVector(VT);11927IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),11928ContainerVT.getVectorElementCount());1192911930Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);1193111932if (!IsUnmasked) {11933MVT MaskVT = getMaskTypeFor(ContainerVT);11934Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);11935PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);11936}11937}1193811939if (!VL)11940VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;1194111942if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {11943IndexVT = IndexVT.changeVectorElementType(XLenVT);11944Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);11945}1194611947unsigned IntID =11948IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;11949SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};11950if (IsUnmasked)11951Ops.push_back(DAG.getUNDEF(ContainerVT));11952else11953Ops.push_back(PassThru);11954Ops.push_back(BasePtr);11955Ops.push_back(Index);11956if (!IsUnmasked)11957Ops.push_back(Mask);11958Ops.push_back(VL);11959if (!IsUnmasked)11960Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));1196111962SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});11963SDValue Result =11964DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);11965Chain = Result.getValue(1);1196611967if (VT.isFixedLengthVector())11968Result = convertFromScalableVector(VT, Result, DAG, Subtarget);1196911970return DAG.getMergeValues({Result, Chain}, DL);11971}1197211973// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be11974// matched to a RVV indexed store. The RVV indexed store instructions only11975// support the "unsigned unscaled" addressing mode; indices are implicitly11976// zero-extended or truncated to XLEN and are treated as byte offsets. Any11977// signed or scaled indexing is extended to the XLEN value type and scaled11978// accordingly.11979SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,11980SelectionDAG &DAG) const {11981SDLoc DL(Op);11982const auto *MemSD = cast<MemSDNode>(Op.getNode());11983EVT MemVT = MemSD->getMemoryVT();11984MachineMemOperand *MMO = MemSD->getMemOperand();11985SDValue Chain = MemSD->getChain();11986SDValue BasePtr = MemSD->getBasePtr();1198711988[[maybe_unused]] bool IsTruncatingStore = false;11989SDValue Index, Mask, Val, VL;1199011991if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {11992Index = VPSN->getIndex();11993Mask = VPSN->getMask();11994Val = VPSN->getValue();11995VL = VPSN->getVectorLength();11996// VP doesn't support truncating stores.11997IsTruncatingStore = false;11998} else {11999// Else it must be a MSCATTER.12000auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());12001Index = MSN->getIndex();12002Mask = MSN->getMask();12003Val = MSN->getValue();12004IsTruncatingStore = MSN->isTruncatingStore();12005}1200612007MVT VT = Val.getSimpleValueType();12008MVT IndexVT = Index.getSimpleValueType();12009MVT XLenVT = Subtarget.getXLenVT();1201012011assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&12012"Unexpected VTs!");12013assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");12014// Targets have to explicitly opt-in for extending vector loads and12015// truncating vector stores.12016assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");1201712018// If the mask is known to be all ones, optimize to an unmasked intrinsic;12019// the selection of the masked intrinsics doesn't do this for us.12020bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());1202112022MVT ContainerVT = VT;12023if (VT.isFixedLengthVector()) {12024ContainerVT = getContainerForFixedLengthVector(VT);12025IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),12026ContainerVT.getVectorElementCount());1202712028Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);12029Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);1203012031if (!IsUnmasked) {12032MVT MaskVT = getMaskTypeFor(ContainerVT);12033Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);12034}12035}1203612037if (!VL)12038VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;1203912040if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {12041IndexVT = IndexVT.changeVectorElementType(XLenVT);12042Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);12043}1204412045unsigned IntID =12046IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;12047SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};12048Ops.push_back(Val);12049Ops.push_back(BasePtr);12050Ops.push_back(Index);12051if (!IsUnmasked)12052Ops.push_back(Mask);12053Ops.push_back(VL);1205412055return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,12056DAG.getVTList(MVT::Other), Ops, MemVT, MMO);12057}1205812059SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,12060SelectionDAG &DAG) const {12061const MVT XLenVT = Subtarget.getXLenVT();12062SDLoc DL(Op);12063SDValue Chain = Op->getOperand(0);12064SDValue SysRegNo = DAG.getTargetConstant(12065RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);12066SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);12067SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);1206812069// Encoding used for rounding mode in RISC-V differs from that used in12070// FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a12071// table, which consists of a sequence of 4-bit fields, each representing12072// corresponding FLT_ROUNDS mode.12073static const int Table =12074(int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |12075(int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |12076(int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |12077(int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |12078(int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);1207912080SDValue Shift =12081DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));12082SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,12083DAG.getConstant(Table, DL, XLenVT), Shift);12084SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,12085DAG.getConstant(7, DL, XLenVT));1208612087return DAG.getMergeValues({Masked, Chain}, DL);12088}1208912090SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,12091SelectionDAG &DAG) const {12092const MVT XLenVT = Subtarget.getXLenVT();12093SDLoc DL(Op);12094SDValue Chain = Op->getOperand(0);12095SDValue RMValue = Op->getOperand(1);12096SDValue SysRegNo = DAG.getTargetConstant(12097RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);1209812099// Encoding used for rounding mode in RISC-V differs from that used in12100// FLT_ROUNDS. To convert it the C rounding mode is used as an index in12101// a table, which consists of a sequence of 4-bit fields, each representing12102// corresponding RISC-V mode.12103static const unsigned Table =12104(RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |12105(RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |12106(RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |12107(RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |12108(RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));1210912110RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);1211112112SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,12113DAG.getConstant(2, DL, XLenVT));12114SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,12115DAG.getConstant(Table, DL, XLenVT), Shift);12116RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,12117DAG.getConstant(0x7, DL, XLenVT));12118return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,12119RMValue);12120}1212112122SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,12123SelectionDAG &DAG) const {12124MachineFunction &MF = DAG.getMachineFunction();1212512126bool isRISCV64 = Subtarget.is64Bit();12127EVT PtrVT = getPointerTy(DAG.getDataLayout());1212812129int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);12130return DAG.getFrameIndex(FI, PtrVT);12131}1213212133// Returns the opcode of the target-specific SDNode that implements the 32-bit12134// form of the given Opcode.12135static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {12136switch (Opcode) {12137default:12138llvm_unreachable("Unexpected opcode");12139case ISD::SHL:12140return RISCVISD::SLLW;12141case ISD::SRA:12142return RISCVISD::SRAW;12143case ISD::SRL:12144return RISCVISD::SRLW;12145case ISD::SDIV:12146return RISCVISD::DIVW;12147case ISD::UDIV:12148return RISCVISD::DIVUW;12149case ISD::UREM:12150return RISCVISD::REMUW;12151case ISD::ROTL:12152return RISCVISD::ROLW;12153case ISD::ROTR:12154return RISCVISD::RORW;12155}12156}1215712158// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG12159// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would12160// otherwise be promoted to i64, making it difficult to select the12161// SLLW/DIVUW/.../*W later one because the fact the operation was originally of12162// type i8/i16/i32 is lost.12163static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,12164unsigned ExtOpc = ISD::ANY_EXTEND) {12165SDLoc DL(N);12166RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());12167SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));12168SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));12169SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);12170// ReplaceNodeResults requires we maintain the same type for the return value.12171return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);12172}1217312174// Converts the given 32-bit operation to a i64 operation with signed extension12175// semantic to reduce the signed extension instructions.12176static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {12177SDLoc DL(N);12178SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));12179SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12180SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);12181SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,12182DAG.getValueType(MVT::i32));12183return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);12184}1218512186void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,12187SmallVectorImpl<SDValue> &Results,12188SelectionDAG &DAG) const {12189SDLoc DL(N);12190switch (N->getOpcode()) {12191default:12192llvm_unreachable("Don't know how to custom type legalize this operation!");12193case ISD::STRICT_FP_TO_SINT:12194case ISD::STRICT_FP_TO_UINT:12195case ISD::FP_TO_SINT:12196case ISD::FP_TO_UINT: {12197assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12198"Unexpected custom legalisation");12199bool IsStrict = N->isStrictFPOpcode();12200bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||12201N->getOpcode() == ISD::STRICT_FP_TO_SINT;12202SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);12203if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=12204TargetLowering::TypeSoftenFloat) {12205if (!isTypeLegal(Op0.getValueType()))12206return;12207if (IsStrict) {12208SDValue Chain = N->getOperand(0);12209// In absense of Zfh, promote f16 to f32, then convert.12210if (Op0.getValueType() == MVT::f16 &&12211!Subtarget.hasStdExtZfhOrZhinx()) {12212Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},12213{Chain, Op0});12214Chain = Op0.getValue(1);12215}12216unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV6412217: RISCVISD::STRICT_FCVT_WU_RV64;12218SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);12219SDValue Res = DAG.getNode(12220Opc, DL, VTs, Chain, Op0,12221DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));12222Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12223Results.push_back(Res.getValue(1));12224return;12225}12226// For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then12227// convert.12228if ((Op0.getValueType() == MVT::f16 &&12229!Subtarget.hasStdExtZfhOrZhinx()) ||12230Op0.getValueType() == MVT::bf16)12231Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);1223212233unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;12234SDValue Res =12235DAG.getNode(Opc, DL, MVT::i64, Op0,12236DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));12237Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12238return;12239}12240// If the FP type needs to be softened, emit a library call using the 'si'12241// version. If we left it to default legalization we'd end up with 'di'. If12242// the FP type doesn't need to be softened just let generic type12243// legalization promote the result type.12244RTLIB::Libcall LC;12245if (IsSigned)12246LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));12247else12248LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));12249MakeLibCallOptions CallOptions;12250EVT OpVT = Op0.getValueType();12251CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);12252SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();12253SDValue Result;12254std::tie(Result, Chain) =12255makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);12256Results.push_back(Result);12257if (IsStrict)12258Results.push_back(Chain);12259break;12260}12261case ISD::LROUND: {12262SDValue Op0 = N->getOperand(0);12263EVT Op0VT = Op0.getValueType();12264if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=12265TargetLowering::TypeSoftenFloat) {12266if (!isTypeLegal(Op0VT))12267return;1226812269// In absense of Zfh, promote f16 to f32, then convert.12270if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())12271Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);1227212273SDValue Res =12274DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,12275DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));12276Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12277return;12278}12279// If the FP type needs to be softened, emit a library call to lround. We'll12280// need to truncate the result. We assume any value that doesn't fit in i3212281// is allowed to return an unspecified value.12282RTLIB::Libcall LC =12283Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;12284MakeLibCallOptions CallOptions;12285EVT OpVT = Op0.getValueType();12286CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);12287SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;12288Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);12289Results.push_back(Result);12290break;12291}12292case ISD::READCYCLECOUNTER:12293case ISD::READSTEADYCOUNTER: {12294assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "12295"has custom type legalization on riscv32");1229612297SDValue LoCounter, HiCounter;12298MVT XLenVT = Subtarget.getXLenVT();12299if (N->getOpcode() == ISD::READCYCLECOUNTER) {12300LoCounter = DAG.getTargetConstant(12301RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);12302HiCounter = DAG.getTargetConstant(12303RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);12304} else {12305LoCounter = DAG.getTargetConstant(12306RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);12307HiCounter = DAG.getTargetConstant(12308RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);12309}12310SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);12311SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,12312N->getOperand(0), LoCounter, HiCounter);1231312314Results.push_back(12315DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));12316Results.push_back(RCW.getValue(2));12317break;12318}12319case ISD::LOAD: {12320if (!ISD::isNON_EXTLoad(N))12321return;1232212323// Use a SEXTLOAD instead of the default EXTLOAD. Similar to the12324// sext_inreg we emit for ADD/SUB/MUL/SLLI.12325LoadSDNode *Ld = cast<LoadSDNode>(N);1232612327SDLoc dl(N);12328SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),12329Ld->getBasePtr(), Ld->getMemoryVT(),12330Ld->getMemOperand());12331Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));12332Results.push_back(Res.getValue(1));12333return;12334}12335case ISD::MUL: {12336unsigned Size = N->getSimpleValueType(0).getSizeInBits();12337unsigned XLen = Subtarget.getXLen();12338// This multiply needs to be expanded, try to use MULHSU+MUL if possible.12339if (Size > XLen) {12340assert(Size == (XLen * 2) && "Unexpected custom legalisation");12341SDValue LHS = N->getOperand(0);12342SDValue RHS = N->getOperand(1);12343APInt HighMask = APInt::getHighBitsSet(Size, XLen);1234412345bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);12346bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);12347// We need exactly one side to be unsigned.12348if (LHSIsU == RHSIsU)12349return;1235012351auto MakeMULPair = [&](SDValue S, SDValue U) {12352MVT XLenVT = Subtarget.getXLenVT();12353S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);12354U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);12355SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);12356SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);12357return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);12358};1235912360bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;12361bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;1236212363// The other operand should be signed, but still prefer MULH when12364// possible.12365if (RHSIsU && LHSIsS && !RHSIsS)12366Results.push_back(MakeMULPair(LHS, RHS));12367else if (LHSIsU && RHSIsS && !LHSIsS)12368Results.push_back(MakeMULPair(RHS, LHS));1236912370return;12371}12372[[fallthrough]];12373}12374case ISD::ADD:12375case ISD::SUB:12376assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12377"Unexpected custom legalisation");12378Results.push_back(customLegalizeToWOpWithSExt(N, DAG));12379break;12380case ISD::SHL:12381case ISD::SRA:12382case ISD::SRL:12383assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12384"Unexpected custom legalisation");12385if (N->getOperand(1).getOpcode() != ISD::Constant) {12386// If we can use a BSET instruction, allow default promotion to apply.12387if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&12388isOneConstant(N->getOperand(0)))12389break;12390Results.push_back(customLegalizeToWOp(N, DAG));12391break;12392}1239312394// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is12395// similar to customLegalizeToWOpWithSExt, but we must zero_extend the12396// shift amount.12397if (N->getOpcode() == ISD::SHL) {12398SDLoc DL(N);12399SDValue NewOp0 =12400DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));12401SDValue NewOp1 =12402DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));12403SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);12404SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,12405DAG.getValueType(MVT::i32));12406Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));12407}1240812409break;12410case ISD::ROTL:12411case ISD::ROTR:12412assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12413"Unexpected custom legalisation");12414assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||12415Subtarget.hasVendorXTHeadBb()) &&12416"Unexpected custom legalization");12417if (!isa<ConstantSDNode>(N->getOperand(1)) &&12418!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))12419return;12420Results.push_back(customLegalizeToWOp(N, DAG));12421break;12422case ISD::CTTZ:12423case ISD::CTTZ_ZERO_UNDEF:12424case ISD::CTLZ:12425case ISD::CTLZ_ZERO_UNDEF: {12426assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12427"Unexpected custom legalisation");1242812429SDValue NewOp0 =12430DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));12431bool IsCTZ =12432N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;12433unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;12434SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);12435Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12436return;12437}12438case ISD::SDIV:12439case ISD::UDIV:12440case ISD::UREM: {12441MVT VT = N->getSimpleValueType(0);12442assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&12443Subtarget.is64Bit() && Subtarget.hasStdExtM() &&12444"Unexpected custom legalisation");12445// Don't promote division/remainder by constant since we should expand those12446// to multiply by magic constant.12447AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();12448if (N->getOperand(1).getOpcode() == ISD::Constant &&12449!isIntDivCheap(N->getValueType(0), Attr))12450return;1245112452// If the input is i32, use ANY_EXTEND since the W instructions don't read12453// the upper 32 bits. For other types we need to sign or zero extend12454// based on the opcode.12455unsigned ExtOpc = ISD::ANY_EXTEND;12456if (VT != MVT::i32)12457ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND12458: ISD::ZERO_EXTEND;1245912460Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));12461break;12462}12463case ISD::SADDO: {12464assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12465"Unexpected custom legalisation");1246612467// If the RHS is a constant, we can simplify ConditionRHS below. Otherwise12468// use the default legalization.12469if (!isa<ConstantSDNode>(N->getOperand(1)))12470return;1247112472SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));12473SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));12474SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);12475Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,12476DAG.getValueType(MVT::i32));1247712478SDValue Zero = DAG.getConstant(0, DL, MVT::i64);1247912480// For an addition, the result should be less than one of the operands (LHS)12481// if and only if the other operand (RHS) is negative, otherwise there will12482// be overflow.12483// For a subtraction, the result should be less than one of the operands12484// (LHS) if and only if the other operand (RHS) is (non-zero) positive,12485// otherwise there will be overflow.12486EVT OType = N->getValueType(1);12487SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);12488SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);1248912490SDValue Overflow =12491DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);12492Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12493Results.push_back(Overflow);12494return;12495}12496case ISD::UADDO:12497case ISD::USUBO: {12498assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12499"Unexpected custom legalisation");12500bool IsAdd = N->getOpcode() == ISD::UADDO;12501// Create an ADDW or SUBW.12502SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));12503SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12504SDValue Res =12505DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);12506Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,12507DAG.getValueType(MVT::i32));1250812509SDValue Overflow;12510if (IsAdd && isOneConstant(RHS)) {12511// Special case uaddo X, 1 overflowed if the addition result is 0.12512// The general case (X + C) < C is not necessarily beneficial. Although we12513// reduce the live range of X, we may introduce the materialization of12514// constant C, especially when the setcc result is used by branch. We have12515// no compare with constant and branch instructions.12516Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,12517DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);12518} else if (IsAdd && isAllOnesConstant(RHS)) {12519// Special case uaddo X, -1 overflowed if X != 0.12520Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),12521DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);12522} else {12523// Sign extend the LHS and perform an unsigned compare with the ADDW12524// result. Since the inputs are sign extended from i32, this is equivalent12525// to comparing the lower 32 bits.12526LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));12527Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,12528IsAdd ? ISD::SETULT : ISD::SETUGT);12529}1253012531Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12532Results.push_back(Overflow);12533return;12534}12535case ISD::UADDSAT:12536case ISD::USUBSAT: {12537assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12538"Unexpected custom legalisation");12539if (Subtarget.hasStdExtZbb()) {12540// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using12541// sign extend allows overflow of the lower 32 bits to be detected on12542// the promoted size.12543SDValue LHS =12544DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));12545SDValue RHS =12546DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));12547SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);12548Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12549return;12550}1255112552// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom12553// promotion for UADDO/USUBO.12554Results.push_back(expandAddSubSat(N, DAG));12555return;12556}12557case ISD::SADDSAT:12558case ISD::SSUBSAT: {12559assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12560"Unexpected custom legalisation");12561Results.push_back(expandAddSubSat(N, DAG));12562return;12563}12564case ISD::ABS: {12565assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&12566"Unexpected custom legalisation");1256712568if (Subtarget.hasStdExtZbb()) {12569// Emit a special ABSW node that will be expanded to NEGW+MAX at isel.12570// This allows us to remember that the result is sign extended. Expanding12571// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.12572SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,12573N->getOperand(0));12574SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);12575Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));12576return;12577}1257812579// Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)12580SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));1258112582// Freeze the source so we can increase it's use count.12583Src = DAG.getFreeze(Src);1258412585// Copy sign bit to all bits using the sraiw pattern.12586SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,12587DAG.getValueType(MVT::i32));12588SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,12589DAG.getConstant(31, DL, MVT::i64));1259012591SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);12592NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);1259312594// NOTE: The result is only required to be anyextended, but sext is12595// consistent with type legalization of sub.12596NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,12597DAG.getValueType(MVT::i32));12598Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));12599return;12600}12601case ISD::BITCAST: {12602EVT VT = N->getValueType(0);12603assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");12604SDValue Op0 = N->getOperand(0);12605EVT Op0VT = Op0.getValueType();12606MVT XLenVT = Subtarget.getXLenVT();12607if (VT == MVT::i16 && Op0VT == MVT::f16 &&12608Subtarget.hasStdExtZfhminOrZhinxmin()) {12609SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);12610Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));12611} else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&12612Subtarget.hasStdExtZfbfmin()) {12613SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);12614Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));12615} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&12616Subtarget.hasStdExtFOrZfinx()) {12617SDValue FPConv =12618DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);12619Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));12620} else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&12621Subtarget.hasStdExtDOrZdinx()) {12622SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,12623DAG.getVTList(MVT::i32, MVT::i32), Op0);12624SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,12625NewReg.getValue(0), NewReg.getValue(1));12626Results.push_back(RetReg);12627} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&12628isTypeLegal(Op0VT)) {12629// Custom-legalize bitcasts from fixed-length vector types to illegal12630// scalar types in order to improve codegen. Bitcast the vector to a12631// one-element vector type whose element type is the same as the result12632// type, and extract the first element.12633EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);12634if (isTypeLegal(BVT)) {12635SDValue BVec = DAG.getBitcast(BVT, Op0);12636Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,12637DAG.getVectorIdxConstant(0, DL)));12638}12639}12640break;12641}12642case RISCVISD::BREV8:12643case RISCVISD::ORC_B: {12644MVT VT = N->getSimpleValueType(0);12645MVT XLenVT = Subtarget.getXLenVT();12646assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&12647"Unexpected custom legalisation");12648assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||12649(N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&12650"Unexpected extension");12651SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));12652SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);12653// ReplaceNodeResults requires we maintain the same type for the return12654// value.12655Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));12656break;12657}12658case ISD::EXTRACT_VECTOR_ELT: {12659// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element12660// type is illegal (currently only vXi64 RV32).12661// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are12662// transferred to the destination register. We issue two of these from the12663// upper- and lower- halves of the SEW-bit vector element, slid down to the12664// first element.12665SDValue Vec = N->getOperand(0);12666SDValue Idx = N->getOperand(1);1266712668// The vector type hasn't been legalized yet so we can't issue target12669// specific nodes if it needs legalization.12670// FIXME: We would manually legalize if it's important.12671if (!isTypeLegal(Vec.getValueType()))12672return;1267312674MVT VecVT = Vec.getSimpleValueType();1267512676assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&12677VecVT.getVectorElementType() == MVT::i64 &&12678"Unexpected EXTRACT_VECTOR_ELT legalization");1267912680// If this is a fixed vector, we need to convert it to a scalable vector.12681MVT ContainerVT = VecVT;12682if (VecVT.isFixedLengthVector()) {12683ContainerVT = getContainerForFixedLengthVector(VecVT);12684Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);12685}1268612687MVT XLenVT = Subtarget.getXLenVT();1268812689// Use a VL of 1 to avoid processing more elements than we need.12690auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);1269112692// Unless the index is known to be 0, we must slide the vector down to get12693// the desired element into index 0.12694if (!isNullConstant(Idx)) {12695Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,12696DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);12697}1269812699// Extract the lower XLEN bits of the correct vector element.12700SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);1270112702// To extract the upper XLEN bits of the vector element, shift the first12703// element right by 32 bits and re-extract the lower XLEN bits.12704SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,12705DAG.getUNDEF(ContainerVT),12706DAG.getConstant(32, DL, XLenVT), VL);12707SDValue LShr32 =12708DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,12709DAG.getUNDEF(ContainerVT), Mask, VL);1271012711SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);1271212713Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));12714break;12715}12716case ISD::INTRINSIC_WO_CHAIN: {12717unsigned IntNo = N->getConstantOperandVal(0);12718switch (IntNo) {12719default:12720llvm_unreachable(12721"Don't know how to custom type legalize this intrinsic!");12722case Intrinsic::experimental_get_vector_length: {12723SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);12724Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12725return;12726}12727case Intrinsic::experimental_cttz_elts: {12728SDValue Res = lowerCttzElts(N, DAG, Subtarget);12729Results.push_back(12730DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));12731return;12732}12733case Intrinsic::riscv_orc_b:12734case Intrinsic::riscv_brev8:12735case Intrinsic::riscv_sha256sig0:12736case Intrinsic::riscv_sha256sig1:12737case Intrinsic::riscv_sha256sum0:12738case Intrinsic::riscv_sha256sum1:12739case Intrinsic::riscv_sm3p0:12740case Intrinsic::riscv_sm3p1: {12741if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)12742return;12743unsigned Opc;12744switch (IntNo) {12745case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;12746case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;12747case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;12748case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;12749case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;12750case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;12751case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;12752case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;12753}1275412755SDValue NewOp =12756DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12757SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);12758Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12759return;12760}12761case Intrinsic::riscv_sm4ks:12762case Intrinsic::riscv_sm4ed: {12763unsigned Opc =12764IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;12765SDValue NewOp0 =12766DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12767SDValue NewOp1 =12768DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));12769SDValue Res =12770DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));12771Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12772return;12773}12774case Intrinsic::riscv_mopr: {12775if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)12776return;12777SDValue NewOp =12778DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12779SDValue Res = DAG.getNode(12780RISCVISD::MOPR, DL, MVT::i64, NewOp,12781DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));12782Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12783return;12784}12785case Intrinsic::riscv_moprr: {12786if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)12787return;12788SDValue NewOp0 =12789DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12790SDValue NewOp1 =12791DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));12792SDValue Res = DAG.getNode(12793RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,12794DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));12795Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12796return;12797}12798case Intrinsic::riscv_clmul: {12799if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)12800return;1280112802SDValue NewOp0 =12803DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12804SDValue NewOp1 =12805DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));12806SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);12807Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12808return;12809}12810case Intrinsic::riscv_clmulh:12811case Intrinsic::riscv_clmulr: {12812if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)12813return;1281412815// Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros12816// to the full 128-bit clmul result of multiplying two xlen values.12817// Perform clmulr or clmulh on the shifted values. Finally, extract the12818// upper 32 bits.12819//12820// The alternative is to mask the inputs to 32 bits and use clmul, but12821// that requires two shifts to mask each input without zext.w.12822// FIXME: If the inputs are known zero extended or could be freely12823// zero extended, the mask form would be better.12824SDValue NewOp0 =12825DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));12826SDValue NewOp1 =12827DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));12828NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,12829DAG.getConstant(32, DL, MVT::i64));12830NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,12831DAG.getConstant(32, DL, MVT::i64));12832unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH12833: RISCVISD::CLMULR;12834SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);12835Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,12836DAG.getConstant(32, DL, MVT::i64));12837Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));12838return;12839}12840case Intrinsic::riscv_vmv_x_s: {12841EVT VT = N->getValueType(0);12842MVT XLenVT = Subtarget.getXLenVT();12843if (VT.bitsLT(XLenVT)) {12844// Simple case just extract using vmv.x.s and truncate.12845SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,12846Subtarget.getXLenVT(), N->getOperand(1));12847Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));12848return;12849}1285012851assert(VT == MVT::i64 && !Subtarget.is64Bit() &&12852"Unexpected custom legalization");1285312854// We need to do the move in two steps.12855SDValue Vec = N->getOperand(1);12856MVT VecVT = Vec.getSimpleValueType();1285712858// First extract the lower XLEN bits of the element.12859SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);1286012861// To extract the upper XLEN bits of the vector element, shift the first12862// element right by 32 bits and re-extract the lower XLEN bits.12863auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);1286412865SDValue ThirtyTwoV =12866DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),12867DAG.getConstant(32, DL, XLenVT), VL);12868SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,12869DAG.getUNDEF(VecVT), Mask, VL);12870SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);1287112872Results.push_back(12873DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));12874break;12875}12876}12877break;12878}12879case ISD::VECREDUCE_ADD:12880case ISD::VECREDUCE_AND:12881case ISD::VECREDUCE_OR:12882case ISD::VECREDUCE_XOR:12883case ISD::VECREDUCE_SMAX:12884case ISD::VECREDUCE_UMAX:12885case ISD::VECREDUCE_SMIN:12886case ISD::VECREDUCE_UMIN:12887if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))12888Results.push_back(V);12889break;12890case ISD::VP_REDUCE_ADD:12891case ISD::VP_REDUCE_AND:12892case ISD::VP_REDUCE_OR:12893case ISD::VP_REDUCE_XOR:12894case ISD::VP_REDUCE_SMAX:12895case ISD::VP_REDUCE_UMAX:12896case ISD::VP_REDUCE_SMIN:12897case ISD::VP_REDUCE_UMIN:12898if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))12899Results.push_back(V);12900break;12901case ISD::GET_ROUNDING: {12902SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);12903SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));12904Results.push_back(Res.getValue(0));12905Results.push_back(Res.getValue(1));12906break;12907}12908}12909}1291012911/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP12912/// which corresponds to it.12913static unsigned getVecReduceOpcode(unsigned Opc) {12914switch (Opc) {12915default:12916llvm_unreachable("Unhandled binary to transfrom reduction");12917case ISD::ADD:12918return ISD::VECREDUCE_ADD;12919case ISD::UMAX:12920return ISD::VECREDUCE_UMAX;12921case ISD::SMAX:12922return ISD::VECREDUCE_SMAX;12923case ISD::UMIN:12924return ISD::VECREDUCE_UMIN;12925case ISD::SMIN:12926return ISD::VECREDUCE_SMIN;12927case ISD::AND:12928return ISD::VECREDUCE_AND;12929case ISD::OR:12930return ISD::VECREDUCE_OR;12931case ISD::XOR:12932return ISD::VECREDUCE_XOR;12933case ISD::FADD:12934// Note: This is the associative form of the generic reduction opcode.12935return ISD::VECREDUCE_FADD;12936}12937}1293812939/// Perform two related transforms whose purpose is to incrementally recognize12940/// an explode_vector followed by scalar reduction as a vector reduction node.12941/// This exists to recover from a deficiency in SLP which can't handle12942/// forests with multiple roots sharing common nodes. In some cases, one12943/// of the trees will be vectorized, and the other will remain (unprofitably)12944/// scalarized.12945static SDValue12946combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,12947const RISCVSubtarget &Subtarget) {1294812949// This transforms need to run before all integer types have been legalized12950// to i64 (so that the vector element type matches the add type), and while12951// it's safe to introduce odd sized vector types.12952if (DAG.NewNodesMustHaveLegalTypes)12953return SDValue();1295412955// Without V, this transform isn't useful. We could form the (illegal)12956// operations and let them be scalarized again, but there's really no point.12957if (!Subtarget.hasVInstructions())12958return SDValue();1295912960const SDLoc DL(N);12961const EVT VT = N->getValueType(0);12962const unsigned Opc = N->getOpcode();1296312964// For FADD, we only handle the case with reassociation allowed. We12965// could handle strict reduction order, but at the moment, there's no12966// known reason to, and the complexity isn't worth it.12967// TODO: Handle fminnum and fmaxnum here12968if (!VT.isInteger() &&12969(Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))12970return SDValue();1297112972const unsigned ReduceOpc = getVecReduceOpcode(Opc);12973assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&12974"Inconsistent mappings");12975SDValue LHS = N->getOperand(0);12976SDValue RHS = N->getOperand(1);1297712978if (!LHS.hasOneUse() || !RHS.hasOneUse())12979return SDValue();1298012981if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)12982std::swap(LHS, RHS);1298312984if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||12985!isa<ConstantSDNode>(RHS.getOperand(1)))12986return SDValue();1298712988uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();12989SDValue SrcVec = RHS.getOperand(0);12990EVT SrcVecVT = SrcVec.getValueType();12991assert(SrcVecVT.getVectorElementType() == VT);12992if (SrcVecVT.isScalableVector())12993return SDValue();1299412995if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())12996return SDValue();1299712998// match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to12999// reduce_op (extract_subvector [2 x VT] from V). This will form the13000// root of our reduction tree. TODO: We could extend this to any two13001// adjacent aligned constant indices if desired.13002if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&13003LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {13004uint64_t LHSIdx =13005cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();13006if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {13007EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);13008SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,13009DAG.getVectorIdxConstant(0, DL));13010return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());13011}13012}1301313014// Match (binop (reduce (extract_subvector V, 0),13015// (extract_vector_elt V, sizeof(SubVec))))13016// into a reduction of one more element from the original vector V.13017if (LHS.getOpcode() != ReduceOpc)13018return SDValue();1301913020SDValue ReduceVec = LHS.getOperand(0);13021if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&13022ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&13023isNullConstant(ReduceVec.getOperand(1)) &&13024ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {13025// For illegal types (e.g. 3xi32), most will be combined again into a13026// wider (hopefully legal) type. If this is a terminal state, we are13027// relying on type legalization here to produce something reasonable13028// and this lowering quality could probably be improved. (TODO)13029EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);13030SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,13031DAG.getVectorIdxConstant(0, DL));13032auto Flags = ReduceVec->getFlags();13033Flags.intersectWith(N->getFlags());13034return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);13035}1303613037return SDValue();13038}130391304013041// Try to fold (<bop> x, (reduction.<bop> vec, start))13042static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,13043const RISCVSubtarget &Subtarget) {13044auto BinOpToRVVReduce = [](unsigned Opc) {13045switch (Opc) {13046default:13047llvm_unreachable("Unhandled binary to transfrom reduction");13048case ISD::ADD:13049return RISCVISD::VECREDUCE_ADD_VL;13050case ISD::UMAX:13051return RISCVISD::VECREDUCE_UMAX_VL;13052case ISD::SMAX:13053return RISCVISD::VECREDUCE_SMAX_VL;13054case ISD::UMIN:13055return RISCVISD::VECREDUCE_UMIN_VL;13056case ISD::SMIN:13057return RISCVISD::VECREDUCE_SMIN_VL;13058case ISD::AND:13059return RISCVISD::VECREDUCE_AND_VL;13060case ISD::OR:13061return RISCVISD::VECREDUCE_OR_VL;13062case ISD::XOR:13063return RISCVISD::VECREDUCE_XOR_VL;13064case ISD::FADD:13065return RISCVISD::VECREDUCE_FADD_VL;13066case ISD::FMAXNUM:13067return RISCVISD::VECREDUCE_FMAX_VL;13068case ISD::FMINNUM:13069return RISCVISD::VECREDUCE_FMIN_VL;13070}13071};1307213073auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {13074return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&13075isNullConstant(V.getOperand(1)) &&13076V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);13077};1307813079unsigned Opc = N->getOpcode();13080unsigned ReduceIdx;13081if (IsReduction(N->getOperand(0), Opc))13082ReduceIdx = 0;13083else if (IsReduction(N->getOperand(1), Opc))13084ReduceIdx = 1;13085else13086return SDValue();1308713088// Skip if FADD disallows reassociation but the combiner needs.13089if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())13090return SDValue();1309113092SDValue Extract = N->getOperand(ReduceIdx);13093SDValue Reduce = Extract.getOperand(0);13094if (!Extract.hasOneUse() || !Reduce.hasOneUse())13095return SDValue();1309613097SDValue ScalarV = Reduce.getOperand(2);13098EVT ScalarVT = ScalarV.getValueType();13099if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&13100ScalarV.getOperand(0)->isUndef() &&13101isNullConstant(ScalarV.getOperand(2)))13102ScalarV = ScalarV.getOperand(1);1310313104// Make sure that ScalarV is a splat with VL=1.13105if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&13106ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&13107ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)13108return SDValue();1310913110if (!isNonZeroAVL(ScalarV.getOperand(2)))13111return SDValue();1311213113// Check the scalar of ScalarV is neutral element13114// TODO: Deal with value other than neutral element.13115if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),131160))13117return SDValue();1311813119// If the AVL is zero, operand 0 will be returned. So it's not safe to fold.13120// FIXME: We might be able to improve this if operand 0 is undef.13121if (!isNonZeroAVL(Reduce.getOperand(5)))13122return SDValue();1312313124SDValue NewStart = N->getOperand(1 - ReduceIdx);1312513126SDLoc DL(N);13127SDValue NewScalarV =13128lowerScalarInsert(NewStart, ScalarV.getOperand(2),13129ScalarV.getSimpleValueType(), DL, DAG, Subtarget);1313013131// If we looked through an INSERT_SUBVECTOR we need to restore it.13132if (ScalarVT != ScalarV.getValueType())13133NewScalarV =13134DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),13135NewScalarV, DAG.getVectorIdxConstant(0, DL));1313613137SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),13138NewScalarV, Reduce.getOperand(3),13139Reduce.getOperand(4), Reduce.getOperand(5)};13140SDValue NewReduce =13141DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);13142return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,13143Extract.getOperand(1));13144}1314513146// Optimize (add (shl x, c0), (shl y, c1)) ->13147// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].13148static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,13149const RISCVSubtarget &Subtarget) {13150// Perform this optimization only in the zba extension.13151if (!Subtarget.hasStdExtZba())13152return SDValue();1315313154// Skip for vector types and larger types.13155EVT VT = N->getValueType(0);13156if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())13157return SDValue();1315813159// The two operand nodes must be SHL and have no other use.13160SDValue N0 = N->getOperand(0);13161SDValue N1 = N->getOperand(1);13162if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||13163!N0->hasOneUse() || !N1->hasOneUse())13164return SDValue();1316513166// Check c0 and c1.13167auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));13168auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));13169if (!N0C || !N1C)13170return SDValue();13171int64_t C0 = N0C->getSExtValue();13172int64_t C1 = N1C->getSExtValue();13173if (C0 <= 0 || C1 <= 0)13174return SDValue();1317513176// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.13177int64_t Bits = std::min(C0, C1);13178int64_t Diff = std::abs(C0 - C1);13179if (Diff != 1 && Diff != 2 && Diff != 3)13180return SDValue();1318113182// Build nodes.13183SDLoc DL(N);13184SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);13185SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);13186SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,13187DAG.getConstant(Diff, DL, VT), NS);13188return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));13189}1319013191// Combine a constant select operand into its use:13192//13193// (and (select cond, -1, c), x)13194// -> (select cond, x, (and x, c)) [AllOnes=1]13195// (or (select cond, 0, c), x)13196// -> (select cond, x, (or x, c)) [AllOnes=0]13197// (xor (select cond, 0, c), x)13198// -> (select cond, x, (xor x, c)) [AllOnes=0]13199// (add (select cond, 0, c), x)13200// -> (select cond, x, (add x, c)) [AllOnes=0]13201// (sub x, (select cond, 0, c))13202// -> (select cond, x, (sub x, c)) [AllOnes=0]13203static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,13204SelectionDAG &DAG, bool AllOnes,13205const RISCVSubtarget &Subtarget) {13206EVT VT = N->getValueType(0);1320713208// Skip vectors.13209if (VT.isVector())13210return SDValue();1321113212if (!Subtarget.hasConditionalMoveFusion()) {13213// (select cond, x, (and x, c)) has custom lowering with Zicond.13214if ((!Subtarget.hasStdExtZicond() &&13215!Subtarget.hasVendorXVentanaCondOps()) ||13216N->getOpcode() != ISD::AND)13217return SDValue();1321813219// Maybe harmful when condition code has multiple use.13220if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())13221return SDValue();1322213223// Maybe harmful when VT is wider than XLen.13224if (VT.getSizeInBits() > Subtarget.getXLen())13225return SDValue();13226}1322713228if ((Slct.getOpcode() != ISD::SELECT &&13229Slct.getOpcode() != RISCVISD::SELECT_CC) ||13230!Slct.hasOneUse())13231return SDValue();1323213233auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {13234return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);13235};1323613237bool SwapSelectOps;13238unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;13239SDValue TrueVal = Slct.getOperand(1 + OpOffset);13240SDValue FalseVal = Slct.getOperand(2 + OpOffset);13241SDValue NonConstantVal;13242if (isZeroOrAllOnes(TrueVal, AllOnes)) {13243SwapSelectOps = false;13244NonConstantVal = FalseVal;13245} else if (isZeroOrAllOnes(FalseVal, AllOnes)) {13246SwapSelectOps = true;13247NonConstantVal = TrueVal;13248} else13249return SDValue();1325013251// Slct is now know to be the desired identity constant when CC is true.13252TrueVal = OtherOp;13253FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);13254// Unless SwapSelectOps says the condition should be false.13255if (SwapSelectOps)13256std::swap(TrueVal, FalseVal);1325713258if (Slct.getOpcode() == RISCVISD::SELECT_CC)13259return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,13260{Slct.getOperand(0), Slct.getOperand(1),13261Slct.getOperand(2), TrueVal, FalseVal});1326213263return DAG.getNode(ISD::SELECT, SDLoc(N), VT,13264{Slct.getOperand(0), TrueVal, FalseVal});13265}1326613267// Attempt combineSelectAndUse on each operand of a commutative operator N.13268static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,13269bool AllOnes,13270const RISCVSubtarget &Subtarget) {13271SDValue N0 = N->getOperand(0);13272SDValue N1 = N->getOperand(1);13273if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))13274return Result;13275if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))13276return Result;13277return SDValue();13278}1327913280// Transform (add (mul x, c0), c1) ->13281// (add (mul (add x, c1/c0), c0), c1%c0).13282// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case13283// that should be excluded is when c0*(c1/c0) is simm12, which will lead13284// to an infinite loop in DAGCombine if transformed.13285// Or transform (add (mul x, c0), c1) ->13286// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),13287// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner13288// case that should be excluded is when c0*(c1/c0+1) is simm12, which will13289// lead to an infinite loop in DAGCombine if transformed.13290// Or transform (add (mul x, c0), c1) ->13291// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),13292// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner13293// case that should be excluded is when c0*(c1/c0-1) is simm12, which will13294// lead to an infinite loop in DAGCombine if transformed.13295// Or transform (add (mul x, c0), c1) ->13296// (mul (add x, c1/c0), c0).13297// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.13298static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,13299const RISCVSubtarget &Subtarget) {13300// Skip for vector types and larger types.13301EVT VT = N->getValueType(0);13302if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())13303return SDValue();13304// The first operand node must be a MUL and has no other use.13305SDValue N0 = N->getOperand(0);13306if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)13307return SDValue();13308// Check if c0 and c1 match above conditions.13309auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));13310auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));13311if (!N0C || !N1C)13312return SDValue();13313// If N0C has multiple uses it's possible one of the cases in13314// DAGCombiner::isMulAddWithConstProfitable will be true, which would result13315// in an infinite loop.13316if (!N0C->hasOneUse())13317return SDValue();13318int64_t C0 = N0C->getSExtValue();13319int64_t C1 = N1C->getSExtValue();13320int64_t CA, CB;13321if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))13322return SDValue();13323// Search for proper CA (non-zero) and CB that both are simm12.13324if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&13325!isInt<12>(C0 * (C1 / C0))) {13326CA = C1 / C0;13327CB = C1 % C0;13328} else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&13329isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {13330CA = C1 / C0 + 1;13331CB = C1 % C0 - C0;13332} else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&13333isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {13334CA = C1 / C0 - 1;13335CB = C1 % C0 + C0;13336} else13337return SDValue();13338// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).13339SDLoc DL(N);13340SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),13341DAG.getConstant(CA, DL, VT));13342SDValue New1 =13343DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));13344return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));13345}1334613347// add (zext, zext) -> zext (add (zext, zext))13348// sub (zext, zext) -> sext (sub (zext, zext))13349// mul (zext, zext) -> zext (mul (zext, zext))13350// sdiv (zext, zext) -> zext (sdiv (zext, zext))13351// udiv (zext, zext) -> zext (udiv (zext, zext))13352// srem (zext, zext) -> zext (srem (zext, zext))13353// urem (zext, zext) -> zext (urem (zext, zext))13354//13355// where the sum of the extend widths match, and the the range of the bin op13356// fits inside the width of the narrower bin op. (For profitability on rvv, we13357// use a power of two for both inner and outer extend.)13358static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG) {1335913360EVT VT = N->getValueType(0);13361if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))13362return SDValue();1336313364SDValue N0 = N->getOperand(0);13365SDValue N1 = N->getOperand(1);13366if (N0.getOpcode() != ISD::ZERO_EXTEND || N1.getOpcode() != ISD::ZERO_EXTEND)13367return SDValue();13368if (!N0.hasOneUse() || !N1.hasOneUse())13369return SDValue();1337013371SDValue Src0 = N0.getOperand(0);13372SDValue Src1 = N1.getOperand(0);13373EVT SrcVT = Src0.getValueType();13374if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||13375SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||13376SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)13377return SDValue();1337813379LLVMContext &C = *DAG.getContext();13380EVT ElemVT = VT.getVectorElementType().getHalfSizedIntegerVT(C);13381EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());1338213383Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);13384Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);1338513386// Src0 and Src1 are zero extended, so they're always positive if signed.13387//13388// sub can produce a negative from two positive operands, so it needs sign13389// extended. Other nodes produce a positive from two positive operands, so13390// zero extend instead.13391unsigned OuterExtend =13392N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;1339313394return DAG.getNode(13395OuterExtend, SDLoc(N), VT,13396DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));13397}1339813399// Try to turn (add (xor bool, 1) -1) into (neg bool).13400static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {13401SDValue N0 = N->getOperand(0);13402SDValue N1 = N->getOperand(1);13403EVT VT = N->getValueType(0);13404SDLoc DL(N);1340513406// RHS should be -1.13407if (!isAllOnesConstant(N1))13408return SDValue();1340913410// Look for (xor X, 1).13411if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))13412return SDValue();1341313414// First xor input should be 0 or 1.13415APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);13416if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))13417return SDValue();1341813419// Emit a negate of the setcc.13420return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),13421N0.getOperand(0));13422}1342313424static SDValue performADDCombine(SDNode *N,13425TargetLowering::DAGCombinerInfo &DCI,13426const RISCVSubtarget &Subtarget) {13427SelectionDAG &DAG = DCI.DAG;13428if (SDValue V = combineAddOfBooleanXor(N, DAG))13429return V;13430if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))13431return V;13432if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())13433if (SDValue V = transformAddShlImm(N, DAG, Subtarget))13434return V;13435if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))13436return V;13437if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))13438return V;13439if (SDValue V = combineBinOpOfZExt(N, DAG))13440return V;1344113442// fold (add (select lhs, rhs, cc, 0, y), x) ->13443// (select lhs, rhs, cc, x, (add x, y))13444return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);13445}1344613447// Try to turn a sub boolean RHS and constant LHS into an addi.13448static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {13449SDValue N0 = N->getOperand(0);13450SDValue N1 = N->getOperand(1);13451EVT VT = N->getValueType(0);13452SDLoc DL(N);1345313454// Require a constant LHS.13455auto *N0C = dyn_cast<ConstantSDNode>(N0);13456if (!N0C)13457return SDValue();1345813459// All our optimizations involve subtracting 1 from the immediate and forming13460// an ADDI. Make sure the new immediate is valid for an ADDI.13461APInt ImmValMinus1 = N0C->getAPIntValue() - 1;13462if (!ImmValMinus1.isSignedIntN(12))13463return SDValue();1346413465SDValue NewLHS;13466if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {13467// (sub constant, (setcc x, y, eq/neq)) ->13468// (add (setcc x, y, neq/eq), constant - 1)13469ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();13470EVT SetCCOpVT = N1.getOperand(0).getValueType();13471if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())13472return SDValue();13473CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);13474NewLHS =13475DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);13476} else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&13477N1.getOperand(0).getOpcode() == ISD::SETCC) {13478// (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).13479// Since setcc returns a bool the xor is equivalent to 1-setcc.13480NewLHS = N1.getOperand(0);13481} else13482return SDValue();1348313484SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);13485return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);13486}1348713488// Looks for (sub (shl X, 8), X) where only bits 8, 16, 24, 32, etc. of X are13489// non-zero. Replace with orc.b.13490static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG,13491const RISCVSubtarget &Subtarget) {13492if (!Subtarget.hasStdExtZbb())13493return SDValue();1349413495EVT VT = N->getValueType(0);1349613497if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)13498return SDValue();1349913500SDValue N0 = N->getOperand(0);13501SDValue N1 = N->getOperand(1);1350213503if (N0.getOpcode() != ISD::SHL || N0.getOperand(0) != N1 || !N0.hasOneUse())13504return SDValue();1350513506auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));13507if (!ShAmtC || ShAmtC->getZExtValue() != 8)13508return SDValue();1350913510APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0xfe));13511if (!DAG.MaskedValueIsZero(N1, Mask))13512return SDValue();1351313514return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, N1);13515}1351613517static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,13518const RISCVSubtarget &Subtarget) {13519if (SDValue V = combineSubOfBoolean(N, DAG))13520return V;1352113522EVT VT = N->getValueType(0);13523SDValue N0 = N->getOperand(0);13524SDValue N1 = N->getOperand(1);13525// fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)13526if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&13527isNullConstant(N1.getOperand(1))) {13528ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();13529if (CCVal == ISD::SETLT) {13530SDLoc DL(N);13531unsigned ShAmt = N0.getValueSizeInBits() - 1;13532return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),13533DAG.getConstant(ShAmt, DL, VT));13534}13535}1353613537if (SDValue V = combineBinOpOfZExt(N, DAG))13538return V;13539if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))13540return V;1354113542// fold (sub x, (select lhs, rhs, cc, 0, y)) ->13543// (select lhs, rhs, cc, x, (sub x, y))13544return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);13545}1354613547// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.13548// Legalizing setcc can introduce xors like this. Doing this transform reduces13549// the number of xors and may allow the xor to fold into a branch condition.13550static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {13551SDValue N0 = N->getOperand(0);13552SDValue N1 = N->getOperand(1);13553bool IsAnd = N->getOpcode() == ISD::AND;1355413555if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)13556return SDValue();1355713558if (!N0.hasOneUse() || !N1.hasOneUse())13559return SDValue();1356013561SDValue N01 = N0.getOperand(1);13562SDValue N11 = N1.getOperand(1);1356313564// For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into13565// (xor X, -1) based on the upper bits of the other operand being 0. If the13566// operation is And, allow one of the Xors to use -1.13567if (isOneConstant(N01)) {13568if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))13569return SDValue();13570} else if (isOneConstant(N11)) {13571// N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.13572if (!(IsAnd && isAllOnesConstant(N01)))13573return SDValue();13574} else13575return SDValue();1357613577EVT VT = N->getValueType(0);1357813579SDValue N00 = N0.getOperand(0);13580SDValue N10 = N1.getOperand(0);1358113582// The LHS of the xors needs to be 0/1.13583APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);13584if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))13585return SDValue();1358613587// Invert the opcode and insert a new xor.13588SDLoc DL(N);13589unsigned Opc = IsAnd ? ISD::OR : ISD::AND;13590SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);13591return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));13592}1359313594// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to13595// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed13596// value to an unsigned value. This will be lowered to vmax and series of13597// vnclipu instructions later. This can be extended to other truncated types13598// other than i8 by replacing 256 and 255 with the equivalent constants for the13599// type.13600static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {13601EVT VT = N->getValueType(0);13602SDValue N0 = N->getOperand(0);13603EVT SrcVT = N0.getValueType();1360413605const TargetLowering &TLI = DAG.getTargetLoweringInfo();13606if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))13607return SDValue();1360813609if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())13610return SDValue();1361113612SDValue Cond = N0.getOperand(0);13613SDValue True = N0.getOperand(1);13614SDValue False = N0.getOperand(2);1361513616if (Cond.getOpcode() != ISD::SETCC)13617return SDValue();1361813619// FIXME: Support the version of this pattern with the select operands13620// swapped.13621ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();13622if (CCVal != ISD::SETULT)13623return SDValue();1362413625SDValue CondLHS = Cond.getOperand(0);13626SDValue CondRHS = Cond.getOperand(1);1362713628if (CondLHS != True)13629return SDValue();1363013631unsigned ScalarBits = VT.getScalarSizeInBits();1363213633// FIXME: Support other constants.13634ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);13635if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))13636return SDValue();1363713638if (False.getOpcode() != ISD::SIGN_EXTEND)13639return SDValue();1364013641False = False.getOperand(0);1364213643if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)13644return SDValue();1364513646ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));13647if (!FalseRHSC || !FalseRHSC->isZero())13648return SDValue();1364913650ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();13651if (CCVal2 != ISD::SETGT)13652return SDValue();1365313654// Emit the signed to unsigned saturation pattern.13655SDLoc DL(N);13656SDValue Max =13657DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));13658SDValue Min =13659DAG.getNode(ISD::SMIN, DL, SrcVT, Max,13660DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));13661return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);13662}1366313664static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,13665const RISCVSubtarget &Subtarget) {13666SDValue N0 = N->getOperand(0);13667EVT VT = N->getValueType(0);1366813669// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero13670// extending X. This is safe since we only need the LSB after the shift and13671// shift amounts larger than 31 would produce poison. If we wait until13672// type legalization, we'll create RISCVISD::SRLW and we can't recover it13673// to use a BEXT instruction.13674if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&13675N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&13676!isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {13677SDLoc DL(N0);13678SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));13679SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));13680SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);13681return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);13682}1368313684return combineTruncSelectToSMaxUSat(N, DAG);13685}1368613687// Combines two comparison operation and logic operation to one selection13688// operation(min, max) and logic operation. Returns new constructed Node if13689// conditions for optimization are satisfied.13690static SDValue performANDCombine(SDNode *N,13691TargetLowering::DAGCombinerInfo &DCI,13692const RISCVSubtarget &Subtarget) {13693SelectionDAG &DAG = DCI.DAG;1369413695SDValue N0 = N->getOperand(0);13696// Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero13697// extending X. This is safe since we only need the LSB after the shift and13698// shift amounts larger than 31 would produce poison. If we wait until13699// type legalization, we'll create RISCVISD::SRLW and we can't recover it13700// to use a BEXT instruction.13701if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&13702N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&13703N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&13704N0.hasOneUse()) {13705SDLoc DL(N);13706SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));13707SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));13708SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);13709SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,13710DAG.getConstant(1, DL, MVT::i64));13711return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);13712}1371313714if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))13715return V;13716if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))13717return V;1371813719if (DCI.isAfterLegalizeDAG())13720if (SDValue V = combineDeMorganOfBoolean(N, DAG))13721return V;1372213723// fold (and (select lhs, rhs, cc, -1, y), x) ->13724// (select lhs, rhs, cc, x, (and x, y))13725return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);13726}1372713728// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.13729// FIXME: Generalize to other binary operators with same operand.13730static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,13731SelectionDAG &DAG) {13732assert(N->getOpcode() == ISD::OR && "Unexpected opcode");1373313734if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||13735N1.getOpcode() != RISCVISD::CZERO_NEZ ||13736!N0.hasOneUse() || !N1.hasOneUse())13737return SDValue();1373813739// Should have the same condition.13740SDValue Cond = N0.getOperand(1);13741if (Cond != N1.getOperand(1))13742return SDValue();1374313744SDValue TrueV = N0.getOperand(0);13745SDValue FalseV = N1.getOperand(0);1374613747if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||13748TrueV.getOperand(1) != FalseV.getOperand(1) ||13749!isOneConstant(TrueV.getOperand(1)) ||13750!TrueV.hasOneUse() || !FalseV.hasOneUse())13751return SDValue();1375213753EVT VT = N->getValueType(0);13754SDLoc DL(N);1375513756SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),13757Cond);13758SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),13759Cond);13760SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);13761return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));13762}1376313764static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,13765const RISCVSubtarget &Subtarget) {13766SelectionDAG &DAG = DCI.DAG;1376713768if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))13769return V;13770if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))13771return V;1377213773if (DCI.isAfterLegalizeDAG())13774if (SDValue V = combineDeMorganOfBoolean(N, DAG))13775return V;1377613777// Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.13778// We may be able to pull a common operation out of the true and false value.13779SDValue N0 = N->getOperand(0);13780SDValue N1 = N->getOperand(1);13781if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))13782return V;13783if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))13784return V;1378513786// fold (or (select cond, 0, y), x) ->13787// (select cond, x, (or x, y))13788return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);13789}1379013791static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,13792const RISCVSubtarget &Subtarget) {13793SDValue N0 = N->getOperand(0);13794SDValue N1 = N->getOperand(1);1379513796// Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use13797// (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create13798// RISCVISD:::SLLW and we can't recover it to use a BSET instruction.13799if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&13800N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&13801N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&13802!isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {13803SDLoc DL(N);13804SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));13805SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));13806SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);13807SDValue And = DAG.getNOT(DL, Shl, MVT::i64);13808return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);13809}1381013811// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)13812// NOTE: Assumes ROL being legal means ROLW is legal.13813const TargetLowering &TLI = DAG.getTargetLoweringInfo();13814if (N0.getOpcode() == RISCVISD::SLLW &&13815isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&13816TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {13817SDLoc DL(N);13818return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,13819DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));13820}1382113822// Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)13823if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {13824auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));13825ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();13826if (ConstN00 && CC == ISD::SETLT) {13827EVT VT = N0.getValueType();13828SDLoc DL(N0);13829const APInt &Imm = ConstN00->getAPIntValue();13830if ((Imm + 1).isSignedIntN(12))13831return DAG.getSetCC(DL, VT, N0.getOperand(1),13832DAG.getConstant(Imm + 1, DL, VT), CC);13833}13834}1383513836// Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with13837// RV64LegalI32 when the setcc is created after type legalization. An i1 xor13838// would have been promoted to i32, but the setcc would have i64 result.13839if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&13840isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {13841SDValue N00 = N0.getOperand(0);13842SDLoc DL(N);13843SDValue LHS = N00.getOperand(0);13844SDValue RHS = N00.getOperand(1);13845SDValue CC = N00.getOperand(2);13846ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),13847LHS.getValueType());13848SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),13849LHS, RHS, NotCC);13850return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);13851}1385213853if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))13854return V;13855if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))13856return V;1385713858// fold (xor (select cond, 0, y), x) ->13859// (select cond, x, (xor x, y))13860return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);13861}1386213863// Try to expand a scalar multiply to a faster sequence.13864static SDValue expandMul(SDNode *N, SelectionDAG &DAG,13865TargetLowering::DAGCombinerInfo &DCI,13866const RISCVSubtarget &Subtarget) {1386713868EVT VT = N->getValueType(0);1386913870// LI + MUL is usually smaller than the alternative sequence.13871if (DAG.getMachineFunction().getFunction().hasMinSize())13872return SDValue();1387313874if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())13875return SDValue();1387613877if (VT != Subtarget.getXLenVT())13878return SDValue();1387913880const bool HasShlAdd =13881Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();1388213883ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));13884if (!CNode)13885return SDValue();13886uint64_t MulAmt = CNode->getZExtValue();1388713888// WARNING: The code below is knowingly incorrect with regards to undef semantics.13889// We're adding additional uses of X here, and in principle, we should be freezing13890// X before doing so. However, adding freeze here causes real regressions, and no13891// other target properly freezes X in these cases either.13892SDValue X = N->getOperand(0);1389313894if (HasShlAdd) {13895for (uint64_t Divisor : {3, 5, 9}) {13896if (MulAmt % Divisor != 0)13897continue;13898uint64_t MulAmt2 = MulAmt / Divisor;13899// 3/5/9 * 2^N -> shl (shXadd X, X), N13900if (isPowerOf2_64(MulAmt2)) {13901SDLoc DL(N);13902SDValue X = N->getOperand(0);13903// Put the shift first if we can fold a zext into the13904// shift forming a slli.uw.13905if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&13906X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {13907SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,13908DAG.getConstant(Log2_64(MulAmt2), DL, VT));13909return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,13910DAG.getConstant(Log2_64(Divisor - 1), DL, VT),13911Shl);13912}13913// Otherwise, put rhe shl second so that it can fold with following13914// instructions (e.g. sext or add).13915SDValue Mul359 =13916DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13917DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);13918return DAG.getNode(ISD::SHL, DL, VT, Mul359,13919DAG.getConstant(Log2_64(MulAmt2), DL, VT));13920}1392113922// 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)13923if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {13924SDLoc DL(N);13925SDValue Mul359 =13926DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13927DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);13928return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,13929DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),13930Mul359);13931}13932}1393313934// If this is a power 2 + 2/4/8, we can use a shift followed by a single13935// shXadd. First check if this a sum of two power of 2s because that's13936// easy. Then count how many zeros are up to the first bit.13937if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {13938unsigned ScaleShift = llvm::countr_zero(MulAmt);13939if (ScaleShift >= 1 && ScaleShift < 4) {13940unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));13941SDLoc DL(N);13942SDValue Shift1 =13943DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));13944return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13945DAG.getConstant(ScaleShift, DL, VT), Shift1);13946}13947}1394813949// 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)13950// This is the two instruction form, there are also three instruction13951// variants we could implement. e.g.13952// (2^(1,2,3) * 3,5,9 + 1) << C213953// 2^(C1>3) * 3,5,9 +/- 113954for (uint64_t Divisor : {3, 5, 9}) {13955uint64_t C = MulAmt - 1;13956if (C <= Divisor)13957continue;13958unsigned TZ = llvm::countr_zero(C);13959if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {13960SDLoc DL(N);13961SDValue Mul359 =13962DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13963DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);13964return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,13965DAG.getConstant(TZ, DL, VT), X);13966}13967}1396813969// 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))13970if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {13971unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);13972if (ScaleShift >= 1 && ScaleShift < 4) {13973unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));13974SDLoc DL(N);13975SDValue Shift1 =13976DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));13977return DAG.getNode(ISD::ADD, DL, VT, Shift1,13978DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13979DAG.getConstant(ScaleShift, DL, VT), X));13980}13981}1398213983// 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))13984for (uint64_t Offset : {3, 5, 9}) {13985if (isPowerOf2_64(MulAmt + Offset)) {13986SDLoc DL(N);13987SDValue Shift1 =13988DAG.getNode(ISD::SHL, DL, VT, X,13989DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));13990SDValue Mul359 =13991DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,13992DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);13993return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);13994}13995}13996}1399713998// 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))13999uint64_t MulAmtLowBit = MulAmt & (-MulAmt);14000if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {14001uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;14002SDLoc DL(N);14003SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),14004DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));14005SDValue Shift2 =14006DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),14007DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));14008return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);14009}1401014011return SDValue();14012}1401314014// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->14015// (bitcast (sra (v2Xi16 (bitcast X)), 15))14016// Same for other equivalent types with other equivalent constants.14017static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG) {14018EVT VT = N->getValueType(0);14019const TargetLowering &TLI = DAG.getTargetLoweringInfo();1402014021// Do this for legal vectors unless they are i1 or i8 vectors.14022if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)14023return SDValue();1402414025if (N->getOperand(0).getOpcode() != ISD::AND ||14026N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)14027return SDValue();1402814029SDValue And = N->getOperand(0);14030SDValue Srl = And.getOperand(0);1403114032APInt V1, V2, V3;14033if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||14034!ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||14035!ISD::isConstantSplatVector(Srl.getOperand(1).getNode(), V3))14036return SDValue();1403714038unsigned HalfSize = VT.getScalarSizeInBits() / 2;14039if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||14040V3 != (HalfSize - 1))14041return SDValue();1404214043EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),14044EVT::getIntegerVT(*DAG.getContext(), HalfSize),14045VT.getVectorElementCount() * 2);14046SDLoc DL(N);14047SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));14048SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,14049DAG.getConstant(HalfSize - 1, DL, HalfVT));14050return DAG.getNode(ISD::BITCAST, DL, VT, Sra);14051}1405214053static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,14054TargetLowering::DAGCombinerInfo &DCI,14055const RISCVSubtarget &Subtarget) {14056EVT VT = N->getValueType(0);14057if (!VT.isVector())14058return expandMul(N, DAG, DCI, Subtarget);1405914060SDLoc DL(N);14061SDValue N0 = N->getOperand(0);14062SDValue N1 = N->getOperand(1);14063SDValue MulOper;14064unsigned AddSubOpc;1406514066// vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)14067// (mul x, add (y, 1)) -> (add x, (mul x, y))14068// vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))14069// (mul x, (sub 1, y)) -> (sub x, (mul x, y))14070auto IsAddSubWith1 = [&](SDValue V) -> bool {14071AddSubOpc = V->getOpcode();14072if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {14073SDValue Opnd = V->getOperand(1);14074MulOper = V->getOperand(0);14075if (AddSubOpc == ISD::SUB)14076std::swap(Opnd, MulOper);14077if (isOneOrOneSplat(Opnd))14078return true;14079}14080return false;14081};1408214083if (IsAddSubWith1(N0)) {14084SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);14085return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);14086}1408714088if (IsAddSubWith1(N1)) {14089SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);14090return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);14091}1409214093if (SDValue V = combineBinOpOfZExt(N, DAG))14094return V;1409514096if (SDValue V = combineVectorMulToSraBitcast(N, DAG))14097return V;1409814099return SDValue();14100}1410114102/// According to the property that indexed load/store instructions zero-extend14103/// their indices, try to narrow the type of index operand.14104static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {14105if (isIndexTypeSigned(IndexType))14106return false;1410714108if (!N->hasOneUse())14109return false;1411014111EVT VT = N.getValueType();14112SDLoc DL(N);1411314114// In general, what we're doing here is seeing if we can sink a truncate to14115// a smaller element type into the expression tree building our index.14116// TODO: We can generalize this and handle a bunch more cases if useful.1411714118// Narrow a buildvector to the narrowest element type. This requires less14119// work and less register pressure at high LMUL, and creates smaller constants14120// which may be cheaper to materialize.14121if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {14122KnownBits Known = DAG.computeKnownBits(N);14123unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());14124LLVMContext &C = *DAG.getContext();14125EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);14126if (ResultVT.bitsLT(VT.getVectorElementType())) {14127N = DAG.getNode(ISD::TRUNCATE, DL,14128VT.changeVectorElementType(ResultVT), N);14129return true;14130}14131}1413214133// Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).14134if (N.getOpcode() != ISD::SHL)14135return false;1413614137SDValue N0 = N.getOperand(0);14138if (N0.getOpcode() != ISD::ZERO_EXTEND &&14139N0.getOpcode() != RISCVISD::VZEXT_VL)14140return false;14141if (!N0->hasOneUse())14142return false;1414314144APInt ShAmt;14145SDValue N1 = N.getOperand(1);14146if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))14147return false;1414814149SDValue Src = N0.getOperand(0);14150EVT SrcVT = Src.getValueType();14151unsigned SrcElen = SrcVT.getScalarSizeInBits();14152unsigned ShAmtV = ShAmt.getZExtValue();14153unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);14154NewElen = std::max(NewElen, 8U);1415514156// Skip if NewElen is not narrower than the original extended type.14157if (NewElen >= N0.getValueType().getScalarSizeInBits())14158return false;1415914160EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);14161EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);1416214163SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());14164SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);14165N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);14166return true;14167}1416814169// Replace (seteq (i64 (and X, 0xffffffff)), C1) with14170// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from14171// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg14172// can become a sext.w instead of a shift pair.14173static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,14174const RISCVSubtarget &Subtarget) {14175SDValue N0 = N->getOperand(0);14176SDValue N1 = N->getOperand(1);14177EVT VT = N->getValueType(0);14178EVT OpVT = N0.getValueType();1417914180if (OpVT != MVT::i64 || !Subtarget.is64Bit())14181return SDValue();1418214183// RHS needs to be a constant.14184auto *N1C = dyn_cast<ConstantSDNode>(N1);14185if (!N1C)14186return SDValue();1418714188// LHS needs to be (and X, 0xffffffff).14189if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||14190!isa<ConstantSDNode>(N0.getOperand(1)) ||14191N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))14192return SDValue();1419314194// Looking for an equality compare.14195ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();14196if (!isIntEqualitySetCC(Cond))14197return SDValue();1419814199// Don't do this if the sign bit is provably zero, it will be turned back into14200// an AND.14201APInt SignMask = APInt::getOneBitSet(64, 31);14202if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))14203return SDValue();1420414205const APInt &C1 = N1C->getAPIntValue();1420614207SDLoc dl(N);14208// If the constant is larger than 2^32 - 1 it is impossible for both sides14209// to be equal.14210if (C1.getActiveBits() > 32)14211return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);1421214213SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,14214N0.getOperand(0), DAG.getValueType(MVT::i32));14215return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),14216dl, OpVT), Cond);14217}1421814219static SDValue14220performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,14221const RISCVSubtarget &Subtarget) {14222SDValue Src = N->getOperand(0);14223EVT VT = N->getValueType(0);1422414225// Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)14226if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&14227cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))14228return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,14229Src.getOperand(0));1423014231return SDValue();14232}1423314234namespace {14235// Forward declaration of the structure holding the necessary information to14236// apply a combine.14237struct CombineResult;1423814239enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };14240/// Helper class for folding sign/zero extensions.14241/// In particular, this class is used for the following combines:14242/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w14243/// sub | sub_vl -> vwsub(u) | vwsub(u)_w14244/// mul | mul_vl -> vwmul(u) | vwmul_su14245/// shl | shl_vl -> vwsll14246/// fadd -> vfwadd | vfwadd_w14247/// fsub -> vfwsub | vfwsub_w14248/// fmul -> vfwmul14249/// An object of this class represents an operand of the operation we want to14250/// combine.14251/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of14252/// NodeExtensionHelper for `a` and one for `b`.14253///14254/// This class abstracts away how the extension is materialized and14255/// how its number of users affect the combines.14256///14257/// In particular:14258/// - VWADD_W is conceptually == add(op0, sext(op1))14259/// - VWADDU_W == add(op0, zext(op1))14260/// - VWSUB_W == sub(op0, sext(op1))14261/// - VWSUBU_W == sub(op0, zext(op1))14262/// - VFWADD_W == fadd(op0, fpext(op1))14263/// - VFWSUB_W == fsub(op0, fpext(op1))14264/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to14265/// zext|sext(smaller_value).14266struct NodeExtensionHelper {14267/// Records if this operand is like being zero extended.14268bool SupportsZExt;14269/// Records if this operand is like being sign extended.14270/// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For14271/// instance, a splat constant (e.g., 3), would support being both sign and14272/// zero extended.14273bool SupportsSExt;14274/// Records if this operand is like being floating-Point extended.14275bool SupportsFPExt;14276/// This boolean captures whether we care if this operand would still be14277/// around after the folding happens.14278bool EnforceOneUse;14279/// Original value that this NodeExtensionHelper represents.14280SDValue OrigOperand;1428114282/// Get the value feeding the extension or the value itself.14283/// E.g., for zext(a), this would return a.14284SDValue getSource() const {14285switch (OrigOperand.getOpcode()) {14286case ISD::ZERO_EXTEND:14287case ISD::SIGN_EXTEND:14288case RISCVISD::VSEXT_VL:14289case RISCVISD::VZEXT_VL:14290case RISCVISD::FP_EXTEND_VL:14291return OrigOperand.getOperand(0);14292default:14293return OrigOperand;14294}14295}1429614297/// Check if this instance represents a splat.14298bool isSplat() const {14299return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||14300OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;14301}1430214303/// Get the extended opcode.14304unsigned getExtOpc(ExtKind SupportsExt) const {14305switch (SupportsExt) {14306case ExtKind::SExt:14307return RISCVISD::VSEXT_VL;14308case ExtKind::ZExt:14309return RISCVISD::VZEXT_VL;14310case ExtKind::FPExt:14311return RISCVISD::FP_EXTEND_VL;14312}14313llvm_unreachable("Unknown ExtKind enum");14314}1431514316/// Get or create a value that can feed \p Root with the given extension \p14317/// SupportsExt. If \p SExt is std::nullopt, this returns the source of this14318/// operand. \see ::getSource().14319SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,14320const RISCVSubtarget &Subtarget,14321std::optional<ExtKind> SupportsExt) const {14322if (!SupportsExt.has_value())14323return OrigOperand;1432414325MVT NarrowVT = getNarrowType(Root, *SupportsExt);1432614327SDValue Source = getSource();14328assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));14329if (Source.getValueType() == NarrowVT)14330return Source;1433114332unsigned ExtOpc = getExtOpc(*SupportsExt);1433314334// If we need an extension, we should be changing the type.14335SDLoc DL(OrigOperand);14336auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);14337switch (OrigOperand.getOpcode()) {14338case ISD::ZERO_EXTEND:14339case ISD::SIGN_EXTEND:14340case RISCVISD::VSEXT_VL:14341case RISCVISD::VZEXT_VL:14342case RISCVISD::FP_EXTEND_VL:14343return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);14344case ISD::SPLAT_VECTOR:14345return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));14346case RISCVISD::VMV_V_X_VL:14347return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,14348DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);14349case RISCVISD::VFMV_V_F_VL:14350Source = Source.getOperand(1);14351assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");14352Source = Source.getOperand(0);14353assert(Source.getValueType() == NarrowVT.getVectorElementType());14354return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,14355DAG.getUNDEF(NarrowVT), Source, VL);14356default:14357// Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL14358// and that operand should already have the right NarrowVT so no14359// extension should be required at this point.14360llvm_unreachable("Unsupported opcode");14361}14362}1436314364/// Helper function to get the narrow type for \p Root.14365/// The narrow type is the type of \p Root where we divided the size of each14366/// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.14367/// \pre Both the narrow type and the original type should be legal.14368static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {14369MVT VT = Root->getSimpleValueType(0);1437014371// Determine the narrow size.14372unsigned NarrowSize = VT.getScalarSizeInBits() / 2;1437314374MVT EltVT = SupportsExt == ExtKind::FPExt14375? MVT::getFloatingPointVT(NarrowSize)14376: MVT::getIntegerVT(NarrowSize);1437714378assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&14379"Trying to extend something we can't represent");14380MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());14381return NarrowVT;14382}1438314384/// Get the opcode to materialize:14385/// Opcode(sext(a), sext(b)) -> newOpcode(a, b)14386static unsigned getSExtOpcode(unsigned Opcode) {14387switch (Opcode) {14388case ISD::ADD:14389case RISCVISD::ADD_VL:14390case RISCVISD::VWADD_W_VL:14391case RISCVISD::VWADDU_W_VL:14392case ISD::OR:14393return RISCVISD::VWADD_VL;14394case ISD::SUB:14395case RISCVISD::SUB_VL:14396case RISCVISD::VWSUB_W_VL:14397case RISCVISD::VWSUBU_W_VL:14398return RISCVISD::VWSUB_VL;14399case ISD::MUL:14400case RISCVISD::MUL_VL:14401return RISCVISD::VWMUL_VL;14402default:14403llvm_unreachable("Unexpected opcode");14404}14405}1440614407/// Get the opcode to materialize:14408/// Opcode(zext(a), zext(b)) -> newOpcode(a, b)14409static unsigned getZExtOpcode(unsigned Opcode) {14410switch (Opcode) {14411case ISD::ADD:14412case RISCVISD::ADD_VL:14413case RISCVISD::VWADD_W_VL:14414case RISCVISD::VWADDU_W_VL:14415case ISD::OR:14416return RISCVISD::VWADDU_VL;14417case ISD::SUB:14418case RISCVISD::SUB_VL:14419case RISCVISD::VWSUB_W_VL:14420case RISCVISD::VWSUBU_W_VL:14421return RISCVISD::VWSUBU_VL;14422case ISD::MUL:14423case RISCVISD::MUL_VL:14424return RISCVISD::VWMULU_VL;14425case ISD::SHL:14426case RISCVISD::SHL_VL:14427return RISCVISD::VWSLL_VL;14428default:14429llvm_unreachable("Unexpected opcode");14430}14431}1443214433/// Get the opcode to materialize:14434/// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)14435static unsigned getFPExtOpcode(unsigned Opcode) {14436switch (Opcode) {14437case RISCVISD::FADD_VL:14438case RISCVISD::VFWADD_W_VL:14439return RISCVISD::VFWADD_VL;14440case RISCVISD::FSUB_VL:14441case RISCVISD::VFWSUB_W_VL:14442return RISCVISD::VFWSUB_VL;14443case RISCVISD::FMUL_VL:14444return RISCVISD::VFWMUL_VL;14445default:14446llvm_unreachable("Unexpected opcode");14447}14448}1444914450/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->14451/// newOpcode(a, b).14452static unsigned getSUOpcode(unsigned Opcode) {14453assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&14454"SU is only supported for MUL");14455return RISCVISD::VWMULSU_VL;14456}1445714458/// Get the opcode to materialize14459/// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).14460static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {14461switch (Opcode) {14462case ISD::ADD:14463case RISCVISD::ADD_VL:14464case ISD::OR:14465return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL14466: RISCVISD::VWADDU_W_VL;14467case ISD::SUB:14468case RISCVISD::SUB_VL:14469return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL14470: RISCVISD::VWSUBU_W_VL;14471case RISCVISD::FADD_VL:14472return RISCVISD::VFWADD_W_VL;14473case RISCVISD::FSUB_VL:14474return RISCVISD::VFWSUB_W_VL;14475default:14476llvm_unreachable("Unexpected opcode");14477}14478}1447914480using CombineToTry = std::function<std::optional<CombineResult>(14481SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,14482const NodeExtensionHelper & /*RHS*/, SelectionDAG &,14483const RISCVSubtarget &)>;1448414485/// Check if this node needs to be fully folded or extended for all users.14486bool needToPromoteOtherUsers() const { return EnforceOneUse; }1448714488void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,14489const RISCVSubtarget &Subtarget) {14490unsigned Opc = OrigOperand.getOpcode();14491MVT VT = OrigOperand.getSimpleValueType();1449214493assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&14494"Unexpected Opcode");1449514496// The pasthru must be undef for tail agnostic.14497if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())14498return;1449914500// Get the scalar value.14501SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)14502: OrigOperand.getOperand(1);1450314504// See if we have enough sign bits or zero bits in the scalar to use a14505// widening opcode by splatting to smaller element size.14506unsigned EltBits = VT.getScalarSizeInBits();14507unsigned ScalarBits = Op.getValueSizeInBits();14508// If we're not getting all bits from the element, we need special handling.14509if (ScalarBits < EltBits) {14510// This should only occur on RV32.14511assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&14512!Subtarget.is64Bit() && "Unexpected splat");14513// vmv.v.x sign extends narrow inputs.14514SupportsSExt = true;1451514516// If the input is positive, then sign extend is also zero extend.14517if (DAG.SignBitIsZero(Op))14518SupportsZExt = true;1451914520EnforceOneUse = false;14521return;14522}1452314524unsigned NarrowSize = EltBits / 2;14525// If the narrow type cannot be expressed with a legal VMV,14526// this is not a valid candidate.14527if (NarrowSize < 8)14528return;1452914530if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)14531SupportsSExt = true;1453214533if (DAG.MaskedValueIsZero(Op,14534APInt::getBitsSetFrom(ScalarBits, NarrowSize)))14535SupportsZExt = true;1453614537EnforceOneUse = false;14538}1453914540/// Helper method to set the various fields of this struct based on the14541/// type of \p Root.14542void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,14543const RISCVSubtarget &Subtarget) {14544SupportsZExt = false;14545SupportsSExt = false;14546SupportsFPExt = false;14547EnforceOneUse = true;14548unsigned Opc = OrigOperand.getOpcode();14549// For the nodes we handle below, we end up using their inputs directly: see14550// getSource(). However since they either don't have a passthru or we check14551// that their passthru is undef, we can safely ignore their mask and VL.14552switch (Opc) {14553case ISD::ZERO_EXTEND:14554case ISD::SIGN_EXTEND: {14555MVT VT = OrigOperand.getSimpleValueType();14556if (!VT.isVector())14557break;1455814559SDValue NarrowElt = OrigOperand.getOperand(0);14560MVT NarrowVT = NarrowElt.getSimpleValueType();14561// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.14562if (NarrowVT.getVectorElementType() == MVT::i1)14563break;1456414565SupportsZExt = Opc == ISD::ZERO_EXTEND;14566SupportsSExt = Opc == ISD::SIGN_EXTEND;14567break;14568}14569case RISCVISD::VZEXT_VL:14570SupportsZExt = true;14571break;14572case RISCVISD::VSEXT_VL:14573SupportsSExt = true;14574break;14575case RISCVISD::FP_EXTEND_VL:14576SupportsFPExt = true;14577break;14578case ISD::SPLAT_VECTOR:14579case RISCVISD::VMV_V_X_VL:14580fillUpExtensionSupportForSplat(Root, DAG, Subtarget);14581break;14582case RISCVISD::VFMV_V_F_VL: {14583MVT VT = OrigOperand.getSimpleValueType();1458414585if (!OrigOperand.getOperand(0).isUndef())14586break;1458714588SDValue Op = OrigOperand.getOperand(1);14589if (Op.getOpcode() != ISD::FP_EXTEND)14590break;1459114592unsigned NarrowSize = VT.getScalarSizeInBits() / 2;14593unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();14594if (NarrowSize != ScalarBits)14595break;1459614597SupportsFPExt = true;14598break;14599}14600default:14601break;14602}14603}1460414605/// Check if \p Root supports any extension folding combines.14606static bool isSupportedRoot(const SDNode *Root,14607const RISCVSubtarget &Subtarget) {14608switch (Root->getOpcode()) {14609case ISD::ADD:14610case ISD::SUB:14611case ISD::MUL: {14612return Root->getValueType(0).isScalableVector();14613}14614case ISD::OR: {14615return Root->getValueType(0).isScalableVector() &&14616Root->getFlags().hasDisjoint();14617}14618// Vector Widening Integer Add/Sub/Mul Instructions14619case RISCVISD::ADD_VL:14620case RISCVISD::MUL_VL:14621case RISCVISD::VWADD_W_VL:14622case RISCVISD::VWADDU_W_VL:14623case RISCVISD::SUB_VL:14624case RISCVISD::VWSUB_W_VL:14625case RISCVISD::VWSUBU_W_VL:14626// Vector Widening Floating-Point Add/Sub/Mul Instructions14627case RISCVISD::FADD_VL:14628case RISCVISD::FSUB_VL:14629case RISCVISD::FMUL_VL:14630case RISCVISD::VFWADD_W_VL:14631case RISCVISD::VFWSUB_W_VL:14632return true;14633case ISD::SHL:14634return Root->getValueType(0).isScalableVector() &&14635Subtarget.hasStdExtZvbb();14636case RISCVISD::SHL_VL:14637return Subtarget.hasStdExtZvbb();14638default:14639return false;14640}14641}1464214643/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).14644NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,14645const RISCVSubtarget &Subtarget) {14646assert(isSupportedRoot(Root, Subtarget) &&14647"Trying to build an helper with an "14648"unsupported root");14649assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");14650assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));14651OrigOperand = Root->getOperand(OperandIdx);1465214653unsigned Opc = Root->getOpcode();14654switch (Opc) {14655// We consider14656// VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))14657// VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))14658// VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))14659case RISCVISD::VWADD_W_VL:14660case RISCVISD::VWADDU_W_VL:14661case RISCVISD::VWSUB_W_VL:14662case RISCVISD::VWSUBU_W_VL:14663case RISCVISD::VFWADD_W_VL:14664case RISCVISD::VFWSUB_W_VL:14665if (OperandIdx == 1) {14666SupportsZExt =14667Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;14668SupportsSExt =14669Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL;14670SupportsFPExt =14671Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL;14672// There's no existing extension here, so we don't have to worry about14673// making sure it gets removed.14674EnforceOneUse = false;14675break;14676}14677[[fallthrough]];14678default:14679fillUpExtensionSupport(Root, DAG, Subtarget);14680break;14681}14682}1468314684/// Helper function to get the Mask and VL from \p Root.14685static std::pair<SDValue, SDValue>14686getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,14687const RISCVSubtarget &Subtarget) {14688assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");14689switch (Root->getOpcode()) {14690case ISD::ADD:14691case ISD::SUB:14692case ISD::MUL:14693case ISD::OR:14694case ISD::SHL: {14695SDLoc DL(Root);14696MVT VT = Root->getSimpleValueType(0);14697return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);14698}14699default:14700return std::make_pair(Root->getOperand(3), Root->getOperand(4));14701}14702}1470314704/// Helper function to check if \p N is commutative with respect to the14705/// foldings that are supported by this class.14706static bool isCommutative(const SDNode *N) {14707switch (N->getOpcode()) {14708case ISD::ADD:14709case ISD::MUL:14710case ISD::OR:14711case RISCVISD::ADD_VL:14712case RISCVISD::MUL_VL:14713case RISCVISD::VWADD_W_VL:14714case RISCVISD::VWADDU_W_VL:14715case RISCVISD::FADD_VL:14716case RISCVISD::FMUL_VL:14717case RISCVISD::VFWADD_W_VL:14718return true;14719case ISD::SUB:14720case RISCVISD::SUB_VL:14721case RISCVISD::VWSUB_W_VL:14722case RISCVISD::VWSUBU_W_VL:14723case RISCVISD::FSUB_VL:14724case RISCVISD::VFWSUB_W_VL:14725case ISD::SHL:14726case RISCVISD::SHL_VL:14727return false;14728default:14729llvm_unreachable("Unexpected opcode");14730}14731}1473214733/// Get a list of combine to try for folding extensions in \p Root.14734/// Note that each returned CombineToTry function doesn't actually modify14735/// anything. Instead they produce an optional CombineResult that if not None,14736/// need to be materialized for the combine to be applied.14737/// \see CombineResult::materialize.14738/// If the related CombineToTry function returns std::nullopt, that means the14739/// combine didn't match.14740static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);14741};1474214743/// Helper structure that holds all the necessary information to materialize a14744/// combine that does some extension folding.14745struct CombineResult {14746/// Opcode to be generated when materializing the combine.14747unsigned TargetOpcode;14748// No value means no extension is needed.14749std::optional<ExtKind> LHSExt;14750std::optional<ExtKind> RHSExt;14751/// Root of the combine.14752SDNode *Root;14753/// LHS of the TargetOpcode.14754NodeExtensionHelper LHS;14755/// RHS of the TargetOpcode.14756NodeExtensionHelper RHS;1475714758CombineResult(unsigned TargetOpcode, SDNode *Root,14759const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,14760const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)14761: TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),14762LHS(LHS), RHS(RHS) {}1476314764/// Return a value that uses TargetOpcode and that can be used to replace14765/// Root.14766/// The actual replacement is *not* done in that method.14767SDValue materialize(SelectionDAG &DAG,14768const RISCVSubtarget &Subtarget) const {14769SDValue Mask, VL, Merge;14770std::tie(Mask, VL) =14771NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);14772switch (Root->getOpcode()) {14773default:14774Merge = Root->getOperand(2);14775break;14776case ISD::ADD:14777case ISD::SUB:14778case ISD::MUL:14779case ISD::OR:14780case ISD::SHL:14781Merge = DAG.getUNDEF(Root->getValueType(0));14782break;14783}14784return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),14785LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),14786RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),14787Merge, Mask, VL);14788}14789};1479014791/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))14792/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both14793/// are zext) and LHS and RHS can be folded into Root.14794/// AllowExtMask define which form `ext` can take in this pattern.14795///14796/// \note If the pattern can match with both zext and sext, the returned14797/// CombineResult will feature the zext result.14798///14799/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14800/// can be used to apply the pattern.14801static std::optional<CombineResult>14802canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,14803const NodeExtensionHelper &RHS,14804uint8_t AllowExtMask, SelectionDAG &DAG,14805const RISCVSubtarget &Subtarget) {14806if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)14807return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),14808Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,14809/*RHSExt=*/{ExtKind::ZExt});14810if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)14811return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),14812Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,14813/*RHSExt=*/{ExtKind::SExt});14814if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)14815return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),14816Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,14817/*RHSExt=*/{ExtKind::FPExt});14818return std::nullopt;14819}1482014821/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))14822/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both14823/// are zext) and LHS and RHS can be folded into Root.14824///14825/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14826/// can be used to apply the pattern.14827static std::optional<CombineResult>14828canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,14829const NodeExtensionHelper &RHS, SelectionDAG &DAG,14830const RISCVSubtarget &Subtarget) {14831return canFoldToVWWithSameExtensionImpl(14832Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,14833Subtarget);14834}1483514836/// Check if \p Root follows a pattern Root(LHS, ext(RHS))14837///14838/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14839/// can be used to apply the pattern.14840static std::optional<CombineResult>14841canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,14842const NodeExtensionHelper &RHS, SelectionDAG &DAG,14843const RISCVSubtarget &Subtarget) {14844if (RHS.SupportsFPExt)14845return CombineResult(14846NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),14847Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});1484814849// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar14850// sext/zext?14851// Control this behavior behind an option (AllowSplatInVW_W) for testing14852// purposes.14853if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))14854return CombineResult(14855NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,14856LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});14857if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))14858return CombineResult(14859NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,14860LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});14861return std::nullopt;14862}1486314864/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))14865///14866/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14867/// can be used to apply the pattern.14868static std::optional<CombineResult>14869canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,14870const NodeExtensionHelper &RHS, SelectionDAG &DAG,14871const RISCVSubtarget &Subtarget) {14872return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,14873Subtarget);14874}1487514876/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))14877///14878/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14879/// can be used to apply the pattern.14880static std::optional<CombineResult>14881canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,14882const NodeExtensionHelper &RHS, SelectionDAG &DAG,14883const RISCVSubtarget &Subtarget) {14884return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,14885Subtarget);14886}1488714888/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))14889///14890/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14891/// can be used to apply the pattern.14892static std::optional<CombineResult>14893canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,14894const NodeExtensionHelper &RHS, SelectionDAG &DAG,14895const RISCVSubtarget &Subtarget) {14896return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,14897Subtarget);14898}1489914900/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))14901///14902/// \returns std::nullopt if the pattern doesn't match or a CombineResult that14903/// can be used to apply the pattern.14904static std::optional<CombineResult>14905canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,14906const NodeExtensionHelper &RHS, SelectionDAG &DAG,14907const RISCVSubtarget &Subtarget) {1490814909if (!LHS.SupportsSExt || !RHS.SupportsZExt)14910return std::nullopt;14911return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),14912Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,14913/*RHSExt=*/{ExtKind::ZExt});14914}1491514916SmallVector<NodeExtensionHelper::CombineToTry>14917NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {14918SmallVector<CombineToTry> Strategies;14919switch (Root->getOpcode()) {14920case ISD::ADD:14921case ISD::SUB:14922case ISD::OR:14923case RISCVISD::ADD_VL:14924case RISCVISD::SUB_VL:14925case RISCVISD::FADD_VL:14926case RISCVISD::FSUB_VL:14927// add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub14928Strategies.push_back(canFoldToVWWithSameExtension);14929// add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w14930Strategies.push_back(canFoldToVW_W);14931break;14932case RISCVISD::FMUL_VL:14933Strategies.push_back(canFoldToVWWithSameExtension);14934break;14935case ISD::MUL:14936case RISCVISD::MUL_VL:14937// mul -> vwmul(u)14938Strategies.push_back(canFoldToVWWithSameExtension);14939// mul -> vwmulsu14940Strategies.push_back(canFoldToVW_SU);14941break;14942case ISD::SHL:14943case RISCVISD::SHL_VL:14944// shl -> vwsll14945Strategies.push_back(canFoldToVWWithZEXT);14946break;14947case RISCVISD::VWADD_W_VL:14948case RISCVISD::VWSUB_W_VL:14949// vwadd_w|vwsub_w -> vwadd|vwsub14950Strategies.push_back(canFoldToVWWithSEXT);14951break;14952case RISCVISD::VWADDU_W_VL:14953case RISCVISD::VWSUBU_W_VL:14954// vwaddu_w|vwsubu_w -> vwaddu|vwsubu14955Strategies.push_back(canFoldToVWWithZEXT);14956break;14957case RISCVISD::VFWADD_W_VL:14958case RISCVISD::VFWSUB_W_VL:14959// vfwadd_w|vfwsub_w -> vfwadd|vfwsub14960Strategies.push_back(canFoldToVWWithFPEXT);14961break;14962default:14963llvm_unreachable("Unexpected opcode");14964}14965return Strategies;14966}14967} // End anonymous namespace.1496814969/// Combine a binary operation to its equivalent VW or VW_W form.14970/// The supported combines are:14971/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w14972/// sub | sub_vl -> vwsub(u) | vwsub(u)_w14973/// mul | mul_vl -> vwmul(u) | vwmul_su14974/// shl | shl_vl -> vwsll14975/// fadd_vl -> vfwadd | vfwadd_w14976/// fsub_vl -> vfwsub | vfwsub_w14977/// fmul_vl -> vfwmul14978/// vwadd_w(u) -> vwadd(u)14979/// vwsub_w(u) -> vwsub(u)14980/// vfwadd_w -> vfwadd14981/// vfwsub_w -> vfwsub14982static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,14983TargetLowering::DAGCombinerInfo &DCI,14984const RISCVSubtarget &Subtarget) {14985SelectionDAG &DAG = DCI.DAG;14986if (DCI.isBeforeLegalize())14987return SDValue();1498814989if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))14990return SDValue();1499114992SmallVector<SDNode *> Worklist;14993SmallSet<SDNode *, 8> Inserted;14994Worklist.push_back(N);14995Inserted.insert(N);14996SmallVector<CombineResult> CombinesToApply;1499714998while (!Worklist.empty()) {14999SDNode *Root = Worklist.pop_back_val();15000if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))15001return SDValue();1500215003NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);15004NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);15005auto AppendUsersIfNeeded = [&Worklist,15006&Inserted](const NodeExtensionHelper &Op) {15007if (Op.needToPromoteOtherUsers()) {15008for (SDNode *TheUse : Op.OrigOperand->uses()) {15009if (Inserted.insert(TheUse).second)15010Worklist.push_back(TheUse);15011}15012}15013};1501415015// Control the compile time by limiting the number of node we look at in15016// total.15017if (Inserted.size() > ExtensionMaxWebSize)15018return SDValue();1501915020SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =15021NodeExtensionHelper::getSupportedFoldings(Root);1502215023assert(!FoldingStrategies.empty() && "Nothing to be folded");15024bool Matched = false;15025for (int Attempt = 0;15026(Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;15027++Attempt) {1502815029for (NodeExtensionHelper::CombineToTry FoldingStrategy :15030FoldingStrategies) {15031std::optional<CombineResult> Res =15032FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);15033if (Res) {15034Matched = true;15035CombinesToApply.push_back(*Res);15036// All the inputs that are extended need to be folded, otherwise15037// we would be leaving the old input (since it is may still be used),15038// and the new one.15039if (Res->LHSExt.has_value())15040AppendUsersIfNeeded(LHS);15041if (Res->RHSExt.has_value())15042AppendUsersIfNeeded(RHS);15043break;15044}15045}15046std::swap(LHS, RHS);15047}15048// Right now we do an all or nothing approach.15049if (!Matched)15050return SDValue();15051}15052// Store the value for the replacement of the input node separately.15053SDValue InputRootReplacement;15054// We do the RAUW after we materialize all the combines, because some replaced15055// nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,15056// some of these nodes may appear in the NodeExtensionHelpers of some of the15057// yet-to-be-visited CombinesToApply roots.15058SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;15059ValuesToReplace.reserve(CombinesToApply.size());15060for (CombineResult Res : CombinesToApply) {15061SDValue NewValue = Res.materialize(DAG, Subtarget);15062if (!InputRootReplacement) {15063assert(Res.Root == N &&15064"First element is expected to be the current node");15065InputRootReplacement = NewValue;15066} else {15067ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);15068}15069}15070for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {15071DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);15072DCI.AddToWorklist(OldNewValues.second.getNode());15073}15074return InputRootReplacement;15075}1507615077// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond15078// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond15079// y will be the Passthru and cond will be the Mask.15080static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {15081unsigned Opc = N->getOpcode();15082assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||15083Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);1508415085SDValue Y = N->getOperand(0);15086SDValue MergeOp = N->getOperand(1);15087unsigned MergeOpc = MergeOp.getOpcode();1508815089if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)15090return SDValue();1509115092SDValue X = MergeOp->getOperand(1);1509315094if (!MergeOp.hasOneUse())15095return SDValue();1509615097// Passthru should be undef15098SDValue Passthru = N->getOperand(2);15099if (!Passthru.isUndef())15100return SDValue();1510115102// Mask should be all ones15103SDValue Mask = N->getOperand(3);15104if (Mask.getOpcode() != RISCVISD::VMSET_VL)15105return SDValue();1510615107// False value of MergeOp should be all zeros15108SDValue Z = MergeOp->getOperand(2);1510915110if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&15111(isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))15112Z = Z.getOperand(1);1511315114if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))15115return SDValue();1511615117return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),15118{Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},15119N->getFlags());15120}1512115122static SDValue performVWADDSUBW_VLCombine(SDNode *N,15123TargetLowering::DAGCombinerInfo &DCI,15124const RISCVSubtarget &Subtarget) {15125[[maybe_unused]] unsigned Opc = N->getOpcode();15126assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||15127Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);1512815129if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))15130return V;1513115132return combineVWADDSUBWSelect(N, DCI.DAG);15133}1513415135// Helper function for performMemPairCombine.15136// Try to combine the memory loads/stores LSNode1 and LSNode215137// into a single memory pair operation.15138static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,15139LSBaseSDNode *LSNode2, SDValue BasePtr,15140uint64_t Imm) {15141SmallPtrSet<const SDNode *, 32> Visited;15142SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};1514315144if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||15145SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))15146return SDValue();1514715148MachineFunction &MF = DAG.getMachineFunction();15149const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();1515015151// The new operation has twice the width.15152MVT XLenVT = Subtarget.getXLenVT();15153EVT MemVT = LSNode1->getMemoryVT();15154EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;15155MachineMemOperand *MMO = LSNode1->getMemOperand();15156MachineMemOperand *NewMMO = MF.getMachineMemOperand(15157MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);1515815159if (LSNode1->getOpcode() == ISD::LOAD) {15160auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();15161unsigned Opcode;15162if (MemVT == MVT::i32)15163Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;15164else15165Opcode = RISCVISD::TH_LDD;1516615167SDValue Res = DAG.getMemIntrinsicNode(15168Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),15169{LSNode1->getChain(), BasePtr,15170DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},15171NewMemVT, NewMMO);1517215173SDValue Node1 =15174DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));15175SDValue Node2 =15176DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));1517715178DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());15179return Node1;15180} else {15181unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;1518215183SDValue Res = DAG.getMemIntrinsicNode(15184Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),15185{LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),15186BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},15187NewMemVT, NewMMO);1518815189DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());15190return Res;15191}15192}1519315194// Try to combine two adjacent loads/stores to a single pair instruction from15195// the XTHeadMemPair vendor extension.15196static SDValue performMemPairCombine(SDNode *N,15197TargetLowering::DAGCombinerInfo &DCI) {15198SelectionDAG &DAG = DCI.DAG;15199MachineFunction &MF = DAG.getMachineFunction();15200const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();1520115202// Target does not support load/store pair.15203if (!Subtarget.hasVendorXTHeadMemPair())15204return SDValue();1520515206LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);15207EVT MemVT = LSNode1->getMemoryVT();15208unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;1520915210// No volatile, indexed or atomic loads/stores.15211if (!LSNode1->isSimple() || LSNode1->isIndexed())15212return SDValue();1521315214// Function to get a base + constant representation from a memory value.15215auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {15216if (Ptr->getOpcode() == ISD::ADD)15217if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))15218return {Ptr->getOperand(0), C1->getZExtValue()};15219return {Ptr, 0};15220};1522115222auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));1522315224SDValue Chain = N->getOperand(0);15225for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();15226UI != UE; ++UI) {15227SDUse &Use = UI.getUse();15228if (Use.getUser() != N && Use.getResNo() == 0 &&15229Use.getUser()->getOpcode() == N->getOpcode()) {15230LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());1523115232// No volatile, indexed or atomic loads/stores.15233if (!LSNode2->isSimple() || LSNode2->isIndexed())15234continue;1523515236// Check if LSNode1 and LSNode2 have the same type and extension.15237if (LSNode1->getOpcode() == ISD::LOAD)15238if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=15239cast<LoadSDNode>(LSNode1)->getExtensionType())15240continue;1524115242if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())15243continue;1524415245auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));1524615247// Check if the base pointer is the same for both instruction.15248if (Base1 != Base2)15249continue;1525015251// Check if the offsets match the XTHeadMemPair encoding contraints.15252bool Valid = false;15253if (MemVT == MVT::i32) {15254// Check for adjacent i32 values and a 2-bit index.15255if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))15256Valid = true;15257} else if (MemVT == MVT::i64) {15258// Check for adjacent i64 values and a 2-bit index.15259if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))15260Valid = true;15261}1526215263if (!Valid)15264continue;1526515266// Try to combine.15267if (SDValue Res =15268tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))15269return Res;15270}15271}1527215273return SDValue();15274}1527515276// Fold15277// (fp_to_int (froundeven X)) -> fcvt X, rne15278// (fp_to_int (ftrunc X)) -> fcvt X, rtz15279// (fp_to_int (ffloor X)) -> fcvt X, rdn15280// (fp_to_int (fceil X)) -> fcvt X, rup15281// (fp_to_int (fround X)) -> fcvt X, rmm15282// (fp_to_int (frint X)) -> fcvt X15283static SDValue performFP_TO_INTCombine(SDNode *N,15284TargetLowering::DAGCombinerInfo &DCI,15285const RISCVSubtarget &Subtarget) {15286SelectionDAG &DAG = DCI.DAG;15287const TargetLowering &TLI = DAG.getTargetLoweringInfo();15288MVT XLenVT = Subtarget.getXLenVT();1528915290SDValue Src = N->getOperand(0);1529115292// Don't do this for strict-fp Src.15293if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())15294return SDValue();1529515296// Ensure the FP type is legal.15297if (!TLI.isTypeLegal(Src.getValueType()))15298return SDValue();1529915300// Don't do this for f16 with Zfhmin and not Zfh.15301if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())15302return SDValue();1530315304RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());15305// If the result is invalid, we didn't find a foldable instruction.15306if (FRM == RISCVFPRndMode::Invalid)15307return SDValue();1530815309SDLoc DL(N);15310bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;15311EVT VT = N->getValueType(0);1531215313if (VT.isVector() && TLI.isTypeLegal(VT)) {15314MVT SrcVT = Src.getSimpleValueType();15315MVT SrcContainerVT = SrcVT;15316MVT ContainerVT = VT.getSimpleVT();15317SDValue XVal = Src.getOperand(0);1531815319// For widening and narrowing conversions we just combine it into a15320// VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They15321// end up getting lowered to their appropriate pseudo instructions based on15322// their operand types15323if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||15324VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())15325return SDValue();1532615327// Make fixed-length vectors scalable first15328if (SrcVT.isFixedLengthVector()) {15329SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);15330XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);15331ContainerVT =15332getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);15333}1533415335auto [Mask, VL] =15336getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);1533715338SDValue FpToInt;15339if (FRM == RISCVFPRndMode::RTZ) {15340// Use the dedicated trunc static rounding mode if we're truncating so we15341// don't need to generate calls to fsrmi/fsrm15342unsigned Opc =15343IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;15344FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);15345} else if (FRM == RISCVFPRndMode::DYN) {15346unsigned Opc =15347IsSigned ? RISCVISD::VFCVT_X_F_VL : RISCVISD::VFCVT_XU_F_VL;15348FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);15349} else {15350unsigned Opc =15351IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;15352FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,15353DAG.getTargetConstant(FRM, DL, XLenVT), VL);15354}1535515356// If converted from fixed-length to scalable, convert back15357if (VT.isFixedLengthVector())15358FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);1535915360return FpToInt;15361}1536215363// Only handle XLen or i32 types. Other types narrower than XLen will15364// eventually be legalized to XLenVT.15365if (VT != MVT::i32 && VT != XLenVT)15366return SDValue();1536715368unsigned Opc;15369if (VT == XLenVT)15370Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;15371else15372Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;1537315374SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),15375DAG.getTargetConstant(FRM, DL, XLenVT));15376return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);15377}1537815379// Fold15380// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))15381// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))15382// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))15383// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))15384// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))15385// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))15386static SDValue performFP_TO_INT_SATCombine(SDNode *N,15387TargetLowering::DAGCombinerInfo &DCI,15388const RISCVSubtarget &Subtarget) {15389SelectionDAG &DAG = DCI.DAG;15390const TargetLowering &TLI = DAG.getTargetLoweringInfo();15391MVT XLenVT = Subtarget.getXLenVT();1539215393// Only handle XLen types. Other types narrower than XLen will eventually be15394// legalized to XLenVT.15395EVT DstVT = N->getValueType(0);15396if (DstVT != XLenVT)15397return SDValue();1539815399SDValue Src = N->getOperand(0);1540015401// Don't do this for strict-fp Src.15402if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())15403return SDValue();1540415405// Ensure the FP type is also legal.15406if (!TLI.isTypeLegal(Src.getValueType()))15407return SDValue();1540815409// Don't do this for f16 with Zfhmin and not Zfh.15410if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())15411return SDValue();1541215413EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();1541415415RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());15416if (FRM == RISCVFPRndMode::Invalid)15417return SDValue();1541815419bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;1542015421unsigned Opc;15422if (SatVT == DstVT)15423Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;15424else if (DstVT == MVT::i64 && SatVT == MVT::i32)15425Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;15426else15427return SDValue();15428// FIXME: Support other SatVTs by clamping before or after the conversion.1542915430Src = Src.getOperand(0);1543115432SDLoc DL(N);15433SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,15434DAG.getTargetConstant(FRM, DL, XLenVT));1543515436// fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero15437// extend.15438if (Opc == RISCVISD::FCVT_WU_RV64)15439FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);1544015441// RISC-V FP-to-int conversions saturate to the destination register size, but15442// don't produce 0 for nan.15443SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);15444return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);15445}1544615447// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is15448// smaller than XLenVT.15449static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,15450const RISCVSubtarget &Subtarget) {15451assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");1545215453SDValue Src = N->getOperand(0);15454if (Src.getOpcode() != ISD::BSWAP)15455return SDValue();1545615457EVT VT = N->getValueType(0);15458if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||15459!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))15460return SDValue();1546115462SDLoc DL(N);15463return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));15464}1546515466// Convert from one FMA opcode to another based on whether we are negating the15467// multiply result and/or the accumulator.15468// NOTE: Only supports RVV operations with VL.15469static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {15470// Negating the multiply result changes ADD<->SUB and toggles 'N'.15471if (NegMul) {15472// clang-format off15473switch (Opcode) {15474default: llvm_unreachable("Unexpected opcode");15475case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;15476case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;15477case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;15478case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;15479case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;15480case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;15481case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;15482case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;15483}15484// clang-format on15485}1548615487// Negating the accumulator changes ADD<->SUB.15488if (NegAcc) {15489// clang-format off15490switch (Opcode) {15491default: llvm_unreachable("Unexpected opcode");15492case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;15493case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;15494case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;15495case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;15496case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;15497case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;15498case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;15499case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;15500}15501// clang-format on15502}1550315504return Opcode;15505}1550615507static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {15508// Fold FNEG_VL into FMA opcodes.15509// The first operand of strict-fp is chain.15510unsigned Offset = N->isTargetStrictFPOpcode();15511SDValue A = N->getOperand(0 + Offset);15512SDValue B = N->getOperand(1 + Offset);15513SDValue C = N->getOperand(2 + Offset);15514SDValue Mask = N->getOperand(3 + Offset);15515SDValue VL = N->getOperand(4 + Offset);1551615517auto invertIfNegative = [&Mask, &VL](SDValue &V) {15518if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&15519V.getOperand(2) == VL) {15520// Return the negated input.15521V = V.getOperand(0);15522return true;15523}1552415525return false;15526};1552715528bool NegA = invertIfNegative(A);15529bool NegB = invertIfNegative(B);15530bool NegC = invertIfNegative(C);1553115532// If no operands are negated, we're done.15533if (!NegA && !NegB && !NegC)15534return SDValue();1553515536unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);15537if (N->isTargetStrictFPOpcode())15538return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),15539{N->getOperand(0), A, B, C, Mask, VL});15540return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,15541VL);15542}1554315544static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG,15545const RISCVSubtarget &Subtarget) {15546if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))15547return V;1554815549if (N->getValueType(0).getVectorElementType() == MVT::f32 &&15550!Subtarget.hasVInstructionsF16())15551return SDValue();1555215553// FIXME: Ignore strict opcodes for now.15554if (N->isTargetStrictFPOpcode())15555return SDValue();1555615557// Try to form widening FMA.15558SDValue Op0 = N->getOperand(0);15559SDValue Op1 = N->getOperand(1);15560SDValue Mask = N->getOperand(3);15561SDValue VL = N->getOperand(4);1556215563if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||15564Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)15565return SDValue();1556615567// TODO: Refactor to handle more complex cases similar to15568// combineBinOp_VLToVWBinOp_VL.15569if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&15570(Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))15571return SDValue();1557215573// Check the mask and VL are the same.15574if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||15575Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)15576return SDValue();1557715578unsigned NewOpc;15579switch (N->getOpcode()) {15580default:15581llvm_unreachable("Unexpected opcode");15582case RISCVISD::VFMADD_VL:15583NewOpc = RISCVISD::VFWMADD_VL;15584break;15585case RISCVISD::VFNMSUB_VL:15586NewOpc = RISCVISD::VFWNMSUB_VL;15587break;15588case RISCVISD::VFNMADD_VL:15589NewOpc = RISCVISD::VFWNMADD_VL;15590break;15591case RISCVISD::VFMSUB_VL:15592NewOpc = RISCVISD::VFWMSUB_VL;15593break;15594}1559515596Op0 = Op0.getOperand(0);15597Op1 = Op1.getOperand(0);1559815599return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,15600N->getOperand(2), Mask, VL);15601}1560215603static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,15604const RISCVSubtarget &Subtarget) {15605assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");1560615607if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())15608return SDValue();1560915610if (!isa<ConstantSDNode>(N->getOperand(1)))15611return SDValue();15612uint64_t ShAmt = N->getConstantOperandVal(1);15613if (ShAmt > 32)15614return SDValue();1561515616SDValue N0 = N->getOperand(0);1561715618// Combine (sra (sext_inreg (shl X, C1), i32), C2) ->15619// (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of15620// SLLIW+SRAIW. SLLI+SRAI have compressed forms.15621if (ShAmt < 32 &&15622N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&15623cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&15624N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&15625isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {15626uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);15627if (LShAmt < 32) {15628SDLoc ShlDL(N0.getOperand(0));15629SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,15630N0.getOperand(0).getOperand(0),15631DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));15632SDLoc DL(N);15633return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,15634DAG.getConstant(ShAmt + 32, DL, MVT::i64));15635}15636}1563715638// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)15639// FIXME: Should this be a generic combine? There's a similar combine on X86.15640//15641// Also try these folds where an add or sub is in the middle.15642// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)15643// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)15644SDValue Shl;15645ConstantSDNode *AddC = nullptr;1564615647// We might have an ADD or SUB between the SRA and SHL.15648bool IsAdd = N0.getOpcode() == ISD::ADD;15649if ((IsAdd || N0.getOpcode() == ISD::SUB)) {15650// Other operand needs to be a constant we can modify.15651AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));15652if (!AddC)15653return SDValue();1565415655// AddC needs to have at least 32 trailing zeros.15656if (AddC->getAPIntValue().countr_zero() < 32)15657return SDValue();1565815659// All users should be a shift by constant less than or equal to 32. This15660// ensures we'll do this optimization for each of them to produce an15661// add/sub+sext_inreg they can all share.15662for (SDNode *U : N0->uses()) {15663if (U->getOpcode() != ISD::SRA ||15664!isa<ConstantSDNode>(U->getOperand(1)) ||15665U->getConstantOperandVal(1) > 32)15666return SDValue();15667}1566815669Shl = N0.getOperand(IsAdd ? 0 : 1);15670} else {15671// Not an ADD or SUB.15672Shl = N0;15673}1567415675// Look for a shift left by 32.15676if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||15677Shl.getConstantOperandVal(1) != 32)15678return SDValue();1567915680// We if we didn't look through an add/sub, then the shl should have one use.15681// If we did look through an add/sub, the sext_inreg we create is free so15682// we're only creating 2 new instructions. It's enough to only remove the15683// original sra+add/sub.15684if (!AddC && !Shl.hasOneUse())15685return SDValue();1568615687SDLoc DL(N);15688SDValue In = Shl.getOperand(0);1568915690// If we looked through an ADD or SUB, we need to rebuild it with the shifted15691// constant.15692if (AddC) {15693SDValue ShiftedAddC =15694DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);15695if (IsAdd)15696In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);15697else15698In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);15699}1570015701SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,15702DAG.getValueType(MVT::i32));15703if (ShAmt == 32)15704return SExt;1570515706return DAG.getNode(15707ISD::SHL, DL, MVT::i64, SExt,15708DAG.getConstant(32 - ShAmt, DL, MVT::i64));15709}1571015711// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if15712// the result is used as the conditon of a br_cc or select_cc we can invert,15713// inverting the setcc is free, and Z is 0/1. Caller will invert the15714// br_cc/select_cc.15715static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {15716bool IsAnd = Cond.getOpcode() == ISD::AND;15717if (!IsAnd && Cond.getOpcode() != ISD::OR)15718return SDValue();1571915720if (!Cond.hasOneUse())15721return SDValue();1572215723SDValue Setcc = Cond.getOperand(0);15724SDValue Xor = Cond.getOperand(1);15725// Canonicalize setcc to LHS.15726if (Setcc.getOpcode() != ISD::SETCC)15727std::swap(Setcc, Xor);15728// LHS should be a setcc and RHS should be an xor.15729if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||15730Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())15731return SDValue();1573215733// If the condition is an And, SimplifyDemandedBits may have changed15734// (xor Z, 1) to (not Z).15735SDValue Xor1 = Xor.getOperand(1);15736if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))15737return SDValue();1573815739EVT VT = Cond.getValueType();15740SDValue Xor0 = Xor.getOperand(0);1574115742// The LHS of the xor needs to be 0/1.15743APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);15744if (!DAG.MaskedValueIsZero(Xor0, Mask))15745return SDValue();1574615747// We can only invert integer setccs.15748EVT SetCCOpVT = Setcc.getOperand(0).getValueType();15749if (!SetCCOpVT.isScalarInteger())15750return SDValue();1575115752ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();15753if (ISD::isIntEqualitySetCC(CCVal)) {15754CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);15755Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),15756Setcc.getOperand(1), CCVal);15757} else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {15758// Invert (setlt 0, X) by converting to (setlt X, 1).15759Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),15760DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);15761} else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {15762// (setlt X, 1) by converting to (setlt 0, X).15763Setcc = DAG.getSetCC(SDLoc(Setcc), VT,15764DAG.getConstant(0, SDLoc(Setcc), VT),15765Setcc.getOperand(0), CCVal);15766} else15767return SDValue();1576815769unsigned Opc = IsAnd ? ISD::OR : ISD::AND;15770return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));15771}1577215773// Perform common combines for BR_CC and SELECT_CC condtions.15774static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,15775SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {15776ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();1577715778// As far as arithmetic right shift always saves the sign,15779// shift can be omitted.15780// Fold setlt (sra X, N), 0 -> setlt X, 0 and15781// setge (sra X, N), 0 -> setge X, 015782if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&15783LHS.getOpcode() == ISD::SRA) {15784LHS = LHS.getOperand(0);15785return true;15786}1578715788if (!ISD::isIntEqualitySetCC(CCVal))15789return false;1579015791// Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)15792// Sometimes the setcc is introduced after br_cc/select_cc has been formed.15793if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&15794LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {15795// If we're looking for eq 0 instead of ne 0, we need to invert the15796// condition.15797bool Invert = CCVal == ISD::SETEQ;15798CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();15799if (Invert)15800CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());1580115802RHS = LHS.getOperand(1);15803LHS = LHS.getOperand(0);15804translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);1580515806CC = DAG.getCondCode(CCVal);15807return true;15808}1580915810// Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)15811if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {15812RHS = LHS.getOperand(1);15813LHS = LHS.getOperand(0);15814return true;15815}1581615817// Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)15818if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&15819LHS.getOperand(1).getOpcode() == ISD::Constant) {15820SDValue LHS0 = LHS.getOperand(0);15821if (LHS0.getOpcode() == ISD::AND &&15822LHS0.getOperand(1).getOpcode() == ISD::Constant) {15823uint64_t Mask = LHS0.getConstantOperandVal(1);15824uint64_t ShAmt = LHS.getConstantOperandVal(1);15825if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {15826CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;15827CC = DAG.getCondCode(CCVal);1582815829ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;15830LHS = LHS0.getOperand(0);15831if (ShAmt != 0)15832LHS =15833DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),15834DAG.getConstant(ShAmt, DL, LHS.getValueType()));15835return true;15836}15837}15838}1583915840// (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.15841// This can occur when legalizing some floating point comparisons.15842APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);15843if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {15844CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());15845CC = DAG.getCondCode(CCVal);15846RHS = DAG.getConstant(0, DL, LHS.getValueType());15847return true;15848}1584915850if (isNullConstant(RHS)) {15851if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {15852CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());15853CC = DAG.getCondCode(CCVal);15854LHS = NewCond;15855return true;15856}15857}1585815859return false;15860}1586115862// Fold15863// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).15864// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).15865// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).15866// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).15867static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,15868SDValue TrueVal, SDValue FalseVal,15869bool Swapped) {15870bool Commutative = true;15871unsigned Opc = TrueVal.getOpcode();15872switch (Opc) {15873default:15874return SDValue();15875case ISD::SHL:15876case ISD::SRA:15877case ISD::SRL:15878case ISD::SUB:15879Commutative = false;15880break;15881case ISD::ADD:15882case ISD::OR:15883case ISD::XOR:15884break;15885}1588615887if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))15888return SDValue();1588915890unsigned OpToFold;15891if (FalseVal == TrueVal.getOperand(0))15892OpToFold = 0;15893else if (Commutative && FalseVal == TrueVal.getOperand(1))15894OpToFold = 1;15895else15896return SDValue();1589715898EVT VT = N->getValueType(0);15899SDLoc DL(N);15900SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);15901EVT OtherOpVT = OtherOp.getValueType();15902SDValue IdentityOperand =15903DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());15904if (!Commutative)15905IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);15906assert(IdentityOperand && "No identity operand!");1590715908if (Swapped)15909std::swap(OtherOp, IdentityOperand);15910SDValue NewSel =15911DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);15912return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);15913}1591415915// This tries to get rid of `select` and `icmp` that are being used to handle15916// `Targets` that do not support `cttz(0)`/`ctlz(0)`.15917static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {15918SDValue Cond = N->getOperand(0);1591915920// This represents either CTTZ or CTLZ instruction.15921SDValue CountZeroes;1592215923SDValue ValOnZero;1592415925if (Cond.getOpcode() != ISD::SETCC)15926return SDValue();1592715928if (!isNullConstant(Cond->getOperand(1)))15929return SDValue();1593015931ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();15932if (CCVal == ISD::CondCode::SETEQ) {15933CountZeroes = N->getOperand(2);15934ValOnZero = N->getOperand(1);15935} else if (CCVal == ISD::CondCode::SETNE) {15936CountZeroes = N->getOperand(1);15937ValOnZero = N->getOperand(2);15938} else {15939return SDValue();15940}1594115942if (CountZeroes.getOpcode() == ISD::TRUNCATE ||15943CountZeroes.getOpcode() == ISD::ZERO_EXTEND)15944CountZeroes = CountZeroes.getOperand(0);1594515946if (CountZeroes.getOpcode() != ISD::CTTZ &&15947CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&15948CountZeroes.getOpcode() != ISD::CTLZ &&15949CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)15950return SDValue();1595115952if (!isNullConstant(ValOnZero))15953return SDValue();1595415955SDValue CountZeroesArgument = CountZeroes->getOperand(0);15956if (Cond->getOperand(0) != CountZeroesArgument)15957return SDValue();1595815959if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {15960CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),15961CountZeroes.getValueType(), CountZeroesArgument);15962} else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {15963CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),15964CountZeroes.getValueType(), CountZeroesArgument);15965}1596615967unsigned BitWidth = CountZeroes.getValueSizeInBits();15968SDValue BitWidthMinusOne =15969DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());1597015971auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),15972CountZeroes, BitWidthMinusOne);15973return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));15974}1597515976static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,15977const RISCVSubtarget &Subtarget) {15978SDValue Cond = N->getOperand(0);15979SDValue True = N->getOperand(1);15980SDValue False = N->getOperand(2);15981SDLoc DL(N);15982EVT VT = N->getValueType(0);15983EVT CondVT = Cond.getValueType();1598415985if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())15986return SDValue();1598715988// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate15989// BEXTI, where C is power of 2.15990if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&15991(Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {15992SDValue LHS = Cond.getOperand(0);15993SDValue RHS = Cond.getOperand(1);15994ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();15995if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&15996isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {15997const APInt &MaskVal = LHS.getConstantOperandAPInt(1);15998if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))15999return DAG.getSelect(DL, VT,16000DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),16001False, True);16002}16003}16004return SDValue();16005}1600616007static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,16008const RISCVSubtarget &Subtarget) {16009if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))16010return Folded;1601116012if (SDValue V = useInversedSetcc(N, DAG, Subtarget))16013return V;1601416015if (Subtarget.hasConditionalMoveFusion())16016return SDValue();1601716018SDValue TrueVal = N->getOperand(1);16019SDValue FalseVal = N->getOperand(2);16020if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))16021return V;16022return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);16023}1602416025/// If we have a build_vector where each lane is binop X, C, where C16026/// is a constant (but not necessarily the same constant on all lanes),16027/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).16028/// We assume that materializing a constant build vector will be no more16029/// expensive that performing O(n) binops.16030static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,16031const RISCVSubtarget &Subtarget,16032const RISCVTargetLowering &TLI) {16033SDLoc DL(N);16034EVT VT = N->getValueType(0);1603516036assert(!VT.isScalableVector() && "unexpected build vector");1603716038if (VT.getVectorNumElements() == 1)16039return SDValue();1604016041const unsigned Opcode = N->op_begin()->getNode()->getOpcode();16042if (!TLI.isBinOp(Opcode))16043return SDValue();1604416045if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))16046return SDValue();1604716048// This BUILD_VECTOR involves an implicit truncation, and sinking16049// truncates through binops is non-trivial.16050if (N->op_begin()->getValueType() != VT.getVectorElementType())16051return SDValue();1605216053SmallVector<SDValue> LHSOps;16054SmallVector<SDValue> RHSOps;16055for (SDValue Op : N->ops()) {16056if (Op.isUndef()) {16057// We can't form a divide or remainder from undef.16058if (!DAG.isSafeToSpeculativelyExecute(Opcode))16059return SDValue();1606016061LHSOps.push_back(Op);16062RHSOps.push_back(Op);16063continue;16064}1606516066// TODO: We can handle operations which have an neutral rhs value16067// (e.g. x + 0, a * 1 or a << 0), but we then have to keep track16068// of profit in a more explicit manner.16069if (Op.getOpcode() != Opcode || !Op.hasOneUse())16070return SDValue();1607116072LHSOps.push_back(Op.getOperand(0));16073if (!isa<ConstantSDNode>(Op.getOperand(1)) &&16074!isa<ConstantFPSDNode>(Op.getOperand(1)))16075return SDValue();16076// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may16077// have different LHS and RHS types.16078if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())16079return SDValue();1608016081RHSOps.push_back(Op.getOperand(1));16082}1608316084return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),16085DAG.getBuildVector(VT, DL, RHSOps));16086}1608716088static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,16089const RISCVSubtarget &Subtarget,16090const RISCVTargetLowering &TLI) {16091SDValue InVec = N->getOperand(0);16092SDValue InVal = N->getOperand(1);16093SDValue EltNo = N->getOperand(2);16094SDLoc DL(N);1609516096EVT VT = InVec.getValueType();16097if (VT.isScalableVector())16098return SDValue();1609916100if (!InVec.hasOneUse())16101return SDValue();1610216103// Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt16104// move the insert_vector_elts into the arms of the binop. Note that16105// the new RHS must be a constant.16106const unsigned InVecOpcode = InVec->getOpcode();16107if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&16108InVal.hasOneUse()) {16109SDValue InVecLHS = InVec->getOperand(0);16110SDValue InVecRHS = InVec->getOperand(1);16111SDValue InValLHS = InVal->getOperand(0);16112SDValue InValRHS = InVal->getOperand(1);1611316114if (!ISD::isBuildVectorOfConstantSDNodes(InVecRHS.getNode()))16115return SDValue();16116if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))16117return SDValue();16118// FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may16119// have different LHS and RHS types.16120if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())16121return SDValue();16122SDValue LHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,16123InVecLHS, InValLHS, EltNo);16124SDValue RHS = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,16125InVecRHS, InValRHS, EltNo);16126return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);16127}1612816129// Given insert_vector_elt (concat_vectors ...), InVal, Elt16130// move the insert_vector_elt to the source operand of the concat_vector.16131if (InVec.getOpcode() != ISD::CONCAT_VECTORS)16132return SDValue();1613316134auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);16135if (!IndexC)16136return SDValue();16137unsigned Elt = IndexC->getZExtValue();1613816139EVT ConcatVT = InVec.getOperand(0).getValueType();16140if (ConcatVT.getVectorElementType() != InVal.getValueType())16141return SDValue();16142unsigned ConcatNumElts = ConcatVT.getVectorNumElements();16143SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);1614416145unsigned ConcatOpIdx = Elt / ConcatNumElts;16146SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);16147ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,16148ConcatOp, InVal, NewIdx);1614916150SmallVector<SDValue> ConcatOps;16151ConcatOps.append(InVec->op_begin(), InVec->op_end());16152ConcatOps[ConcatOpIdx] = ConcatOp;16153return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);16154}1615516156// If we're concatenating a series of vector loads like16157// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...16158// Then we can turn this into a strided load by widening the vector elements16159// vlse32 p, stride=n16160static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,16161const RISCVSubtarget &Subtarget,16162const RISCVTargetLowering &TLI) {16163SDLoc DL(N);16164EVT VT = N->getValueType(0);1616516166// Only perform this combine on legal MVTs.16167if (!TLI.isTypeLegal(VT))16168return SDValue();1616916170// TODO: Potentially extend this to scalable vectors16171if (VT.isScalableVector())16172return SDValue();1617316174auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));16175if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||16176!SDValue(BaseLd, 0).hasOneUse())16177return SDValue();1617816179EVT BaseLdVT = BaseLd->getValueType(0);1618016181// Go through the loads and check that they're strided16182SmallVector<LoadSDNode *> Lds;16183Lds.push_back(BaseLd);16184Align Align = BaseLd->getAlign();16185for (SDValue Op : N->ops().drop_front()) {16186auto *Ld = dyn_cast<LoadSDNode>(Op);16187if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||16188Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||16189Ld->getValueType(0) != BaseLdVT)16190return SDValue();1619116192Lds.push_back(Ld);1619316194// The common alignment is the most restrictive (smallest) of all the loads16195Align = std::min(Align, Ld->getAlign());16196}1619716198using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;16199auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,16200LoadSDNode *Ld2) -> std::optional<PtrDiff> {16201// If the load ptrs can be decomposed into a common (Base + Index) with a16202// common constant stride, then return the constant stride.16203BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);16204BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);16205if (BIO1.equalBaseIndex(BIO2, DAG))16206return {{BIO2.getOffset() - BIO1.getOffset(), false}};1620716208// Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)16209SDValue P1 = Ld1->getBasePtr();16210SDValue P2 = Ld2->getBasePtr();16211if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)16212return {{P2.getOperand(1), false}};16213if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)16214return {{P1.getOperand(1), true}};1621516216return std::nullopt;16217};1621816219// Get the distance between the first and second loads16220auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);16221if (!BaseDiff)16222return SDValue();1622316224// Check all the loads are the same distance apart16225for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)16226if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)16227return SDValue();1622816229// TODO: At this point, we've successfully matched a generalized gather16230// load. Maybe we should emit that, and then move the specialized16231// matchers above and below into a DAG combine?1623216233// Get the widened scalar type, e.g. v4i8 -> i6416234unsigned WideScalarBitWidth =16235BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();16236MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);1623716238// Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i6416239MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());16240if (!TLI.isTypeLegal(WideVecVT))16241return SDValue();1624216243// Check that the operation is legal16244if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))16245return SDValue();1624616247auto [StrideVariant, MustNegateStride] = *BaseDiff;16248SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)16249? std::get<SDValue>(StrideVariant)16250: DAG.getConstant(std::get<int64_t>(StrideVariant), DL,16251Lds[0]->getOffset().getValueType());16252if (MustNegateStride)16253Stride = DAG.getNegative(Stride, DL, Stride.getValueType());1625416255SDValue AllOneMask =16256DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,16257DAG.getConstant(1, DL, MVT::i1));1625816259uint64_t MemSize;16260if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);16261ConstStride && ConstStride->getSExtValue() >= 0)16262// total size = (elsize * n) + (stride - elsize) * (n-1)16263// = elsize + stride * (n-1)16264MemSize = WideScalarVT.getSizeInBits() +16265ConstStride->getSExtValue() * (N->getNumOperands() - 1);16266else16267// If Stride isn't constant, then we can't know how much it will load16268MemSize = MemoryLocation::UnknownSize;1626916270MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(16271BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,16272Align);1627316274SDValue StridedLoad = DAG.getStridedLoadVP(16275WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,16276AllOneMask,16277DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);1627816279for (SDValue Ld : N->ops())16280DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);1628116282return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);16283}1628416285static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,16286const RISCVSubtarget &Subtarget) {1628716288assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);1628916290if (N->getValueType(0).isFixedLengthVector())16291return SDValue();1629216293SDValue Addend = N->getOperand(0);16294SDValue MulOp = N->getOperand(1);1629516296if (N->getOpcode() == RISCVISD::ADD_VL) {16297SDValue AddMergeOp = N->getOperand(2);16298if (!AddMergeOp.isUndef())16299return SDValue();16300}1630116302auto IsVWMulOpc = [](unsigned Opc) {16303switch (Opc) {16304case RISCVISD::VWMUL_VL:16305case RISCVISD::VWMULU_VL:16306case RISCVISD::VWMULSU_VL:16307return true;16308default:16309return false;16310}16311};1631216313if (!IsVWMulOpc(MulOp.getOpcode()))16314std::swap(Addend, MulOp);1631516316if (!IsVWMulOpc(MulOp.getOpcode()))16317return SDValue();1631816319SDValue MulMergeOp = MulOp.getOperand(2);1632016321if (!MulMergeOp.isUndef())16322return SDValue();1632316324auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,16325const RISCVSubtarget &Subtarget) {16326if (N->getOpcode() == ISD::ADD) {16327SDLoc DL(N);16328return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,16329Subtarget);16330}16331return std::make_pair(N->getOperand(3), N->getOperand(4));16332}(N, DAG, Subtarget);1633316334SDValue MulMask = MulOp.getOperand(3);16335SDValue MulVL = MulOp.getOperand(4);1633616337if (AddMask != MulMask || AddVL != MulVL)16338return SDValue();1633916340unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;16341static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,16342"Unexpected opcode after VWMACC_VL");16343static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,16344"Unexpected opcode after VWMACC_VL!");16345static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,16346"Unexpected opcode after VWMUL_VL!");16347static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,16348"Unexpected opcode after VWMUL_VL!");1634916350SDLoc DL(N);16351EVT VT = N->getValueType(0);16352SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,16353AddVL};16354return DAG.getNode(Opc, DL, VT, Ops);16355}1635616357static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,16358ISD::MemIndexType &IndexType,16359RISCVTargetLowering::DAGCombinerInfo &DCI) {16360if (!DCI.isBeforeLegalize())16361return false;1636216363SelectionDAG &DAG = DCI.DAG;16364const MVT XLenVT =16365DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();1636616367const EVT IndexVT = Index.getValueType();1636816369// RISC-V indexed loads only support the "unsigned unscaled" addressing16370// mode, so anything else must be manually legalized.16371if (!isIndexTypeSigned(IndexType))16372return false;1637316374if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {16375// Any index legalization should first promote to XLenVT, so we don't lose16376// bits when scaling. This may create an illegal index type so we let16377// LLVM's legalization take care of the splitting.16378// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.16379Index = DAG.getNode(ISD::SIGN_EXTEND, DL,16380IndexVT.changeVectorElementType(XLenVT), Index);16381}16382IndexType = ISD::UNSIGNED_SCALED;16383return true;16384}1638516386/// Match the index vector of a scatter or gather node as the shuffle mask16387/// which performs the rearrangement if possible. Will only match if16388/// all lanes are touched, and thus replacing the scatter or gather with16389/// a unit strided access and shuffle is legal.16390static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,16391SmallVector<int> &ShuffleMask) {16392if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))16393return false;16394if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))16395return false;1639616397const unsigned ElementSize = VT.getScalarStoreSize();16398const unsigned NumElems = VT.getVectorNumElements();1639916400// Create the shuffle mask and check all bits active16401assert(ShuffleMask.empty());16402BitVector ActiveLanes(NumElems);16403for (unsigned i = 0; i < Index->getNumOperands(); i++) {16404// TODO: We've found an active bit of UB, and could be16405// more aggressive here if desired.16406if (Index->getOperand(i)->isUndef())16407return false;16408uint64_t C = Index->getConstantOperandVal(i);16409if (C % ElementSize != 0)16410return false;16411C = C / ElementSize;16412if (C >= NumElems)16413return false;16414ShuffleMask.push_back(C);16415ActiveLanes.set(C);16416}16417return ActiveLanes.all();16418}1641916420/// Match the index of a gather or scatter operation as an operation16421/// with twice the element width and half the number of elements. This is16422/// generally profitable (if legal) because these operations are linear16423/// in VL, so even if we cause some extract VTYPE/VL toggles, we still16424/// come out ahead.16425static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,16426Align BaseAlign, const RISCVSubtarget &ST) {16427if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))16428return false;16429if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))16430return false;1643116432// Attempt a doubling. If we can use a element type 4x or 8x in16433// size, this will happen via multiply iterations of the transform.16434const unsigned NumElems = VT.getVectorNumElements();16435if (NumElems % 2 != 0)16436return false;1643716438const unsigned ElementSize = VT.getScalarStoreSize();16439const unsigned WiderElementSize = ElementSize * 2;16440if (WiderElementSize > ST.getELen()/8)16441return false;1644216443if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)16444return false;1644516446for (unsigned i = 0; i < Index->getNumOperands(); i++) {16447// TODO: We've found an active bit of UB, and could be16448// more aggressive here if desired.16449if (Index->getOperand(i)->isUndef())16450return false;16451// TODO: This offset check is too strict if we support fully16452// misaligned memory operations.16453uint64_t C = Index->getConstantOperandVal(i);16454if (i % 2 == 0) {16455if (C % WiderElementSize != 0)16456return false;16457continue;16458}16459uint64_t Last = Index->getConstantOperandVal(i-1);16460if (C != Last + ElementSize)16461return false;16462}16463return true;16464}1646516466// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))16467// This would be benefit for the cases where X and Y are both the same value16468// type of low precision vectors. Since the truncate would be lowered into16469// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate16470// restriction, such pattern would be expanded into a series of "vsetvli"16471// and "vnsrl" instructions later to reach this point.16472static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG) {16473SDValue Mask = N->getOperand(1);16474SDValue VL = N->getOperand(2);1647516476bool IsVLMAX = isAllOnesConstant(VL) ||16477(isa<RegisterSDNode>(VL) &&16478cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);16479if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||16480Mask.getOperand(0) != VL)16481return SDValue();1648216483auto IsTruncNode = [&](SDValue V) {16484return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&16485V.getOperand(1) == Mask && V.getOperand(2) == VL;16486};1648716488SDValue Op = N->getOperand(0);1648916490// We need to first find the inner level of TRUNCATE_VECTOR_VL node16491// to distinguish such pattern.16492while (IsTruncNode(Op)) {16493if (!Op.hasOneUse())16494return SDValue();16495Op = Op.getOperand(0);16496}1649716498if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())16499return SDValue();1650016501SDValue N0 = Op.getOperand(0);16502SDValue N1 = Op.getOperand(1);16503if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||16504N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())16505return SDValue();1650616507SDValue N00 = N0.getOperand(0);16508SDValue N10 = N1.getOperand(0);16509if (!N00.getValueType().isVector() ||16510N00.getValueType() != N10.getValueType() ||16511N->getValueType(0) != N10.getValueType())16512return SDValue();1651316514unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;16515SDValue SMin =16516DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,16517DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));16518return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);16519}1652016521// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the16522// maximum value for the truncated type.16523// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C116524// is the signed maximum value for the truncated type and C2 is the signed16525// minimum value.16526static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG,16527const RISCVSubtarget &Subtarget) {16528assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);1652916530MVT VT = N->getSimpleValueType(0);1653116532SDValue Mask = N->getOperand(1);16533SDValue VL = N->getOperand(2);1653416535auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,16536APInt &SplatVal) {16537if (V.getOpcode() != Opc &&16538!(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&16539V.getOperand(3) == Mask && V.getOperand(4) == VL))16540return SDValue();1654116542SDValue Op = V.getOperand(1);1654316544// Peek through conversion between fixed and scalable vectors.16545if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&16546isNullConstant(Op.getOperand(2)) &&16547Op.getOperand(1).getValueType().isFixedLengthVector() &&16548Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&16549Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&16550isNullConstant(Op.getOperand(1).getOperand(1)))16551Op = Op.getOperand(1).getOperand(0);1655216553if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))16554return V.getOperand(0);1655516556if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&16557Op.getOperand(2) == VL) {16558if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {16559SplatVal =16560Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());16561return V.getOperand(0);16562}16563}1656416565return SDValue();16566};1656716568SDLoc DL(N);1656916570auto DetectUSatPattern = [&](SDValue V) {16571APInt LoC, HiC;1657216573// Simple case, V is a UMIN.16574if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))16575if (HiC.isMask(VT.getScalarSizeInBits()))16576return UMinOp;1657716578// If we have an SMAX that removes negative numbers first, then we can match16579// SMIN instead of UMIN.16580if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))16581if (SDValue SMaxOp =16582MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))16583if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))16584return SMinOp;1658516586// If we have an SMIN before an SMAX and the SMAX constant is less than or16587// equal to the SMIN constant, we can use vnclipu if we insert a new SMAX16588// first.16589if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))16590if (SDValue SMinOp =16591MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))16592if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&16593HiC.uge(LoC))16594return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,16595V.getOperand(1), DAG.getUNDEF(V.getValueType()),16596Mask, VL);1659716598return SDValue();16599};1660016601auto DetectSSatPattern = [&](SDValue V) {16602unsigned NumDstBits = VT.getScalarSizeInBits();16603unsigned NumSrcBits = V.getScalarValueSizeInBits();16604APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);16605APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);1660616607APInt HiC, LoC;16608if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))16609if (SDValue SMaxOp =16610MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))16611if (HiC == SignedMax && LoC == SignedMin)16612return SMaxOp;1661316614if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))16615if (SDValue SMinOp =16616MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))16617if (HiC == SignedMax && LoC == SignedMin)16618return SMinOp;1661916620return SDValue();16621};1662216623SDValue Src = N->getOperand(0);1662416625// Look through multiple layers of truncates.16626while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&16627Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&16628Src.hasOneUse())16629Src = Src.getOperand(0);1663016631SDValue Val;16632unsigned ClipOpc;16633if ((Val = DetectUSatPattern(Src)))16634ClipOpc = RISCVISD::VNCLIPU_VL;16635else if ((Val = DetectSSatPattern(Src)))16636ClipOpc = RISCVISD::VNCLIP_VL;16637else16638return SDValue();1663916640MVT ValVT = Val.getSimpleValueType();1664116642do {16643MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);16644ValVT = ValVT.changeVectorElementType(ValEltVT);16645// Rounding mode here is arbitrary since we aren't shifting out any bits.16646Val = DAG.getNode(16647ClipOpc, DL, ValVT,16648{Val, DAG.getConstant(0, DL, ValVT), DAG.getUNDEF(VT), Mask,16649DAG.getTargetConstant(RISCVVXRndMode::RNU, DL, Subtarget.getXLenVT()),16650VL});16651} while (ValVT != VT);1665216653return Val;16654}1665516656SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,16657DAGCombinerInfo &DCI) const {16658SelectionDAG &DAG = DCI.DAG;16659const MVT XLenVT = Subtarget.getXLenVT();16660SDLoc DL(N);1666116662// Helper to call SimplifyDemandedBits on an operand of N where only some low16663// bits are demanded. N will be added to the Worklist if it was not deleted.16664// Caller should return SDValue(N, 0) if this returns true.16665auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {16666SDValue Op = N->getOperand(OpNo);16667APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);16668if (!SimplifyDemandedBits(Op, Mask, DCI))16669return false;1667016671if (N->getOpcode() != ISD::DELETED_NODE)16672DCI.AddToWorklist(N);16673return true;16674};1667516676switch (N->getOpcode()) {16677default:16678break;16679case RISCVISD::SplitF64: {16680SDValue Op0 = N->getOperand(0);16681// If the input to SplitF64 is just BuildPairF64 then the operation is16682// redundant. Instead, use BuildPairF64's operands directly.16683if (Op0->getOpcode() == RISCVISD::BuildPairF64)16684return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));1668516686if (Op0->isUndef()) {16687SDValue Lo = DAG.getUNDEF(MVT::i32);16688SDValue Hi = DAG.getUNDEF(MVT::i32);16689return DCI.CombineTo(N, Lo, Hi);16690}1669116692// It's cheaper to materialise two 32-bit integers than to load a double16693// from the constant pool and transfer it to integer registers through the16694// stack.16695if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {16696APInt V = C->getValueAPF().bitcastToAPInt();16697SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);16698SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);16699return DCI.CombineTo(N, Lo, Hi);16700}1670116702// This is a target-specific version of a DAGCombine performed in16703// DAGCombiner::visitBITCAST. It performs the equivalent of:16704// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)16705// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))16706if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||16707!Op0.getNode()->hasOneUse())16708break;16709SDValue NewSplitF64 =16710DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),16711Op0.getOperand(0));16712SDValue Lo = NewSplitF64.getValue(0);16713SDValue Hi = NewSplitF64.getValue(1);16714APInt SignBit = APInt::getSignMask(32);16715if (Op0.getOpcode() == ISD::FNEG) {16716SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,16717DAG.getConstant(SignBit, DL, MVT::i32));16718return DCI.CombineTo(N, Lo, NewHi);16719}16720assert(Op0.getOpcode() == ISD::FABS);16721SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,16722DAG.getConstant(~SignBit, DL, MVT::i32));16723return DCI.CombineTo(N, Lo, NewHi);16724}16725case RISCVISD::SLLW:16726case RISCVISD::SRAW:16727case RISCVISD::SRLW:16728case RISCVISD::RORW:16729case RISCVISD::ROLW: {16730// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.16731if (SimplifyDemandedLowBitsHelper(0, 32) ||16732SimplifyDemandedLowBitsHelper(1, 5))16733return SDValue(N, 0);1673416735break;16736}16737case RISCVISD::CLZW:16738case RISCVISD::CTZW: {16739// Only the lower 32 bits of the first operand are read16740if (SimplifyDemandedLowBitsHelper(0, 32))16741return SDValue(N, 0);16742break;16743}16744case RISCVISD::FMV_W_X_RV64: {16745// If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the16746// conversion is unnecessary and can be replaced with the16747// FMV_X_ANYEXTW_RV64 operand.16748SDValue Op0 = N->getOperand(0);16749if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)16750return Op0.getOperand(0);16751break;16752}16753case RISCVISD::FMV_X_ANYEXTH:16754case RISCVISD::FMV_X_ANYEXTW_RV64: {16755SDLoc DL(N);16756SDValue Op0 = N->getOperand(0);16757MVT VT = N->getSimpleValueType(0);16758// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the16759// conversion is unnecessary and can be replaced with the FMV_W_X_RV6416760// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.16761if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&16762Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||16763(N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&16764Op0->getOpcode() == RISCVISD::FMV_H_X)) {16765assert(Op0.getOperand(0).getValueType() == VT &&16766"Unexpected value type!");16767return Op0.getOperand(0);16768}1676916770// This is a target-specific version of a DAGCombine performed in16771// DAGCombiner::visitBITCAST. It performs the equivalent of:16772// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)16773// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))16774if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||16775!Op0.getNode()->hasOneUse())16776break;16777SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));16778unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;16779APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());16780if (Op0.getOpcode() == ISD::FNEG)16781return DAG.getNode(ISD::XOR, DL, VT, NewFMV,16782DAG.getConstant(SignBit, DL, VT));1678316784assert(Op0.getOpcode() == ISD::FABS);16785return DAG.getNode(ISD::AND, DL, VT, NewFMV,16786DAG.getConstant(~SignBit, DL, VT));16787}16788case ISD::ABS: {16789EVT VT = N->getValueType(0);16790SDValue N0 = N->getOperand(0);16791// abs (sext) -> zext (abs)16792// abs (zext) -> zext (handled elsewhere)16793if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {16794SDValue Src = N0.getOperand(0);16795SDLoc DL(N);16796return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,16797DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));16798}16799break;16800}16801case ISD::ADD: {16802if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))16803return V;16804if (SDValue V = combineToVWMACC(N, DAG, Subtarget))16805return V;16806return performADDCombine(N, DCI, Subtarget);16807}16808case ISD::SUB: {16809if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))16810return V;16811return performSUBCombine(N, DAG, Subtarget);16812}16813case ISD::AND:16814return performANDCombine(N, DCI, Subtarget);16815case ISD::OR: {16816if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))16817return V;16818return performORCombine(N, DCI, Subtarget);16819}16820case ISD::XOR:16821return performXORCombine(N, DAG, Subtarget);16822case ISD::MUL:16823if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))16824return V;16825return performMULCombine(N, DAG, DCI, Subtarget);16826case ISD::SDIV:16827case ISD::UDIV:16828case ISD::SREM:16829case ISD::UREM:16830if (SDValue V = combineBinOpOfZExt(N, DAG))16831return V;16832break;16833case ISD::FADD:16834case ISD::UMAX:16835case ISD::UMIN:16836case ISD::SMAX:16837case ISD::SMIN:16838case ISD::FMAXNUM:16839case ISD::FMINNUM: {16840if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))16841return V;16842if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))16843return V;16844return SDValue();16845}16846case ISD::SETCC:16847return performSETCCCombine(N, DAG, Subtarget);16848case ISD::SIGN_EXTEND_INREG:16849return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);16850case ISD::ZERO_EXTEND:16851// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during16852// type legalization. This is safe because fp_to_uint produces poison if16853// it overflows.16854if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {16855SDValue Src = N->getOperand(0);16856if (Src.getOpcode() == ISD::FP_TO_UINT &&16857isTypeLegal(Src.getOperand(0).getValueType()))16858return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,16859Src.getOperand(0));16860if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&16861isTypeLegal(Src.getOperand(1).getValueType())) {16862SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);16863SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,16864Src.getOperand(0), Src.getOperand(1));16865DCI.CombineTo(N, Res);16866DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));16867DCI.recursivelyDeleteUnusedNodes(Src.getNode());16868return SDValue(N, 0); // Return N so it doesn't get rechecked.16869}16870}16871return SDValue();16872case RISCVISD::TRUNCATE_VECTOR_VL:16873if (SDValue V = combineTruncOfSraSext(N, DAG))16874return V;16875return combineTruncToVnclip(N, DAG, Subtarget);16876case ISD::TRUNCATE:16877return performTRUNCATECombine(N, DAG, Subtarget);16878case ISD::SELECT:16879return performSELECTCombine(N, DAG, Subtarget);16880case RISCVISD::CZERO_EQZ:16881case RISCVISD::CZERO_NEZ: {16882SDValue Val = N->getOperand(0);16883SDValue Cond = N->getOperand(1);1688416885unsigned Opc = N->getOpcode();1688616887// czero_eqz x, x -> x16888if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)16889return Val;1689016891unsigned InvOpc =16892Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;1689316894// czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.16895// czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.16896if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {16897SDValue NewCond = Cond.getOperand(0);16898APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);16899if (DAG.MaskedValueIsZero(NewCond, Mask))16900return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);16901}16902// czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y16903// czero_nez x, (setcc y, 0, ne) -> czero_nez x, y16904// czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y16905// czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y16906if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {16907ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();16908if (ISD::isIntEqualitySetCC(CCVal))16909return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),16910N->getValueType(0), Val, Cond.getOperand(0));16911}16912return SDValue();16913}16914case RISCVISD::SELECT_CC: {16915// Transform16916SDValue LHS = N->getOperand(0);16917SDValue RHS = N->getOperand(1);16918SDValue CC = N->getOperand(2);16919ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();16920SDValue TrueV = N->getOperand(3);16921SDValue FalseV = N->getOperand(4);16922SDLoc DL(N);16923EVT VT = N->getValueType(0);1692416925// If the True and False values are the same, we don't need a select_cc.16926if (TrueV == FalseV)16927return TrueV;1692816929// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z16930// (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y16931if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&16932isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&16933(CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {16934if (CCVal == ISD::CondCode::SETGE)16935std::swap(TrueV, FalseV);1693616937int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();16938int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();16939// Only handle simm12, if it is not in this range, it can be considered as16940// register.16941if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&16942isInt<12>(TrueSImm - FalseSImm)) {16943SDValue SRA =16944DAG.getNode(ISD::SRA, DL, VT, LHS,16945DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));16946SDValue AND =16947DAG.getNode(ISD::AND, DL, VT, SRA,16948DAG.getConstant(TrueSImm - FalseSImm, DL, VT));16949return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);16950}1695116952if (CCVal == ISD::CondCode::SETGE)16953std::swap(TrueV, FalseV);16954}1695516956if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))16957return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),16958{LHS, RHS, CC, TrueV, FalseV});1695916960if (!Subtarget.hasConditionalMoveFusion()) {16961// (select c, -1, y) -> -c | y16962if (isAllOnesConstant(TrueV)) {16963SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);16964SDValue Neg = DAG.getNegative(C, DL, VT);16965return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);16966}16967// (select c, y, -1) -> -!c | y16968if (isAllOnesConstant(FalseV)) {16969SDValue C =16970DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));16971SDValue Neg = DAG.getNegative(C, DL, VT);16972return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);16973}1697416975// (select c, 0, y) -> -!c & y16976if (isNullConstant(TrueV)) {16977SDValue C =16978DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));16979SDValue Neg = DAG.getNegative(C, DL, VT);16980return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);16981}16982// (select c, y, 0) -> -c & y16983if (isNullConstant(FalseV)) {16984SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);16985SDValue Neg = DAG.getNegative(C, DL, VT);16986return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);16987}16988// (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))16989// (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))16990if (((isOneConstant(FalseV) && LHS == TrueV &&16991CCVal == ISD::CondCode::SETNE) ||16992(isOneConstant(TrueV) && LHS == FalseV &&16993CCVal == ISD::CondCode::SETEQ)) &&16994isNullConstant(RHS)) {16995// freeze it to be safe.16996LHS = DAG.getFreeze(LHS);16997SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);16998return DAG.getNode(ISD::ADD, DL, VT, LHS, C);16999}17000}1700117002// If both true/false are an xor with 1, pull through the select.17003// This can occur after op legalization if both operands are setccs that17004// require an xor to invert.17005// FIXME: Generalize to other binary ops with identical operand?17006if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&17007TrueV.getOperand(1) == FalseV.getOperand(1) &&17008isOneConstant(TrueV.getOperand(1)) &&17009TrueV.hasOneUse() && FalseV.hasOneUse()) {17010SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,17011TrueV.getOperand(0), FalseV.getOperand(0));17012return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));17013}1701417015return SDValue();17016}17017case RISCVISD::BR_CC: {17018SDValue LHS = N->getOperand(1);17019SDValue RHS = N->getOperand(2);17020SDValue CC = N->getOperand(3);17021SDLoc DL(N);1702217023if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))17024return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),17025N->getOperand(0), LHS, RHS, CC, N->getOperand(4));1702617027return SDValue();17028}17029case ISD::BITREVERSE:17030return performBITREVERSECombine(N, DAG, Subtarget);17031case ISD::FP_TO_SINT:17032case ISD::FP_TO_UINT:17033return performFP_TO_INTCombine(N, DCI, Subtarget);17034case ISD::FP_TO_SINT_SAT:17035case ISD::FP_TO_UINT_SAT:17036return performFP_TO_INT_SATCombine(N, DCI, Subtarget);17037case ISD::FCOPYSIGN: {17038EVT VT = N->getValueType(0);17039if (!VT.isVector())17040break;17041// There is a form of VFSGNJ which injects the negated sign of its second17042// operand. Try and bubble any FNEG up after the extend/round to produce17043// this optimized pattern. Avoid modifying cases where FP_ROUND and17044// TRUNC=1.17045SDValue In2 = N->getOperand(1);17046// Avoid cases where the extend/round has multiple uses, as duplicating17047// those is typically more expensive than removing a fneg.17048if (!In2.hasOneUse())17049break;17050if (In2.getOpcode() != ISD::FP_EXTEND &&17051(In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))17052break;17053In2 = In2.getOperand(0);17054if (In2.getOpcode() != ISD::FNEG)17055break;17056SDLoc DL(N);17057SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);17058return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),17059DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));17060}17061case ISD::MGATHER: {17062const auto *MGN = cast<MaskedGatherSDNode>(N);17063const EVT VT = N->getValueType(0);17064SDValue Index = MGN->getIndex();17065SDValue ScaleOp = MGN->getScale();17066ISD::MemIndexType IndexType = MGN->getIndexType();17067assert(!MGN->isIndexScaled() &&17068"Scaled gather/scatter should not be formed");1706917070SDLoc DL(N);17071if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))17072return DAG.getMaskedGather(17073N->getVTList(), MGN->getMemoryVT(), DL,17074{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),17075MGN->getBasePtr(), Index, ScaleOp},17076MGN->getMemOperand(), IndexType, MGN->getExtensionType());1707717078if (narrowIndex(Index, IndexType, DAG))17079return DAG.getMaskedGather(17080N->getVTList(), MGN->getMemoryVT(), DL,17081{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),17082MGN->getBasePtr(), Index, ScaleOp},17083MGN->getMemOperand(), IndexType, MGN->getExtensionType());1708417085if (Index.getOpcode() == ISD::BUILD_VECTOR &&17086MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {17087// The sequence will be XLenVT, not the type of Index. Tell17088// isSimpleVIDSequence this so we avoid overflow.17089if (std::optional<VIDSequence> SimpleVID =17090isSimpleVIDSequence(Index, Subtarget.getXLen());17091SimpleVID && SimpleVID->StepDenominator == 1) {17092const int64_t StepNumerator = SimpleVID->StepNumerator;17093const int64_t Addend = SimpleVID->Addend;1709417095// Note: We don't need to check alignment here since (by assumption17096// from the existance of the gather), our offsets must be sufficiently17097// aligned.1709817099const EVT PtrVT = getPointerTy(DAG.getDataLayout());17100assert(MGN->getBasePtr()->getValueType(0) == PtrVT);17101assert(IndexType == ISD::UNSIGNED_SCALED);17102SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),17103DAG.getConstant(Addend, DL, PtrVT));1710417105SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),17106VT.getVectorElementCount());17107SDValue StridedLoad =17108DAG.getStridedLoadVP(VT, DL, MGN->getChain(), BasePtr,17109DAG.getConstant(StepNumerator, DL, XLenVT),17110MGN->getMask(), EVL, MGN->getMemOperand());17111SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),17112StridedLoad, MGN->getPassThru(), EVL);17113return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},17114DL);17115}17116}1711717118SmallVector<int> ShuffleMask;17119if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&17120matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {17121SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),17122MGN->getBasePtr(), DAG.getUNDEF(XLenVT),17123MGN->getMask(), DAG.getUNDEF(VT),17124MGN->getMemoryVT(), MGN->getMemOperand(),17125ISD::UNINDEXED, ISD::NON_EXTLOAD);17126SDValue Shuffle =17127DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);17128return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);17129}1713017131if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&17132matchIndexAsWiderOp(VT, Index, MGN->getMask(),17133MGN->getMemOperand()->getBaseAlign(), Subtarget)) {17134SmallVector<SDValue> NewIndices;17135for (unsigned i = 0; i < Index->getNumOperands(); i += 2)17136NewIndices.push_back(Index.getOperand(i));17137EVT IndexVT = Index.getValueType()17138.getHalfNumVectorElementsVT(*DAG.getContext());17139Index = DAG.getBuildVector(IndexVT, DL, NewIndices);1714017141unsigned ElementSize = VT.getScalarStoreSize();17142EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);17143auto EltCnt = VT.getVectorElementCount();17144assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");17145EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,17146EltCnt.divideCoefficientBy(2));17147SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());17148EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,17149EltCnt.divideCoefficientBy(2));17150SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));1715117152SDValue Gather =17153DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,17154{MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),17155Index, ScaleOp},17156MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);17157SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));17158return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);17159}17160break;17161}17162case ISD::MSCATTER:{17163const auto *MSN = cast<MaskedScatterSDNode>(N);17164SDValue Index = MSN->getIndex();17165SDValue ScaleOp = MSN->getScale();17166ISD::MemIndexType IndexType = MSN->getIndexType();17167assert(!MSN->isIndexScaled() &&17168"Scaled gather/scatter should not be formed");1716917170SDLoc DL(N);17171if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))17172return DAG.getMaskedScatter(17173N->getVTList(), MSN->getMemoryVT(), DL,17174{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),17175Index, ScaleOp},17176MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());1717717178if (narrowIndex(Index, IndexType, DAG))17179return DAG.getMaskedScatter(17180N->getVTList(), MSN->getMemoryVT(), DL,17181{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),17182Index, ScaleOp},17183MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());1718417185EVT VT = MSN->getValue()->getValueType(0);17186SmallVector<int> ShuffleMask;17187if (!MSN->isTruncatingStore() &&17188matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {17189SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),17190DAG.getUNDEF(VT), ShuffleMask);17191return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),17192DAG.getUNDEF(XLenVT), MSN->getMask(),17193MSN->getMemoryVT(), MSN->getMemOperand(),17194ISD::UNINDEXED, false);17195}17196break;17197}17198case ISD::VP_GATHER: {17199const auto *VPGN = cast<VPGatherSDNode>(N);17200SDValue Index = VPGN->getIndex();17201SDValue ScaleOp = VPGN->getScale();17202ISD::MemIndexType IndexType = VPGN->getIndexType();17203assert(!VPGN->isIndexScaled() &&17204"Scaled gather/scatter should not be formed");1720517206SDLoc DL(N);17207if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))17208return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,17209{VPGN->getChain(), VPGN->getBasePtr(), Index,17210ScaleOp, VPGN->getMask(),17211VPGN->getVectorLength()},17212VPGN->getMemOperand(), IndexType);1721317214if (narrowIndex(Index, IndexType, DAG))17215return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,17216{VPGN->getChain(), VPGN->getBasePtr(), Index,17217ScaleOp, VPGN->getMask(),17218VPGN->getVectorLength()},17219VPGN->getMemOperand(), IndexType);1722017221break;17222}17223case ISD::VP_SCATTER: {17224const auto *VPSN = cast<VPScatterSDNode>(N);17225SDValue Index = VPSN->getIndex();17226SDValue ScaleOp = VPSN->getScale();17227ISD::MemIndexType IndexType = VPSN->getIndexType();17228assert(!VPSN->isIndexScaled() &&17229"Scaled gather/scatter should not be formed");1723017231SDLoc DL(N);17232if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))17233return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,17234{VPSN->getChain(), VPSN->getValue(),17235VPSN->getBasePtr(), Index, ScaleOp,17236VPSN->getMask(), VPSN->getVectorLength()},17237VPSN->getMemOperand(), IndexType);1723817239if (narrowIndex(Index, IndexType, DAG))17240return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,17241{VPSN->getChain(), VPSN->getValue(),17242VPSN->getBasePtr(), Index, ScaleOp,17243VPSN->getMask(), VPSN->getVectorLength()},17244VPSN->getMemOperand(), IndexType);17245break;17246}17247case RISCVISD::SHL_VL:17248if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))17249return V;17250[[fallthrough]];17251case RISCVISD::SRA_VL:17252case RISCVISD::SRL_VL: {17253SDValue ShAmt = N->getOperand(1);17254if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {17255// We don't need the upper 32 bits of a 64-bit element for a shift amount.17256SDLoc DL(N);17257SDValue VL = N->getOperand(4);17258EVT VT = N->getValueType(0);17259ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),17260ShAmt.getOperand(1), VL);17261return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,17262N->getOperand(2), N->getOperand(3), N->getOperand(4));17263}17264break;17265}17266case ISD::SRA:17267if (SDValue V = performSRACombine(N, DAG, Subtarget))17268return V;17269[[fallthrough]];17270case ISD::SRL:17271case ISD::SHL: {17272if (N->getOpcode() == ISD::SHL) {17273if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))17274return V;17275}17276SDValue ShAmt = N->getOperand(1);17277if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {17278// We don't need the upper 32 bits of a 64-bit element for a shift amount.17279SDLoc DL(N);17280EVT VT = N->getValueType(0);17281ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),17282ShAmt.getOperand(1),17283DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));17284return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);17285}17286break;17287}17288case RISCVISD::ADD_VL:17289if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))17290return V;17291return combineToVWMACC(N, DAG, Subtarget);17292case RISCVISD::VWADD_W_VL:17293case RISCVISD::VWADDU_W_VL:17294case RISCVISD::VWSUB_W_VL:17295case RISCVISD::VWSUBU_W_VL:17296return performVWADDSUBW_VLCombine(N, DCI, Subtarget);17297case RISCVISD::SUB_VL:17298case RISCVISD::MUL_VL:17299return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);17300case RISCVISD::VFMADD_VL:17301case RISCVISD::VFNMADD_VL:17302case RISCVISD::VFMSUB_VL:17303case RISCVISD::VFNMSUB_VL:17304case RISCVISD::STRICT_VFMADD_VL:17305case RISCVISD::STRICT_VFNMADD_VL:17306case RISCVISD::STRICT_VFMSUB_VL:17307case RISCVISD::STRICT_VFNMSUB_VL:17308return performVFMADD_VLCombine(N, DAG, Subtarget);17309case RISCVISD::FADD_VL:17310case RISCVISD::FSUB_VL:17311case RISCVISD::FMUL_VL:17312case RISCVISD::VFWADD_W_VL:17313case RISCVISD::VFWSUB_W_VL: {17314if (N->getValueType(0).getVectorElementType() == MVT::f32 &&17315!Subtarget.hasVInstructionsF16())17316return SDValue();17317return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);17318}17319case ISD::LOAD:17320case ISD::STORE: {17321if (DCI.isAfterLegalizeDAG())17322if (SDValue V = performMemPairCombine(N, DCI))17323return V;1732417325if (N->getOpcode() != ISD::STORE)17326break;1732717328auto *Store = cast<StoreSDNode>(N);17329SDValue Chain = Store->getChain();17330EVT MemVT = Store->getMemoryVT();17331SDValue Val = Store->getValue();17332SDLoc DL(N);1733317334bool IsScalarizable =17335MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&17336Store->isSimple() &&17337MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&17338isPowerOf2_64(MemVT.getSizeInBits()) &&17339MemVT.getSizeInBits() <= Subtarget.getXLen();1734017341// If sufficiently aligned we can scalarize stores of constant vectors of17342// any power-of-two size up to XLen bits, provided that they aren't too17343// expensive to materialize.17344// vsetivli zero, 2, e8, m1, ta, ma17345// vmv.v.i v8, 417346// vse64.v v8, (a0)17347// ->17348// li a1, 102817349// sh a1, 0(a0)17350if (DCI.isBeforeLegalize() && IsScalarizable &&17351ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {17352// Get the constant vector bits17353APInt NewC(Val.getValueSizeInBits(), 0);17354uint64_t EltSize = Val.getScalarValueSizeInBits();17355for (unsigned i = 0; i < Val.getNumOperands(); i++) {17356if (Val.getOperand(i).isUndef())17357continue;17358NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),17359i * EltSize);17360}17361MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());1736217363if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,17364true) <= 2 &&17365allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),17366NewVT, *Store->getMemOperand())) {17367SDValue NewV = DAG.getConstant(NewC, DL, NewVT);17368return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),17369Store->getPointerInfo(), Store->getOriginalAlign(),17370Store->getMemOperand()->getFlags());17371}17372}1737317374// Similarly, if sufficiently aligned we can scalarize vector copies, e.g.17375// vsetivli zero, 2, e16, m1, ta, ma17376// vle16.v v8, (a0)17377// vse16.v v8, (a1)17378if (auto *L = dyn_cast<LoadSDNode>(Val);17379L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&17380L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&17381Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&17382L->getMemoryVT() == MemVT) {17383MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());17384if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),17385NewVT, *Store->getMemOperand()) &&17386allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),17387NewVT, *L->getMemOperand())) {17388SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),17389L->getPointerInfo(), L->getOriginalAlign(),17390L->getMemOperand()->getFlags());17391return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),17392Store->getPointerInfo(), Store->getOriginalAlign(),17393Store->getMemOperand()->getFlags());17394}17395}1739617397// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.17398// vfmv.f.s is represented as extract element from 0. Match it late to avoid17399// any illegal types.17400if (Val.getOpcode() == RISCVISD::VMV_X_S ||17401(DCI.isAfterLegalizeDAG() &&17402Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&17403isNullConstant(Val.getOperand(1)))) {17404SDValue Src = Val.getOperand(0);17405MVT VecVT = Src.getSimpleValueType();17406// VecVT should be scalable and memory VT should match the element type.17407if (!Store->isIndexed() && VecVT.isScalableVector() &&17408MemVT == VecVT.getVectorElementType()) {17409SDLoc DL(N);17410MVT MaskVT = getMaskTypeFor(VecVT);17411return DAG.getStoreVP(17412Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),17413DAG.getConstant(1, DL, MaskVT),17414DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,17415Store->getMemOperand(), Store->getAddressingMode(),17416Store->isTruncatingStore(), /*IsCompress*/ false);17417}17418}1741917420break;17421}17422case ISD::SPLAT_VECTOR: {17423EVT VT = N->getValueType(0);17424// Only perform this combine on legal MVT types.17425if (!isTypeLegal(VT))17426break;17427if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,17428DAG, Subtarget))17429return Gather;17430break;17431}17432case ISD::BUILD_VECTOR:17433if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))17434return V;17435break;17436case ISD::CONCAT_VECTORS:17437if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))17438return V;17439break;17440case ISD::INSERT_VECTOR_ELT:17441if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))17442return V;17443break;17444case RISCVISD::VFMV_V_F_VL: {17445const MVT VT = N->getSimpleValueType(0);17446SDValue Passthru = N->getOperand(0);17447SDValue Scalar = N->getOperand(1);17448SDValue VL = N->getOperand(2);1744917450// If VL is 1, we can use vfmv.s.f.17451if (isOneConstant(VL))17452return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);17453break;17454}17455case RISCVISD::VMV_V_X_VL: {17456const MVT VT = N->getSimpleValueType(0);17457SDValue Passthru = N->getOperand(0);17458SDValue Scalar = N->getOperand(1);17459SDValue VL = N->getOperand(2);1746017461// Tail agnostic VMV.V.X only demands the vector element bitwidth from the17462// scalar input.17463unsigned ScalarSize = Scalar.getValueSizeInBits();17464unsigned EltWidth = VT.getScalarSizeInBits();17465if (ScalarSize > EltWidth && Passthru.isUndef())17466if (SimplifyDemandedLowBitsHelper(1, EltWidth))17467return SDValue(N, 0);1746817469// If VL is 1 and the scalar value won't benefit from immediate, we can17470// use vmv.s.x.17471ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);17472if (isOneConstant(VL) &&17473(!Const || Const->isZero() ||17474!Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))17475return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);1747617477break;17478}17479case RISCVISD::VFMV_S_F_VL: {17480SDValue Src = N->getOperand(1);17481// Try to remove vector->scalar->vector if the scalar->vector is inserting17482// into an undef vector.17483// TODO: Could use a vslide or vmv.v.v for non-undef.17484if (N->getOperand(0).isUndef() &&17485Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&17486isNullConstant(Src.getOperand(1)) &&17487Src.getOperand(0).getValueType().isScalableVector()) {17488EVT VT = N->getValueType(0);17489EVT SrcVT = Src.getOperand(0).getValueType();17490assert(SrcVT.getVectorElementType() == VT.getVectorElementType());17491// Widths match, just return the original vector.17492if (SrcVT == VT)17493return Src.getOperand(0);17494// TODO: Use insert_subvector/extract_subvector to change widen/narrow?17495}17496[[fallthrough]];17497}17498case RISCVISD::VMV_S_X_VL: {17499const MVT VT = N->getSimpleValueType(0);17500SDValue Passthru = N->getOperand(0);17501SDValue Scalar = N->getOperand(1);17502SDValue VL = N->getOperand(2);1750317504if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&17505Scalar.getOperand(0).getValueType() == N->getValueType(0))17506return Scalar.getOperand(0);1750717508// Use M1 or smaller to avoid over constraining register allocation17509const MVT M1VT = getLMUL1VT(VT);17510if (M1VT.bitsLT(VT)) {17511SDValue M1Passthru =17512DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,17513DAG.getVectorIdxConstant(0, DL));17514SDValue Result =17515DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);17516Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,17517DAG.getVectorIdxConstant(0, DL));17518return Result;17519}1752017521// We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or17522// higher would involve overly constraining the register allocator for17523// no purpose.17524if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);17525Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&17526VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())17527return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);1752817529break;17530}17531case RISCVISD::VMV_X_S: {17532SDValue Vec = N->getOperand(0);17533MVT VecVT = N->getOperand(0).getSimpleValueType();17534const MVT M1VT = getLMUL1VT(VecVT);17535if (M1VT.bitsLT(VecVT)) {17536Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,17537DAG.getVectorIdxConstant(0, DL));17538return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);17539}17540break;17541}17542case ISD::INTRINSIC_VOID:17543case ISD::INTRINSIC_W_CHAIN:17544case ISD::INTRINSIC_WO_CHAIN: {17545unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;17546unsigned IntNo = N->getConstantOperandVal(IntOpNo);17547switch (IntNo) {17548// By default we do not combine any intrinsic.17549default:17550return SDValue();17551case Intrinsic::riscv_masked_strided_load: {17552MVT VT = N->getSimpleValueType(0);17553auto *Load = cast<MemIntrinsicSDNode>(N);17554SDValue PassThru = N->getOperand(2);17555SDValue Base = N->getOperand(3);17556SDValue Stride = N->getOperand(4);17557SDValue Mask = N->getOperand(5);1755817559// If the stride is equal to the element size in bytes, we can use17560// a masked.load.17561const unsigned ElementSize = VT.getScalarStoreSize();17562if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);17563StrideC && StrideC->getZExtValue() == ElementSize)17564return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,17565DAG.getUNDEF(XLenVT), Mask, PassThru,17566Load->getMemoryVT(), Load->getMemOperand(),17567ISD::UNINDEXED, ISD::NON_EXTLOAD);17568return SDValue();17569}17570case Intrinsic::riscv_masked_strided_store: {17571auto *Store = cast<MemIntrinsicSDNode>(N);17572SDValue Value = N->getOperand(2);17573SDValue Base = N->getOperand(3);17574SDValue Stride = N->getOperand(4);17575SDValue Mask = N->getOperand(5);1757617577// If the stride is equal to the element size in bytes, we can use17578// a masked.store.17579const unsigned ElementSize = Value.getValueType().getScalarStoreSize();17580if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);17581StrideC && StrideC->getZExtValue() == ElementSize)17582return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,17583DAG.getUNDEF(XLenVT), Mask,17584Value.getValueType(), Store->getMemOperand(),17585ISD::UNINDEXED, false);17586return SDValue();17587}17588case Intrinsic::riscv_vcpop:17589case Intrinsic::riscv_vcpop_mask:17590case Intrinsic::riscv_vfirst:17591case Intrinsic::riscv_vfirst_mask: {17592SDValue VL = N->getOperand(2);17593if (IntNo == Intrinsic::riscv_vcpop_mask ||17594IntNo == Intrinsic::riscv_vfirst_mask)17595VL = N->getOperand(3);17596if (!isNullConstant(VL))17597return SDValue();17598// If VL is 0, vcpop -> li 0, vfirst -> li -1.17599SDLoc DL(N);17600EVT VT = N->getValueType(0);17601if (IntNo == Intrinsic::riscv_vfirst ||17602IntNo == Intrinsic::riscv_vfirst_mask)17603return DAG.getConstant(-1, DL, VT);17604return DAG.getConstant(0, DL, VT);17605}17606}17607}17608case ISD::BITCAST: {17609assert(Subtarget.useRVVForFixedLengthVectors());17610SDValue N0 = N->getOperand(0);17611EVT VT = N->getValueType(0);17612EVT SrcVT = N0.getValueType();17613// If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer17614// type, widen both sides to avoid a trip through memory.17615if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&17616VT.isScalarInteger()) {17617unsigned NumConcats = 8 / SrcVT.getVectorNumElements();17618SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));17619Ops[0] = N0;17620SDLoc DL(N);17621N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);17622N0 = DAG.getBitcast(MVT::i8, N0);17623return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);17624}1762517626return SDValue();17627}17628}1762917630return SDValue();17631}1763217633bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(17634EVT XVT, unsigned KeptBits) const {17635// For vectors, we don't have a preference..17636if (XVT.isVector())17637return false;1763817639if (XVT != MVT::i32 && XVT != MVT::i64)17640return false;1764117642// We can use sext.w for RV64 or an srai 31 on RV32.17643if (KeptBits == 32 || KeptBits == 64)17644return true;1764517646// With Zbb we can use sext.h/sext.b.17647return Subtarget.hasStdExtZbb() &&17648((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||17649KeptBits == 16);17650}1765117652bool RISCVTargetLowering::isDesirableToCommuteWithShift(17653const SDNode *N, CombineLevel Level) const {17654assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||17655N->getOpcode() == ISD::SRL) &&17656"Expected shift op");1765717658// The following folds are only desirable if `(OP _, c1 << c2)` can be17659// materialised in fewer instructions than `(OP _, c1)`:17660//17661// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)17662// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)17663SDValue N0 = N->getOperand(0);17664EVT Ty = N0.getValueType();17665if (Ty.isScalarInteger() &&17666(N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {17667auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));17668auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));17669if (C1 && C2) {17670const APInt &C1Int = C1->getAPIntValue();17671APInt ShiftedC1Int = C1Int << C2->getAPIntValue();1767217673// We can materialise `c1 << c2` into an add immediate, so it's "free",17674// and the combine should happen, to potentially allow further combines17675// later.17676if (ShiftedC1Int.getSignificantBits() <= 64 &&17677isLegalAddImmediate(ShiftedC1Int.getSExtValue()))17678return true;1767917680// We can materialise `c1` in an add immediate, so it's "free", and the17681// combine should be prevented.17682if (C1Int.getSignificantBits() <= 64 &&17683isLegalAddImmediate(C1Int.getSExtValue()))17684return false;1768517686// Neither constant will fit into an immediate, so find materialisation17687// costs.17688int C1Cost =17689RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,17690/*CompressionCost*/ true);17691int ShiftedC1Cost = RISCVMatInt::getIntMatCost(17692ShiftedC1Int, Ty.getSizeInBits(), Subtarget,17693/*CompressionCost*/ true);1769417695// Materialising `c1` is cheaper than materialising `c1 << c2`, so the17696// combine should be prevented.17697if (C1Cost < ShiftedC1Cost)17698return false;17699}17700}17701return true;17702}1770317704bool RISCVTargetLowering::targetShrinkDemandedConstant(17705SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,17706TargetLoweringOpt &TLO) const {17707// Delay this optimization as late as possible.17708if (!TLO.LegalOps)17709return false;1771017711EVT VT = Op.getValueType();17712if (VT.isVector())17713return false;1771417715unsigned Opcode = Op.getOpcode();17716if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)17717return false;1771817719ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));17720if (!C)17721return false;1772217723const APInt &Mask = C->getAPIntValue();1772417725// Clear all non-demanded bits initially.17726APInt ShrunkMask = Mask & DemandedBits;1772717728// Try to make a smaller immediate by setting undemanded bits.1772917730APInt ExpandedMask = Mask | ~DemandedBits;1773117732auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {17733return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);17734};17735auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {17736if (NewMask == Mask)17737return true;17738SDLoc DL(Op);17739SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());17740SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),17741Op.getOperand(0), NewC);17742return TLO.CombineTo(Op, NewOp);17743};1774417745// If the shrunk mask fits in sign extended 12 bits, let the target17746// independent code apply it.17747if (ShrunkMask.isSignedIntN(12))17748return false;1774917750// And has a few special cases for zext.17751if (Opcode == ISD::AND) {17752// Preserve (and X, 0xffff), if zext.h exists use zext.h,17753// otherwise use SLLI + SRLI.17754APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);17755if (IsLegalMask(NewMask))17756return UseMask(NewMask);1775717758// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.17759if (VT == MVT::i64) {17760APInt NewMask = APInt(64, 0xffffffff);17761if (IsLegalMask(NewMask))17762return UseMask(NewMask);17763}17764}1776517766// For the remaining optimizations, we need to be able to make a negative17767// number through a combination of mask and undemanded bits.17768if (!ExpandedMask.isNegative())17769return false;1777017771// What is the fewest number of bits we need to represent the negative number.17772unsigned MinSignedBits = ExpandedMask.getSignificantBits();1777317774// Try to make a 12 bit negative immediate. If that fails try to make a 3217775// bit negative immediate unless the shrunk immediate already fits in 32 bits.17776// If we can't create a simm12, we shouldn't change opaque constants.17777APInt NewMask = ShrunkMask;17778if (MinSignedBits <= 12)17779NewMask.setBitsFrom(11);17780else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))17781NewMask.setBitsFrom(31);17782else17783return false;1778417785// Check that our new mask is a subset of the demanded mask.17786assert(IsLegalMask(NewMask));17787return UseMask(NewMask);17788}1778917790static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {17791static const uint64_t GREVMasks[] = {177920x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,177930x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};1779417795for (unsigned Stage = 0; Stage != 6; ++Stage) {17796unsigned Shift = 1 << Stage;17797if (ShAmt & Shift) {17798uint64_t Mask = GREVMasks[Stage];17799uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);17800if (IsGORC)17801Res |= x;17802x = Res;17803}17804}1780517806return x;17807}1780817809void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,17810KnownBits &Known,17811const APInt &DemandedElts,17812const SelectionDAG &DAG,17813unsigned Depth) const {17814unsigned BitWidth = Known.getBitWidth();17815unsigned Opc = Op.getOpcode();17816assert((Opc >= ISD::BUILTIN_OP_END ||17817Opc == ISD::INTRINSIC_WO_CHAIN ||17818Opc == ISD::INTRINSIC_W_CHAIN ||17819Opc == ISD::INTRINSIC_VOID) &&17820"Should use MaskedValueIsZero if you don't know whether Op"17821" is a target node!");1782217823Known.resetAll();17824switch (Opc) {17825default: break;17826case RISCVISD::SELECT_CC: {17827Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);17828// If we don't know any bits, early out.17829if (Known.isUnknown())17830break;17831KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);1783217833// Only known if known in both the LHS and RHS.17834Known = Known.intersectWith(Known2);17835break;17836}17837case RISCVISD::CZERO_EQZ:17838case RISCVISD::CZERO_NEZ:17839Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);17840// Result is either all zero or operand 0. We can propagate zeros, but not17841// ones.17842Known.One.clearAllBits();17843break;17844case RISCVISD::REMUW: {17845KnownBits Known2;17846Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);17847Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);17848// We only care about the lower 32 bits.17849Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));17850// Restore the original width by sign extending.17851Known = Known.sext(BitWidth);17852break;17853}17854case RISCVISD::DIVUW: {17855KnownBits Known2;17856Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);17857Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);17858// We only care about the lower 32 bits.17859Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));17860// Restore the original width by sign extending.17861Known = Known.sext(BitWidth);17862break;17863}17864case RISCVISD::SLLW: {17865KnownBits Known2;17866Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);17867Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);17868Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));17869// Restore the original width by sign extending.17870Known = Known.sext(BitWidth);17871break;17872}17873case RISCVISD::CTZW: {17874KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);17875unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();17876unsigned LowBits = llvm::bit_width(PossibleTZ);17877Known.Zero.setBitsFrom(LowBits);17878break;17879}17880case RISCVISD::CLZW: {17881KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);17882unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();17883unsigned LowBits = llvm::bit_width(PossibleLZ);17884Known.Zero.setBitsFrom(LowBits);17885break;17886}17887case RISCVISD::BREV8:17888case RISCVISD::ORC_B: {17889// FIXME: This is based on the non-ratified Zbp GREV and GORC where a17890// control value of 7 is equivalent to brev8 and orc.b.17891Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);17892bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;17893// To compute zeros, we need to invert the value and invert it back after.17894Known.Zero =17895~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);17896Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);17897break;17898}17899case RISCVISD::READ_VLENB: {17900// We can use the minimum and maximum VLEN values to bound VLENB. We17901// know VLEN must be a power of two.17902const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;17903const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;17904assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");17905Known.Zero.setLowBits(Log2_32(MinVLenB));17906Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);17907if (MaxVLenB == MinVLenB)17908Known.One.setBit(Log2_32(MinVLenB));17909break;17910}17911case RISCVISD::FCLASS: {17912// fclass will only set one of the low 10 bits.17913Known.Zero.setBitsFrom(10);17914break;17915}17916case ISD::INTRINSIC_W_CHAIN:17917case ISD::INTRINSIC_WO_CHAIN: {17918unsigned IntNo =17919Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);17920switch (IntNo) {17921default:17922// We can't do anything for most intrinsics.17923break;17924case Intrinsic::riscv_vsetvli:17925case Intrinsic::riscv_vsetvlimax: {17926bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;17927unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);17928RISCVII::VLMUL VLMUL =17929static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));17930unsigned SEW = RISCVVType::decodeVSEW(VSEW);17931auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);17932uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;17933MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;1793417935// Result of vsetvli must be not larger than AVL.17936if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))17937MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));1793817939unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;17940if (BitWidth > KnownZeroFirstBit)17941Known.Zero.setBitsFrom(KnownZeroFirstBit);17942break;17943}17944}17945break;17946}17947}17948}1794917950unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(17951SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,17952unsigned Depth) const {17953switch (Op.getOpcode()) {17954default:17955break;17956case RISCVISD::SELECT_CC: {17957unsigned Tmp =17958DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);17959if (Tmp == 1) return 1; // Early out.17960unsigned Tmp2 =17961DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);17962return std::min(Tmp, Tmp2);17963}17964case RISCVISD::CZERO_EQZ:17965case RISCVISD::CZERO_NEZ:17966// Output is either all zero or operand 0. We can propagate sign bit count17967// from operand 0.17968return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);17969case RISCVISD::ABSW: {17970// We expand this at isel to negw+max. The result will have 33 sign bits17971// if the input has at least 33 sign bits.17972unsigned Tmp =17973DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);17974if (Tmp < 33) return 1;17975return 33;17976}17977case RISCVISD::SLLW:17978case RISCVISD::SRAW:17979case RISCVISD::SRLW:17980case RISCVISD::DIVW:17981case RISCVISD::DIVUW:17982case RISCVISD::REMUW:17983case RISCVISD::ROLW:17984case RISCVISD::RORW:17985case RISCVISD::FCVT_W_RV64:17986case RISCVISD::FCVT_WU_RV64:17987case RISCVISD::STRICT_FCVT_W_RV64:17988case RISCVISD::STRICT_FCVT_WU_RV64:17989// TODO: As the result is sign-extended, this is conservatively correct. A17990// more precise answer could be calculated for SRAW depending on known17991// bits in the shift amount.17992return 33;17993case RISCVISD::VMV_X_S: {17994// The number of sign bits of the scalar result is computed by obtaining the17995// element type of the input vector operand, subtracting its width from the17996// XLEN, and then adding one (sign bit within the element type). If the17997// element type is wider than XLen, the least-significant XLEN bits are17998// taken.17999unsigned XLen = Subtarget.getXLen();18000unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();18001if (EltBits <= XLen)18002return XLen - EltBits + 1;18003break;18004}18005case ISD::INTRINSIC_W_CHAIN: {18006unsigned IntNo = Op.getConstantOperandVal(1);18007switch (IntNo) {18008default:18009break;18010case Intrinsic::riscv_masked_atomicrmw_xchg_i64:18011case Intrinsic::riscv_masked_atomicrmw_add_i64:18012case Intrinsic::riscv_masked_atomicrmw_sub_i64:18013case Intrinsic::riscv_masked_atomicrmw_nand_i64:18014case Intrinsic::riscv_masked_atomicrmw_max_i64:18015case Intrinsic::riscv_masked_atomicrmw_min_i64:18016case Intrinsic::riscv_masked_atomicrmw_umax_i64:18017case Intrinsic::riscv_masked_atomicrmw_umin_i64:18018case Intrinsic::riscv_masked_cmpxchg_i64:18019// riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated18020// narrow atomic operation. These are implemented using atomic18021// operations at the minimum supported atomicrmw/cmpxchg width whose18022// result is then sign extended to XLEN. With +A, the minimum width is18023// 32 for both 64 and 32.18024assert(Subtarget.getXLen() == 64);18025assert(getMinCmpXchgSizeInBits() == 32);18026assert(Subtarget.hasStdExtA());18027return 33;18028}18029break;18030}18031}1803218033return 1;18034}1803518036bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(18037SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,18038bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {1803918040// TODO: Add more target nodes.18041switch (Op.getOpcode()) {18042case RISCVISD::SELECT_CC:18043// Integer select_cc cannot create poison.18044// TODO: What are the FP poison semantics?18045// TODO: This instruction blocks poison from the unselected operand, can18046// we do anything with that?18047return !Op.getValueType().isInteger();18048}18049return TargetLowering::canCreateUndefOrPoisonForTargetNode(18050Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);18051}1805218053const Constant *18054RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {18055assert(Ld && "Unexpected null LoadSDNode");18056if (!ISD::isNormalLoad(Ld))18057return nullptr;1805818059SDValue Ptr = Ld->getBasePtr();1806018061// Only constant pools with no offset are supported.18062auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {18063auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);18064if (!CNode || CNode->isMachineConstantPoolEntry() ||18065CNode->getOffset() != 0)18066return nullptr;1806718068return CNode;18069};1807018071// Simple case, LLA.18072if (Ptr.getOpcode() == RISCVISD::LLA) {18073auto *CNode = GetSupportedConstantPool(Ptr);18074if (!CNode || CNode->getTargetFlags() != 0)18075return nullptr;1807618077return CNode->getConstVal();18078}1807918080// Look for a HI and ADD_LO pair.18081if (Ptr.getOpcode() != RISCVISD::ADD_LO ||18082Ptr.getOperand(0).getOpcode() != RISCVISD::HI)18083return nullptr;1808418085auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));18086auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));1808718088if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||18089!CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)18090return nullptr;1809118092if (CNodeLo->getConstVal() != CNodeHi->getConstVal())18093return nullptr;1809418095return CNodeLo->getConstVal();18096}1809718098static MachineBasicBlock *emitReadCounterWidePseudo(MachineInstr &MI,18099MachineBasicBlock *BB) {18100assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");1810118102// To read a 64-bit counter CSR on a 32-bit target, we read the two halves.18103// Should the count have wrapped while it was being read, we need to try18104// again.18105// For example:18106// ```18107// read:18108// csrrs x3, counterh # load high word of counter18109// csrrs x2, counter # load low word of counter18110// csrrs x4, counterh # load high word of counter18111// bne x3, x4, read # check if high word reads match, otherwise try again18112// ```1811318114MachineFunction &MF = *BB->getParent();18115const BasicBlock *LLVMBB = BB->getBasicBlock();18116MachineFunction::iterator It = ++BB->getIterator();1811718118MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);18119MF.insert(It, LoopMBB);1812018121MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);18122MF.insert(It, DoneMBB);1812318124// Transfer the remainder of BB and its successor edges to DoneMBB.18125DoneMBB->splice(DoneMBB->begin(), BB,18126std::next(MachineBasicBlock::iterator(MI)), BB->end());18127DoneMBB->transferSuccessorsAndUpdatePHIs(BB);1812818129BB->addSuccessor(LoopMBB);1813018131MachineRegisterInfo &RegInfo = MF.getRegInfo();18132Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);18133Register LoReg = MI.getOperand(0).getReg();18134Register HiReg = MI.getOperand(1).getReg();18135int64_t LoCounter = MI.getOperand(2).getImm();18136int64_t HiCounter = MI.getOperand(3).getImm();18137DebugLoc DL = MI.getDebugLoc();1813818139const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();18140BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)18141.addImm(HiCounter)18142.addReg(RISCV::X0);18143BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)18144.addImm(LoCounter)18145.addReg(RISCV::X0);18146BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)18147.addImm(HiCounter)18148.addReg(RISCV::X0);1814918150BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))18151.addReg(HiReg)18152.addReg(ReadAgainReg)18153.addMBB(LoopMBB);1815418155LoopMBB->addSuccessor(LoopMBB);18156LoopMBB->addSuccessor(DoneMBB);1815718158MI.eraseFromParent();1815918160return DoneMBB;18161}1816218163static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,18164MachineBasicBlock *BB,18165const RISCVSubtarget &Subtarget) {18166assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");1816718168MachineFunction &MF = *BB->getParent();18169DebugLoc DL = MI.getDebugLoc();18170const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();18171const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();18172Register LoReg = MI.getOperand(0).getReg();18173Register HiReg = MI.getOperand(1).getReg();18174Register SrcReg = MI.getOperand(2).getReg();1817518176const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;18177int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);1817818179TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,18180RI, Register());18181MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);18182MachineMemOperand *MMOLo =18183MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));18184MachineMemOperand *MMOHi = MF.getMachineMemOperand(18185MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));18186BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)18187.addFrameIndex(FI)18188.addImm(0)18189.addMemOperand(MMOLo);18190BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)18191.addFrameIndex(FI)18192.addImm(4)18193.addMemOperand(MMOHi);18194MI.eraseFromParent(); // The pseudo instruction is gone now.18195return BB;18196}1819718198static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,18199MachineBasicBlock *BB,18200const RISCVSubtarget &Subtarget) {18201assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&18202"Unexpected instruction");1820318204MachineFunction &MF = *BB->getParent();18205DebugLoc DL = MI.getDebugLoc();18206const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();18207const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();18208Register DstReg = MI.getOperand(0).getReg();18209Register LoReg = MI.getOperand(1).getReg();18210Register HiReg = MI.getOperand(2).getReg();1821118212const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;18213int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);1821418215MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);18216MachineMemOperand *MMOLo =18217MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));18218MachineMemOperand *MMOHi = MF.getMachineMemOperand(18219MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));18220BuildMI(*BB, MI, DL, TII.get(RISCV::SW))18221.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))18222.addFrameIndex(FI)18223.addImm(0)18224.addMemOperand(MMOLo);18225BuildMI(*BB, MI, DL, TII.get(RISCV::SW))18226.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))18227.addFrameIndex(FI)18228.addImm(4)18229.addMemOperand(MMOHi);18230TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());18231MI.eraseFromParent(); // The pseudo instruction is gone now.18232return BB;18233}1823418235static bool isSelectPseudo(MachineInstr &MI) {18236switch (MI.getOpcode()) {18237default:18238return false;18239case RISCV::Select_GPR_Using_CC_GPR:18240case RISCV::Select_GPR_Using_CC_Imm:18241case RISCV::Select_FPR16_Using_CC_GPR:18242case RISCV::Select_FPR16INX_Using_CC_GPR:18243case RISCV::Select_FPR32_Using_CC_GPR:18244case RISCV::Select_FPR32INX_Using_CC_GPR:18245case RISCV::Select_FPR64_Using_CC_GPR:18246case RISCV::Select_FPR64INX_Using_CC_GPR:18247case RISCV::Select_FPR64IN32X_Using_CC_GPR:18248return true;18249}18250}1825118252static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,18253unsigned RelOpcode, unsigned EqOpcode,18254const RISCVSubtarget &Subtarget) {18255DebugLoc DL = MI.getDebugLoc();18256Register DstReg = MI.getOperand(0).getReg();18257Register Src1Reg = MI.getOperand(1).getReg();18258Register Src2Reg = MI.getOperand(2).getReg();18259MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();18260Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);18261const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();1826218263// Save the current FFLAGS.18264BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);1826518266auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)18267.addReg(Src1Reg)18268.addReg(Src2Reg);18269if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))18270MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);1827118272// Restore the FFLAGS.18273BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))18274.addReg(SavedFFlags, RegState::Kill);1827518276// Issue a dummy FEQ opcode to raise exception for signaling NaNs.18277auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)18278.addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))18279.addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));18280if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))18281MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);1828218283// Erase the pseudoinstruction.18284MI.eraseFromParent();18285return BB;18286}1828718288static MachineBasicBlock *18289EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,18290MachineBasicBlock *ThisMBB,18291const RISCVSubtarget &Subtarget) {18292// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)18293// Without this, custom-inserter would have generated:18294//18295// A18296// | \18297// | B18298// | /18299// C18300// | \18301// | D18302// | /18303// E18304//18305// A: X = ...; Y = ...18306// B: empty18307// C: Z = PHI [X, A], [Y, B]18308// D: empty18309// E: PHI [X, C], [Z, D]18310//18311// If we lower both Select_FPRX_ in a single step, we can instead generate:18312//18313// A18314// | \18315// | C18316// | /|18317// |/ |18318// | |18319// | D18320// | /18321// E18322//18323// A: X = ...; Y = ...18324// D: empty18325// E: PHI [X, A], [X, C], [Y, D]1832618327const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();18328const DebugLoc &DL = First.getDebugLoc();18329const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();18330MachineFunction *F = ThisMBB->getParent();18331MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);18332MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);18333MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);18334MachineFunction::iterator It = ++ThisMBB->getIterator();18335F->insert(It, FirstMBB);18336F->insert(It, SecondMBB);18337F->insert(It, SinkMBB);1833818339// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.18340SinkMBB->splice(SinkMBB->begin(), ThisMBB,18341std::next(MachineBasicBlock::iterator(First)),18342ThisMBB->end());18343SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);1834418345// Fallthrough block for ThisMBB.18346ThisMBB->addSuccessor(FirstMBB);18347// Fallthrough block for FirstMBB.18348FirstMBB->addSuccessor(SecondMBB);18349ThisMBB->addSuccessor(SinkMBB);18350FirstMBB->addSuccessor(SinkMBB);18351// This is fallthrough.18352SecondMBB->addSuccessor(SinkMBB);1835318354auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());18355Register FLHS = First.getOperand(1).getReg();18356Register FRHS = First.getOperand(2).getReg();18357// Insert appropriate branch.18358BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))18359.addReg(FLHS)18360.addReg(FRHS)18361.addMBB(SinkMBB);1836218363Register SLHS = Second.getOperand(1).getReg();18364Register SRHS = Second.getOperand(2).getReg();18365Register Op1Reg4 = First.getOperand(4).getReg();18366Register Op1Reg5 = First.getOperand(5).getReg();1836718368auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());18369// Insert appropriate branch.18370BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))18371.addReg(SLHS)18372.addReg(SRHS)18373.addMBB(SinkMBB);1837418375Register DestReg = Second.getOperand(0).getReg();18376Register Op2Reg4 = Second.getOperand(4).getReg();18377BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)18378.addReg(Op2Reg4)18379.addMBB(ThisMBB)18380.addReg(Op1Reg4)18381.addMBB(FirstMBB)18382.addReg(Op1Reg5)18383.addMBB(SecondMBB);1838418385// Now remove the Select_FPRX_s.18386First.eraseFromParent();18387Second.eraseFromParent();18388return SinkMBB;18389}1839018391static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,18392MachineBasicBlock *BB,18393const RISCVSubtarget &Subtarget) {18394// To "insert" Select_* instructions, we actually have to insert the triangle18395// control-flow pattern. The incoming instructions know the destination vreg18396// to set, the condition code register to branch on, the true/false values to18397// select between, and the condcode to use to select the appropriate branch.18398//18399// We produce the following control flow:18400// HeadMBB18401// | \18402// | IfFalseMBB18403// | /18404// TailMBB18405//18406// When we find a sequence of selects we attempt to optimize their emission18407// by sharing the control flow. Currently we only handle cases where we have18408// multiple selects with the exact same condition (same LHS, RHS and CC).18409// The selects may be interleaved with other instructions if the other18410// instructions meet some requirements we deem safe:18411// - They are not pseudo instructions.18412// - They are debug instructions. Otherwise,18413// - They do not have side-effects, do not access memory and their inputs do18414// not depend on the results of the select pseudo-instructions.18415// The TrueV/FalseV operands of the selects cannot depend on the result of18416// previous selects in the sequence.18417// These conditions could be further relaxed. See the X86 target for a18418// related approach and more information.18419//18420// Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))18421// is checked here and handled by a separate function -18422// EmitLoweredCascadedSelect.1842318424auto Next = next_nodbg(MI.getIterator(), BB->instr_end());18425if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&18426MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&18427Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&18428Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&18429Next->getOperand(5).isKill())18430return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);1843118432Register LHS = MI.getOperand(1).getReg();18433Register RHS;18434if (MI.getOperand(2).isReg())18435RHS = MI.getOperand(2).getReg();18436auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());1843718438SmallVector<MachineInstr *, 4> SelectDebugValues;18439SmallSet<Register, 4> SelectDests;18440SelectDests.insert(MI.getOperand(0).getReg());1844118442MachineInstr *LastSelectPseudo = &MI;18443for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);18444SequenceMBBI != E; ++SequenceMBBI) {18445if (SequenceMBBI->isDebugInstr())18446continue;18447if (isSelectPseudo(*SequenceMBBI)) {18448if (SequenceMBBI->getOperand(1).getReg() != LHS ||18449!SequenceMBBI->getOperand(2).isReg() ||18450SequenceMBBI->getOperand(2).getReg() != RHS ||18451SequenceMBBI->getOperand(3).getImm() != CC ||18452SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||18453SelectDests.count(SequenceMBBI->getOperand(5).getReg()))18454break;18455LastSelectPseudo = &*SequenceMBBI;18456SequenceMBBI->collectDebugValues(SelectDebugValues);18457SelectDests.insert(SequenceMBBI->getOperand(0).getReg());18458continue;18459}18460if (SequenceMBBI->hasUnmodeledSideEffects() ||18461SequenceMBBI->mayLoadOrStore() ||18462SequenceMBBI->usesCustomInsertionHook())18463break;18464if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {18465return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());18466}))18467break;18468}1846918470const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();18471const BasicBlock *LLVM_BB = BB->getBasicBlock();18472DebugLoc DL = MI.getDebugLoc();18473MachineFunction::iterator I = ++BB->getIterator();1847418475MachineBasicBlock *HeadMBB = BB;18476MachineFunction *F = BB->getParent();18477MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);18478MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);1847918480F->insert(I, IfFalseMBB);18481F->insert(I, TailMBB);1848218483// Set the call frame size on entry to the new basic blocks.18484unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);18485IfFalseMBB->setCallFrameSize(CallFrameSize);18486TailMBB->setCallFrameSize(CallFrameSize);1848718488// Transfer debug instructions associated with the selects to TailMBB.18489for (MachineInstr *DebugInstr : SelectDebugValues) {18490TailMBB->push_back(DebugInstr->removeFromParent());18491}1849218493// Move all instructions after the sequence to TailMBB.18494TailMBB->splice(TailMBB->end(), HeadMBB,18495std::next(LastSelectPseudo->getIterator()), HeadMBB->end());18496// Update machine-CFG edges by transferring all successors of the current18497// block to the new block which will contain the Phi nodes for the selects.18498TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);18499// Set the successors for HeadMBB.18500HeadMBB->addSuccessor(IfFalseMBB);18501HeadMBB->addSuccessor(TailMBB);1850218503// Insert appropriate branch.18504if (MI.getOperand(2).isImm())18505BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))18506.addReg(LHS)18507.addImm(MI.getOperand(2).getImm())18508.addMBB(TailMBB);18509else18510BuildMI(HeadMBB, DL, TII.getBrCond(CC))18511.addReg(LHS)18512.addReg(RHS)18513.addMBB(TailMBB);1851418515// IfFalseMBB just falls through to TailMBB.18516IfFalseMBB->addSuccessor(TailMBB);1851718518// Create PHIs for all of the select pseudo-instructions.18519auto SelectMBBI = MI.getIterator();18520auto SelectEnd = std::next(LastSelectPseudo->getIterator());18521auto InsertionPoint = TailMBB->begin();18522while (SelectMBBI != SelectEnd) {18523auto Next = std::next(SelectMBBI);18524if (isSelectPseudo(*SelectMBBI)) {18525// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]18526BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),18527TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())18528.addReg(SelectMBBI->getOperand(4).getReg())18529.addMBB(HeadMBB)18530.addReg(SelectMBBI->getOperand(5).getReg())18531.addMBB(IfFalseMBB);18532SelectMBBI->eraseFromParent();18533}18534SelectMBBI = Next;18535}1853618537F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);18538return TailMBB;18539}1854018541// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.18542static const RISCV::RISCVMaskedPseudoInfo *18543lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {18544const RISCVVInversePseudosTable::PseudoInfo *Inverse =18545RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);18546assert(Inverse && "Unexpected LMUL and SEW pair for instruction");18547const RISCV::RISCVMaskedPseudoInfo *Masked =18548RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);18549assert(Masked && "Could not find masked instruction for LMUL and SEW pair");18550return Masked;18551}1855218553static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,18554MachineBasicBlock *BB,18555unsigned CVTXOpc) {18556DebugLoc DL = MI.getDebugLoc();1855718558const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();1855918560MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();18561Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);1856218563// Save the old value of FFLAGS.18564BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);1856518566assert(MI.getNumOperands() == 7);1856718568// Emit a VFCVT_X_F18569const TargetRegisterInfo *TRI =18570BB->getParent()->getSubtarget().getRegisterInfo();18571const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);18572Register Tmp = MRI.createVirtualRegister(RC);18573BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)18574.add(MI.getOperand(1))18575.add(MI.getOperand(2))18576.add(MI.getOperand(3))18577.add(MachineOperand::CreateImm(7)) // frm = DYN18578.add(MI.getOperand(4))18579.add(MI.getOperand(5))18580.add(MI.getOperand(6))18581.add(MachineOperand::CreateReg(RISCV::FRM,18582/*IsDef*/ false,18583/*IsImp*/ true));1858418585// Emit a VFCVT_F_X18586RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);18587unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();18588// There is no E8 variant for VFCVT_F_X.18589assert(Log2SEW >= 4);18590unsigned CVTFOpc =18591lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)18592->MaskedPseudo;1859318594BuildMI(*BB, MI, DL, TII.get(CVTFOpc))18595.add(MI.getOperand(0))18596.add(MI.getOperand(1))18597.addReg(Tmp)18598.add(MI.getOperand(3))18599.add(MachineOperand::CreateImm(7)) // frm = DYN18600.add(MI.getOperand(4))18601.add(MI.getOperand(5))18602.add(MI.getOperand(6))18603.add(MachineOperand::CreateReg(RISCV::FRM,18604/*IsDef*/ false,18605/*IsImp*/ true));1860618607// Restore FFLAGS.18608BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))18609.addReg(SavedFFLAGS, RegState::Kill);1861018611// Erase the pseudoinstruction.18612MI.eraseFromParent();18613return BB;18614}1861518616static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,18617const RISCVSubtarget &Subtarget) {18618unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;18619const TargetRegisterClass *RC;18620switch (MI.getOpcode()) {18621default:18622llvm_unreachable("Unexpected opcode");18623case RISCV::PseudoFROUND_H:18624CmpOpc = RISCV::FLT_H;18625F2IOpc = RISCV::FCVT_W_H;18626I2FOpc = RISCV::FCVT_H_W;18627FSGNJOpc = RISCV::FSGNJ_H;18628FSGNJXOpc = RISCV::FSGNJX_H;18629RC = &RISCV::FPR16RegClass;18630break;18631case RISCV::PseudoFROUND_H_INX:18632CmpOpc = RISCV::FLT_H_INX;18633F2IOpc = RISCV::FCVT_W_H_INX;18634I2FOpc = RISCV::FCVT_H_W_INX;18635FSGNJOpc = RISCV::FSGNJ_H_INX;18636FSGNJXOpc = RISCV::FSGNJX_H_INX;18637RC = &RISCV::GPRF16RegClass;18638break;18639case RISCV::PseudoFROUND_S:18640CmpOpc = RISCV::FLT_S;18641F2IOpc = RISCV::FCVT_W_S;18642I2FOpc = RISCV::FCVT_S_W;18643FSGNJOpc = RISCV::FSGNJ_S;18644FSGNJXOpc = RISCV::FSGNJX_S;18645RC = &RISCV::FPR32RegClass;18646break;18647case RISCV::PseudoFROUND_S_INX:18648CmpOpc = RISCV::FLT_S_INX;18649F2IOpc = RISCV::FCVT_W_S_INX;18650I2FOpc = RISCV::FCVT_S_W_INX;18651FSGNJOpc = RISCV::FSGNJ_S_INX;18652FSGNJXOpc = RISCV::FSGNJX_S_INX;18653RC = &RISCV::GPRF32RegClass;18654break;18655case RISCV::PseudoFROUND_D:18656assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");18657CmpOpc = RISCV::FLT_D;18658F2IOpc = RISCV::FCVT_L_D;18659I2FOpc = RISCV::FCVT_D_L;18660FSGNJOpc = RISCV::FSGNJ_D;18661FSGNJXOpc = RISCV::FSGNJX_D;18662RC = &RISCV::FPR64RegClass;18663break;18664case RISCV::PseudoFROUND_D_INX:18665assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");18666CmpOpc = RISCV::FLT_D_INX;18667F2IOpc = RISCV::FCVT_L_D_INX;18668I2FOpc = RISCV::FCVT_D_L_INX;18669FSGNJOpc = RISCV::FSGNJ_D_INX;18670FSGNJXOpc = RISCV::FSGNJX_D_INX;18671RC = &RISCV::GPRRegClass;18672break;18673}1867418675const BasicBlock *BB = MBB->getBasicBlock();18676DebugLoc DL = MI.getDebugLoc();18677MachineFunction::iterator I = ++MBB->getIterator();1867818679MachineFunction *F = MBB->getParent();18680MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);18681MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);1868218683F->insert(I, CvtMBB);18684F->insert(I, DoneMBB);18685// Move all instructions after the sequence to DoneMBB.18686DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),18687MBB->end());18688// Update machine-CFG edges by transferring all successors of the current18689// block to the new block which will contain the Phi nodes for the selects.18690DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);18691// Set the successors for MBB.18692MBB->addSuccessor(CvtMBB);18693MBB->addSuccessor(DoneMBB);1869418695Register DstReg = MI.getOperand(0).getReg();18696Register SrcReg = MI.getOperand(1).getReg();18697Register MaxReg = MI.getOperand(2).getReg();18698int64_t FRM = MI.getOperand(3).getImm();1869918700const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();18701MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();1870218703Register FabsReg = MRI.createVirtualRegister(RC);18704BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);1870518706// Compare the FP value to the max value.18707Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);18708auto MIB =18709BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);18710if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))18711MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);1871218713// Insert branch.18714BuildMI(MBB, DL, TII.get(RISCV::BEQ))18715.addReg(CmpReg)18716.addReg(RISCV::X0)18717.addMBB(DoneMBB);1871818719CvtMBB->addSuccessor(DoneMBB);1872018721// Convert to integer.18722Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);18723MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);18724if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))18725MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);1872618727// Convert back to FP.18728Register I2FReg = MRI.createVirtualRegister(RC);18729MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);18730if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))18731MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);1873218733// Restore the sign bit.18734Register CvtReg = MRI.createVirtualRegister(RC);18735BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);1873618737// Merge the results.18738BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)18739.addReg(SrcReg)18740.addMBB(MBB)18741.addReg(CvtReg)18742.addMBB(CvtMBB);1874318744MI.eraseFromParent();18745return DoneMBB;18746}1874718748MachineBasicBlock *18749RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,18750MachineBasicBlock *BB) const {18751switch (MI.getOpcode()) {18752default:18753llvm_unreachable("Unexpected instr type to insert");18754case RISCV::ReadCounterWide:18755assert(!Subtarget.is64Bit() &&18756"ReadCounterWide is only to be used on riscv32");18757return emitReadCounterWidePseudo(MI, BB);18758case RISCV::Select_GPR_Using_CC_GPR:18759case RISCV::Select_GPR_Using_CC_Imm:18760case RISCV::Select_FPR16_Using_CC_GPR:18761case RISCV::Select_FPR16INX_Using_CC_GPR:18762case RISCV::Select_FPR32_Using_CC_GPR:18763case RISCV::Select_FPR32INX_Using_CC_GPR:18764case RISCV::Select_FPR64_Using_CC_GPR:18765case RISCV::Select_FPR64INX_Using_CC_GPR:18766case RISCV::Select_FPR64IN32X_Using_CC_GPR:18767return emitSelectPseudo(MI, BB, Subtarget);18768case RISCV::BuildPairF64Pseudo:18769return emitBuildPairF64Pseudo(MI, BB, Subtarget);18770case RISCV::SplitF64Pseudo:18771return emitSplitF64Pseudo(MI, BB, Subtarget);18772case RISCV::PseudoQuietFLE_H:18773return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);18774case RISCV::PseudoQuietFLE_H_INX:18775return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);18776case RISCV::PseudoQuietFLT_H:18777return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);18778case RISCV::PseudoQuietFLT_H_INX:18779return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);18780case RISCV::PseudoQuietFLE_S:18781return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);18782case RISCV::PseudoQuietFLE_S_INX:18783return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);18784case RISCV::PseudoQuietFLT_S:18785return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);18786case RISCV::PseudoQuietFLT_S_INX:18787return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);18788case RISCV::PseudoQuietFLE_D:18789return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);18790case RISCV::PseudoQuietFLE_D_INX:18791return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);18792case RISCV::PseudoQuietFLE_D_IN32X:18793return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,18794Subtarget);18795case RISCV::PseudoQuietFLT_D:18796return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);18797case RISCV::PseudoQuietFLT_D_INX:18798return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);18799case RISCV::PseudoQuietFLT_D_IN32X:18800return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,18801Subtarget);1880218803case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:18804return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);18805case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:18806return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);18807case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:18808return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);18809case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:18810return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);18811case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:18812return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);18813case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:18814return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);18815case RISCV::PseudoFROUND_H:18816case RISCV::PseudoFROUND_H_INX:18817case RISCV::PseudoFROUND_S:18818case RISCV::PseudoFROUND_S_INX:18819case RISCV::PseudoFROUND_D:18820case RISCV::PseudoFROUND_D_INX:18821case RISCV::PseudoFROUND_D_IN32X:18822return emitFROUND(MI, BB, Subtarget);18823case TargetOpcode::STATEPOINT:18824// STATEPOINT is a pseudo instruction which has no implicit defs/uses18825// while jal call instruction (where statepoint will be lowered at the end)18826// has implicit def. This def is early-clobber as it will be set at18827// the moment of the call and earlier than any use is read.18828// Add this implicit dead def here as a workaround.18829MI.addOperand(*MI.getMF(),18830MachineOperand::CreateReg(18831RISCV::X1, /*isDef*/ true,18832/*isImp*/ true, /*isKill*/ false, /*isDead*/ true,18833/*isUndef*/ false, /*isEarlyClobber*/ true));18834[[fallthrough]];18835case TargetOpcode::STACKMAP:18836case TargetOpcode::PATCHPOINT:18837if (!Subtarget.is64Bit())18838report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "18839"supported on 64-bit targets");18840return emitPatchPoint(MI, BB);18841}18842}1884318844void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,18845SDNode *Node) const {18846// Add FRM dependency to any instructions with dynamic rounding mode.18847int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);18848if (Idx < 0) {18849// Vector pseudos have FRM index indicated by TSFlags.18850Idx = RISCVII::getFRMOpNum(MI.getDesc());18851if (Idx < 0)18852return;18853}18854if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)18855return;18856// If the instruction already reads FRM, don't add another read.18857if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))18858return;18859MI.addOperand(18860MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));18861}1886218863// Calling Convention Implementation.18864// The expectations for frontend ABI lowering vary from target to target.18865// Ideally, an LLVM frontend would be able to avoid worrying about many ABI18866// details, but this is a longer term goal. For now, we simply try to keep the18867// role of the frontend as simple and well-defined as possible. The rules can18868// be summarised as:18869// * Never split up large scalar arguments. We handle them here.18870// * If a hardfloat calling convention is being used, and the struct may be18871// passed in a pair of registers (fp+fp, int+fp), and both registers are18872// available, then pass as two separate arguments. If either the GPRs or FPRs18873// are exhausted, then pass according to the rule below.18874// * If a struct could never be passed in registers or directly in a stack18875// slot (as it is larger than 2*XLEN and the floating point rules don't18876// apply), then pass it using a pointer with the byval attribute.18877// * If a struct is less than 2*XLEN, then coerce to either a two-element18878// word-sized array or a 2*XLEN scalar (depending on alignment).18879// * The frontend can determine whether a struct is returned by reference or18880// not based on its size and fields. If it will be returned by reference, the18881// frontend must modify the prototype so a pointer with the sret annotation is18882// passed as the first argument. This is not necessary for large scalar18883// returns.18884// * Struct return values and varargs should be coerced to structs containing18885// register-size fields in the same situations they would be for fixed18886// arguments.1888718888static const MCPhysReg ArgFPR16s[] = {18889RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,18890RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H18891};18892static const MCPhysReg ArgFPR32s[] = {18893RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,18894RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F18895};18896static const MCPhysReg ArgFPR64s[] = {18897RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,18898RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D18899};18900// This is an interim calling convention and it may be changed in the future.18901static const MCPhysReg ArgVRs[] = {18902RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,18903RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,18904RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};18905static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,18906RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,18907RISCV::V20M2, RISCV::V22M2};18908static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,18909RISCV::V20M4};18910static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};1891118912ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {18913// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except18914// the ILP32E ABI.18915static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,18916RISCV::X13, RISCV::X14, RISCV::X15,18917RISCV::X16, RISCV::X17};18918// The GPRs used for passing arguments in the ILP32E/ILP64E ABI.18919static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,18920RISCV::X13, RISCV::X14, RISCV::X15};1892118922if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)18923return ArrayRef(ArgEGPRs);1892418925return ArrayRef(ArgIGPRs);18926}1892718928static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {18929// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used18930// for save-restore libcall, so we don't use them.18931// Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.18932static const MCPhysReg FastCCIGPRs[] = {18933RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,18934RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};1893518936// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.18937static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,18938RISCV::X13, RISCV::X14, RISCV::X15};1893918940if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)18941return ArrayRef(FastCCEGPRs);1894218943return ArrayRef(FastCCIGPRs);18944}1894518946// Pass a 2*XLEN argument that has been split into two XLEN values through18947// registers or the stack as necessary.18948static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,18949ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,18950MVT ValVT2, MVT LocVT2,18951ISD::ArgFlagsTy ArgFlags2, bool EABI) {18952unsigned XLenInBytes = XLen / 8;18953const RISCVSubtarget &STI =18954State.getMachineFunction().getSubtarget<RISCVSubtarget>();18955ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI());1895618957if (Register Reg = State.AllocateReg(ArgGPRs)) {18958// At least one half can be passed via register.18959State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,18960VA1.getLocVT(), CCValAssign::Full));18961} else {18962// Both halves must be passed on the stack, with proper alignment.18963// TODO: To be compatible with GCC's behaviors, we force them to have 4-byte18964// alignment. This behavior may be changed when RV32E/ILP32E is ratified.18965Align StackAlign(XLenInBytes);18966if (!EABI || XLen != 32)18967StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());18968State.addLoc(18969CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),18970State.AllocateStack(XLenInBytes, StackAlign),18971VA1.getLocVT(), CCValAssign::Full));18972State.addLoc(CCValAssign::getMem(18973ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),18974LocVT2, CCValAssign::Full));18975return false;18976}1897718978if (Register Reg = State.AllocateReg(ArgGPRs)) {18979// The second half can also be passed via register.18980State.addLoc(18981CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));18982} else {18983// The second half is passed via the stack, without additional alignment.18984State.addLoc(CCValAssign::getMem(18985ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),18986LocVT2, CCValAssign::Full));18987}1898818989return false;18990}1899118992// Implements the RISC-V calling convention. Returns true upon failure.18993bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,18994MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,18995ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,18996bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,18997RVVArgDispatcher &RVVDispatcher) {18998unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();18999assert(XLen == 32 || XLen == 64);19000MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;1900119002// Static chain parameter must not be passed in normal argument registers,19003// so we assign t2 for it as done in GCC's __builtin_call_with_static_chain19004if (ArgFlags.isNest()) {19005if (unsigned Reg = State.AllocateReg(RISCV::X7)) {19006State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19007return false;19008}19009}1901019011// Any return value split in to more than two values can't be returned19012// directly. Vectors are returned via the available vector registers.19013if (!LocVT.isVector() && IsRet && ValNo > 1)19014return true;1901519016// UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a19017// variadic argument, or if no F16/F32 argument registers are available.19018bool UseGPRForF16_F32 = true;19019// UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a19020// variadic argument, or if no F64 argument registers are available.19021bool UseGPRForF64 = true;1902219023switch (ABI) {19024default:19025llvm_unreachable("Unexpected ABI");19026case RISCVABI::ABI_ILP32:19027case RISCVABI::ABI_ILP32E:19028case RISCVABI::ABI_LP64:19029case RISCVABI::ABI_LP64E:19030break;19031case RISCVABI::ABI_ILP32F:19032case RISCVABI::ABI_LP64F:19033UseGPRForF16_F32 = !IsFixed;19034break;19035case RISCVABI::ABI_ILP32D:19036case RISCVABI::ABI_LP64D:19037UseGPRForF16_F32 = !IsFixed;19038UseGPRForF64 = !IsFixed;19039break;19040}1904119042// FPR16, FPR32, and FPR64 alias each other.19043if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {19044UseGPRForF16_F32 = true;19045UseGPRForF64 = true;19046}1904719048// From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and19049// similar local variables rather than directly checking against the target19050// ABI.1905119052if (UseGPRForF16_F32 &&19053(ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {19054LocVT = XLenVT;19055LocInfo = CCValAssign::BCvt;19056} else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {19057LocVT = MVT::i64;19058LocInfo = CCValAssign::BCvt;19059}1906019061ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);1906219063// If this is a variadic argument, the RISC-V calling convention requires19064// that it is assigned an 'even' or 'aligned' register if it has 8-byte19065// alignment (RV32) or 16-byte alignment (RV64). An aligned register should19066// be used regardless of whether the original argument was split during19067// legalisation or not. The argument will not be passed by registers if the19068// original type is larger than 2*XLEN, so the register alignment rule does19069// not apply.19070// TODO: To be compatible with GCC's behaviors, we don't align registers19071// currently if we are using ILP32E calling convention. This behavior may be19072// changed when RV32E/ILP32E is ratified.19073unsigned TwoXLenInBytes = (2 * XLen) / 8;19074if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&19075DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&19076ABI != RISCVABI::ABI_ILP32E) {19077unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);19078// Skip 'odd' register if necessary.19079if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)19080State.AllocateReg(ArgGPRs);19081}1908219083SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();19084SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =19085State.getPendingArgFlags();1908619087assert(PendingLocs.size() == PendingArgFlags.size() &&19088"PendingLocs and PendingArgFlags out of sync");1908919090// Handle passing f64 on RV32D with a soft float ABI or when floating point19091// registers are exhausted.19092if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {19093assert(PendingLocs.empty() && "Can't lower f64 if it is split");19094// Depending on available argument GPRS, f64 may be passed in a pair of19095// GPRs, split between a GPR and the stack, or passed completely on the19096// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these19097// cases.19098Register Reg = State.AllocateReg(ArgGPRs);19099if (!Reg) {19100unsigned StackOffset = State.AllocateStack(8, Align(8));19101State.addLoc(19102CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));19103return false;19104}19105LocVT = MVT::i32;19106State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));19107Register HiReg = State.AllocateReg(ArgGPRs);19108if (HiReg) {19109State.addLoc(19110CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));19111} else {19112unsigned StackOffset = State.AllocateStack(4, Align(4));19113State.addLoc(19114CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));19115}19116return false;19117}1911819119// Fixed-length vectors are located in the corresponding scalable-vector19120// container types.19121if (ValVT.isFixedLengthVector())19122LocVT = TLI.getContainerForFixedLengthVector(LocVT);1912319124// Split arguments might be passed indirectly, so keep track of the pending19125// values. Split vectors are passed via a mix of registers and indirectly, so19126// treat them as we would any other argument.19127if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {19128LocVT = XLenVT;19129LocInfo = CCValAssign::Indirect;19130PendingLocs.push_back(19131CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));19132PendingArgFlags.push_back(ArgFlags);19133if (!ArgFlags.isSplitEnd()) {19134return false;19135}19136}1913719138// If the split argument only had two elements, it should be passed directly19139// in registers or on the stack.19140if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&19141PendingLocs.size() <= 2) {19142assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");19143// Apply the normal calling convention rules to the first half of the19144// split argument.19145CCValAssign VA = PendingLocs[0];19146ISD::ArgFlagsTy AF = PendingArgFlags[0];19147PendingLocs.clear();19148PendingArgFlags.clear();19149return CC_RISCVAssign2XLen(19150XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,19151ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);19152}1915319154// Allocate to a register if possible, or else a stack slot.19155Register Reg;19156unsigned StoreSizeBytes = XLen / 8;19157Align StackAlign = Align(XLen / 8);1915819159if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)19160Reg = State.AllocateReg(ArgFPR16s);19161else if (ValVT == MVT::f32 && !UseGPRForF16_F32)19162Reg = State.AllocateReg(ArgFPR32s);19163else if (ValVT == MVT::f64 && !UseGPRForF64)19164Reg = State.AllocateReg(ArgFPR64s);19165else if (ValVT.isVector()) {19166Reg = RVVDispatcher.getNextPhysReg();19167if (!Reg) {19168// For return values, the vector must be passed fully via registers or19169// via the stack.19170// FIXME: The proposed vector ABI only mandates v8-v15 for return values,19171// but we're using all of them.19172if (IsRet)19173return true;19174// Try using a GPR to pass the address19175if ((Reg = State.AllocateReg(ArgGPRs))) {19176LocVT = XLenVT;19177LocInfo = CCValAssign::Indirect;19178} else if (ValVT.isScalableVector()) {19179LocVT = XLenVT;19180LocInfo = CCValAssign::Indirect;19181} else {19182// Pass fixed-length vectors on the stack.19183LocVT = ValVT;19184StoreSizeBytes = ValVT.getStoreSize();19185// Align vectors to their element sizes, being careful for vXi119186// vectors.19187StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();19188}19189}19190} else {19191Reg = State.AllocateReg(ArgGPRs);19192}1919319194unsigned StackOffset =19195Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);1919619197// If we reach this point and PendingLocs is non-empty, we must be at the19198// end of a split argument that must be passed indirectly.19199if (!PendingLocs.empty()) {19200assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");19201assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");1920219203for (auto &It : PendingLocs) {19204if (Reg)19205It.convertToReg(Reg);19206else19207It.convertToMem(StackOffset);19208State.addLoc(It);19209}19210PendingLocs.clear();19211PendingArgFlags.clear();19212return false;19213}1921419215assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||19216(TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&19217"Expected an XLenVT or vector types at this stage");1921819219if (Reg) {19220State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19221return false;19222}1922319224// When a scalar floating-point value is passed on the stack, no19225// bit-conversion is needed.19226if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {19227assert(!ValVT.isVector());19228LocVT = ValVT;19229LocInfo = CCValAssign::Full;19230}19231State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));19232return false;19233}1923419235template <typename ArgTy>19236static std::optional<unsigned> preAssignMask(const ArgTy &Args) {19237for (const auto &ArgIdx : enumerate(Args)) {19238MVT ArgVT = ArgIdx.value().VT;19239if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)19240return ArgIdx.index();19241}19242return std::nullopt;19243}1924419245void RISCVTargetLowering::analyzeInputArgs(19246MachineFunction &MF, CCState &CCInfo,19247const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,19248RISCVCCAssignFn Fn) const {19249unsigned NumArgs = Ins.size();19250FunctionType *FType = MF.getFunction().getFunctionType();1925119252RVVArgDispatcher Dispatcher;19253if (IsRet) {19254Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};19255} else {19256SmallVector<Type *, 4> TypeList;19257for (const Argument &Arg : MF.getFunction().args())19258TypeList.push_back(Arg.getType());19259Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};19260}1926119262for (unsigned i = 0; i != NumArgs; ++i) {19263MVT ArgVT = Ins[i].VT;19264ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;1926519266Type *ArgTy = nullptr;19267if (IsRet)19268ArgTy = FType->getReturnType();19269else if (Ins[i].isOrigArg())19270ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());1927119272RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();19273if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,19274ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,19275Dispatcher)) {19276LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "19277<< ArgVT << '\n');19278llvm_unreachable(nullptr);19279}19280}19281}1928219283void RISCVTargetLowering::analyzeOutputArgs(19284MachineFunction &MF, CCState &CCInfo,19285const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,19286CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {19287unsigned NumArgs = Outs.size();1928819289SmallVector<Type *, 4> TypeList;19290if (IsRet)19291TypeList.push_back(MF.getFunction().getReturnType());19292else if (CLI)19293for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())19294TypeList.push_back(Arg.Ty);19295RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};1929619297for (unsigned i = 0; i != NumArgs; i++) {19298MVT ArgVT = Outs[i].VT;19299ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;19300Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;1930119302RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();19303if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,19304ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,19305Dispatcher)) {19306LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "19307<< ArgVT << "\n");19308llvm_unreachable(nullptr);19309}19310}19311}1931219313// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect19314// values.19315static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,19316const CCValAssign &VA, const SDLoc &DL,19317const RISCVSubtarget &Subtarget) {19318switch (VA.getLocInfo()) {19319default:19320llvm_unreachable("Unexpected CCValAssign::LocInfo");19321case CCValAssign::Full:19322if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())19323Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);19324break;19325case CCValAssign::BCvt:19326if (VA.getLocVT().isInteger() &&19327(VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {19328Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);19329} else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {19330if (RV64LegalI32) {19331Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);19332Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);19333} else {19334Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);19335}19336} else {19337Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);19338}19339break;19340}19341return Val;19342}1934319344// The caller is responsible for loading the full value if the argument is19345// passed with CCValAssign::Indirect.19346static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,19347const CCValAssign &VA, const SDLoc &DL,19348const ISD::InputArg &In,19349const RISCVTargetLowering &TLI) {19350MachineFunction &MF = DAG.getMachineFunction();19351MachineRegisterInfo &RegInfo = MF.getRegInfo();19352EVT LocVT = VA.getLocVT();19353SDValue Val;19354const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());19355Register VReg = RegInfo.createVirtualRegister(RC);19356RegInfo.addLiveIn(VA.getLocReg(), VReg);19357Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);1935819359// If input is sign extended from 32 bits, note it for the SExtWRemoval pass.19360if (In.isOrigArg()) {19361Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());19362if (OrigArg->getType()->isIntegerTy()) {19363unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();19364// An input zero extended from i31 can also be considered sign extended.19365if ((BitWidth <= 32 && In.Flags.isSExt()) ||19366(BitWidth < 32 && In.Flags.isZExt())) {19367RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();19368RVFI->addSExt32Register(VReg);19369}19370}19371}1937219373if (VA.getLocInfo() == CCValAssign::Indirect)19374return Val;1937519376return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());19377}1937819379static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,19380const CCValAssign &VA, const SDLoc &DL,19381const RISCVSubtarget &Subtarget) {19382EVT LocVT = VA.getLocVT();1938319384switch (VA.getLocInfo()) {19385default:19386llvm_unreachable("Unexpected CCValAssign::LocInfo");19387case CCValAssign::Full:19388if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())19389Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);19390break;19391case CCValAssign::BCvt:19392if (LocVT.isInteger() &&19393(VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {19394Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);19395} else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {19396if (RV64LegalI32) {19397Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);19398Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);19399} else {19400Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);19401}19402} else {19403Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);19404}19405break;19406}19407return Val;19408}1940919410// The caller is responsible for loading the full value if the argument is19411// passed with CCValAssign::Indirect.19412static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,19413const CCValAssign &VA, const SDLoc &DL) {19414MachineFunction &MF = DAG.getMachineFunction();19415MachineFrameInfo &MFI = MF.getFrameInfo();19416EVT LocVT = VA.getLocVT();19417EVT ValVT = VA.getValVT();19418EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));19419if (ValVT.isScalableVector()) {19420// When the value is a scalable vector, we save the pointer which points to19421// the scalable vector value in the stack. The ValVT will be the pointer19422// type, instead of the scalable vector type.19423ValVT = LocVT;19424}19425int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),19426/*IsImmutable=*/true);19427SDValue FIN = DAG.getFrameIndex(FI, PtrVT);19428SDValue Val;1942919430ISD::LoadExtType ExtType;19431switch (VA.getLocInfo()) {19432default:19433llvm_unreachable("Unexpected CCValAssign::LocInfo");19434case CCValAssign::Full:19435case CCValAssign::Indirect:19436case CCValAssign::BCvt:19437ExtType = ISD::NON_EXTLOAD;19438break;19439}19440Val = DAG.getExtLoad(19441ExtType, DL, LocVT, Chain, FIN,19442MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);19443return Val;19444}1944519446static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,19447const CCValAssign &VA,19448const CCValAssign &HiVA,19449const SDLoc &DL) {19450assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&19451"Unexpected VA");19452MachineFunction &MF = DAG.getMachineFunction();19453MachineFrameInfo &MFI = MF.getFrameInfo();19454MachineRegisterInfo &RegInfo = MF.getRegInfo();1945519456assert(VA.isRegLoc() && "Expected register VA assignment");1945719458Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);19459RegInfo.addLiveIn(VA.getLocReg(), LoVReg);19460SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);19461SDValue Hi;19462if (HiVA.isMemLoc()) {19463// Second half of f64 is passed on the stack.19464int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),19465/*IsImmutable=*/true);19466SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);19467Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,19468MachinePointerInfo::getFixedStack(MF, FI));19469} else {19470// Second half of f64 is passed in another GPR.19471Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);19472RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);19473Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);19474}19475return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);19476}1947719478// FastCC has less than 1% performance improvement for some particular19479// benchmark. But theoretically, it may has benenfit for some cases.19480bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,19481unsigned ValNo, MVT ValVT, MVT LocVT,19482CCValAssign::LocInfo LocInfo,19483ISD::ArgFlagsTy ArgFlags, CCState &State,19484bool IsFixed, bool IsRet, Type *OrigTy,19485const RISCVTargetLowering &TLI,19486RVVArgDispatcher &RVVDispatcher) {19487if (LocVT == MVT::i32 || LocVT == MVT::i64) {19488if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {19489State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19490return false;19491}19492}1949319494const RISCVSubtarget &Subtarget = TLI.getSubtarget();1949519496if (LocVT == MVT::f16 &&19497(Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {19498static const MCPhysReg FPR16List[] = {19499RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,19500RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,19501RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,19502RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};19503if (unsigned Reg = State.AllocateReg(FPR16List)) {19504State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19505return false;19506}19507}1950819509if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {19510static const MCPhysReg FPR32List[] = {19511RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,19512RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,19513RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,19514RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};19515if (unsigned Reg = State.AllocateReg(FPR32List)) {19516State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19517return false;19518}19519}1952019521if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {19522static const MCPhysReg FPR64List[] = {19523RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,19524RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,19525RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,19526RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};19527if (unsigned Reg = State.AllocateReg(FPR64List)) {19528State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19529return false;19530}19531}1953219533// Check if there is an available GPR before hitting the stack.19534if ((LocVT == MVT::f16 &&19535(Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||19536(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||19537(LocVT == MVT::f64 && Subtarget.is64Bit() &&19538Subtarget.hasStdExtZdinx())) {19539if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {19540State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19541return false;19542}19543}1954419545if (LocVT == MVT::f16) {19546unsigned Offset2 = State.AllocateStack(2, Align(2));19547State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));19548return false;19549}1955019551if (LocVT == MVT::i32 || LocVT == MVT::f32) {19552unsigned Offset4 = State.AllocateStack(4, Align(4));19553State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));19554return false;19555}1955619557if (LocVT == MVT::i64 || LocVT == MVT::f64) {19558unsigned Offset5 = State.AllocateStack(8, Align(8));19559State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));19560return false;19561}1956219563if (LocVT.isVector()) {19564MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();19565if (AllocatedVReg) {19566// Fixed-length vectors are located in the corresponding scalable-vector19567// container types.19568if (ValVT.isFixedLengthVector())19569LocVT = TLI.getContainerForFixedLengthVector(LocVT);19570State.addLoc(19571CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));19572} else {19573// Try and pass the address via a "fast" GPR.19574if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {19575LocInfo = CCValAssign::Indirect;19576LocVT = TLI.getSubtarget().getXLenVT();19577State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));19578} else if (ValVT.isFixedLengthVector()) {19579auto StackAlign =19580MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();19581unsigned StackOffset =19582State.AllocateStack(ValVT.getStoreSize(), StackAlign);19583State.addLoc(19584CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));19585} else {19586// Can't pass scalable vectors on the stack.19587return true;19588}19589}1959019591return false;19592}1959319594return true; // CC didn't match.19595}1959619597bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,19598CCValAssign::LocInfo LocInfo,19599ISD::ArgFlagsTy ArgFlags, CCState &State) {19600if (ArgFlags.isNest()) {19601report_fatal_error(19602"Attribute 'nest' is not supported in GHC calling convention");19603}1960419605static const MCPhysReg GPRList[] = {19606RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,19607RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};1960819609if (LocVT == MVT::i32 || LocVT == MVT::i64) {19610// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim19611// s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s1119612if (unsigned Reg = State.AllocateReg(GPRList)) {19613State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19614return false;19615}19616}1961719618const RISCVSubtarget &Subtarget =19619State.getMachineFunction().getSubtarget<RISCVSubtarget>();1962019621if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {19622// Pass in STG registers: F1, ..., F619623// fs0 ... fs519624static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,19625RISCV::F18_F, RISCV::F19_F,19626RISCV::F20_F, RISCV::F21_F};19627if (unsigned Reg = State.AllocateReg(FPR32List)) {19628State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19629return false;19630}19631}1963219633if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {19634// Pass in STG registers: D1, ..., D619635// fs6 ... fs1119636static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,19637RISCV::F24_D, RISCV::F25_D,19638RISCV::F26_D, RISCV::F27_D};19639if (unsigned Reg = State.AllocateReg(FPR64List)) {19640State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19641return false;19642}19643}1964419645if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||19646(LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&19647Subtarget.is64Bit())) {19648if (unsigned Reg = State.AllocateReg(GPRList)) {19649State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));19650return false;19651}19652}1965319654report_fatal_error("No registers left in GHC calling convention");19655return true;19656}1965719658// Transform physical registers into virtual registers.19659SDValue RISCVTargetLowering::LowerFormalArguments(19660SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,19661const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,19662SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {1966319664MachineFunction &MF = DAG.getMachineFunction();1966519666switch (CallConv) {19667default:19668report_fatal_error("Unsupported calling convention");19669case CallingConv::C:19670case CallingConv::Fast:19671case CallingConv::SPIR_KERNEL:19672case CallingConv::GRAAL:19673case CallingConv::RISCV_VectorCall:19674break;19675case CallingConv::GHC:19676if (Subtarget.hasStdExtE())19677report_fatal_error("GHC calling convention is not supported on RVE!");19678if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())19679report_fatal_error("GHC calling convention requires the (Zfinx/F) and "19680"(Zdinx/D) instruction set extensions");19681}1968219683const Function &Func = MF.getFunction();19684if (Func.hasFnAttribute("interrupt")) {19685if (!Func.arg_empty())19686report_fatal_error(19687"Functions with the interrupt attribute cannot have arguments!");1968819689StringRef Kind =19690MF.getFunction().getFnAttribute("interrupt").getValueAsString();1969119692if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))19693report_fatal_error(19694"Function interrupt attribute argument not supported!");19695}1969619697EVT PtrVT = getPointerTy(DAG.getDataLayout());19698MVT XLenVT = Subtarget.getXLenVT();19699unsigned XLenInBytes = Subtarget.getXLen() / 8;19700// Used with vargs to acumulate store chains.19701std::vector<SDValue> OutChains;1970219703// Assign locations to all of the incoming arguments.19704SmallVector<CCValAssign, 16> ArgLocs;19705CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());1970619707if (CallConv == CallingConv::GHC)19708CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);19709else19710analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,19711CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC19712: RISCV::CC_RISCV);1971319714for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {19715CCValAssign &VA = ArgLocs[i];19716SDValue ArgValue;19717// Passing f64 on RV32D with a soft float ABI must be handled as a special19718// case.19719if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {19720assert(VA.needsCustom());19721ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);19722} else if (VA.isRegLoc())19723ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);19724else19725ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);1972619727if (VA.getLocInfo() == CCValAssign::Indirect) {19728// If the original argument was split and passed by reference (e.g. i12819729// on RV32), we need to load all parts of it here (using the same19730// address). Vectors may be partly split to registers and partly to the19731// stack, in which case the base address is partly offset and subsequent19732// stores are relative to that.19733InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,19734MachinePointerInfo()));19735unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;19736unsigned ArgPartOffset = Ins[InsIdx].PartOffset;19737assert(VA.getValVT().isVector() || ArgPartOffset == 0);19738while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {19739CCValAssign &PartVA = ArgLocs[i + 1];19740unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;19741SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);19742if (PartVA.getValVT().isScalableVector())19743Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);19744SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);19745InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,19746MachinePointerInfo()));19747++i;19748++InsIdx;19749}19750continue;19751}19752InVals.push_back(ArgValue);19753}1975419755if (any_of(ArgLocs,19756[](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))19757MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();1975819759if (IsVarArg) {19760ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());19761unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);19762const TargetRegisterClass *RC = &RISCV::GPRRegClass;19763MachineFrameInfo &MFI = MF.getFrameInfo();19764MachineRegisterInfo &RegInfo = MF.getRegInfo();19765RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();1976619767// Size of the vararg save area. For now, the varargs save area is either19768// zero or large enough to hold a0-a7.19769int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);19770int FI;1977119772// If all registers are allocated, then all varargs must be passed on the19773// stack and we don't need to save any argregs.19774if (VarArgsSaveSize == 0) {19775int VaArgOffset = CCInfo.getStackSize();19776FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);19777} else {19778int VaArgOffset = -VarArgsSaveSize;19779FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);1978019781// If saving an odd number of registers then create an extra stack slot to19782// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures19783// offsets to even-numbered registered remain 2*XLEN-aligned.19784if (Idx % 2) {19785MFI.CreateFixedObject(19786XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);19787VarArgsSaveSize += XLenInBytes;19788}1978919790SDValue FIN = DAG.getFrameIndex(FI, PtrVT);1979119792// Copy the integer registers that may have been used for passing varargs19793// to the vararg save area.19794for (unsigned I = Idx; I < ArgRegs.size(); ++I) {19795const Register Reg = RegInfo.createVirtualRegister(RC);19796RegInfo.addLiveIn(ArgRegs[I], Reg);19797SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);19798SDValue Store = DAG.getStore(19799Chain, DL, ArgValue, FIN,19800MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));19801OutChains.push_back(Store);19802FIN =19803DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);19804}19805}1980619807// Record the frame index of the first variable argument19808// which is a value necessary to VASTART.19809RVFI->setVarArgsFrameIndex(FI);19810RVFI->setVarArgsSaveSize(VarArgsSaveSize);19811}1981219813// All stores are grouped in one node to allow the matching between19814// the size of Ins and InVals. This only happens for vararg functions.19815if (!OutChains.empty()) {19816OutChains.push_back(Chain);19817Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);19818}1981919820return Chain;19821}1982219823/// isEligibleForTailCallOptimization - Check whether the call is eligible19824/// for tail call optimization.19825/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.19826bool RISCVTargetLowering::isEligibleForTailCallOptimization(19827CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,19828const SmallVector<CCValAssign, 16> &ArgLocs) const {1982919830auto CalleeCC = CLI.CallConv;19831auto &Outs = CLI.Outs;19832auto &Caller = MF.getFunction();19833auto CallerCC = Caller.getCallingConv();1983419835// Exception-handling functions need a special set of instructions to19836// indicate a return to the hardware. Tail-calling another function would19837// probably break this.19838// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This19839// should be expanded as new function attributes are introduced.19840if (Caller.hasFnAttribute("interrupt"))19841return false;1984219843// Do not tail call opt if the stack is used to pass parameters.19844if (CCInfo.getStackSize() != 0)19845return false;1984619847// Do not tail call opt if any parameters need to be passed indirectly.19848// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are19849// passed indirectly. So the address of the value will be passed in a19850// register, or if not available, then the address is put on the stack. In19851// order to pass indirectly, space on the stack often needs to be allocated19852// in order to store the value. In this case the CCInfo.getNextStackOffset()19853// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs19854// are passed CCValAssign::Indirect.19855for (auto &VA : ArgLocs)19856if (VA.getLocInfo() == CCValAssign::Indirect)19857return false;1985819859// Do not tail call opt if either caller or callee uses struct return19860// semantics.19861auto IsCallerStructRet = Caller.hasStructRetAttr();19862auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();19863if (IsCallerStructRet || IsCalleeStructRet)19864return false;1986519866// The callee has to preserve all registers the caller needs to preserve.19867const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();19868const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);19869if (CalleeCC != CallerCC) {19870const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);19871if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))19872return false;19873}1987419875// Byval parameters hand the function a pointer directly into the stack area19876// we want to reuse during a tail call. Working around this *is* possible19877// but less efficient and uglier in LowerCall.19878for (auto &Arg : Outs)19879if (Arg.Flags.isByVal())19880return false;1988119882return true;19883}1988419885static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {19886return DAG.getDataLayout().getPrefTypeAlign(19887VT.getTypeForEVT(*DAG.getContext()));19888}1988919890// Lower a call to a callseq_start + CALL + callseq_end chain, and add input19891// and output parameter nodes.19892SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,19893SmallVectorImpl<SDValue> &InVals) const {19894SelectionDAG &DAG = CLI.DAG;19895SDLoc &DL = CLI.DL;19896SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;19897SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;19898SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;19899SDValue Chain = CLI.Chain;19900SDValue Callee = CLI.Callee;19901bool &IsTailCall = CLI.IsTailCall;19902CallingConv::ID CallConv = CLI.CallConv;19903bool IsVarArg = CLI.IsVarArg;19904EVT PtrVT = getPointerTy(DAG.getDataLayout());19905MVT XLenVT = Subtarget.getXLenVT();1990619907MachineFunction &MF = DAG.getMachineFunction();1990819909// Analyze the operands of the call, assigning locations to each operand.19910SmallVector<CCValAssign, 16> ArgLocs;19911CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());1991219913if (CallConv == CallingConv::GHC) {19914if (Subtarget.hasStdExtE())19915report_fatal_error("GHC calling convention is not supported on RVE!");19916ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);19917} else19918analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,19919CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC19920: RISCV::CC_RISCV);1992119922// Check if it's really possible to do a tail call.19923if (IsTailCall)19924IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);1992519926if (IsTailCall)19927++NumTailCalls;19928else if (CLI.CB && CLI.CB->isMustTailCall())19929report_fatal_error("failed to perform tail call elimination on a call "19930"site marked musttail");1993119932// Get a count of how many bytes are to be pushed on the stack.19933unsigned NumBytes = ArgCCInfo.getStackSize();1993419935// Create local copies for byval args19936SmallVector<SDValue, 8> ByValArgs;19937for (unsigned i = 0, e = Outs.size(); i != e; ++i) {19938ISD::ArgFlagsTy Flags = Outs[i].Flags;19939if (!Flags.isByVal())19940continue;1994119942SDValue Arg = OutVals[i];19943unsigned Size = Flags.getByValSize();19944Align Alignment = Flags.getNonZeroByValAlign();1994519946int FI =19947MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);19948SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));19949SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);1995019951Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,19952/*IsVolatile=*/false,19953/*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,19954MachinePointerInfo(), MachinePointerInfo());19955ByValArgs.push_back(FIPtr);19956}1995719958if (!IsTailCall)19959Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);1996019961// Copy argument values to their designated locations.19962SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;19963SmallVector<SDValue, 8> MemOpChains;19964SDValue StackPtr;19965for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;19966++i, ++OutIdx) {19967CCValAssign &VA = ArgLocs[i];19968SDValue ArgValue = OutVals[OutIdx];19969ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;1997019971// Handle passing f64 on RV32D with a soft float ABI as a special case.19972if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {19973assert(VA.isRegLoc() && "Expected register VA assignment");19974assert(VA.needsCustom());19975SDValue SplitF64 = DAG.getNode(19976RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);19977SDValue Lo = SplitF64.getValue(0);19978SDValue Hi = SplitF64.getValue(1);1997919980Register RegLo = VA.getLocReg();19981RegsToPass.push_back(std::make_pair(RegLo, Lo));1998219983// Get the CCValAssign for the Hi part.19984CCValAssign &HiVA = ArgLocs[++i];1998519986if (HiVA.isMemLoc()) {19987// Second half of f64 is passed on the stack.19988if (!StackPtr.getNode())19989StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);19990SDValue Address =19991DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,19992DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));19993// Emit the store.19994MemOpChains.push_back(19995DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));19996} else {19997// Second half of f64 is passed in another GPR.19998Register RegHigh = HiVA.getLocReg();19999RegsToPass.push_back(std::make_pair(RegHigh, Hi));20000}20001continue;20002}2000320004// Promote the value if needed.20005// For now, only handle fully promoted and indirect arguments.20006if (VA.getLocInfo() == CCValAssign::Indirect) {20007// Store the argument in a stack slot and pass its address.20008Align StackAlign =20009std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),20010getPrefTypeAlign(ArgValue.getValueType(), DAG));20011TypeSize StoredSize = ArgValue.getValueType().getStoreSize();20012// If the original argument was split (e.g. i128), we need20013// to store the required parts of it here (and pass just one address).20014// Vectors may be partly split to registers and partly to the stack, in20015// which case the base address is partly offset and subsequent stores are20016// relative to that.20017unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;20018unsigned ArgPartOffset = Outs[OutIdx].PartOffset;20019assert(VA.getValVT().isVector() || ArgPartOffset == 0);20020// Calculate the total size to store. We don't have access to what we're20021// actually storing other than performing the loop and collecting the20022// info.20023SmallVector<std::pair<SDValue, SDValue>> Parts;20024while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {20025SDValue PartValue = OutVals[OutIdx + 1];20026unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;20027SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);20028EVT PartVT = PartValue.getValueType();20029if (PartVT.isScalableVector())20030Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);20031StoredSize += PartVT.getStoreSize();20032StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));20033Parts.push_back(std::make_pair(PartValue, Offset));20034++i;20035++OutIdx;20036}20037SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);20038int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();20039MemOpChains.push_back(20040DAG.getStore(Chain, DL, ArgValue, SpillSlot,20041MachinePointerInfo::getFixedStack(MF, FI)));20042for (const auto &Part : Parts) {20043SDValue PartValue = Part.first;20044SDValue PartOffset = Part.second;20045SDValue Address =20046DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);20047MemOpChains.push_back(20048DAG.getStore(Chain, DL, PartValue, Address,20049MachinePointerInfo::getFixedStack(MF, FI)));20050}20051ArgValue = SpillSlot;20052} else {20053ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);20054}2005520056// Use local copy if it is a byval arg.20057if (Flags.isByVal())20058ArgValue = ByValArgs[j++];2005920060if (VA.isRegLoc()) {20061// Queue up the argument copies and emit them at the end.20062RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));20063} else {20064assert(VA.isMemLoc() && "Argument not register or memory");20065assert(!IsTailCall && "Tail call not allowed if stack is used "20066"for passing parameters");2006720068// Work out the address of the stack slot.20069if (!StackPtr.getNode())20070StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);20071SDValue Address =20072DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,20073DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));2007420075// Emit the store.20076MemOpChains.push_back(20077DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));20078}20079}2008020081// Join the stores, which are independent of one another.20082if (!MemOpChains.empty())20083Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);2008420085SDValue Glue;2008620087// Build a sequence of copy-to-reg nodes, chained and glued together.20088for (auto &Reg : RegsToPass) {20089Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);20090Glue = Chain.getValue(1);20091}2009220093// Validate that none of the argument registers have been marked as20094// reserved, if so report an error. Do the same for the return address if this20095// is not a tailcall.20096validateCCReservedRegs(RegsToPass, MF);20097if (!IsTailCall &&20098MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))20099MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{20100MF.getFunction(),20101"Return address register required, but has been reserved."});2010220103// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a20104// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't20105// split it and then direct call can be matched by PseudoCALL.20106if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {20107const GlobalValue *GV = S->getGlobal();20108Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);20109} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {20110Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);20111}2011220113// The first call operand is the chain and the second is the target address.20114SmallVector<SDValue, 8> Ops;20115Ops.push_back(Chain);20116Ops.push_back(Callee);2011720118// Add argument registers to the end of the list so that they are20119// known live into the call.20120for (auto &Reg : RegsToPass)20121Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));2012220123if (!IsTailCall) {20124// Add a register mask operand representing the call-preserved registers.20125const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();20126const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);20127assert(Mask && "Missing call preserved mask for calling convention");20128Ops.push_back(DAG.getRegisterMask(Mask));20129}2013020131// Glue the call to the argument copies, if any.20132if (Glue.getNode())20133Ops.push_back(Glue);2013420135assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&20136"Unexpected CFI type for a direct call");2013720138// Emit the call.20139SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);2014020141if (IsTailCall) {20142MF.getFrameInfo().setHasTailCall();20143SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);20144if (CLI.CFIType)20145Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());20146DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);20147return Ret;20148}2014920150Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);20151if (CLI.CFIType)20152Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());20153DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);20154Glue = Chain.getValue(1);2015520156// Mark the end of the call, which is glued to the call itself.20157Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);20158Glue = Chain.getValue(1);2015920160// Assign locations to each value returned by this call.20161SmallVector<CCValAssign, 16> RVLocs;20162CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());20163analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);2016420165// Copy all of the result registers out of their specified physreg.20166for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {20167auto &VA = RVLocs[i];20168// Copy the value out20169SDValue RetValue =20170DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);20171// Glue the RetValue to the end of the call sequence20172Chain = RetValue.getValue(1);20173Glue = RetValue.getValue(2);2017420175if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {20176assert(VA.needsCustom());20177SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),20178MVT::i32, Glue);20179Chain = RetValue2.getValue(1);20180Glue = RetValue2.getValue(2);20181RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,20182RetValue2);20183}2018420185RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);2018620187InVals.push_back(RetValue);20188}2018920190return Chain;20191}2019220193bool RISCVTargetLowering::CanLowerReturn(20194CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,20195const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {20196SmallVector<CCValAssign, 16> RVLocs;20197CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);2019820199RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};2020020201for (unsigned i = 0, e = Outs.size(); i != e; ++i) {20202MVT VT = Outs[i].VT;20203ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;20204RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();20205if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,20206ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,20207nullptr, *this, Dispatcher))20208return false;20209}20210return true;20211}2021220213SDValue20214RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,20215bool IsVarArg,20216const SmallVectorImpl<ISD::OutputArg> &Outs,20217const SmallVectorImpl<SDValue> &OutVals,20218const SDLoc &DL, SelectionDAG &DAG) const {20219MachineFunction &MF = DAG.getMachineFunction();20220const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();2022120222// Stores the assignment of the return value to a location.20223SmallVector<CCValAssign, 16> RVLocs;2022420225// Info about the registers and stack slot.20226CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,20227*DAG.getContext());2022820229analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,20230nullptr, RISCV::CC_RISCV);2023120232if (CallConv == CallingConv::GHC && !RVLocs.empty())20233report_fatal_error("GHC functions return void only");2023420235SDValue Glue;20236SmallVector<SDValue, 4> RetOps(1, Chain);2023720238// Copy the result values into the output registers.20239for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {20240SDValue Val = OutVals[OutIdx];20241CCValAssign &VA = RVLocs[i];20242assert(VA.isRegLoc() && "Can only return in registers!");2024320244if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {20245// Handle returning f64 on RV32D with a soft float ABI.20246assert(VA.isRegLoc() && "Expected return via registers");20247assert(VA.needsCustom());20248SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,20249DAG.getVTList(MVT::i32, MVT::i32), Val);20250SDValue Lo = SplitF64.getValue(0);20251SDValue Hi = SplitF64.getValue(1);20252Register RegLo = VA.getLocReg();20253Register RegHi = RVLocs[++i].getLocReg();2025420255if (STI.isRegisterReservedByUser(RegLo) ||20256STI.isRegisterReservedByUser(RegHi))20257MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{20258MF.getFunction(),20259"Return value register required, but has been reserved."});2026020261Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);20262Glue = Chain.getValue(1);20263RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));20264Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);20265Glue = Chain.getValue(1);20266RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));20267} else {20268// Handle a 'normal' return.20269Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);20270Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);2027120272if (STI.isRegisterReservedByUser(VA.getLocReg()))20273MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{20274MF.getFunction(),20275"Return value register required, but has been reserved."});2027620277// Guarantee that all emitted copies are stuck together.20278Glue = Chain.getValue(1);20279RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));20280}20281}2028220283RetOps[0] = Chain; // Update chain.2028420285// Add the glue node if we have it.20286if (Glue.getNode()) {20287RetOps.push_back(Glue);20288}2028920290if (any_of(RVLocs,20291[](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))20292MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();2029320294unsigned RetOpc = RISCVISD::RET_GLUE;20295// Interrupt service routines use different return instructions.20296const Function &Func = DAG.getMachineFunction().getFunction();20297if (Func.hasFnAttribute("interrupt")) {20298if (!Func.getReturnType()->isVoidTy())20299report_fatal_error(20300"Functions with the interrupt attribute must have void return type!");2030120302MachineFunction &MF = DAG.getMachineFunction();20303StringRef Kind =20304MF.getFunction().getFnAttribute("interrupt").getValueAsString();2030520306if (Kind == "supervisor")20307RetOpc = RISCVISD::SRET_GLUE;20308else20309RetOpc = RISCVISD::MRET_GLUE;20310}2031120312return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);20313}2031420315void RISCVTargetLowering::validateCCReservedRegs(20316const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,20317MachineFunction &MF) const {20318const Function &F = MF.getFunction();20319const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();2032020321if (llvm::any_of(Regs, [&STI](auto Reg) {20322return STI.isRegisterReservedByUser(Reg.first);20323}))20324F.getContext().diagnose(DiagnosticInfoUnsupported{20325F, "Argument register required, but has been reserved."});20326}2032720328// Check if the result of the node is only used as a return value, as20329// otherwise we can't perform a tail-call.20330bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {20331if (N->getNumValues() != 1)20332return false;20333if (!N->hasNUsesOfValue(1, 0))20334return false;2033520336SDNode *Copy = *N->use_begin();2033720338if (Copy->getOpcode() == ISD::BITCAST) {20339return isUsedByReturnOnly(Copy, Chain);20340}2034120342// TODO: Handle additional opcodes in order to support tail-calling libcalls20343// with soft float ABIs.20344if (Copy->getOpcode() != ISD::CopyToReg) {20345return false;20346}2034720348// If the ISD::CopyToReg has a glue operand, we conservatively assume it20349// isn't safe to perform a tail call.20350if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)20351return false;2035220353// The copy must be used by a RISCVISD::RET_GLUE, and nothing else.20354bool HasRet = false;20355for (SDNode *Node : Copy->uses()) {20356if (Node->getOpcode() != RISCVISD::RET_GLUE)20357return false;20358HasRet = true;20359}20360if (!HasRet)20361return false;2036220363Chain = Copy->getOperand(0);20364return true;20365}2036620367bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {20368return CI->isTailCall();20369}2037020371const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {20372#define NODE_NAME_CASE(NODE) \20373case RISCVISD::NODE: \20374return "RISCVISD::" #NODE;20375// clang-format off20376switch ((RISCVISD::NodeType)Opcode) {20377case RISCVISD::FIRST_NUMBER:20378break;20379NODE_NAME_CASE(RET_GLUE)20380NODE_NAME_CASE(SRET_GLUE)20381NODE_NAME_CASE(MRET_GLUE)20382NODE_NAME_CASE(CALL)20383NODE_NAME_CASE(SELECT_CC)20384NODE_NAME_CASE(BR_CC)20385NODE_NAME_CASE(BuildPairF64)20386NODE_NAME_CASE(SplitF64)20387NODE_NAME_CASE(TAIL)20388NODE_NAME_CASE(ADD_LO)20389NODE_NAME_CASE(HI)20390NODE_NAME_CASE(LLA)20391NODE_NAME_CASE(ADD_TPREL)20392NODE_NAME_CASE(MULHSU)20393NODE_NAME_CASE(SHL_ADD)20394NODE_NAME_CASE(SLLW)20395NODE_NAME_CASE(SRAW)20396NODE_NAME_CASE(SRLW)20397NODE_NAME_CASE(DIVW)20398NODE_NAME_CASE(DIVUW)20399NODE_NAME_CASE(REMUW)20400NODE_NAME_CASE(ROLW)20401NODE_NAME_CASE(RORW)20402NODE_NAME_CASE(CLZW)20403NODE_NAME_CASE(CTZW)20404NODE_NAME_CASE(ABSW)20405NODE_NAME_CASE(FMV_H_X)20406NODE_NAME_CASE(FMV_X_ANYEXTH)20407NODE_NAME_CASE(FMV_X_SIGNEXTH)20408NODE_NAME_CASE(FMV_W_X_RV64)20409NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)20410NODE_NAME_CASE(FCVT_X)20411NODE_NAME_CASE(FCVT_XU)20412NODE_NAME_CASE(FCVT_W_RV64)20413NODE_NAME_CASE(FCVT_WU_RV64)20414NODE_NAME_CASE(STRICT_FCVT_W_RV64)20415NODE_NAME_CASE(STRICT_FCVT_WU_RV64)20416NODE_NAME_CASE(FP_ROUND_BF16)20417NODE_NAME_CASE(FP_EXTEND_BF16)20418NODE_NAME_CASE(FROUND)20419NODE_NAME_CASE(FCLASS)20420NODE_NAME_CASE(FMAX)20421NODE_NAME_CASE(FMIN)20422NODE_NAME_CASE(READ_COUNTER_WIDE)20423NODE_NAME_CASE(BREV8)20424NODE_NAME_CASE(ORC_B)20425NODE_NAME_CASE(ZIP)20426NODE_NAME_CASE(UNZIP)20427NODE_NAME_CASE(CLMUL)20428NODE_NAME_CASE(CLMULH)20429NODE_NAME_CASE(CLMULR)20430NODE_NAME_CASE(MOPR)20431NODE_NAME_CASE(MOPRR)20432NODE_NAME_CASE(SHA256SIG0)20433NODE_NAME_CASE(SHA256SIG1)20434NODE_NAME_CASE(SHA256SUM0)20435NODE_NAME_CASE(SHA256SUM1)20436NODE_NAME_CASE(SM4KS)20437NODE_NAME_CASE(SM4ED)20438NODE_NAME_CASE(SM3P0)20439NODE_NAME_CASE(SM3P1)20440NODE_NAME_CASE(TH_LWD)20441NODE_NAME_CASE(TH_LWUD)20442NODE_NAME_CASE(TH_LDD)20443NODE_NAME_CASE(TH_SWD)20444NODE_NAME_CASE(TH_SDD)20445NODE_NAME_CASE(VMV_V_V_VL)20446NODE_NAME_CASE(VMV_V_X_VL)20447NODE_NAME_CASE(VFMV_V_F_VL)20448NODE_NAME_CASE(VMV_X_S)20449NODE_NAME_CASE(VMV_S_X_VL)20450NODE_NAME_CASE(VFMV_S_F_VL)20451NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)20452NODE_NAME_CASE(READ_VLENB)20453NODE_NAME_CASE(TRUNCATE_VECTOR_VL)20454NODE_NAME_CASE(VSLIDEUP_VL)20455NODE_NAME_CASE(VSLIDE1UP_VL)20456NODE_NAME_CASE(VSLIDEDOWN_VL)20457NODE_NAME_CASE(VSLIDE1DOWN_VL)20458NODE_NAME_CASE(VFSLIDE1UP_VL)20459NODE_NAME_CASE(VFSLIDE1DOWN_VL)20460NODE_NAME_CASE(VID_VL)20461NODE_NAME_CASE(VFNCVT_ROD_VL)20462NODE_NAME_CASE(VECREDUCE_ADD_VL)20463NODE_NAME_CASE(VECREDUCE_UMAX_VL)20464NODE_NAME_CASE(VECREDUCE_SMAX_VL)20465NODE_NAME_CASE(VECREDUCE_UMIN_VL)20466NODE_NAME_CASE(VECREDUCE_SMIN_VL)20467NODE_NAME_CASE(VECREDUCE_AND_VL)20468NODE_NAME_CASE(VECREDUCE_OR_VL)20469NODE_NAME_CASE(VECREDUCE_XOR_VL)20470NODE_NAME_CASE(VECREDUCE_FADD_VL)20471NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)20472NODE_NAME_CASE(VECREDUCE_FMIN_VL)20473NODE_NAME_CASE(VECREDUCE_FMAX_VL)20474NODE_NAME_CASE(ADD_VL)20475NODE_NAME_CASE(AND_VL)20476NODE_NAME_CASE(MUL_VL)20477NODE_NAME_CASE(OR_VL)20478NODE_NAME_CASE(SDIV_VL)20479NODE_NAME_CASE(SHL_VL)20480NODE_NAME_CASE(SREM_VL)20481NODE_NAME_CASE(SRA_VL)20482NODE_NAME_CASE(SRL_VL)20483NODE_NAME_CASE(ROTL_VL)20484NODE_NAME_CASE(ROTR_VL)20485NODE_NAME_CASE(SUB_VL)20486NODE_NAME_CASE(UDIV_VL)20487NODE_NAME_CASE(UREM_VL)20488NODE_NAME_CASE(XOR_VL)20489NODE_NAME_CASE(AVGFLOORS_VL)20490NODE_NAME_CASE(AVGFLOORU_VL)20491NODE_NAME_CASE(AVGCEILS_VL)20492NODE_NAME_CASE(AVGCEILU_VL)20493NODE_NAME_CASE(SADDSAT_VL)20494NODE_NAME_CASE(UADDSAT_VL)20495NODE_NAME_CASE(SSUBSAT_VL)20496NODE_NAME_CASE(USUBSAT_VL)20497NODE_NAME_CASE(VNCLIP_VL)20498NODE_NAME_CASE(VNCLIPU_VL)20499NODE_NAME_CASE(FADD_VL)20500NODE_NAME_CASE(FSUB_VL)20501NODE_NAME_CASE(FMUL_VL)20502NODE_NAME_CASE(FDIV_VL)20503NODE_NAME_CASE(FNEG_VL)20504NODE_NAME_CASE(FABS_VL)20505NODE_NAME_CASE(FSQRT_VL)20506NODE_NAME_CASE(FCLASS_VL)20507NODE_NAME_CASE(VFMADD_VL)20508NODE_NAME_CASE(VFNMADD_VL)20509NODE_NAME_CASE(VFMSUB_VL)20510NODE_NAME_CASE(VFNMSUB_VL)20511NODE_NAME_CASE(VFWMADD_VL)20512NODE_NAME_CASE(VFWNMADD_VL)20513NODE_NAME_CASE(VFWMSUB_VL)20514NODE_NAME_CASE(VFWNMSUB_VL)20515NODE_NAME_CASE(FCOPYSIGN_VL)20516NODE_NAME_CASE(SMIN_VL)20517NODE_NAME_CASE(SMAX_VL)20518NODE_NAME_CASE(UMIN_VL)20519NODE_NAME_CASE(UMAX_VL)20520NODE_NAME_CASE(BITREVERSE_VL)20521NODE_NAME_CASE(BSWAP_VL)20522NODE_NAME_CASE(CTLZ_VL)20523NODE_NAME_CASE(CTTZ_VL)20524NODE_NAME_CASE(CTPOP_VL)20525NODE_NAME_CASE(VFMIN_VL)20526NODE_NAME_CASE(VFMAX_VL)20527NODE_NAME_CASE(MULHS_VL)20528NODE_NAME_CASE(MULHU_VL)20529NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)20530NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)20531NODE_NAME_CASE(VFCVT_RM_X_F_VL)20532NODE_NAME_CASE(VFCVT_RM_XU_F_VL)20533NODE_NAME_CASE(VFCVT_X_F_VL)20534NODE_NAME_CASE(VFCVT_XU_F_VL)20535NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)20536NODE_NAME_CASE(SINT_TO_FP_VL)20537NODE_NAME_CASE(UINT_TO_FP_VL)20538NODE_NAME_CASE(VFCVT_RM_F_XU_VL)20539NODE_NAME_CASE(VFCVT_RM_F_X_VL)20540NODE_NAME_CASE(FP_EXTEND_VL)20541NODE_NAME_CASE(FP_ROUND_VL)20542NODE_NAME_CASE(STRICT_FADD_VL)20543NODE_NAME_CASE(STRICT_FSUB_VL)20544NODE_NAME_CASE(STRICT_FMUL_VL)20545NODE_NAME_CASE(STRICT_FDIV_VL)20546NODE_NAME_CASE(STRICT_FSQRT_VL)20547NODE_NAME_CASE(STRICT_VFMADD_VL)20548NODE_NAME_CASE(STRICT_VFNMADD_VL)20549NODE_NAME_CASE(STRICT_VFMSUB_VL)20550NODE_NAME_CASE(STRICT_VFNMSUB_VL)20551NODE_NAME_CASE(STRICT_FP_ROUND_VL)20552NODE_NAME_CASE(STRICT_FP_EXTEND_VL)20553NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)20554NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)20555NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)20556NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)20557NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)20558NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)20559NODE_NAME_CASE(STRICT_FSETCC_VL)20560NODE_NAME_CASE(STRICT_FSETCCS_VL)20561NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)20562NODE_NAME_CASE(VWMUL_VL)20563NODE_NAME_CASE(VWMULU_VL)20564NODE_NAME_CASE(VWMULSU_VL)20565NODE_NAME_CASE(VWADD_VL)20566NODE_NAME_CASE(VWADDU_VL)20567NODE_NAME_CASE(VWSUB_VL)20568NODE_NAME_CASE(VWSUBU_VL)20569NODE_NAME_CASE(VWADD_W_VL)20570NODE_NAME_CASE(VWADDU_W_VL)20571NODE_NAME_CASE(VWSUB_W_VL)20572NODE_NAME_CASE(VWSUBU_W_VL)20573NODE_NAME_CASE(VWSLL_VL)20574NODE_NAME_CASE(VFWMUL_VL)20575NODE_NAME_CASE(VFWADD_VL)20576NODE_NAME_CASE(VFWSUB_VL)20577NODE_NAME_CASE(VFWADD_W_VL)20578NODE_NAME_CASE(VFWSUB_W_VL)20579NODE_NAME_CASE(VWMACC_VL)20580NODE_NAME_CASE(VWMACCU_VL)20581NODE_NAME_CASE(VWMACCSU_VL)20582NODE_NAME_CASE(VNSRL_VL)20583NODE_NAME_CASE(SETCC_VL)20584NODE_NAME_CASE(VMERGE_VL)20585NODE_NAME_CASE(VMAND_VL)20586NODE_NAME_CASE(VMOR_VL)20587NODE_NAME_CASE(VMXOR_VL)20588NODE_NAME_CASE(VMCLR_VL)20589NODE_NAME_CASE(VMSET_VL)20590NODE_NAME_CASE(VRGATHER_VX_VL)20591NODE_NAME_CASE(VRGATHER_VV_VL)20592NODE_NAME_CASE(VRGATHEREI16_VV_VL)20593NODE_NAME_CASE(VSEXT_VL)20594NODE_NAME_CASE(VZEXT_VL)20595NODE_NAME_CASE(VCPOP_VL)20596NODE_NAME_CASE(VFIRST_VL)20597NODE_NAME_CASE(READ_CSR)20598NODE_NAME_CASE(WRITE_CSR)20599NODE_NAME_CASE(SWAP_CSR)20600NODE_NAME_CASE(CZERO_EQZ)20601NODE_NAME_CASE(CZERO_NEZ)20602NODE_NAME_CASE(SW_GUARDED_BRIND)20603NODE_NAME_CASE(SF_VC_XV_SE)20604NODE_NAME_CASE(SF_VC_IV_SE)20605NODE_NAME_CASE(SF_VC_VV_SE)20606NODE_NAME_CASE(SF_VC_FV_SE)20607NODE_NAME_CASE(SF_VC_XVV_SE)20608NODE_NAME_CASE(SF_VC_IVV_SE)20609NODE_NAME_CASE(SF_VC_VVV_SE)20610NODE_NAME_CASE(SF_VC_FVV_SE)20611NODE_NAME_CASE(SF_VC_XVW_SE)20612NODE_NAME_CASE(SF_VC_IVW_SE)20613NODE_NAME_CASE(SF_VC_VVW_SE)20614NODE_NAME_CASE(SF_VC_FVW_SE)20615NODE_NAME_CASE(SF_VC_V_X_SE)20616NODE_NAME_CASE(SF_VC_V_I_SE)20617NODE_NAME_CASE(SF_VC_V_XV_SE)20618NODE_NAME_CASE(SF_VC_V_IV_SE)20619NODE_NAME_CASE(SF_VC_V_VV_SE)20620NODE_NAME_CASE(SF_VC_V_FV_SE)20621NODE_NAME_CASE(SF_VC_V_XVV_SE)20622NODE_NAME_CASE(SF_VC_V_IVV_SE)20623NODE_NAME_CASE(SF_VC_V_VVV_SE)20624NODE_NAME_CASE(SF_VC_V_FVV_SE)20625NODE_NAME_CASE(SF_VC_V_XVW_SE)20626NODE_NAME_CASE(SF_VC_V_IVW_SE)20627NODE_NAME_CASE(SF_VC_V_VVW_SE)20628NODE_NAME_CASE(SF_VC_V_FVW_SE)20629}20630// clang-format on20631return nullptr;20632#undef NODE_NAME_CASE20633}2063420635/// getConstraintType - Given a constraint letter, return the type of20636/// constraint it is for this target.20637RISCVTargetLowering::ConstraintType20638RISCVTargetLowering::getConstraintType(StringRef Constraint) const {20639if (Constraint.size() == 1) {20640switch (Constraint[0]) {20641default:20642break;20643case 'f':20644return C_RegisterClass;20645case 'I':20646case 'J':20647case 'K':20648return C_Immediate;20649case 'A':20650return C_Memory;20651case 's':20652case 'S': // A symbolic address20653return C_Other;20654}20655} else {20656if (Constraint == "vr" || Constraint == "vm")20657return C_RegisterClass;20658}20659return TargetLowering::getConstraintType(Constraint);20660}2066120662std::pair<unsigned, const TargetRegisterClass *>20663RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,20664StringRef Constraint,20665MVT VT) const {20666// First, see if this is a constraint that directly corresponds to a RISC-V20667// register class.20668if (Constraint.size() == 1) {20669switch (Constraint[0]) {20670case 'r':20671// TODO: Support fixed vectors up to XLen for P extension?20672if (VT.isVector())20673break;20674if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())20675return std::make_pair(0U, &RISCV::GPRF16RegClass);20676if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())20677return std::make_pair(0U, &RISCV::GPRF32RegClass);20678if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())20679return std::make_pair(0U, &RISCV::GPRPairRegClass);20680return std::make_pair(0U, &RISCV::GPRNoX0RegClass);20681case 'f':20682if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)20683return std::make_pair(0U, &RISCV::FPR16RegClass);20684if (Subtarget.hasStdExtF() && VT == MVT::f32)20685return std::make_pair(0U, &RISCV::FPR32RegClass);20686if (Subtarget.hasStdExtD() && VT == MVT::f64)20687return std::make_pair(0U, &RISCV::FPR64RegClass);20688break;20689default:20690break;20691}20692} else if (Constraint == "vr") {20693for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,20694&RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {20695if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))20696return std::make_pair(0U, RC);20697}20698} else if (Constraint == "vm") {20699if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))20700return std::make_pair(0U, &RISCV::VMV0RegClass);20701}2070220703// Clang will correctly decode the usage of register name aliases into their20704// official names. However, other frontends like `rustc` do not. This allows20705// users of these frontends to use the ABI names for registers in LLVM-style20706// register constraints.20707unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())20708.Case("{zero}", RISCV::X0)20709.Case("{ra}", RISCV::X1)20710.Case("{sp}", RISCV::X2)20711.Case("{gp}", RISCV::X3)20712.Case("{tp}", RISCV::X4)20713.Case("{t0}", RISCV::X5)20714.Case("{t1}", RISCV::X6)20715.Case("{t2}", RISCV::X7)20716.Cases("{s0}", "{fp}", RISCV::X8)20717.Case("{s1}", RISCV::X9)20718.Case("{a0}", RISCV::X10)20719.Case("{a1}", RISCV::X11)20720.Case("{a2}", RISCV::X12)20721.Case("{a3}", RISCV::X13)20722.Case("{a4}", RISCV::X14)20723.Case("{a5}", RISCV::X15)20724.Case("{a6}", RISCV::X16)20725.Case("{a7}", RISCV::X17)20726.Case("{s2}", RISCV::X18)20727.Case("{s3}", RISCV::X19)20728.Case("{s4}", RISCV::X20)20729.Case("{s5}", RISCV::X21)20730.Case("{s6}", RISCV::X22)20731.Case("{s7}", RISCV::X23)20732.Case("{s8}", RISCV::X24)20733.Case("{s9}", RISCV::X25)20734.Case("{s10}", RISCV::X26)20735.Case("{s11}", RISCV::X27)20736.Case("{t3}", RISCV::X28)20737.Case("{t4}", RISCV::X29)20738.Case("{t5}", RISCV::X30)20739.Case("{t6}", RISCV::X31)20740.Default(RISCV::NoRegister);20741if (XRegFromAlias != RISCV::NoRegister)20742return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);2074320744// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the20745// TableGen record rather than the AsmName to choose registers for InlineAsm20746// constraints, plus we want to match those names to the widest floating point20747// register type available, manually select floating point registers here.20748//20749// The second case is the ABI name of the register, so that frontends can also20750// use the ABI names in register constraint lists.20751if (Subtarget.hasStdExtF()) {20752unsigned FReg = StringSwitch<unsigned>(Constraint.lower())20753.Cases("{f0}", "{ft0}", RISCV::F0_F)20754.Cases("{f1}", "{ft1}", RISCV::F1_F)20755.Cases("{f2}", "{ft2}", RISCV::F2_F)20756.Cases("{f3}", "{ft3}", RISCV::F3_F)20757.Cases("{f4}", "{ft4}", RISCV::F4_F)20758.Cases("{f5}", "{ft5}", RISCV::F5_F)20759.Cases("{f6}", "{ft6}", RISCV::F6_F)20760.Cases("{f7}", "{ft7}", RISCV::F7_F)20761.Cases("{f8}", "{fs0}", RISCV::F8_F)20762.Cases("{f9}", "{fs1}", RISCV::F9_F)20763.Cases("{f10}", "{fa0}", RISCV::F10_F)20764.Cases("{f11}", "{fa1}", RISCV::F11_F)20765.Cases("{f12}", "{fa2}", RISCV::F12_F)20766.Cases("{f13}", "{fa3}", RISCV::F13_F)20767.Cases("{f14}", "{fa4}", RISCV::F14_F)20768.Cases("{f15}", "{fa5}", RISCV::F15_F)20769.Cases("{f16}", "{fa6}", RISCV::F16_F)20770.Cases("{f17}", "{fa7}", RISCV::F17_F)20771.Cases("{f18}", "{fs2}", RISCV::F18_F)20772.Cases("{f19}", "{fs3}", RISCV::F19_F)20773.Cases("{f20}", "{fs4}", RISCV::F20_F)20774.Cases("{f21}", "{fs5}", RISCV::F21_F)20775.Cases("{f22}", "{fs6}", RISCV::F22_F)20776.Cases("{f23}", "{fs7}", RISCV::F23_F)20777.Cases("{f24}", "{fs8}", RISCV::F24_F)20778.Cases("{f25}", "{fs9}", RISCV::F25_F)20779.Cases("{f26}", "{fs10}", RISCV::F26_F)20780.Cases("{f27}", "{fs11}", RISCV::F27_F)20781.Cases("{f28}", "{ft8}", RISCV::F28_F)20782.Cases("{f29}", "{ft9}", RISCV::F29_F)20783.Cases("{f30}", "{ft10}", RISCV::F30_F)20784.Cases("{f31}", "{ft11}", RISCV::F31_F)20785.Default(RISCV::NoRegister);20786if (FReg != RISCV::NoRegister) {20787assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");20788if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {20789unsigned RegNo = FReg - RISCV::F0_F;20790unsigned DReg = RISCV::F0_D + RegNo;20791return std::make_pair(DReg, &RISCV::FPR64RegClass);20792}20793if (VT == MVT::f32 || VT == MVT::Other)20794return std::make_pair(FReg, &RISCV::FPR32RegClass);20795if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {20796unsigned RegNo = FReg - RISCV::F0_F;20797unsigned HReg = RISCV::F0_H + RegNo;20798return std::make_pair(HReg, &RISCV::FPR16RegClass);20799}20800}20801}2080220803if (Subtarget.hasVInstructions()) {20804Register VReg = StringSwitch<Register>(Constraint.lower())20805.Case("{v0}", RISCV::V0)20806.Case("{v1}", RISCV::V1)20807.Case("{v2}", RISCV::V2)20808.Case("{v3}", RISCV::V3)20809.Case("{v4}", RISCV::V4)20810.Case("{v5}", RISCV::V5)20811.Case("{v6}", RISCV::V6)20812.Case("{v7}", RISCV::V7)20813.Case("{v8}", RISCV::V8)20814.Case("{v9}", RISCV::V9)20815.Case("{v10}", RISCV::V10)20816.Case("{v11}", RISCV::V11)20817.Case("{v12}", RISCV::V12)20818.Case("{v13}", RISCV::V13)20819.Case("{v14}", RISCV::V14)20820.Case("{v15}", RISCV::V15)20821.Case("{v16}", RISCV::V16)20822.Case("{v17}", RISCV::V17)20823.Case("{v18}", RISCV::V18)20824.Case("{v19}", RISCV::V19)20825.Case("{v20}", RISCV::V20)20826.Case("{v21}", RISCV::V21)20827.Case("{v22}", RISCV::V22)20828.Case("{v23}", RISCV::V23)20829.Case("{v24}", RISCV::V24)20830.Case("{v25}", RISCV::V25)20831.Case("{v26}", RISCV::V26)20832.Case("{v27}", RISCV::V27)20833.Case("{v28}", RISCV::V28)20834.Case("{v29}", RISCV::V29)20835.Case("{v30}", RISCV::V30)20836.Case("{v31}", RISCV::V31)20837.Default(RISCV::NoRegister);20838if (VReg != RISCV::NoRegister) {20839if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))20840return std::make_pair(VReg, &RISCV::VMRegClass);20841if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))20842return std::make_pair(VReg, &RISCV::VRRegClass);20843for (const auto *RC :20844{&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {20845if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {20846VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);20847return std::make_pair(VReg, RC);20848}20849}20850}20851}2085220853std::pair<Register, const TargetRegisterClass *> Res =20854TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);2085520856// If we picked one of the Zfinx register classes, remap it to the GPR class.20857// FIXME: When Zfinx is supported in CodeGen this will need to take the20858// Subtarget into account.20859if (Res.second == &RISCV::GPRF16RegClass ||20860Res.second == &RISCV::GPRF32RegClass ||20861Res.second == &RISCV::GPRPairRegClass)20862return std::make_pair(Res.first, &RISCV::GPRRegClass);2086320864return Res;20865}2086620867InlineAsm::ConstraintCode20868RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {20869// Currently only support length 1 constraints.20870if (ConstraintCode.size() == 1) {20871switch (ConstraintCode[0]) {20872case 'A':20873return InlineAsm::ConstraintCode::A;20874default:20875break;20876}20877}2087820879return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);20880}2088120882void RISCVTargetLowering::LowerAsmOperandForConstraint(20883SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,20884SelectionDAG &DAG) const {20885// Currently only support length 1 constraints.20886if (Constraint.size() == 1) {20887switch (Constraint[0]) {20888case 'I':20889// Validate & create a 12-bit signed immediate operand.20890if (auto *C = dyn_cast<ConstantSDNode>(Op)) {20891uint64_t CVal = C->getSExtValue();20892if (isInt<12>(CVal))20893Ops.push_back(20894DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));20895}20896return;20897case 'J':20898// Validate & create an integer zero operand.20899if (isNullConstant(Op))20900Ops.push_back(20901DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));20902return;20903case 'K':20904// Validate & create a 5-bit unsigned immediate operand.20905if (auto *C = dyn_cast<ConstantSDNode>(Op)) {20906uint64_t CVal = C->getZExtValue();20907if (isUInt<5>(CVal))20908Ops.push_back(20909DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));20910}20911return;20912case 'S':20913TargetLowering::LowerAsmOperandForConstraint(Op, "s", Ops, DAG);20914return;20915default:20916break;20917}20918}20919TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);20920}2092120922Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,20923Instruction *Inst,20924AtomicOrdering Ord) const {20925if (Subtarget.hasStdExtZtso()) {20926if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)20927return Builder.CreateFence(Ord);20928return nullptr;20929}2093020931if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)20932return Builder.CreateFence(Ord);20933if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))20934return Builder.CreateFence(AtomicOrdering::Release);20935return nullptr;20936}2093720938Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,20939Instruction *Inst,20940AtomicOrdering Ord) const {20941if (Subtarget.hasStdExtZtso()) {20942if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)20943return Builder.CreateFence(Ord);20944return nullptr;20945}2094620947if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))20948return Builder.CreateFence(AtomicOrdering::Acquire);20949if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&20950Ord == AtomicOrdering::SequentiallyConsistent)20951return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);20952return nullptr;20953}2095420955TargetLowering::AtomicExpansionKind20956RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {20957// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating20958// point operations can't be used in an lr/sc sequence without breaking the20959// forward-progress guarantee.20960if (AI->isFloatingPointOperation() ||20961AI->getOperation() == AtomicRMWInst::UIncWrap ||20962AI->getOperation() == AtomicRMWInst::UDecWrap)20963return AtomicExpansionKind::CmpXChg;2096420965// Don't expand forced atomics, we want to have __sync libcalls instead.20966if (Subtarget.hasForcedAtomics())20967return AtomicExpansionKind::None;2096820969unsigned Size = AI->getType()->getPrimitiveSizeInBits();20970if (AI->getOperation() == AtomicRMWInst::Nand) {20971if (Subtarget.hasStdExtZacas() &&20972(Size >= 32 || Subtarget.hasStdExtZabha()))20973return AtomicExpansionKind::CmpXChg;20974if (Size < 32)20975return AtomicExpansionKind::MaskedIntrinsic;20976}2097720978if (Size < 32 && !Subtarget.hasStdExtZabha())20979return AtomicExpansionKind::MaskedIntrinsic;2098020981return AtomicExpansionKind::None;20982}2098320984static Intrinsic::ID20985getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {20986if (XLen == 32) {20987switch (BinOp) {20988default:20989llvm_unreachable("Unexpected AtomicRMW BinOp");20990case AtomicRMWInst::Xchg:20991return Intrinsic::riscv_masked_atomicrmw_xchg_i32;20992case AtomicRMWInst::Add:20993return Intrinsic::riscv_masked_atomicrmw_add_i32;20994case AtomicRMWInst::Sub:20995return Intrinsic::riscv_masked_atomicrmw_sub_i32;20996case AtomicRMWInst::Nand:20997return Intrinsic::riscv_masked_atomicrmw_nand_i32;20998case AtomicRMWInst::Max:20999return Intrinsic::riscv_masked_atomicrmw_max_i32;21000case AtomicRMWInst::Min:21001return Intrinsic::riscv_masked_atomicrmw_min_i32;21002case AtomicRMWInst::UMax:21003return Intrinsic::riscv_masked_atomicrmw_umax_i32;21004case AtomicRMWInst::UMin:21005return Intrinsic::riscv_masked_atomicrmw_umin_i32;21006}21007}2100821009if (XLen == 64) {21010switch (BinOp) {21011default:21012llvm_unreachable("Unexpected AtomicRMW BinOp");21013case AtomicRMWInst::Xchg:21014return Intrinsic::riscv_masked_atomicrmw_xchg_i64;21015case AtomicRMWInst::Add:21016return Intrinsic::riscv_masked_atomicrmw_add_i64;21017case AtomicRMWInst::Sub:21018return Intrinsic::riscv_masked_atomicrmw_sub_i64;21019case AtomicRMWInst::Nand:21020return Intrinsic::riscv_masked_atomicrmw_nand_i64;21021case AtomicRMWInst::Max:21022return Intrinsic::riscv_masked_atomicrmw_max_i64;21023case AtomicRMWInst::Min:21024return Intrinsic::riscv_masked_atomicrmw_min_i64;21025case AtomicRMWInst::UMax:21026return Intrinsic::riscv_masked_atomicrmw_umax_i64;21027case AtomicRMWInst::UMin:21028return Intrinsic::riscv_masked_atomicrmw_umin_i64;21029}21030}2103121032llvm_unreachable("Unexpected XLen\n");21033}2103421035Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(21036IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,21037Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {21038// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace21039// the atomic instruction with an AtomicRMWInst::And/Or with appropriate21040// mask, as this produces better code than the LR/SC loop emitted by21041// int_riscv_masked_atomicrmw_xchg.21042if (AI->getOperation() == AtomicRMWInst::Xchg &&21043isa<ConstantInt>(AI->getValOperand())) {21044ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());21045if (CVal->isZero())21046return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,21047Builder.CreateNot(Mask, "Inv_Mask"),21048AI->getAlign(), Ord);21049if (CVal->isMinusOne())21050return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,21051AI->getAlign(), Ord);21052}2105321054unsigned XLen = Subtarget.getXLen();21055Value *Ordering =21056Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));21057Type *Tys[] = {AlignedAddr->getType()};21058Function *LrwOpScwLoop = Intrinsic::getDeclaration(21059AI->getModule(),21060getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);2106121062if (XLen == 64) {21063Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());21064Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());21065ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());21066}2106721068Value *Result;2106921070// Must pass the shift amount needed to sign extend the loaded value prior21071// to performing a signed comparison for min/max. ShiftAmt is the number of21072// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which21073// is the number of bits to left+right shift the value in order to21074// sign-extend.21075if (AI->getOperation() == AtomicRMWInst::Min ||21076AI->getOperation() == AtomicRMWInst::Max) {21077const DataLayout &DL = AI->getDataLayout();21078unsigned ValWidth =21079DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());21080Value *SextShamt =21081Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);21082Result = Builder.CreateCall(LrwOpScwLoop,21083{AlignedAddr, Incr, Mask, SextShamt, Ordering});21084} else {21085Result =21086Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});21087}2108821089if (XLen == 64)21090Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());21091return Result;21092}2109321094TargetLowering::AtomicExpansionKind21095RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(21096AtomicCmpXchgInst *CI) const {21097// Don't expand forced atomics, we want to have __sync libcalls instead.21098if (Subtarget.hasForcedAtomics())21099return AtomicExpansionKind::None;2110021101unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();21102if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&21103(Size == 8 || Size == 16))21104return AtomicExpansionKind::MaskedIntrinsic;21105return AtomicExpansionKind::None;21106}2110721108Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(21109IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,21110Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {21111unsigned XLen = Subtarget.getXLen();21112Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));21113Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;21114if (XLen == 64) {21115CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());21116NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());21117Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());21118CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;21119}21120Type *Tys[] = {AlignedAddr->getType()};21121Function *MaskedCmpXchg =21122Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);21123Value *Result = Builder.CreateCall(21124MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});21125if (XLen == 64)21126Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());21127return Result;21128}2112921130bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,21131EVT DataVT) const {21132// We have indexed loads for all supported EEW types. Indices are always21133// zero extended.21134return Extend.getOpcode() == ISD::ZERO_EXTEND &&21135isTypeLegal(Extend.getValueType()) &&21136isTypeLegal(Extend.getOperand(0).getValueType()) &&21137Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;21138}2113921140bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,21141EVT VT) const {21142if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())21143return false;2114421145switch (FPVT.getSimpleVT().SimpleTy) {21146case MVT::f16:21147return Subtarget.hasStdExtZfhmin();21148case MVT::f32:21149return Subtarget.hasStdExtF();21150case MVT::f64:21151return Subtarget.hasStdExtD();21152default:21153return false;21154}21155}2115621157unsigned RISCVTargetLowering::getJumpTableEncoding() const {21158// If we are using the small code model, we can reduce size of jump table21159// entry to 4 bytes.21160if (Subtarget.is64Bit() && !isPositionIndependent() &&21161getTargetMachine().getCodeModel() == CodeModel::Small) {21162return MachineJumpTableInfo::EK_Custom32;21163}21164return TargetLowering::getJumpTableEncoding();21165}2116621167const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(21168const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,21169unsigned uid, MCContext &Ctx) const {21170assert(Subtarget.is64Bit() && !isPositionIndependent() &&21171getTargetMachine().getCodeModel() == CodeModel::Small);21172return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);21173}2117421175bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {21176// We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power21177// of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be21178// a power of two as well.21179// FIXME: This doesn't work for zve32, but that's already broken21180// elsewhere for the same reason.21181assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");21182static_assert(RISCV::RVVBitsPerBlock == 64,21183"RVVBitsPerBlock changed, audit needed");21184return true;21185}2118621187bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,21188SDValue &Offset,21189ISD::MemIndexedMode &AM,21190SelectionDAG &DAG) const {21191// Target does not support indexed loads.21192if (!Subtarget.hasVendorXTHeadMemIdx())21193return false;2119421195if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)21196return false;2119721198Base = Op->getOperand(0);21199if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {21200int64_t RHSC = RHS->getSExtValue();21201if (Op->getOpcode() == ISD::SUB)21202RHSC = -(uint64_t)RHSC;2120321204// The constants that can be encoded in the THeadMemIdx instructions21205// are of the form (sign_extend(imm5) << imm2).21206bool isLegalIndexedOffset = false;21207for (unsigned i = 0; i < 4; i++)21208if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {21209isLegalIndexedOffset = true;21210break;21211}2121221213if (!isLegalIndexedOffset)21214return false;2121521216Offset = Op->getOperand(1);21217return true;21218}2121921220return false;21221}2122221223bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,21224SDValue &Offset,21225ISD::MemIndexedMode &AM,21226SelectionDAG &DAG) const {21227EVT VT;21228SDValue Ptr;21229if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {21230VT = LD->getMemoryVT();21231Ptr = LD->getBasePtr();21232} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {21233VT = ST->getMemoryVT();21234Ptr = ST->getBasePtr();21235} else21236return false;2123721238if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))21239return false;2124021241AM = ISD::PRE_INC;21242return true;21243}2124421245bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,21246SDValue &Base,21247SDValue &Offset,21248ISD::MemIndexedMode &AM,21249SelectionDAG &DAG) const {21250if (Subtarget.hasVendorXCVmem()) {21251if (Op->getOpcode() != ISD::ADD)21252return false;2125321254if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N))21255Base = LS->getBasePtr();21256else21257return false;2125821259if (Base == Op->getOperand(0))21260Offset = Op->getOperand(1);21261else if (Base == Op->getOperand(1))21262Offset = Op->getOperand(0);21263else21264return false;2126521266AM = ISD::POST_INC;21267return true;21268}2126921270EVT VT;21271SDValue Ptr;21272if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {21273VT = LD->getMemoryVT();21274Ptr = LD->getBasePtr();21275} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {21276VT = ST->getMemoryVT();21277Ptr = ST->getBasePtr();21278} else21279return false;2128021281if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))21282return false;21283// Post-indexing updates the base, so it's not a valid transform21284// if that's not the same as the load's pointer.21285if (Ptr != Base)21286return false;2128721288AM = ISD::POST_INC;21289return true;21290}2129121292bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,21293EVT VT) const {21294EVT SVT = VT.getScalarType();2129521296if (!SVT.isSimple())21297return false;2129821299switch (SVT.getSimpleVT().SimpleTy) {21300case MVT::f16:21301return VT.isVector() ? Subtarget.hasVInstructionsF16()21302: Subtarget.hasStdExtZfhOrZhinx();21303case MVT::f32:21304return Subtarget.hasStdExtFOrZfinx();21305case MVT::f64:21306return Subtarget.hasStdExtDOrZdinx();21307default:21308break;21309}2131021311return false;21312}2131321314ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {21315// Zacas will use amocas.w which does not require extension.21316return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;21317}2131821319Register RISCVTargetLowering::getExceptionPointerRegister(21320const Constant *PersonalityFn) const {21321return RISCV::X10;21322}2132321324Register RISCVTargetLowering::getExceptionSelectorRegister(21325const Constant *PersonalityFn) const {21326return RISCV::X11;21327}2132821329bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {21330// Return false to suppress the unnecessary extensions if the LibCall21331// arguments or return value is a float narrower than XLEN on a soft FP ABI.21332if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&21333Type.getSizeInBits() < Subtarget.getXLen()))21334return false;2133521336return true;21337}2133821339bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {21340if (Subtarget.is64Bit() && Type == MVT::i32)21341return true;2134221343return IsSigned;21344}2134521346bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,21347SDValue C) const {21348// Check integral scalar types.21349const bool HasZmmul = Subtarget.hasStdExtZmmul();21350if (!VT.isScalarInteger())21351return false;2135221353// Omit the optimization if the sub target has the M extension and the data21354// size exceeds XLen.21355if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())21356return false;2135721358if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {21359// Break the MUL to a SLLI and an ADD/SUB.21360const APInt &Imm = ConstNode->getAPIntValue();21361if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||21362(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())21363return true;2136421365// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.21366if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&21367((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||21368(Imm - 8).isPowerOf2()))21369return true;2137021371// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs21372// a pair of LUI/ADDI.21373if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&21374ConstNode->hasOneUse()) {21375APInt ImmS = Imm.ashr(Imm.countr_zero());21376if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||21377(1 - ImmS).isPowerOf2())21378return true;21379}21380}2138121382return false;21383}2138421385bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,21386SDValue ConstNode) const {21387// Let the DAGCombiner decide for vectors.21388EVT VT = AddNode.getValueType();21389if (VT.isVector())21390return true;2139121392// Let the DAGCombiner decide for larger types.21393if (VT.getScalarSizeInBits() > Subtarget.getXLen())21394return true;2139521396// It is worse if c1 is simm12 while c1*c2 is not.21397ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));21398ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);21399const APInt &C1 = C1Node->getAPIntValue();21400const APInt &C2 = C2Node->getAPIntValue();21401if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))21402return false;2140321404// Default to true and let the DAGCombiner decide.21405return true;21406}2140721408bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(21409EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,21410unsigned *Fast) const {21411if (!VT.isVector()) {21412if (Fast)21413*Fast = Subtarget.enableUnalignedScalarMem();21414return Subtarget.enableUnalignedScalarMem();21415}2141621417// All vector implementations must support element alignment21418EVT ElemVT = VT.getVectorElementType();21419if (Alignment >= ElemVT.getStoreSize()) {21420if (Fast)21421*Fast = 1;21422return true;21423}2142421425// Note: We lower an unmasked unaligned vector access to an equally sized21426// e8 element type access. Given this, we effectively support all unmasked21427// misaligned accesses. TODO: Work through the codegen implications of21428// allowing such accesses to be formed, and considered fast.21429if (Fast)21430*Fast = Subtarget.enableUnalignedVectorMem();21431return Subtarget.enableUnalignedVectorMem();21432}214332143421435EVT RISCVTargetLowering::getOptimalMemOpType(const MemOp &Op,21436const AttributeList &FuncAttributes) const {21437if (!Subtarget.hasVInstructions())21438return MVT::Other;2143921440if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))21441return MVT::Other;2144221443// We use LMUL1 memory operations here for a non-obvious reason. Our caller21444// has an expansion threshold, and we want the number of hardware memory21445// operations to correspond roughly to that threshold. LMUL>1 operations21446// are typically expanded linearly internally, and thus correspond to more21447// than one actual memory operation. Note that store merging and load21448// combining will typically form larger LMUL operations from the LMUL121449// operations emitted here, and that's okay because combining isn't21450// introducing new memory operations; it's just merging existing ones.21451const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;21452if (Op.size() < MinVLenInBytes)21453// TODO: Figure out short memops. For the moment, do the default thing21454// which ends up using scalar sequences.21455return MVT::Other;2145621457// Prefer i8 for non-zero memset as it allows us to avoid materializing21458// a large scalar constant and instead use vmv.v.x/i to do the21459// broadcast. For everything else, prefer ELenVT to minimize VL and thus21460// maximize the chance we can encode the size in the vsetvli.21461MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());21462MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;2146321464// Do we have sufficient alignment for our preferred VT? If not, revert21465// to largest size allowed by our alignment criteria.21466if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {21467Align RequiredAlign(PreferredVT.getStoreSize());21468if (Op.isFixedDstAlign())21469RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());21470if (Op.isMemcpy())21471RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());21472PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);21473}21474return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());21475}2147621477bool RISCVTargetLowering::splitValueIntoRegisterParts(21478SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,21479unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {21480bool IsABIRegCopy = CC.has_value();21481EVT ValueVT = Val.getValueType();21482if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&21483PartVT == MVT::f32) {21484// Cast the [b]f16 to i16, extend to i32, pad with ones to make a float21485// nan, and cast to f32.21486Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);21487Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);21488Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,21489DAG.getConstant(0xFFFF0000, DL, MVT::i32));21490Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);21491Parts[0] = Val;21492return true;21493}2149421495if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {21496LLVMContext &Context = *DAG.getContext();21497EVT ValueEltVT = ValueVT.getVectorElementType();21498EVT PartEltVT = PartVT.getVectorElementType();21499unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();21500unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();21501if (PartVTBitSize % ValueVTBitSize == 0) {21502assert(PartVTBitSize >= ValueVTBitSize);21503// If the element types are different, bitcast to the same element type of21504// PartVT first.21505// Give an example here, we want copy a <vscale x 1 x i8> value to21506// <vscale x 4 x i16>.21507// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert21508// subvector, then we can bitcast to <vscale x 4 x i16>.21509if (ValueEltVT != PartEltVT) {21510if (PartVTBitSize > ValueVTBitSize) {21511unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();21512assert(Count != 0 && "The number of element should not be zero.");21513EVT SameEltTypeVT =21514EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);21515Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,21516DAG.getUNDEF(SameEltTypeVT), Val,21517DAG.getVectorIdxConstant(0, DL));21518}21519Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);21520} else {21521Val =21522DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),21523Val, DAG.getVectorIdxConstant(0, DL));21524}21525Parts[0] = Val;21526return true;21527}21528}21529return false;21530}2153121532SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(21533SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,21534MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {21535bool IsABIRegCopy = CC.has_value();21536if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&21537PartVT == MVT::f32) {21538SDValue Val = Parts[0];2153921540// Cast the f32 to i32, truncate to i16, and cast back to [b]f16.21541Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);21542Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);21543Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);21544return Val;21545}2154621547if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {21548LLVMContext &Context = *DAG.getContext();21549SDValue Val = Parts[0];21550EVT ValueEltVT = ValueVT.getVectorElementType();21551EVT PartEltVT = PartVT.getVectorElementType();21552unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();21553unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();21554if (PartVTBitSize % ValueVTBitSize == 0) {21555assert(PartVTBitSize >= ValueVTBitSize);21556EVT SameEltTypeVT = ValueVT;21557// If the element types are different, convert it to the same element type21558// of PartVT.21559// Give an example here, we want copy a <vscale x 1 x i8> value from21560// <vscale x 4 x i16>.21561// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,21562// then we can extract <vscale x 1 x i8>.21563if (ValueEltVT != PartEltVT) {21564unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();21565assert(Count != 0 && "The number of element should not be zero.");21566SameEltTypeVT =21567EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);21568Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);21569}21570Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,21571DAG.getVectorIdxConstant(0, DL));21572return Val;21573}21574}21575return SDValue();21576}2157721578bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {21579// When aggressively optimizing for code size, we prefer to use a div21580// instruction, as it is usually smaller than the alternative sequence.21581// TODO: Add vector division?21582bool OptSize = Attr.hasFnAttr(Attribute::MinSize);21583return OptSize && !VT.isVector();21584}2158521586bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {21587// Scalarize zero_ext and sign_ext might stop match to widening instruction in21588// some situation.21589unsigned Opc = N->getOpcode();21590if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)21591return false;21592return true;21593}2159421595static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {21596Module *M = IRB.GetInsertBlock()->getParent()->getParent();21597Function *ThreadPointerFunc =21598Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);21599return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),21600IRB.CreateCall(ThreadPointerFunc), Offset);21601}2160221603Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {21604// Fuchsia provides a fixed TLS slot for the stack cookie.21605// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.21606if (Subtarget.isTargetFuchsia())21607return useTpOffset(IRB, -0x10);2160821609// Android provides a fixed TLS slot for the stack cookie. See the definition21610// of TLS_SLOT_STACK_GUARD in21611// https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h21612if (Subtarget.isTargetAndroid())21613return useTpOffset(IRB, -0x18);2161421615return TargetLowering::getIRStackGuard(IRB);21616}2161721618bool RISCVTargetLowering::isLegalInterleavedAccessType(21619VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,21620const DataLayout &DL) const {21621EVT VT = getValueType(DL, VTy);21622// Don't lower vlseg/vsseg for vector types that can't be split.21623if (!isTypeLegal(VT))21624return false;2162521626if (!isLegalElementTypeForRVV(VT.getScalarType()) ||21627!allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,21628Alignment))21629return false;2163021631MVT ContainerVT = VT.getSimpleVT();2163221633if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {21634if (!Subtarget.useRVVForFixedLengthVectors())21635return false;21636// Sometimes the interleaved access pass picks up splats as interleaves of21637// one element. Don't lower these.21638if (FVTy->getNumElements() < 2)21639return false;2164021641ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());21642} else {21643// The intrinsics for scalable vectors are not overloaded on pointer type21644// and can only handle the default address space.21645if (AddrSpace)21646return false;21647}2164821649// Need to make sure that EMUL * NFIELDS ≤ 821650auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));21651if (Fractional)21652return true;21653return Factor * LMUL <= 8;21654}2165521656bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,21657Align Alignment) const {21658if (!Subtarget.hasVInstructions())21659return false;2166021661// Only support fixed vectors if we know the minimum vector size.21662if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())21663return false;2166421665EVT ScalarType = DataType.getScalarType();21666if (!isLegalElementTypeForRVV(ScalarType))21667return false;2166821669if (!Subtarget.enableUnalignedVectorMem() &&21670Alignment < ScalarType.getStoreSize())21671return false;2167221673return true;21674}2167521676static const Intrinsic::ID FixedVlsegIntrIds[] = {21677Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,21678Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,21679Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,21680Intrinsic::riscv_seg8_load};2168121682/// Lower an interleaved load into a vlsegN intrinsic.21683///21684/// E.g. Lower an interleaved load (Factor = 2):21685/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr21686/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements21687/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements21688///21689/// Into:21690/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(21691/// %ptr, i64 4)21692/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 021693/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 121694bool RISCVTargetLowering::lowerInterleavedLoad(21695LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,21696ArrayRef<unsigned> Indices, unsigned Factor) const {21697IRBuilder<> Builder(LI);2169821699auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());21700if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),21701LI->getPointerAddressSpace(),21702LI->getDataLayout()))21703return false;2170421705auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());2170621707Function *VlsegNFunc =21708Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],21709{VTy, LI->getPointerOperandType(), XLenTy});2171021711Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());2171221713CallInst *VlsegN =21714Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});2171521716for (unsigned i = 0; i < Shuffles.size(); i++) {21717Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);21718Shuffles[i]->replaceAllUsesWith(SubVec);21719}2172021721return true;21722}2172321724static const Intrinsic::ID FixedVssegIntrIds[] = {21725Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,21726Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,21727Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,21728Intrinsic::riscv_seg8_store};2172921730/// Lower an interleaved store into a vssegN intrinsic.21731///21732/// E.g. Lower an interleaved store (Factor = 3):21733/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,21734/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>21735/// store <12 x i32> %i.vec, <12 x i32>* %ptr21736///21737/// Into:21738/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>21739/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>21740/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>21741/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,21742/// %ptr, i32 4)21743///21744/// Note that the new shufflevectors will be removed and we'll only generate one21745/// vsseg3 instruction in CodeGen.21746bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,21747ShuffleVectorInst *SVI,21748unsigned Factor) const {21749IRBuilder<> Builder(SI);21750auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());21751// Given SVI : <n*factor x ty>, then VTy : <n x ty>21752auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),21753ShuffleVTy->getNumElements() / Factor);21754if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),21755SI->getPointerAddressSpace(),21756SI->getDataLayout()))21757return false;2175821759auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());2176021761Function *VssegNFunc =21762Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],21763{VTy, SI->getPointerOperandType(), XLenTy});2176421765auto Mask = SVI->getShuffleMask();21766SmallVector<Value *, 10> Ops;2176721768for (unsigned i = 0; i < Factor; i++) {21769Value *Shuffle = Builder.CreateShuffleVector(21770SVI->getOperand(0), SVI->getOperand(1),21771createSequentialMask(Mask[i], VTy->getNumElements(), 0));21772Ops.push_back(Shuffle);21773}21774// This VL should be OK (should be executable in one vsseg instruction,21775// potentially under larger LMULs) because we checked that the fixed vector21776// type fits in isLegalInterleavedAccessType21777Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());21778Ops.append({SI->getPointerOperand(), VL});2177921780Builder.CreateCall(VssegNFunc, Ops);2178121782return true;21783}2178421785bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,21786LoadInst *LI) const {21787assert(LI->isSimple());21788IRBuilder<> Builder(LI);2178921790// Only deinterleave2 supported at present.21791if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)21792return false;2179321794unsigned Factor = 2;2179521796VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());21797VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));2179821799if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),21800LI->getPointerAddressSpace(),21801LI->getDataLayout()))21802return false;2180321804Function *VlsegNFunc;21805Value *VL;21806Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());21807SmallVector<Value *, 10> Ops;2180821809if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {21810VlsegNFunc = Intrinsic::getDeclaration(21811LI->getModule(), FixedVlsegIntrIds[Factor - 2],21812{ResVTy, LI->getPointerOperandType(), XLenTy});21813VL = ConstantInt::get(XLenTy, FVTy->getNumElements());21814} else {21815static const Intrinsic::ID IntrIds[] = {21816Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,21817Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,21818Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,21819Intrinsic::riscv_vlseg8};2182021821VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],21822{ResVTy, XLenTy});21823VL = Constant::getAllOnesValue(XLenTy);21824Ops.append(Factor, PoisonValue::get(ResVTy));21825}2182621827Ops.append({LI->getPointerOperand(), VL});2182821829Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);21830DI->replaceAllUsesWith(Vlseg);2183121832return true;21833}2183421835bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,21836StoreInst *SI) const {21837assert(SI->isSimple());21838IRBuilder<> Builder(SI);2183921840// Only interleave2 supported at present.21841if (II->getIntrinsicID() != Intrinsic::vector_interleave2)21842return false;2184321844unsigned Factor = 2;2184521846VectorType *VTy = cast<VectorType>(II->getType());21847VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());2184821849if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),21850SI->getPointerAddressSpace(),21851SI->getDataLayout()))21852return false;2185321854Function *VssegNFunc;21855Value *VL;21856Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());2185721858if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {21859VssegNFunc = Intrinsic::getDeclaration(21860SI->getModule(), FixedVssegIntrIds[Factor - 2],21861{InVTy, SI->getPointerOperandType(), XLenTy});21862VL = ConstantInt::get(XLenTy, FVTy->getNumElements());21863} else {21864static const Intrinsic::ID IntrIds[] = {21865Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,21866Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,21867Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,21868Intrinsic::riscv_vsseg8};2186921870VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],21871{InVTy, XLenTy});21872VL = Constant::getAllOnesValue(XLenTy);21873}2187421875Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),21876SI->getPointerOperand(), VL});2187721878return true;21879}2188021881MachineInstr *21882RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,21883MachineBasicBlock::instr_iterator &MBBI,21884const TargetInstrInfo *TII) const {21885assert(MBBI->isCall() && MBBI->getCFIType() &&21886"Invalid call instruction for a KCFI check");21887assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},21888MBBI->getOpcode()));2188921890MachineOperand &Target = MBBI->getOperand(0);21891Target.setIsRenamable(false);2189221893return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))21894.addReg(Target.getReg())21895.addImm(MBBI->getCFIType())21896.getInstr();21897}2189821899#define GET_REGISTER_MATCHER21900#include "RISCVGenAsmMatcher.inc"2190121902Register21903RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,21904const MachineFunction &MF) const {21905Register Reg = MatchRegisterAltName(RegName);21906if (Reg == RISCV::NoRegister)21907Reg = MatchRegisterName(RegName);21908if (Reg == RISCV::NoRegister)21909report_fatal_error(21910Twine("Invalid register name \"" + StringRef(RegName) + "\"."));21911BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);21912if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))21913report_fatal_error(Twine("Trying to obtain non-reserved register \"" +21914StringRef(RegName) + "\"."));21915return Reg;21916}2191721918MachineMemOperand::Flags21919RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {21920const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);2192121922if (NontemporalInfo == nullptr)21923return MachineMemOperand::MONone;2192421925// 1 for default value work as __RISCV_NTLH_ALL21926// 2 -> __RISCV_NTLH_INNERMOST_PRIVATE21927// 3 -> __RISCV_NTLH_ALL_PRIVATE21928// 4 -> __RISCV_NTLH_INNERMOST_SHARED21929// 5 -> __RISCV_NTLH_ALL21930int NontemporalLevel = 5;21931const MDNode *RISCVNontemporalInfo =21932I.getMetadata("riscv-nontemporal-domain");21933if (RISCVNontemporalInfo != nullptr)21934NontemporalLevel =21935cast<ConstantInt>(21936cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))21937->getValue())21938->getZExtValue();2193921940assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&21941"RISC-V target doesn't support this non-temporal domain.");2194221943NontemporalLevel -= 2;21944MachineMemOperand::Flags Flags = MachineMemOperand::MONone;21945if (NontemporalLevel & 0b1)21946Flags |= MONontemporalBit0;21947if (NontemporalLevel & 0b10)21948Flags |= MONontemporalBit1;2194921950return Flags;21951}2195221953MachineMemOperand::Flags21954RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {2195521956MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();21957MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;21958TargetFlags |= (NodeFlags & MONontemporalBit0);21959TargetFlags |= (NodeFlags & MONontemporalBit1);21960return TargetFlags;21961}2196221963bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(21964const MemSDNode &NodeX, const MemSDNode &NodeY) const {21965return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);21966}2196721968bool RISCVTargetLowering::isCtpopFast(EVT VT) const {21969if (VT.isScalableVector())21970return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();21971if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())21972return true;21973return Subtarget.hasStdExtZbb() &&21974(VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());21975}2197621977unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,21978ISD::CondCode Cond) const {21979return isCtpopFast(VT) ? 0 : 1;21980}2198121982bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {2198321984// GISel support is in progress or complete for these opcodes.21985unsigned Op = Inst.getOpcode();21986if (Op == Instruction::Add || Op == Instruction::Sub ||21987Op == Instruction::And || Op == Instruction::Or ||21988Op == Instruction::Xor || Op == Instruction::InsertElement ||21989Op == Instruction::ShuffleVector || Op == Instruction::Load ||21990Op == Instruction::Freeze || Op == Instruction::Store)21991return false;2199221993if (Inst.getType()->isScalableTy())21994return true;2199521996for (unsigned i = 0; i < Inst.getNumOperands(); ++i)21997if (Inst.getOperand(i)->getType()->isScalableTy() &&21998!isa<ReturnInst>(&Inst))21999return true;2200022001if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {22002if (AI->getAllocatedType()->isScalableTy())22003return true;22004}2200522006return false;22007}2200822009SDValue22010RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,22011SelectionDAG &DAG,22012SmallVectorImpl<SDNode *> &Created) const {22013AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();22014if (isIntDivCheap(N->getValueType(0), Attr))22015return SDValue(N, 0); // Lower SDIV as SDIV2201622017// Only perform this transform if short forward branch opt is supported.22018if (!Subtarget.hasShortForwardBranchOpt())22019return SDValue();22020EVT VT = N->getValueType(0);22021if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))22022return SDValue();2202322024// Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.22025if (Divisor.sgt(2048) || Divisor.slt(-2048))22026return SDValue();22027return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);22028}2202922030bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(22031EVT VT, const APInt &AndMask) const {22032if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())22033return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);22034return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);22035}2203622037unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {22038return Subtarget.getMinimumJumpTableEntries();22039}2204022041// Handle single arg such as return value.22042template <typename Arg>22043void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {22044// This lambda determines whether an array of types are constructed by22045// homogeneous vector types.22046auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {22047// First, extract the first element in the argument type.22048auto It = ArgList.begin();22049MVT FirstArgRegType = It->VT;2205022051// Return if there is no return or the type needs split.22052if (It == ArgList.end() || It->Flags.isSplit())22053return false;2205422055++It;2205622057// Return if this argument type contains only 1 element, or it's not a22058// vector type.22059if (It == ArgList.end() || !FirstArgRegType.isScalableVector())22060return false;2206122062// Second, check if the following elements in this argument type are all the22063// same.22064for (; It != ArgList.end(); ++It)22065if (It->Flags.isSplit() || It->VT != FirstArgRegType)22066return false;2206722068return true;22069};2207022071if (isHomogeneousScalableVectorType(ArgList)) {22072// Handle as tuple type22073RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});22074} else {22075// Handle as normal vector type22076bool FirstVMaskAssigned = false;22077for (const auto &OutArg : ArgList) {22078MVT RegisterVT = OutArg.VT;2207922080// Skip non-RVV register type22081if (!RegisterVT.isVector())22082continue;2208322084if (RegisterVT.isFixedLengthVector())22085RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);2208622087if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {22088RVVArgInfos.push_back({1, RegisterVT, true});22089FirstVMaskAssigned = true;22090continue;22091}2209222093RVVArgInfos.push_back({1, RegisterVT, false});22094}22095}22096}2209722098// Handle multiple args.22099template <>22100void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {22101const DataLayout &DL = MF->getDataLayout();22102const Function &F = MF->getFunction();22103LLVMContext &Context = F.getContext();2210422105bool FirstVMaskAssigned = false;22106for (Type *Ty : TypeList) {22107StructType *STy = dyn_cast<StructType>(Ty);22108if (STy && STy->containsHomogeneousScalableVectorTypes()) {22109Type *ElemTy = STy->getTypeAtIndex(0U);22110EVT VT = TLI->getValueType(DL, ElemTy);22111MVT RegisterVT =22112TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);22113unsigned NumRegs =22114TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);2211522116RVVArgInfos.push_back(22117{NumRegs * STy->getNumElements(), RegisterVT, false});22118} else {22119SmallVector<EVT, 4> ValueVTs;22120ComputeValueVTs(*TLI, DL, Ty, ValueVTs);2212122122for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;22123++Value) {22124EVT VT = ValueVTs[Value];22125MVT RegisterVT =22126TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);22127unsigned NumRegs =22128TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);2212922130// Skip non-RVV register type22131if (!RegisterVT.isVector())22132continue;2213322134if (RegisterVT.isFixedLengthVector())22135RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);2213622137if (!FirstVMaskAssigned &&22138RegisterVT.getVectorElementType() == MVT::i1) {22139RVVArgInfos.push_back({1, RegisterVT, true});22140FirstVMaskAssigned = true;22141--NumRegs;22142}2214322144RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});22145}22146}22147}22148}2214922150void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,22151unsigned StartReg) {22152assert((StartReg % LMul) == 0 &&22153"Start register number should be multiple of lmul");22154const MCPhysReg *VRArrays;22155switch (LMul) {22156default:22157report_fatal_error("Invalid lmul");22158case 1:22159VRArrays = ArgVRs;22160break;22161case 2:22162VRArrays = ArgVRM2s;22163break;22164case 4:22165VRArrays = ArgVRM4s;22166break;22167case 8:22168VRArrays = ArgVRM8s;22169break;22170}2217122172for (unsigned i = 0; i < NF; ++i)22173if (StartReg)22174AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);22175else22176AllocatedPhysRegs.push_back(MCPhysReg());22177}2217822179/// This function determines if each RVV argument is passed by register, if the22180/// argument can be assigned to a VR, then give it a specific register.22181/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.22182void RVVArgDispatcher::compute() {22183uint32_t AssignedMap = 0;22184auto allocate = [&](const RVVArgInfo &ArgInfo) {22185// Allocate first vector mask argument to V0.22186if (ArgInfo.FirstVMask) {22187AllocatedPhysRegs.push_back(RISCV::V0);22188return;22189}2219022191unsigned RegsNeeded = divideCeil(22192ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);22193unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;22194for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;22195StartReg += RegsNeeded) {22196uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;22197if ((AssignedMap & Map) == 0) {22198allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);22199AssignedMap |= Map;22200return;22201}22202}2220322204allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);22205};2220622207for (unsigned i = 0; i < RVVArgInfos.size(); ++i)22208allocate(RVVArgInfos[i]);22209}2221022211MCPhysReg RVVArgDispatcher::getNextPhysReg() {22212assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");22213return AllocatedPhysRegs[CurIdx++];22214}2221522216SDValue RISCVTargetLowering::expandIndirectJTBranch(const SDLoc &dl,22217SDValue Value, SDValue Addr,22218int JTI,22219SelectionDAG &DAG) const {22220if (Subtarget.hasStdExtZicfilp()) {22221// When Zicfilp enabled, we need to use software guarded branch for jump22222// table branch.22223SDValue JTInfo = DAG.getJumpTableDebugInfo(JTI, Value, dl);22224return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, JTInfo,22225Addr);22226}22227return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);22228}2222922230namespace llvm::RISCVVIntrinsicsTable {2223122232#define GET_RISCVVIntrinsicsTable_IMPL22233#include "RISCVGenSearchableTables.inc"2223422235} // namespace llvm::RISCVVIntrinsicsTable222362223722238