Path: blob/main/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
35267 views
//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that LoongArch uses to lower LLVM code into9// a selection DAG.10//11//===----------------------------------------------------------------------===//1213#include "LoongArchISelLowering.h"14#include "LoongArch.h"15#include "LoongArchMachineFunctionInfo.h"16#include "LoongArchRegisterInfo.h"17#include "LoongArchSubtarget.h"18#include "LoongArchTargetMachine.h"19#include "MCTargetDesc/LoongArchBaseInfo.h"20#include "MCTargetDesc/LoongArchMCTargetDesc.h"21#include "llvm/ADT/Statistic.h"22#include "llvm/ADT/StringExtras.h"23#include "llvm/CodeGen/ISDOpcodes.h"24#include "llvm/CodeGen/RuntimeLibcallUtil.h"25#include "llvm/CodeGen/SelectionDAGNodes.h"26#include "llvm/IR/IRBuilder.h"27#include "llvm/IR/IntrinsicsLoongArch.h"28#include "llvm/Support/CodeGen.h"29#include "llvm/Support/Debug.h"30#include "llvm/Support/ErrorHandling.h"31#include "llvm/Support/KnownBits.h"32#include "llvm/Support/MathExtras.h"3334using namespace llvm;3536#define DEBUG_TYPE "loongarch-isel-lowering"3738STATISTIC(NumTailCalls, "Number of tail calls");3940static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,41cl::desc("Trap on integer division by zero."),42cl::init(false));4344LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,45const LoongArchSubtarget &STI)46: TargetLowering(TM), Subtarget(STI) {4748MVT GRLenVT = Subtarget.getGRLenVT();4950// Set up the register classes.5152addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);53if (Subtarget.hasBasicF())54addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);55if (Subtarget.hasBasicD())56addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);5758static const MVT::SimpleValueType LSXVTs[] = {59MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};60static const MVT::SimpleValueType LASXVTs[] = {61MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};6263if (Subtarget.hasExtLSX())64for (MVT VT : LSXVTs)65addRegisterClass(VT, &LoongArch::LSX128RegClass);6667if (Subtarget.hasExtLASX())68for (MVT VT : LASXVTs)69addRegisterClass(VT, &LoongArch::LASX256RegClass);7071// Set operations for LA32 and LA64.7273setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,74MVT::i1, Promote);7576setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);77setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);78setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);79setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);80setOperationAction(ISD::ROTL, GRLenVT, Expand);81setOperationAction(ISD::CTPOP, GRLenVT, Expand);8283setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,84ISD::JumpTable, ISD::GlobalTLSAddress},85GRLenVT, Custom);8687setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);8889setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);90setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);91setOperationAction(ISD::VASTART, MVT::Other, Custom);92setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);9394setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);95setOperationAction(ISD::TRAP, MVT::Other, Legal);9697setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);98setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);99setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);100101// Expand bitreverse.i16 with native-width bitrev and shift for now, before102// we get to know which of sll and revb.2h is faster.103setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);104setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);105106// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and107// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16108// and i32 could still be byte-swapped relatively cheaply.109setOperationAction(ISD::BSWAP, MVT::i16, Custom);110111setOperationAction(ISD::BR_JT, MVT::Other, Expand);112setOperationAction(ISD::BR_CC, GRLenVT, Expand);113setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);114setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);115setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);116117setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);118setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);119120// Set operations for LA64 only.121122if (Subtarget.is64Bit()) {123setOperationAction(ISD::ADD, MVT::i32, Custom);124setOperationAction(ISD::SUB, MVT::i32, Custom);125setOperationAction(ISD::SHL, MVT::i32, Custom);126setOperationAction(ISD::SRA, MVT::i32, Custom);127setOperationAction(ISD::SRL, MVT::i32, Custom);128setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);129setOperationAction(ISD::BITCAST, MVT::i32, Custom);130setOperationAction(ISD::ROTR, MVT::i32, Custom);131setOperationAction(ISD::ROTL, MVT::i32, Custom);132setOperationAction(ISD::CTTZ, MVT::i32, Custom);133setOperationAction(ISD::CTLZ, MVT::i32, Custom);134setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);135setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);136setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);137setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);138setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);139setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);140141setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);142setOperationAction(ISD::BSWAP, MVT::i32, Custom);143setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);144}145146// Set operations for LA32 only.147148if (!Subtarget.is64Bit()) {149setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);150setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);151setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);152setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);153setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);154}155156setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);157158static const ISD::CondCode FPCCToExpand[] = {159ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,160ISD::SETGE, ISD::SETNE, ISD::SETGT};161162// Set operations for 'F' feature.163164if (Subtarget.hasBasicF()) {165setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);166setTruncStoreAction(MVT::f32, MVT::f16, Expand);167setCondCodeAction(FPCCToExpand, MVT::f32, Expand);168169setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);170setOperationAction(ISD::BR_CC, MVT::f32, Expand);171setOperationAction(ISD::FMA, MVT::f32, Legal);172setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);173setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);174setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);175setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);176setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);177setOperationAction(ISD::FSIN, MVT::f32, Expand);178setOperationAction(ISD::FCOS, MVT::f32, Expand);179setOperationAction(ISD::FSINCOS, MVT::f32, Expand);180setOperationAction(ISD::FPOW, MVT::f32, Expand);181setOperationAction(ISD::FREM, MVT::f32, Expand);182setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);183setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);184185if (Subtarget.is64Bit())186setOperationAction(ISD::FRINT, MVT::f32, Legal);187188if (!Subtarget.hasBasicD()) {189setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);190if (Subtarget.is64Bit()) {191setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);192setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);193}194}195}196197// Set operations for 'D' feature.198199if (Subtarget.hasBasicD()) {200setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);201setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);202setTruncStoreAction(MVT::f64, MVT::f16, Expand);203setTruncStoreAction(MVT::f64, MVT::f32, Expand);204setCondCodeAction(FPCCToExpand, MVT::f64, Expand);205206setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);207setOperationAction(ISD::BR_CC, MVT::f64, Expand);208setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);209setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);210setOperationAction(ISD::FMA, MVT::f64, Legal);211setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);212setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);213setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);214setOperationAction(ISD::FSIN, MVT::f64, Expand);215setOperationAction(ISD::FCOS, MVT::f64, Expand);216setOperationAction(ISD::FSINCOS, MVT::f64, Expand);217setOperationAction(ISD::FPOW, MVT::f64, Expand);218setOperationAction(ISD::FREM, MVT::f64, Expand);219setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);220setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);221222if (Subtarget.is64Bit())223setOperationAction(ISD::FRINT, MVT::f64, Legal);224}225226// Set operations for 'LSX' feature.227228if (Subtarget.hasExtLSX()) {229for (MVT VT : MVT::fixedlen_vector_valuetypes()) {230// Expand all truncating stores and extending loads.231for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {232setTruncStoreAction(VT, InnerVT, Expand);233setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);234setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);235setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);236}237// By default everything must be expanded. Then we will selectively turn238// on ones that can be effectively codegen'd.239for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)240setOperationAction(Op, VT, Expand);241}242243for (MVT VT : LSXVTs) {244setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);245setOperationAction(ISD::BITCAST, VT, Legal);246setOperationAction(ISD::UNDEF, VT, Legal);247248setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);249setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);250setOperationAction(ISD::BUILD_VECTOR, VT, Custom);251252setOperationAction(ISD::SETCC, VT, Legal);253setOperationAction(ISD::VSELECT, VT, Legal);254setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);255}256for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {257setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);258setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,259Legal);260setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},261VT, Legal);262setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);263setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);264setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);265setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);266setCondCodeAction(267{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,268Expand);269}270for (MVT VT : {MVT::v4i32, MVT::v2i64}) {271setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);272setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);273}274for (MVT VT : {MVT::v4f32, MVT::v2f64}) {275setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);276setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);277setOperationAction(ISD::FMA, VT, Legal);278setOperationAction(ISD::FSQRT, VT, Legal);279setOperationAction(ISD::FNEG, VT, Legal);280setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,281ISD::SETUGE, ISD::SETUGT},282VT, Expand);283}284}285286// Set operations for 'LASX' feature.287288if (Subtarget.hasExtLASX()) {289for (MVT VT : LASXVTs) {290setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);291setOperationAction(ISD::BITCAST, VT, Legal);292setOperationAction(ISD::UNDEF, VT, Legal);293294setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);295setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);296setOperationAction(ISD::BUILD_VECTOR, VT, Custom);297setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);298299setOperationAction(ISD::SETCC, VT, Legal);300setOperationAction(ISD::VSELECT, VT, Legal);301setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);302}303for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {304setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);305setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,306Legal);307setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},308VT, Legal);309setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);310setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);311setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);312setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);313setCondCodeAction(314{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,315Expand);316}317for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {318setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);319setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);320}321for (MVT VT : {MVT::v8f32, MVT::v4f64}) {322setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);323setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);324setOperationAction(ISD::FMA, VT, Legal);325setOperationAction(ISD::FSQRT, VT, Legal);326setOperationAction(ISD::FNEG, VT, Legal);327setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,328ISD::SETUGE, ISD::SETUGT},329VT, Expand);330}331}332333// Set DAG combine for LA32 and LA64.334335setTargetDAGCombine(ISD::AND);336setTargetDAGCombine(ISD::OR);337setTargetDAGCombine(ISD::SRL);338setTargetDAGCombine(ISD::SETCC);339340// Set DAG combine for 'LSX' feature.341342if (Subtarget.hasExtLSX())343setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);344345// Compute derived properties from the register classes.346computeRegisterProperties(Subtarget.getRegisterInfo());347348setStackPointerRegisterToSaveRestore(LoongArch::R3);349350setBooleanContents(ZeroOrOneBooleanContent);351setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);352353setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());354355setMinCmpXchgSizeInBits(32);356357// Function alignments.358setMinFunctionAlignment(Align(4));359// Set preferred alignments.360setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());361setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());362setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());363}364365bool LoongArchTargetLowering::isOffsetFoldingLegal(366const GlobalAddressSDNode *GA) const {367// In order to maximise the opportunity for common subexpression elimination,368// keep a separate ADD node for the global address offset instead of folding369// it in the global address node. Later peephole optimisations may choose to370// fold it back in when profitable.371return false;372}373374SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,375SelectionDAG &DAG) const {376switch (Op.getOpcode()) {377case ISD::ATOMIC_FENCE:378return lowerATOMIC_FENCE(Op, DAG);379case ISD::EH_DWARF_CFA:380return lowerEH_DWARF_CFA(Op, DAG);381case ISD::GlobalAddress:382return lowerGlobalAddress(Op, DAG);383case ISD::GlobalTLSAddress:384return lowerGlobalTLSAddress(Op, DAG);385case ISD::INTRINSIC_WO_CHAIN:386return lowerINTRINSIC_WO_CHAIN(Op, DAG);387case ISD::INTRINSIC_W_CHAIN:388return lowerINTRINSIC_W_CHAIN(Op, DAG);389case ISD::INTRINSIC_VOID:390return lowerINTRINSIC_VOID(Op, DAG);391case ISD::BlockAddress:392return lowerBlockAddress(Op, DAG);393case ISD::JumpTable:394return lowerJumpTable(Op, DAG);395case ISD::SHL_PARTS:396return lowerShiftLeftParts(Op, DAG);397case ISD::SRA_PARTS:398return lowerShiftRightParts(Op, DAG, true);399case ISD::SRL_PARTS:400return lowerShiftRightParts(Op, DAG, false);401case ISD::ConstantPool:402return lowerConstantPool(Op, DAG);403case ISD::FP_TO_SINT:404return lowerFP_TO_SINT(Op, DAG);405case ISD::BITCAST:406return lowerBITCAST(Op, DAG);407case ISD::UINT_TO_FP:408return lowerUINT_TO_FP(Op, DAG);409case ISD::SINT_TO_FP:410return lowerSINT_TO_FP(Op, DAG);411case ISD::VASTART:412return lowerVASTART(Op, DAG);413case ISD::FRAMEADDR:414return lowerFRAMEADDR(Op, DAG);415case ISD::RETURNADDR:416return lowerRETURNADDR(Op, DAG);417case ISD::WRITE_REGISTER:418return lowerWRITE_REGISTER(Op, DAG);419case ISD::INSERT_VECTOR_ELT:420return lowerINSERT_VECTOR_ELT(Op, DAG);421case ISD::EXTRACT_VECTOR_ELT:422return lowerEXTRACT_VECTOR_ELT(Op, DAG);423case ISD::BUILD_VECTOR:424return lowerBUILD_VECTOR(Op, DAG);425case ISD::VECTOR_SHUFFLE:426return lowerVECTOR_SHUFFLE(Op, DAG);427}428return SDValue();429}430431/// Determine whether a range fits a regular pattern of values.432/// This function accounts for the possibility of jumping over the End iterator.433template <typename ValType>434static bool435fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,436unsigned CheckStride,437typename SmallVectorImpl<ValType>::const_iterator End,438ValType ExpectedIndex, unsigned ExpectedIndexStride) {439auto &I = Begin;440441while (I != End) {442if (*I != -1 && *I != ExpectedIndex)443return false;444ExpectedIndex += ExpectedIndexStride;445446// Incrementing past End is undefined behaviour so we must increment one447// step at a time and check for End at each step.448for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)449; // Empty loop body.450}451return true;452}453454/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).455///456/// VREPLVEI performs vector broadcast based on an element specified by an457/// integer immediate, with its mask being similar to:458/// <x, x, x, ...>459/// where x is any valid index.460///461/// When undef's appear in the mask they are treated as if they were whatever462/// value is necessary in order to fit the above form.463static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,464MVT VT, SDValue V1, SDValue V2,465SelectionDAG &DAG) {466int SplatIndex = -1;467for (const auto &M : Mask) {468if (M != -1) {469SplatIndex = M;470break;471}472}473474if (SplatIndex == -1)475return DAG.getUNDEF(VT);476477assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");478if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {479APInt Imm(64, SplatIndex);480return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,481DAG.getConstant(Imm, DL, MVT::i64));482}483484return SDValue();485}486487/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).488///489/// VSHUF4I splits the vector into blocks of four elements, then shuffles these490/// elements according to a <4 x i2> constant (encoded as an integer immediate).491///492/// It is therefore possible to lower into VSHUF4I when the mask takes the form:493/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>494/// When undef's appear they are treated as if they were whatever value is495/// necessary in order to fit the above forms.496///497/// For example:498/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,499/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,500/// i32 7, i32 6, i32 5, i32 4>501/// is lowered to:502/// (VSHUF4I_H $v0, $v1, 27)503/// where the 27 comes from:504/// 3 + (2 << 2) + (1 << 4) + (0 << 6)505static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,506MVT VT, SDValue V1, SDValue V2,507SelectionDAG &DAG) {508509// When the size is less than 4, lower cost instructions may be used.510if (Mask.size() < 4)511return SDValue();512513int SubMask[4] = {-1, -1, -1, -1};514for (unsigned i = 0; i < 4; ++i) {515for (unsigned j = i; j < Mask.size(); j += 4) {516int Idx = Mask[j];517518// Convert from vector index to 4-element subvector index519// If an index refers to an element outside of the subvector then give up520if (Idx != -1) {521Idx -= 4 * (j / 4);522if (Idx < 0 || Idx >= 4)523return SDValue();524}525526// If the mask has an undef, replace it with the current index.527// Note that it might still be undef if the current index is also undef528if (SubMask[i] == -1)529SubMask[i] = Idx;530// Check that non-undef values are the same as in the mask. If they531// aren't then give up532else if (Idx != -1 && Idx != SubMask[i])533return SDValue();534}535}536537// Calculate the immediate. Replace any remaining undefs with zero538APInt Imm(64, 0);539for (int i = 3; i >= 0; --i) {540int Idx = SubMask[i];541542if (Idx == -1)543Idx = 0;544545Imm <<= 2;546Imm |= Idx & 0x3;547}548549return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,550DAG.getConstant(Imm, DL, MVT::i64));551}552553/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).554///555/// VPACKEV interleaves the even elements from each vector.556///557/// It is possible to lower into VPACKEV when the mask consists of two of the558/// following forms interleaved:559/// <0, 2, 4, ...>560/// <n, n+2, n+4, ...>561/// where n is the number of elements in the vector.562/// For example:563/// <0, 0, 2, 2, 4, 4, ...>564/// <0, n, 2, n+2, 4, n+4, ...>565///566/// When undef's appear in the mask they are treated as if they were whatever567/// value is necessary in order to fit the above forms.568static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,569MVT VT, SDValue V1, SDValue V2,570SelectionDAG &DAG) {571572const auto &Begin = Mask.begin();573const auto &End = Mask.end();574SDValue OriV1 = V1, OriV2 = V2;575576if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))577V1 = OriV1;578else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))579V1 = OriV2;580else581return SDValue();582583if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))584V2 = OriV1;585else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))586V2 = OriV2;587else588return SDValue();589590return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);591}592593/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).594///595/// VPACKOD interleaves the odd elements from each vector.596///597/// It is possible to lower into VPACKOD when the mask consists of two of the598/// following forms interleaved:599/// <1, 3, 5, ...>600/// <n+1, n+3, n+5, ...>601/// where n is the number of elements in the vector.602/// For example:603/// <1, 1, 3, 3, 5, 5, ...>604/// <1, n+1, 3, n+3, 5, n+5, ...>605///606/// When undef's appear in the mask they are treated as if they were whatever607/// value is necessary in order to fit the above forms.608static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,609MVT VT, SDValue V1, SDValue V2,610SelectionDAG &DAG) {611612const auto &Begin = Mask.begin();613const auto &End = Mask.end();614SDValue OriV1 = V1, OriV2 = V2;615616if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))617V1 = OriV1;618else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))619V1 = OriV2;620else621return SDValue();622623if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))624V2 = OriV1;625else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))626V2 = OriV2;627else628return SDValue();629630return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);631}632633/// Lower VECTOR_SHUFFLE into VILVH (if possible).634///635/// VILVH interleaves consecutive elements from the left (highest-indexed) half636/// of each vector.637///638/// It is possible to lower into VILVH when the mask consists of two of the639/// following forms interleaved:640/// <x, x+1, x+2, ...>641/// <n+x, n+x+1, n+x+2, ...>642/// where n is the number of elements in the vector and x is half n.643/// For example:644/// <x, x, x+1, x+1, x+2, x+2, ...>645/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>646///647/// When undef's appear in the mask they are treated as if they were whatever648/// value is necessary in order to fit the above forms.649static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,650MVT VT, SDValue V1, SDValue V2,651SelectionDAG &DAG) {652653const auto &Begin = Mask.begin();654const auto &End = Mask.end();655unsigned HalfSize = Mask.size() / 2;656SDValue OriV1 = V1, OriV2 = V2;657658if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))659V1 = OriV1;660else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))661V1 = OriV2;662else663return SDValue();664665if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))666V2 = OriV1;667else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,6681))669V2 = OriV2;670else671return SDValue();672673return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);674}675676/// Lower VECTOR_SHUFFLE into VILVL (if possible).677///678/// VILVL interleaves consecutive elements from the right (lowest-indexed) half679/// of each vector.680///681/// It is possible to lower into VILVL when the mask consists of two of the682/// following forms interleaved:683/// <0, 1, 2, ...>684/// <n, n+1, n+2, ...>685/// where n is the number of elements in the vector.686/// For example:687/// <0, 0, 1, 1, 2, 2, ...>688/// <0, n, 1, n+1, 2, n+2, ...>689///690/// When undef's appear in the mask they are treated as if they were whatever691/// value is necessary in order to fit the above forms.692static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,693MVT VT, SDValue V1, SDValue V2,694SelectionDAG &DAG) {695696const auto &Begin = Mask.begin();697const auto &End = Mask.end();698SDValue OriV1 = V1, OriV2 = V2;699700if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))701V1 = OriV1;702else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))703V1 = OriV2;704else705return SDValue();706707if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))708V2 = OriV1;709else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))710V2 = OriV2;711else712return SDValue();713714return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);715}716717/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).718///719/// VPICKEV copies the even elements of each vector into the result vector.720///721/// It is possible to lower into VPICKEV when the mask consists of two of the722/// following forms concatenated:723/// <0, 2, 4, ...>724/// <n, n+2, n+4, ...>725/// where n is the number of elements in the vector.726/// For example:727/// <0, 2, 4, ..., 0, 2, 4, ...>728/// <0, 2, 4, ..., n, n+2, n+4, ...>729///730/// When undef's appear in the mask they are treated as if they were whatever731/// value is necessary in order to fit the above forms.732static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,733MVT VT, SDValue V1, SDValue V2,734SelectionDAG &DAG) {735736const auto &Begin = Mask.begin();737const auto &Mid = Mask.begin() + Mask.size() / 2;738const auto &End = Mask.end();739SDValue OriV1 = V1, OriV2 = V2;740741if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))742V1 = OriV1;743else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))744V1 = OriV2;745else746return SDValue();747748if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))749V2 = OriV1;750else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))751V2 = OriV2;752753else754return SDValue();755756return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);757}758759/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).760///761/// VPICKOD copies the odd elements of each vector into the result vector.762///763/// It is possible to lower into VPICKOD when the mask consists of two of the764/// following forms concatenated:765/// <1, 3, 5, ...>766/// <n+1, n+3, n+5, ...>767/// where n is the number of elements in the vector.768/// For example:769/// <1, 3, 5, ..., 1, 3, 5, ...>770/// <1, 3, 5, ..., n+1, n+3, n+5, ...>771///772/// When undef's appear in the mask they are treated as if they were whatever773/// value is necessary in order to fit the above forms.774static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,775MVT VT, SDValue V1, SDValue V2,776SelectionDAG &DAG) {777778const auto &Begin = Mask.begin();779const auto &Mid = Mask.begin() + Mask.size() / 2;780const auto &End = Mask.end();781SDValue OriV1 = V1, OriV2 = V2;782783if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))784V1 = OriV1;785else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))786V1 = OriV2;787else788return SDValue();789790if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))791V2 = OriV1;792else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))793V2 = OriV2;794else795return SDValue();796797return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);798}799800/// Lower VECTOR_SHUFFLE into VSHUF.801///802/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and803/// adding it as an operand to the resulting VSHUF.804static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,805MVT VT, SDValue V1, SDValue V2,806SelectionDAG &DAG) {807808SmallVector<SDValue, 16> Ops;809for (auto M : Mask)810Ops.push_back(DAG.getConstant(M, DL, MVT::i64));811812EVT MaskVecTy = VT.changeVectorElementTypeToInteger();813SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);814815// VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.816// <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>817// VSHF concatenates the vectors in a bitwise fashion:818// <0b00, 0b01> + <0b10, 0b11> ->819// 0b0100 + 0b1110 -> 0b01001110820// <0b10, 0b11, 0b00, 0b01>821// We must therefore swap the operands to get the correct result.822return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);823}824825/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.826///827/// This routine breaks down the specific type of 128-bit shuffle and828/// dispatches to the lowering routines accordingly.829static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,830SDValue V1, SDValue V2, SelectionDAG &DAG) {831assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||832VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||833VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&834"Vector type is unsupported for lsx!");835assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&836"Two operands have different types!");837assert(VT.getVectorNumElements() == Mask.size() &&838"Unexpected mask size for shuffle!");839assert(Mask.size() % 2 == 0 && "Expected even mask size.");840841SDValue Result;842// TODO: Add more comparison patterns.843if (V2.isUndef()) {844if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))845return Result;846if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))847return Result;848849// TODO: This comment may be enabled in the future to better match the850// pattern for instruction selection.851/* V2 = V1; */852}853854// It is recommended not to change the pattern comparison order for better855// performance.856if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))857return Result;858if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))859return Result;860if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))861return Result;862if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))863return Result;864if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))865return Result;866if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))867return Result;868if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))869return Result;870871return SDValue();872}873874/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).875///876/// It is a XVREPLVEI when the mask is:877/// <x, x, x, ..., x+n, x+n, x+n, ...>878/// where the number of x is equal to n and n is half the length of vector.879///880/// When undef's appear in the mask they are treated as if they were whatever881/// value is necessary in order to fit the above form.882static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,883ArrayRef<int> Mask, MVT VT,884SDValue V1, SDValue V2,885SelectionDAG &DAG) {886int SplatIndex = -1;887for (const auto &M : Mask) {888if (M != -1) {889SplatIndex = M;890break;891}892}893894if (SplatIndex == -1)895return DAG.getUNDEF(VT);896897const auto &Begin = Mask.begin();898const auto &End = Mask.end();899unsigned HalfSize = Mask.size() / 2;900901assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");902if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&903fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,9040)) {905APInt Imm(64, SplatIndex);906return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,907DAG.getConstant(Imm, DL, MVT::i64));908}909910return SDValue();911}912913/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).914static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,915MVT VT, SDValue V1, SDValue V2,916SelectionDAG &DAG) {917// When the size is less than or equal to 4, lower cost instructions may be918// used.919if (Mask.size() <= 4)920return SDValue();921return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);922}923924/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).925static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,926MVT VT, SDValue V1, SDValue V2,927SelectionDAG &DAG) {928return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);929}930931/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).932static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,933MVT VT, SDValue V1, SDValue V2,934SelectionDAG &DAG) {935return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);936}937938/// Lower VECTOR_SHUFFLE into XVILVH (if possible).939static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,940MVT VT, SDValue V1, SDValue V2,941SelectionDAG &DAG) {942943const auto &Begin = Mask.begin();944const auto &End = Mask.end();945unsigned HalfSize = Mask.size() / 2;946unsigned LeftSize = HalfSize / 2;947SDValue OriV1 = V1, OriV2 = V2;948949if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,9501) &&951fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))952V1 = OriV1;953else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,954Mask.size() + HalfSize - LeftSize, 1) &&955fitsRegularPattern<int>(Begin + HalfSize, 2, End,956Mask.size() + HalfSize + LeftSize, 1))957V1 = OriV2;958else959return SDValue();960961if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,9621) &&963fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,9641))965V2 = OriV1;966else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,967Mask.size() + HalfSize - LeftSize, 1) &&968fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,969Mask.size() + HalfSize + LeftSize, 1))970V2 = OriV2;971else972return SDValue();973974return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);975}976977/// Lower VECTOR_SHUFFLE into XVILVL (if possible).978static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,979MVT VT, SDValue V1, SDValue V2,980SelectionDAG &DAG) {981982const auto &Begin = Mask.begin();983const auto &End = Mask.end();984unsigned HalfSize = Mask.size() / 2;985SDValue OriV1 = V1, OriV2 = V2;986987if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&988fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))989V1 = OriV1;990else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&991fitsRegularPattern<int>(Begin + HalfSize, 2, End,992Mask.size() + HalfSize, 1))993V1 = OriV2;994else995return SDValue();996997if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&998fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))999V2 = OriV1;1000else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),10011) &&1002fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,1003Mask.size() + HalfSize, 1))1004V2 = OriV2;1005else1006return SDValue();10071008return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);1009}10101011/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).1012static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,1013MVT VT, SDValue V1, SDValue V2,1014SelectionDAG &DAG) {10151016const auto &Begin = Mask.begin();1017const auto &LeftMid = Mask.begin() + Mask.size() / 4;1018const auto &Mid = Mask.begin() + Mask.size() / 2;1019const auto &RightMid = Mask.end() - Mask.size() / 4;1020const auto &End = Mask.end();1021unsigned HalfSize = Mask.size() / 2;1022SDValue OriV1 = V1, OriV2 = V2;10231024if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&1025fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))1026V1 = OriV1;1027else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&1028fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))1029V1 = OriV2;1030else1031return SDValue();10321033if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&1034fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))1035V2 = OriV1;1036else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&1037fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))1038V2 = OriV2;10391040else1041return SDValue();10421043return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);1044}10451046/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).1047static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,1048MVT VT, SDValue V1, SDValue V2,1049SelectionDAG &DAG) {10501051const auto &Begin = Mask.begin();1052const auto &LeftMid = Mask.begin() + Mask.size() / 4;1053const auto &Mid = Mask.begin() + Mask.size() / 2;1054const auto &RightMid = Mask.end() - Mask.size() / 4;1055const auto &End = Mask.end();1056unsigned HalfSize = Mask.size() / 2;1057SDValue OriV1 = V1, OriV2 = V2;10581059if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&1060fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))1061V1 = OriV1;1062else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&1063fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,10642))1065V1 = OriV2;1066else1067return SDValue();10681069if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&1070fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))1071V2 = OriV1;1072else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&1073fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,10742))1075V2 = OriV2;1076else1077return SDValue();10781079return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);1080}10811082/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).1083static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,1084MVT VT, SDValue V1, SDValue V2,1085SelectionDAG &DAG) {10861087int MaskSize = Mask.size();1088int HalfSize = Mask.size() / 2;1089const auto &Begin = Mask.begin();1090const auto &Mid = Mask.begin() + HalfSize;1091const auto &End = Mask.end();10921093// VECTOR_SHUFFLE concatenates the vectors:1094// <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>1095// shuffling ->1096// <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>1097//1098// XVSHUF concatenates the vectors:1099// <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>1100// shuffling ->1101// <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>1102SmallVector<SDValue, 8> MaskAlloc;1103for (auto it = Begin; it < Mid; it++) {1104if (*it < 0) // UNDEF1105MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));1106else if ((*it >= 0 && *it < HalfSize) ||1107(*it >= MaskSize && *it <= MaskSize + HalfSize)) {1108int M = *it < HalfSize ? *it : *it - HalfSize;1109MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));1110} else1111return SDValue();1112}1113assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");11141115for (auto it = Mid; it < End; it++) {1116if (*it < 0) // UNDEF1117MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));1118else if ((*it >= HalfSize && *it < MaskSize) ||1119(*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {1120int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;1121MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));1122} else1123return SDValue();1124}1125assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");11261127EVT MaskVecTy = VT.changeVectorElementTypeToInteger();1128SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);1129return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);1130}11311132/// Shuffle vectors by lane to generate more optimized instructions.1133/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.1134///1135/// Therefore, except for the following four cases, other cases are regarded1136/// as cross-lane shuffles, where optimization is relatively limited.1137///1138/// - Shuffle high, low lanes of two inputs vector1139/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>1140/// - Shuffle low, high lanes of two inputs vector1141/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>1142/// - Shuffle low, low lanes of two inputs vector1143/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>1144/// - Shuffle high, high lanes of two inputs vector1145/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>1146///1147/// The first case is the closest to LoongArch instructions and the other1148/// cases need to be converted to it for processing.1149///1150/// This function may modify V1, V2 and Mask1151static void canonicalizeShuffleVectorByLane(const SDLoc &DL,1152MutableArrayRef<int> Mask, MVT VT,1153SDValue &V1, SDValue &V2,1154SelectionDAG &DAG) {11551156enum HalfMaskType { HighLaneTy, LowLaneTy, None };11571158int MaskSize = Mask.size();1159int HalfSize = Mask.size() / 2;11601161HalfMaskType preMask = None, postMask = None;11621163if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {1164return M < 0 || (M >= 0 && M < HalfSize) ||1165(M >= MaskSize && M < MaskSize + HalfSize);1166}))1167preMask = HighLaneTy;1168else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {1169return M < 0 || (M >= HalfSize && M < MaskSize) ||1170(M >= MaskSize + HalfSize && M < MaskSize * 2);1171}))1172preMask = LowLaneTy;11731174if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {1175return M < 0 || (M >= 0 && M < HalfSize) ||1176(M >= MaskSize && M < MaskSize + HalfSize);1177}))1178postMask = HighLaneTy;1179else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {1180return M < 0 || (M >= HalfSize && M < MaskSize) ||1181(M >= MaskSize + HalfSize && M < MaskSize * 2);1182}))1183postMask = LowLaneTy;11841185// The pre-half of mask is high lane type, and the post-half of mask1186// is low lane type, which is closest to the LoongArch instructions.1187//1188// Note: In the LoongArch architecture, the high lane of mask corresponds1189// to the lower 128-bit of vector register, and the low lane of mask1190// corresponds the higher 128-bit of vector register.1191if (preMask == HighLaneTy && postMask == LowLaneTy) {1192return;1193}1194if (preMask == LowLaneTy && postMask == HighLaneTy) {1195V1 = DAG.getBitcast(MVT::v4i64, V1);1196V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,1197DAG.getConstant(0b01001110, DL, MVT::i64));1198V1 = DAG.getBitcast(VT, V1);11991200if (!V2.isUndef()) {1201V2 = DAG.getBitcast(MVT::v4i64, V2);1202V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,1203DAG.getConstant(0b01001110, DL, MVT::i64));1204V2 = DAG.getBitcast(VT, V2);1205}12061207for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {1208*it = *it < 0 ? *it : *it - HalfSize;1209}1210for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {1211*it = *it < 0 ? *it : *it + HalfSize;1212}1213} else if (preMask == LowLaneTy && postMask == LowLaneTy) {1214V1 = DAG.getBitcast(MVT::v4i64, V1);1215V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,1216DAG.getConstant(0b11101110, DL, MVT::i64));1217V1 = DAG.getBitcast(VT, V1);12181219if (!V2.isUndef()) {1220V2 = DAG.getBitcast(MVT::v4i64, V2);1221V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,1222DAG.getConstant(0b11101110, DL, MVT::i64));1223V2 = DAG.getBitcast(VT, V2);1224}12251226for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {1227*it = *it < 0 ? *it : *it - HalfSize;1228}1229} else if (preMask == HighLaneTy && postMask == HighLaneTy) {1230V1 = DAG.getBitcast(MVT::v4i64, V1);1231V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,1232DAG.getConstant(0b01000100, DL, MVT::i64));1233V1 = DAG.getBitcast(VT, V1);12341235if (!V2.isUndef()) {1236V2 = DAG.getBitcast(MVT::v4i64, V2);1237V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,1238DAG.getConstant(0b01000100, DL, MVT::i64));1239V2 = DAG.getBitcast(VT, V2);1240}12411242for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {1243*it = *it < 0 ? *it : *it + HalfSize;1244}1245} else { // cross-lane1246return;1247}1248}12491250/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.1251///1252/// This routine breaks down the specific type of 256-bit shuffle and1253/// dispatches to the lowering routines accordingly.1254static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,1255SDValue V1, SDValue V2, SelectionDAG &DAG) {1256assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||1257VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||1258VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&1259"Vector type is unsupported for lasx!");1260assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&1261"Two operands have different types!");1262assert(VT.getVectorNumElements() == Mask.size() &&1263"Unexpected mask size for shuffle!");1264assert(Mask.size() % 2 == 0 && "Expected even mask size.");1265assert(Mask.size() >= 4 && "Mask size is less than 4.");12661267// canonicalize non cross-lane shuffle vector1268SmallVector<int> NewMask(Mask);1269canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);12701271SDValue Result;1272// TODO: Add more comparison patterns.1273if (V2.isUndef()) {1274if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))1275return Result;1276if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))1277return Result;12781279// TODO: This comment may be enabled in the future to better match the1280// pattern for instruction selection.1281/* V2 = V1; */1282}12831284// It is recommended not to change the pattern comparison order for better1285// performance.1286if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))1287return Result;1288if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))1289return Result;1290if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))1291return Result;1292if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))1293return Result;1294if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))1295return Result;1296if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))1297return Result;1298if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))1299return Result;13001301return SDValue();1302}13031304SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,1305SelectionDAG &DAG) const {1306ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);1307ArrayRef<int> OrigMask = SVOp->getMask();1308SDValue V1 = Op.getOperand(0);1309SDValue V2 = Op.getOperand(1);1310MVT VT = Op.getSimpleValueType();1311int NumElements = VT.getVectorNumElements();1312SDLoc DL(Op);13131314bool V1IsUndef = V1.isUndef();1315bool V2IsUndef = V2.isUndef();1316if (V1IsUndef && V2IsUndef)1317return DAG.getUNDEF(VT);13181319// When we create a shuffle node we put the UNDEF node to second operand,1320// but in some cases the first operand may be transformed to UNDEF.1321// In this case we should just commute the node.1322if (V1IsUndef)1323return DAG.getCommutedVectorShuffle(*SVOp);13241325// Check for non-undef masks pointing at an undef vector and make the masks1326// undef as well. This makes it easier to match the shuffle based solely on1327// the mask.1328if (V2IsUndef &&1329any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {1330SmallVector<int, 8> NewMask(OrigMask);1331for (int &M : NewMask)1332if (M >= NumElements)1333M = -1;1334return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);1335}13361337// Check for illegal shuffle mask element index values.1338int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);1339(void)MaskUpperLimit;1340assert(llvm::all_of(OrigMask,1341[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&1342"Out of bounds shuffle index");13431344// For each vector width, delegate to a specialized lowering routine.1345if (VT.is128BitVector())1346return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);13471348if (VT.is256BitVector())1349return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);13501351return SDValue();1352}13531354static bool isConstantOrUndef(const SDValue Op) {1355if (Op->isUndef())1356return true;1357if (isa<ConstantSDNode>(Op))1358return true;1359if (isa<ConstantFPSDNode>(Op))1360return true;1361return false;1362}13631364static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {1365for (unsigned i = 0; i < Op->getNumOperands(); ++i)1366if (isConstantOrUndef(Op->getOperand(i)))1367return true;1368return false;1369}13701371SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,1372SelectionDAG &DAG) const {1373BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);1374EVT ResTy = Op->getValueType(0);1375SDLoc DL(Op);1376APInt SplatValue, SplatUndef;1377unsigned SplatBitSize;1378bool HasAnyUndefs;1379bool Is128Vec = ResTy.is128BitVector();1380bool Is256Vec = ResTy.is256BitVector();13811382if ((!Subtarget.hasExtLSX() || !Is128Vec) &&1383(!Subtarget.hasExtLASX() || !Is256Vec))1384return SDValue();13851386if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,1387/*MinSplatBits=*/8) &&1388SplatBitSize <= 64) {1389// We can only cope with 8, 16, 32, or 64-bit elements.1390if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&1391SplatBitSize != 64)1392return SDValue();13931394EVT ViaVecTy;13951396switch (SplatBitSize) {1397default:1398return SDValue();1399case 8:1400ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;1401break;1402case 16:1403ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;1404break;1405case 32:1406ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;1407break;1408case 64:1409ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;1410break;1411}14121413// SelectionDAG::getConstant will promote SplatValue appropriately.1414SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);14151416// Bitcast to the type we originally wanted.1417if (ViaVecTy != ResTy)1418Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);14191420return Result;1421}14221423if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))1424return Op;14251426if (!isConstantOrUndefBUILD_VECTOR(Node)) {1427// Use INSERT_VECTOR_ELT operations rather than expand to stores.1428// The resulting code is the same length as the expansion, but it doesn't1429// use memory operations.1430EVT ResTy = Node->getValueType(0);14311432assert(ResTy.isVector());14331434unsigned NumElts = ResTy.getVectorNumElements();1435SDValue Vector = DAG.getUNDEF(ResTy);1436for (unsigned i = 0; i < NumElts; ++i) {1437Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,1438Node->getOperand(i),1439DAG.getConstant(i, DL, Subtarget.getGRLenVT()));1440}1441return Vector;1442}14431444return SDValue();1445}14461447SDValue1448LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,1449SelectionDAG &DAG) const {1450EVT VecTy = Op->getOperand(0)->getValueType(0);1451SDValue Idx = Op->getOperand(1);1452EVT EltTy = VecTy.getVectorElementType();1453unsigned NumElts = VecTy.getVectorNumElements();14541455if (isa<ConstantSDNode>(Idx) &&1456(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||1457EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))1458return Op;14591460return SDValue();1461}14621463SDValue1464LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,1465SelectionDAG &DAG) const {1466if (isa<ConstantSDNode>(Op->getOperand(2)))1467return Op;1468return SDValue();1469}14701471SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,1472SelectionDAG &DAG) const {1473SDLoc DL(Op);1474SyncScope::ID FenceSSID =1475static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));14761477// singlethread fences only synchronize with signal handlers on the same1478// thread and thus only need to preserve instruction order, not actually1479// enforce memory ordering.1480if (FenceSSID == SyncScope::SingleThread)1481// MEMBARRIER is a compiler barrier; it codegens to a no-op.1482return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));14831484return Op;1485}14861487SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,1488SelectionDAG &DAG) const {14891490if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {1491DAG.getContext()->emitError(1492"On LA64, only 64-bit registers can be written.");1493return Op.getOperand(0);1494}14951496if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {1497DAG.getContext()->emitError(1498"On LA32, only 32-bit registers can be written.");1499return Op.getOperand(0);1500}15011502return Op;1503}15041505SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,1506SelectionDAG &DAG) const {1507if (!isa<ConstantSDNode>(Op.getOperand(0))) {1508DAG.getContext()->emitError("argument to '__builtin_frame_address' must "1509"be a constant integer");1510return SDValue();1511}15121513MachineFunction &MF = DAG.getMachineFunction();1514MF.getFrameInfo().setFrameAddressIsTaken(true);1515Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);1516EVT VT = Op.getValueType();1517SDLoc DL(Op);1518SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);1519unsigned Depth = Op.getConstantOperandVal(0);1520int GRLenInBytes = Subtarget.getGRLen() / 8;15211522while (Depth--) {1523int Offset = -(GRLenInBytes * 2);1524SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,1525DAG.getIntPtrConstant(Offset, DL));1526FrameAddr =1527DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());1528}1529return FrameAddr;1530}15311532SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,1533SelectionDAG &DAG) const {1534if (verifyReturnAddressArgumentIsConstant(Op, DAG))1535return SDValue();15361537// Currently only support lowering return address for current frame.1538if (Op.getConstantOperandVal(0) != 0) {1539DAG.getContext()->emitError(1540"return address can only be determined for the current frame");1541return SDValue();1542}15431544MachineFunction &MF = DAG.getMachineFunction();1545MF.getFrameInfo().setReturnAddressIsTaken(true);1546MVT GRLenVT = Subtarget.getGRLenVT();15471548// Return the value of the return address register, marking it an implicit1549// live-in.1550Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),1551getRegClassFor(GRLenVT));1552return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);1553}15541555SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,1556SelectionDAG &DAG) const {1557MachineFunction &MF = DAG.getMachineFunction();1558auto Size = Subtarget.getGRLen() / 8;1559auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);1560return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));1561}15621563SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,1564SelectionDAG &DAG) const {1565MachineFunction &MF = DAG.getMachineFunction();1566auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();15671568SDLoc DL(Op);1569SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),1570getPointerTy(MF.getDataLayout()));15711572// vastart just stores the address of the VarArgsFrameIndex slot into the1573// memory location argument.1574const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();1575return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),1576MachinePointerInfo(SV));1577}15781579SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,1580SelectionDAG &DAG) const {1581assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&1582!Subtarget.hasBasicD() && "unexpected target features");15831584SDLoc DL(Op);1585SDValue Op0 = Op.getOperand(0);1586if (Op0->getOpcode() == ISD::AND) {1587auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));1588if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))1589return Op;1590}15911592if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&1593Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&1594Op0.getConstantOperandVal(2) == UINT64_C(0))1595return Op;15961597if (Op0.getOpcode() == ISD::AssertZext &&1598dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))1599return Op;16001601EVT OpVT = Op0.getValueType();1602EVT RetVT = Op.getValueType();1603RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);1604MakeLibCallOptions CallOptions;1605CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);1606SDValue Chain = SDValue();1607SDValue Result;1608std::tie(Result, Chain) =1609makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);1610return Result;1611}16121613SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,1614SelectionDAG &DAG) const {1615assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&1616!Subtarget.hasBasicD() && "unexpected target features");16171618SDLoc DL(Op);1619SDValue Op0 = Op.getOperand(0);16201621if ((Op0.getOpcode() == ISD::AssertSext ||1622Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&1623dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))1624return Op;16251626EVT OpVT = Op0.getValueType();1627EVT RetVT = Op.getValueType();1628RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);1629MakeLibCallOptions CallOptions;1630CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);1631SDValue Chain = SDValue();1632SDValue Result;1633std::tie(Result, Chain) =1634makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);1635return Result;1636}16371638SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,1639SelectionDAG &DAG) const {16401641SDLoc DL(Op);1642SDValue Op0 = Op.getOperand(0);16431644if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&1645Subtarget.is64Bit() && Subtarget.hasBasicF()) {1646SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);1647return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);1648}1649return Op;1650}16511652SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,1653SelectionDAG &DAG) const {16541655SDLoc DL(Op);16561657if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&1658!Subtarget.hasBasicD()) {1659SDValue Dst =1660DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));1661return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);1662}16631664EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());1665SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));1666return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);1667}16681669static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,1670SelectionDAG &DAG, unsigned Flags) {1671return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);1672}16731674static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,1675SelectionDAG &DAG, unsigned Flags) {1676return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),1677Flags);1678}16791680static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,1681SelectionDAG &DAG, unsigned Flags) {1682return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),1683N->getOffset(), Flags);1684}16851686static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,1687SelectionDAG &DAG, unsigned Flags) {1688return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);1689}16901691template <class NodeTy>1692SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,1693CodeModel::Model M,1694bool IsLocal) const {1695SDLoc DL(N);1696EVT Ty = getPointerTy(DAG.getDataLayout());1697SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);1698SDValue Load;16991700switch (M) {1701default:1702report_fatal_error("Unsupported code model");17031704case CodeModel::Large: {1705assert(Subtarget.is64Bit() && "Large code model requires LA64");17061707// This is not actually used, but is necessary for successfully matching1708// the PseudoLA_*_LARGE nodes.1709SDValue Tmp = DAG.getConstant(0, DL, Ty);1710if (IsLocal) {1711// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that1712// eventually becomes the desired 5-insn code sequence.1713Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,1714Tmp, Addr),17150);1716} else {1717// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that1718// eventually becomes the desired 5-insn code sequence.1719Load = SDValue(1720DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),17210);1722}1723break;1724}17251726case CodeModel::Small:1727case CodeModel::Medium:1728if (IsLocal) {1729// This generates the pattern (PseudoLA_PCREL sym), which expands to1730// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).1731Load = SDValue(1732DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);1733} else {1734// This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d1735// (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).1736Load =1737SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);1738}1739}17401741if (!IsLocal) {1742// Mark the load instruction as invariant to enable hoisting in MachineLICM.1743MachineFunction &MF = DAG.getMachineFunction();1744MachineMemOperand *MemOp = MF.getMachineMemOperand(1745MachinePointerInfo::getGOT(MF),1746MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |1747MachineMemOperand::MOInvariant,1748LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));1749DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});1750}17511752return Load;1753}17541755SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,1756SelectionDAG &DAG) const {1757return getAddr(cast<BlockAddressSDNode>(Op), DAG,1758DAG.getTarget().getCodeModel());1759}17601761SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,1762SelectionDAG &DAG) const {1763return getAddr(cast<JumpTableSDNode>(Op), DAG,1764DAG.getTarget().getCodeModel());1765}17661767SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,1768SelectionDAG &DAG) const {1769return getAddr(cast<ConstantPoolSDNode>(Op), DAG,1770DAG.getTarget().getCodeModel());1771}17721773SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,1774SelectionDAG &DAG) const {1775GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);1776assert(N->getOffset() == 0 && "unexpected offset in global node");1777auto CM = DAG.getTarget().getCodeModel();1778const GlobalValue *GV = N->getGlobal();17791780if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {1781if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())1782CM = *GCM;1783}17841785return getAddr(N, DAG, CM, GV->isDSOLocal());1786}17871788SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,1789SelectionDAG &DAG,1790unsigned Opc, bool UseGOT,1791bool Large) const {1792SDLoc DL(N);1793EVT Ty = getPointerTy(DAG.getDataLayout());1794MVT GRLenVT = Subtarget.getGRLenVT();17951796// This is not actually used, but is necessary for successfully matching the1797// PseudoLA_*_LARGE nodes.1798SDValue Tmp = DAG.getConstant(0, DL, Ty);1799SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);1800SDValue Offset = Large1801? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)1802: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);1803if (UseGOT) {1804// Mark the load instruction as invariant to enable hoisting in MachineLICM.1805MachineFunction &MF = DAG.getMachineFunction();1806MachineMemOperand *MemOp = MF.getMachineMemOperand(1807MachinePointerInfo::getGOT(MF),1808MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |1809MachineMemOperand::MOInvariant,1810LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));1811DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});1812}18131814// Add the thread pointer.1815return DAG.getNode(ISD::ADD, DL, Ty, Offset,1816DAG.getRegister(LoongArch::R2, GRLenVT));1817}18181819SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,1820SelectionDAG &DAG,1821unsigned Opc,1822bool Large) const {1823SDLoc DL(N);1824EVT Ty = getPointerTy(DAG.getDataLayout());1825IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());18261827// This is not actually used, but is necessary for successfully matching the1828// PseudoLA_*_LARGE nodes.1829SDValue Tmp = DAG.getConstant(0, DL, Ty);18301831// Use a PC-relative addressing mode to access the dynamic GOT address.1832SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);1833SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)1834: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);18351836// Prepare argument list to generate call.1837ArgListTy Args;1838ArgListEntry Entry;1839Entry.Node = Load;1840Entry.Ty = CallTy;1841Args.push_back(Entry);18421843// Setup call to __tls_get_addr.1844TargetLowering::CallLoweringInfo CLI(DAG);1845CLI.setDebugLoc(DL)1846.setChain(DAG.getEntryNode())1847.setLibCallee(CallingConv::C, CallTy,1848DAG.getExternalSymbol("__tls_get_addr", Ty),1849std::move(Args));18501851return LowerCallTo(CLI).first;1852}18531854SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,1855SelectionDAG &DAG, unsigned Opc,1856bool Large) const {1857SDLoc DL(N);1858EVT Ty = getPointerTy(DAG.getDataLayout());1859const GlobalValue *GV = N->getGlobal();18601861// This is not actually used, but is necessary for successfully matching the1862// PseudoLA_*_LARGE nodes.1863SDValue Tmp = DAG.getConstant(0, DL, Ty);18641865// Use a PC-relative addressing mode to access the global dynamic GOT address.1866// This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).1867SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);1868return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)1869: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);1870}18711872SDValue1873LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,1874SelectionDAG &DAG) const {1875if (DAG.getMachineFunction().getFunction().getCallingConv() ==1876CallingConv::GHC)1877report_fatal_error("In GHC calling convention TLS is not supported");18781879bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;1880assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");18811882GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);1883assert(N->getOffset() == 0 && "unexpected offset in global node");18841885if (DAG.getTarget().useEmulatedTLS())1886report_fatal_error("the emulated TLS is prohibited",1887/*GenCrashDiag=*/false);18881889bool IsDesc = DAG.getTarget().useTLSDESC();18901891switch (getTargetMachine().getTLSModel(N->getGlobal())) {1892case TLSModel::GeneralDynamic:1893// In this model, application code calls the dynamic linker function1894// __tls_get_addr to locate TLS offsets into the dynamic thread vector at1895// runtime.1896if (!IsDesc)1897return getDynamicTLSAddr(N, DAG,1898Large ? LoongArch::PseudoLA_TLS_GD_LARGE1899: LoongArch::PseudoLA_TLS_GD,1900Large);1901break;1902case TLSModel::LocalDynamic:1903// Same as GeneralDynamic, except for assembly modifiers and relocation1904// records.1905if (!IsDesc)1906return getDynamicTLSAddr(N, DAG,1907Large ? LoongArch::PseudoLA_TLS_LD_LARGE1908: LoongArch::PseudoLA_TLS_LD,1909Large);1910break;1911case TLSModel::InitialExec:1912// This model uses the GOT to resolve TLS offsets.1913return getStaticTLSAddr(N, DAG,1914Large ? LoongArch::PseudoLA_TLS_IE_LARGE1915: LoongArch::PseudoLA_TLS_IE,1916/*UseGOT=*/true, Large);1917case TLSModel::LocalExec:1918// This model is used when static linking as the TLS offsets are resolved1919// during program linking.1920//1921// This node doesn't need an extra argument for the large code model.1922return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,1923/*UseGOT=*/false);1924}19251926return getTLSDescAddr(N, DAG,1927Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE1928: LoongArch::PseudoLA_TLS_DESC_PC,1929Large);1930}19311932template <unsigned N>1933static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,1934SelectionDAG &DAG, bool IsSigned = false) {1935auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));1936// Check the ImmArg.1937if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||1938(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {1939DAG.getContext()->emitError(Op->getOperationName(0) +1940": argument out of range.");1941return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());1942}1943return SDValue();1944}19451946SDValue1947LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,1948SelectionDAG &DAG) const {1949SDLoc DL(Op);1950switch (Op.getConstantOperandVal(0)) {1951default:1952return SDValue(); // Don't custom lower most intrinsics.1953case Intrinsic::thread_pointer: {1954EVT PtrVT = getPointerTy(DAG.getDataLayout());1955return DAG.getRegister(LoongArch::R2, PtrVT);1956}1957case Intrinsic::loongarch_lsx_vpickve2gr_d:1958case Intrinsic::loongarch_lsx_vpickve2gr_du:1959case Intrinsic::loongarch_lsx_vreplvei_d:1960case Intrinsic::loongarch_lasx_xvrepl128vei_d:1961return checkIntrinsicImmArg<1>(Op, 2, DAG);1962case Intrinsic::loongarch_lsx_vreplvei_w:1963case Intrinsic::loongarch_lasx_xvrepl128vei_w:1964case Intrinsic::loongarch_lasx_xvpickve2gr_d:1965case Intrinsic::loongarch_lasx_xvpickve2gr_du:1966case Intrinsic::loongarch_lasx_xvpickve_d:1967case Intrinsic::loongarch_lasx_xvpickve_d_f:1968return checkIntrinsicImmArg<2>(Op, 2, DAG);1969case Intrinsic::loongarch_lasx_xvinsve0_d:1970return checkIntrinsicImmArg<2>(Op, 3, DAG);1971case Intrinsic::loongarch_lsx_vsat_b:1972case Intrinsic::loongarch_lsx_vsat_bu:1973case Intrinsic::loongarch_lsx_vrotri_b:1974case Intrinsic::loongarch_lsx_vsllwil_h_b:1975case Intrinsic::loongarch_lsx_vsllwil_hu_bu:1976case Intrinsic::loongarch_lsx_vsrlri_b:1977case Intrinsic::loongarch_lsx_vsrari_b:1978case Intrinsic::loongarch_lsx_vreplvei_h:1979case Intrinsic::loongarch_lasx_xvsat_b:1980case Intrinsic::loongarch_lasx_xvsat_bu:1981case Intrinsic::loongarch_lasx_xvrotri_b:1982case Intrinsic::loongarch_lasx_xvsllwil_h_b:1983case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:1984case Intrinsic::loongarch_lasx_xvsrlri_b:1985case Intrinsic::loongarch_lasx_xvsrari_b:1986case Intrinsic::loongarch_lasx_xvrepl128vei_h:1987case Intrinsic::loongarch_lasx_xvpickve_w:1988case Intrinsic::loongarch_lasx_xvpickve_w_f:1989return checkIntrinsicImmArg<3>(Op, 2, DAG);1990case Intrinsic::loongarch_lasx_xvinsve0_w:1991return checkIntrinsicImmArg<3>(Op, 3, DAG);1992case Intrinsic::loongarch_lsx_vsat_h:1993case Intrinsic::loongarch_lsx_vsat_hu:1994case Intrinsic::loongarch_lsx_vrotri_h:1995case Intrinsic::loongarch_lsx_vsllwil_w_h:1996case Intrinsic::loongarch_lsx_vsllwil_wu_hu:1997case Intrinsic::loongarch_lsx_vsrlri_h:1998case Intrinsic::loongarch_lsx_vsrari_h:1999case Intrinsic::loongarch_lsx_vreplvei_b:2000case Intrinsic::loongarch_lasx_xvsat_h:2001case Intrinsic::loongarch_lasx_xvsat_hu:2002case Intrinsic::loongarch_lasx_xvrotri_h:2003case Intrinsic::loongarch_lasx_xvsllwil_w_h:2004case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:2005case Intrinsic::loongarch_lasx_xvsrlri_h:2006case Intrinsic::loongarch_lasx_xvsrari_h:2007case Intrinsic::loongarch_lasx_xvrepl128vei_b:2008return checkIntrinsicImmArg<4>(Op, 2, DAG);2009case Intrinsic::loongarch_lsx_vsrlni_b_h:2010case Intrinsic::loongarch_lsx_vsrani_b_h:2011case Intrinsic::loongarch_lsx_vsrlrni_b_h:2012case Intrinsic::loongarch_lsx_vsrarni_b_h:2013case Intrinsic::loongarch_lsx_vssrlni_b_h:2014case Intrinsic::loongarch_lsx_vssrani_b_h:2015case Intrinsic::loongarch_lsx_vssrlni_bu_h:2016case Intrinsic::loongarch_lsx_vssrani_bu_h:2017case Intrinsic::loongarch_lsx_vssrlrni_b_h:2018case Intrinsic::loongarch_lsx_vssrarni_b_h:2019case Intrinsic::loongarch_lsx_vssrlrni_bu_h:2020case Intrinsic::loongarch_lsx_vssrarni_bu_h:2021case Intrinsic::loongarch_lasx_xvsrlni_b_h:2022case Intrinsic::loongarch_lasx_xvsrani_b_h:2023case Intrinsic::loongarch_lasx_xvsrlrni_b_h:2024case Intrinsic::loongarch_lasx_xvsrarni_b_h:2025case Intrinsic::loongarch_lasx_xvssrlni_b_h:2026case Intrinsic::loongarch_lasx_xvssrani_b_h:2027case Intrinsic::loongarch_lasx_xvssrlni_bu_h:2028case Intrinsic::loongarch_lasx_xvssrani_bu_h:2029case Intrinsic::loongarch_lasx_xvssrlrni_b_h:2030case Intrinsic::loongarch_lasx_xvssrarni_b_h:2031case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:2032case Intrinsic::loongarch_lasx_xvssrarni_bu_h:2033return checkIntrinsicImmArg<4>(Op, 3, DAG);2034case Intrinsic::loongarch_lsx_vsat_w:2035case Intrinsic::loongarch_lsx_vsat_wu:2036case Intrinsic::loongarch_lsx_vrotri_w:2037case Intrinsic::loongarch_lsx_vsllwil_d_w:2038case Intrinsic::loongarch_lsx_vsllwil_du_wu:2039case Intrinsic::loongarch_lsx_vsrlri_w:2040case Intrinsic::loongarch_lsx_vsrari_w:2041case Intrinsic::loongarch_lsx_vslei_bu:2042case Intrinsic::loongarch_lsx_vslei_hu:2043case Intrinsic::loongarch_lsx_vslei_wu:2044case Intrinsic::loongarch_lsx_vslei_du:2045case Intrinsic::loongarch_lsx_vslti_bu:2046case Intrinsic::loongarch_lsx_vslti_hu:2047case Intrinsic::loongarch_lsx_vslti_wu:2048case Intrinsic::loongarch_lsx_vslti_du:2049case Intrinsic::loongarch_lsx_vbsll_v:2050case Intrinsic::loongarch_lsx_vbsrl_v:2051case Intrinsic::loongarch_lasx_xvsat_w:2052case Intrinsic::loongarch_lasx_xvsat_wu:2053case Intrinsic::loongarch_lasx_xvrotri_w:2054case Intrinsic::loongarch_lasx_xvsllwil_d_w:2055case Intrinsic::loongarch_lasx_xvsllwil_du_wu:2056case Intrinsic::loongarch_lasx_xvsrlri_w:2057case Intrinsic::loongarch_lasx_xvsrari_w:2058case Intrinsic::loongarch_lasx_xvslei_bu:2059case Intrinsic::loongarch_lasx_xvslei_hu:2060case Intrinsic::loongarch_lasx_xvslei_wu:2061case Intrinsic::loongarch_lasx_xvslei_du:2062case Intrinsic::loongarch_lasx_xvslti_bu:2063case Intrinsic::loongarch_lasx_xvslti_hu:2064case Intrinsic::loongarch_lasx_xvslti_wu:2065case Intrinsic::loongarch_lasx_xvslti_du:2066case Intrinsic::loongarch_lasx_xvbsll_v:2067case Intrinsic::loongarch_lasx_xvbsrl_v:2068return checkIntrinsicImmArg<5>(Op, 2, DAG);2069case Intrinsic::loongarch_lsx_vseqi_b:2070case Intrinsic::loongarch_lsx_vseqi_h:2071case Intrinsic::loongarch_lsx_vseqi_w:2072case Intrinsic::loongarch_lsx_vseqi_d:2073case Intrinsic::loongarch_lsx_vslei_b:2074case Intrinsic::loongarch_lsx_vslei_h:2075case Intrinsic::loongarch_lsx_vslei_w:2076case Intrinsic::loongarch_lsx_vslei_d:2077case Intrinsic::loongarch_lsx_vslti_b:2078case Intrinsic::loongarch_lsx_vslti_h:2079case Intrinsic::loongarch_lsx_vslti_w:2080case Intrinsic::loongarch_lsx_vslti_d:2081case Intrinsic::loongarch_lasx_xvseqi_b:2082case Intrinsic::loongarch_lasx_xvseqi_h:2083case Intrinsic::loongarch_lasx_xvseqi_w:2084case Intrinsic::loongarch_lasx_xvseqi_d:2085case Intrinsic::loongarch_lasx_xvslei_b:2086case Intrinsic::loongarch_lasx_xvslei_h:2087case Intrinsic::loongarch_lasx_xvslei_w:2088case Intrinsic::loongarch_lasx_xvslei_d:2089case Intrinsic::loongarch_lasx_xvslti_b:2090case Intrinsic::loongarch_lasx_xvslti_h:2091case Intrinsic::loongarch_lasx_xvslti_w:2092case Intrinsic::loongarch_lasx_xvslti_d:2093return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);2094case Intrinsic::loongarch_lsx_vsrlni_h_w:2095case Intrinsic::loongarch_lsx_vsrani_h_w:2096case Intrinsic::loongarch_lsx_vsrlrni_h_w:2097case Intrinsic::loongarch_lsx_vsrarni_h_w:2098case Intrinsic::loongarch_lsx_vssrlni_h_w:2099case Intrinsic::loongarch_lsx_vssrani_h_w:2100case Intrinsic::loongarch_lsx_vssrlni_hu_w:2101case Intrinsic::loongarch_lsx_vssrani_hu_w:2102case Intrinsic::loongarch_lsx_vssrlrni_h_w:2103case Intrinsic::loongarch_lsx_vssrarni_h_w:2104case Intrinsic::loongarch_lsx_vssrlrni_hu_w:2105case Intrinsic::loongarch_lsx_vssrarni_hu_w:2106case Intrinsic::loongarch_lsx_vfrstpi_b:2107case Intrinsic::loongarch_lsx_vfrstpi_h:2108case Intrinsic::loongarch_lasx_xvsrlni_h_w:2109case Intrinsic::loongarch_lasx_xvsrani_h_w:2110case Intrinsic::loongarch_lasx_xvsrlrni_h_w:2111case Intrinsic::loongarch_lasx_xvsrarni_h_w:2112case Intrinsic::loongarch_lasx_xvssrlni_h_w:2113case Intrinsic::loongarch_lasx_xvssrani_h_w:2114case Intrinsic::loongarch_lasx_xvssrlni_hu_w:2115case Intrinsic::loongarch_lasx_xvssrani_hu_w:2116case Intrinsic::loongarch_lasx_xvssrlrni_h_w:2117case Intrinsic::loongarch_lasx_xvssrarni_h_w:2118case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:2119case Intrinsic::loongarch_lasx_xvssrarni_hu_w:2120case Intrinsic::loongarch_lasx_xvfrstpi_b:2121case Intrinsic::loongarch_lasx_xvfrstpi_h:2122return checkIntrinsicImmArg<5>(Op, 3, DAG);2123case Intrinsic::loongarch_lsx_vsat_d:2124case Intrinsic::loongarch_lsx_vsat_du:2125case Intrinsic::loongarch_lsx_vrotri_d:2126case Intrinsic::loongarch_lsx_vsrlri_d:2127case Intrinsic::loongarch_lsx_vsrari_d:2128case Intrinsic::loongarch_lasx_xvsat_d:2129case Intrinsic::loongarch_lasx_xvsat_du:2130case Intrinsic::loongarch_lasx_xvrotri_d:2131case Intrinsic::loongarch_lasx_xvsrlri_d:2132case Intrinsic::loongarch_lasx_xvsrari_d:2133return checkIntrinsicImmArg<6>(Op, 2, DAG);2134case Intrinsic::loongarch_lsx_vsrlni_w_d:2135case Intrinsic::loongarch_lsx_vsrani_w_d:2136case Intrinsic::loongarch_lsx_vsrlrni_w_d:2137case Intrinsic::loongarch_lsx_vsrarni_w_d:2138case Intrinsic::loongarch_lsx_vssrlni_w_d:2139case Intrinsic::loongarch_lsx_vssrani_w_d:2140case Intrinsic::loongarch_lsx_vssrlni_wu_d:2141case Intrinsic::loongarch_lsx_vssrani_wu_d:2142case Intrinsic::loongarch_lsx_vssrlrni_w_d:2143case Intrinsic::loongarch_lsx_vssrarni_w_d:2144case Intrinsic::loongarch_lsx_vssrlrni_wu_d:2145case Intrinsic::loongarch_lsx_vssrarni_wu_d:2146case Intrinsic::loongarch_lasx_xvsrlni_w_d:2147case Intrinsic::loongarch_lasx_xvsrani_w_d:2148case Intrinsic::loongarch_lasx_xvsrlrni_w_d:2149case Intrinsic::loongarch_lasx_xvsrarni_w_d:2150case Intrinsic::loongarch_lasx_xvssrlni_w_d:2151case Intrinsic::loongarch_lasx_xvssrani_w_d:2152case Intrinsic::loongarch_lasx_xvssrlni_wu_d:2153case Intrinsic::loongarch_lasx_xvssrani_wu_d:2154case Intrinsic::loongarch_lasx_xvssrlrni_w_d:2155case Intrinsic::loongarch_lasx_xvssrarni_w_d:2156case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:2157case Intrinsic::loongarch_lasx_xvssrarni_wu_d:2158return checkIntrinsicImmArg<6>(Op, 3, DAG);2159case Intrinsic::loongarch_lsx_vsrlni_d_q:2160case Intrinsic::loongarch_lsx_vsrani_d_q:2161case Intrinsic::loongarch_lsx_vsrlrni_d_q:2162case Intrinsic::loongarch_lsx_vsrarni_d_q:2163case Intrinsic::loongarch_lsx_vssrlni_d_q:2164case Intrinsic::loongarch_lsx_vssrani_d_q:2165case Intrinsic::loongarch_lsx_vssrlni_du_q:2166case Intrinsic::loongarch_lsx_vssrani_du_q:2167case Intrinsic::loongarch_lsx_vssrlrni_d_q:2168case Intrinsic::loongarch_lsx_vssrarni_d_q:2169case Intrinsic::loongarch_lsx_vssrlrni_du_q:2170case Intrinsic::loongarch_lsx_vssrarni_du_q:2171case Intrinsic::loongarch_lasx_xvsrlni_d_q:2172case Intrinsic::loongarch_lasx_xvsrani_d_q:2173case Intrinsic::loongarch_lasx_xvsrlrni_d_q:2174case Intrinsic::loongarch_lasx_xvsrarni_d_q:2175case Intrinsic::loongarch_lasx_xvssrlni_d_q:2176case Intrinsic::loongarch_lasx_xvssrani_d_q:2177case Intrinsic::loongarch_lasx_xvssrlni_du_q:2178case Intrinsic::loongarch_lasx_xvssrani_du_q:2179case Intrinsic::loongarch_lasx_xvssrlrni_d_q:2180case Intrinsic::loongarch_lasx_xvssrarni_d_q:2181case Intrinsic::loongarch_lasx_xvssrlrni_du_q:2182case Intrinsic::loongarch_lasx_xvssrarni_du_q:2183return checkIntrinsicImmArg<7>(Op, 3, DAG);2184case Intrinsic::loongarch_lsx_vnori_b:2185case Intrinsic::loongarch_lsx_vshuf4i_b:2186case Intrinsic::loongarch_lsx_vshuf4i_h:2187case Intrinsic::loongarch_lsx_vshuf4i_w:2188case Intrinsic::loongarch_lasx_xvnori_b:2189case Intrinsic::loongarch_lasx_xvshuf4i_b:2190case Intrinsic::loongarch_lasx_xvshuf4i_h:2191case Intrinsic::loongarch_lasx_xvshuf4i_w:2192case Intrinsic::loongarch_lasx_xvpermi_d:2193return checkIntrinsicImmArg<8>(Op, 2, DAG);2194case Intrinsic::loongarch_lsx_vshuf4i_d:2195case Intrinsic::loongarch_lsx_vpermi_w:2196case Intrinsic::loongarch_lsx_vbitseli_b:2197case Intrinsic::loongarch_lsx_vextrins_b:2198case Intrinsic::loongarch_lsx_vextrins_h:2199case Intrinsic::loongarch_lsx_vextrins_w:2200case Intrinsic::loongarch_lsx_vextrins_d:2201case Intrinsic::loongarch_lasx_xvshuf4i_d:2202case Intrinsic::loongarch_lasx_xvpermi_w:2203case Intrinsic::loongarch_lasx_xvpermi_q:2204case Intrinsic::loongarch_lasx_xvbitseli_b:2205case Intrinsic::loongarch_lasx_xvextrins_b:2206case Intrinsic::loongarch_lasx_xvextrins_h:2207case Intrinsic::loongarch_lasx_xvextrins_w:2208case Intrinsic::loongarch_lasx_xvextrins_d:2209return checkIntrinsicImmArg<8>(Op, 3, DAG);2210case Intrinsic::loongarch_lsx_vrepli_b:2211case Intrinsic::loongarch_lsx_vrepli_h:2212case Intrinsic::loongarch_lsx_vrepli_w:2213case Intrinsic::loongarch_lsx_vrepli_d:2214case Intrinsic::loongarch_lasx_xvrepli_b:2215case Intrinsic::loongarch_lasx_xvrepli_h:2216case Intrinsic::loongarch_lasx_xvrepli_w:2217case Intrinsic::loongarch_lasx_xvrepli_d:2218return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);2219case Intrinsic::loongarch_lsx_vldi:2220case Intrinsic::loongarch_lasx_xvldi:2221return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);2222}2223}22242225// Helper function that emits error message for intrinsics with chain and return2226// merge values of a UNDEF and the chain.2227static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,2228StringRef ErrorMsg,2229SelectionDAG &DAG) {2230DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");2231return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},2232SDLoc(Op));2233}22342235SDValue2236LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,2237SelectionDAG &DAG) const {2238SDLoc DL(Op);2239MVT GRLenVT = Subtarget.getGRLenVT();2240EVT VT = Op.getValueType();2241SDValue Chain = Op.getOperand(0);2242const StringRef ErrorMsgOOR = "argument out of range";2243const StringRef ErrorMsgReqLA64 = "requires loongarch64";2244const StringRef ErrorMsgReqF = "requires basic 'f' target feature";22452246switch (Op.getConstantOperandVal(1)) {2247default:2248return Op;2249case Intrinsic::loongarch_crc_w_b_w:2250case Intrinsic::loongarch_crc_w_h_w:2251case Intrinsic::loongarch_crc_w_w_w:2252case Intrinsic::loongarch_crc_w_d_w:2253case Intrinsic::loongarch_crcc_w_b_w:2254case Intrinsic::loongarch_crcc_w_h_w:2255case Intrinsic::loongarch_crcc_w_w_w:2256case Intrinsic::loongarch_crcc_w_d_w:2257return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);2258case Intrinsic::loongarch_csrrd_w:2259case Intrinsic::loongarch_csrrd_d: {2260unsigned Imm = Op.getConstantOperandVal(2);2261return !isUInt<14>(Imm)2262? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2263: DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},2264{Chain, DAG.getConstant(Imm, DL, GRLenVT)});2265}2266case Intrinsic::loongarch_csrwr_w:2267case Intrinsic::loongarch_csrwr_d: {2268unsigned Imm = Op.getConstantOperandVal(3);2269return !isUInt<14>(Imm)2270? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2271: DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},2272{Chain, Op.getOperand(2),2273DAG.getConstant(Imm, DL, GRLenVT)});2274}2275case Intrinsic::loongarch_csrxchg_w:2276case Intrinsic::loongarch_csrxchg_d: {2277unsigned Imm = Op.getConstantOperandVal(4);2278return !isUInt<14>(Imm)2279? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2280: DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},2281{Chain, Op.getOperand(2), Op.getOperand(3),2282DAG.getConstant(Imm, DL, GRLenVT)});2283}2284case Intrinsic::loongarch_iocsrrd_d: {2285return DAG.getNode(2286LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},2287{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});2288}2289#define IOCSRRD_CASE(NAME, NODE) \2290case Intrinsic::loongarch_##NAME: { \2291return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \2292{Chain, Op.getOperand(2)}); \2293}2294IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);2295IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);2296IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);2297#undef IOCSRRD_CASE2298case Intrinsic::loongarch_cpucfg: {2299return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},2300{Chain, Op.getOperand(2)});2301}2302case Intrinsic::loongarch_lddir_d: {2303unsigned Imm = Op.getConstantOperandVal(3);2304return !isUInt<8>(Imm)2305? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2306: Op;2307}2308case Intrinsic::loongarch_movfcsr2gr: {2309if (!Subtarget.hasBasicF())2310return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);2311unsigned Imm = Op.getConstantOperandVal(2);2312return !isUInt<2>(Imm)2313? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2314: DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},2315{Chain, DAG.getConstant(Imm, DL, GRLenVT)});2316}2317case Intrinsic::loongarch_lsx_vld:2318case Intrinsic::loongarch_lsx_vldrepl_b:2319case Intrinsic::loongarch_lasx_xvld:2320case Intrinsic::loongarch_lasx_xvldrepl_b:2321return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())2322? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)2323: SDValue();2324case Intrinsic::loongarch_lsx_vldrepl_h:2325case Intrinsic::loongarch_lasx_xvldrepl_h:2326return !isShiftedInt<11, 1>(2327cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())2328? emitIntrinsicWithChainErrorMessage(2329Op, "argument out of range or not a multiple of 2", DAG)2330: SDValue();2331case Intrinsic::loongarch_lsx_vldrepl_w:2332case Intrinsic::loongarch_lasx_xvldrepl_w:2333return !isShiftedInt<10, 2>(2334cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())2335? emitIntrinsicWithChainErrorMessage(2336Op, "argument out of range or not a multiple of 4", DAG)2337: SDValue();2338case Intrinsic::loongarch_lsx_vldrepl_d:2339case Intrinsic::loongarch_lasx_xvldrepl_d:2340return !isShiftedInt<9, 3>(2341cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())2342? emitIntrinsicWithChainErrorMessage(2343Op, "argument out of range or not a multiple of 8", DAG)2344: SDValue();2345}2346}23472348// Helper function that emits error message for intrinsics with void return2349// value and return the chain.2350static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,2351SelectionDAG &DAG) {23522353DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");2354return Op.getOperand(0);2355}23562357SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,2358SelectionDAG &DAG) const {2359SDLoc DL(Op);2360MVT GRLenVT = Subtarget.getGRLenVT();2361SDValue Chain = Op.getOperand(0);2362uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);2363SDValue Op2 = Op.getOperand(2);2364const StringRef ErrorMsgOOR = "argument out of range";2365const StringRef ErrorMsgReqLA64 = "requires loongarch64";2366const StringRef ErrorMsgReqLA32 = "requires loongarch32";2367const StringRef ErrorMsgReqF = "requires basic 'f' target feature";23682369switch (IntrinsicEnum) {2370default:2371// TODO: Add more Intrinsics.2372return SDValue();2373case Intrinsic::loongarch_cacop_d:2374case Intrinsic::loongarch_cacop_w: {2375if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())2376return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);2377if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())2378return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);2379// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)2380unsigned Imm1 = Op2->getAsZExtVal();2381int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();2382if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))2383return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);2384return Op;2385}2386case Intrinsic::loongarch_dbar: {2387unsigned Imm = Op2->getAsZExtVal();2388return !isUInt<15>(Imm)2389? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2390: DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,2391DAG.getConstant(Imm, DL, GRLenVT));2392}2393case Intrinsic::loongarch_ibar: {2394unsigned Imm = Op2->getAsZExtVal();2395return !isUInt<15>(Imm)2396? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2397: DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,2398DAG.getConstant(Imm, DL, GRLenVT));2399}2400case Intrinsic::loongarch_break: {2401unsigned Imm = Op2->getAsZExtVal();2402return !isUInt<15>(Imm)2403? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2404: DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,2405DAG.getConstant(Imm, DL, GRLenVT));2406}2407case Intrinsic::loongarch_movgr2fcsr: {2408if (!Subtarget.hasBasicF())2409return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);2410unsigned Imm = Op2->getAsZExtVal();2411return !isUInt<2>(Imm)2412? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2413: DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,2414DAG.getConstant(Imm, DL, GRLenVT),2415DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,2416Op.getOperand(3)));2417}2418case Intrinsic::loongarch_syscall: {2419unsigned Imm = Op2->getAsZExtVal();2420return !isUInt<15>(Imm)2421? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2422: DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,2423DAG.getConstant(Imm, DL, GRLenVT));2424}2425#define IOCSRWR_CASE(NAME, NODE) \2426case Intrinsic::loongarch_##NAME: { \2427SDValue Op3 = Op.getOperand(3); \2428return Subtarget.is64Bit() \2429? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \2430DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \2431DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \2432: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \2433Op3); \2434}2435IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);2436IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);2437IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);2438#undef IOCSRWR_CASE2439case Intrinsic::loongarch_iocsrwr_d: {2440return !Subtarget.is64Bit()2441? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)2442: DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,2443Op2,2444DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,2445Op.getOperand(3)));2446}2447#define ASRT_LE_GT_CASE(NAME) \2448case Intrinsic::loongarch_##NAME: { \2449return !Subtarget.is64Bit() \2450? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \2451: Op; \2452}2453ASRT_LE_GT_CASE(asrtle_d)2454ASRT_LE_GT_CASE(asrtgt_d)2455#undef ASRT_LE_GT_CASE2456case Intrinsic::loongarch_ldpte_d: {2457unsigned Imm = Op.getConstantOperandVal(3);2458return !Subtarget.is64Bit()2459? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)2460: !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2461: Op;2462}2463case Intrinsic::loongarch_lsx_vst:2464case Intrinsic::loongarch_lasx_xvst:2465return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())2466? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2467: SDValue();2468case Intrinsic::loongarch_lasx_xvstelm_b:2469return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2470!isUInt<5>(Op.getConstantOperandVal(5)))2471? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2472: SDValue();2473case Intrinsic::loongarch_lsx_vstelm_b:2474return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2475!isUInt<4>(Op.getConstantOperandVal(5)))2476? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)2477: SDValue();2478case Intrinsic::loongarch_lasx_xvstelm_h:2479return (!isShiftedInt<8, 1>(2480cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2481!isUInt<4>(Op.getConstantOperandVal(5)))2482? emitIntrinsicErrorMessage(2483Op, "argument out of range or not a multiple of 2", DAG)2484: SDValue();2485case Intrinsic::loongarch_lsx_vstelm_h:2486return (!isShiftedInt<8, 1>(2487cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2488!isUInt<3>(Op.getConstantOperandVal(5)))2489? emitIntrinsicErrorMessage(2490Op, "argument out of range or not a multiple of 2", DAG)2491: SDValue();2492case Intrinsic::loongarch_lasx_xvstelm_w:2493return (!isShiftedInt<8, 2>(2494cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2495!isUInt<3>(Op.getConstantOperandVal(5)))2496? emitIntrinsicErrorMessage(2497Op, "argument out of range or not a multiple of 4", DAG)2498: SDValue();2499case Intrinsic::loongarch_lsx_vstelm_w:2500return (!isShiftedInt<8, 2>(2501cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2502!isUInt<2>(Op.getConstantOperandVal(5)))2503? emitIntrinsicErrorMessage(2504Op, "argument out of range or not a multiple of 4", DAG)2505: SDValue();2506case Intrinsic::loongarch_lasx_xvstelm_d:2507return (!isShiftedInt<8, 3>(2508cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2509!isUInt<2>(Op.getConstantOperandVal(5)))2510? emitIntrinsicErrorMessage(2511Op, "argument out of range or not a multiple of 8", DAG)2512: SDValue();2513case Intrinsic::loongarch_lsx_vstelm_d:2514return (!isShiftedInt<8, 3>(2515cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||2516!isUInt<1>(Op.getConstantOperandVal(5)))2517? emitIntrinsicErrorMessage(2518Op, "argument out of range or not a multiple of 8", DAG)2519: SDValue();2520}2521}25222523SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,2524SelectionDAG &DAG) const {2525SDLoc DL(Op);2526SDValue Lo = Op.getOperand(0);2527SDValue Hi = Op.getOperand(1);2528SDValue Shamt = Op.getOperand(2);2529EVT VT = Lo.getValueType();25302531// if Shamt-GRLen < 0: // Shamt < GRLen2532// Lo = Lo << Shamt2533// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))2534// else:2535// Lo = 02536// Hi = Lo << (Shamt-GRLen)25372538SDValue Zero = DAG.getConstant(0, DL, VT);2539SDValue One = DAG.getConstant(1, DL, VT);2540SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);2541SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);2542SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);2543SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);25442545SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);2546SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);2547SDValue ShiftRightLo =2548DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);2549SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);2550SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);2551SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);25522553SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);25542555Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);2556Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);25572558SDValue Parts[2] = {Lo, Hi};2559return DAG.getMergeValues(Parts, DL);2560}25612562SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,2563SelectionDAG &DAG,2564bool IsSRA) const {2565SDLoc DL(Op);2566SDValue Lo = Op.getOperand(0);2567SDValue Hi = Op.getOperand(1);2568SDValue Shamt = Op.getOperand(2);2569EVT VT = Lo.getValueType();25702571// SRA expansion:2572// if Shamt-GRLen < 0: // Shamt < GRLen2573// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))2574// Hi = Hi >>s Shamt2575// else:2576// Lo = Hi >>s (Shamt-GRLen);2577// Hi = Hi >>s (GRLen-1)2578//2579// SRL expansion:2580// if Shamt-GRLen < 0: // Shamt < GRLen2581// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))2582// Hi = Hi >>u Shamt2583// else:2584// Lo = Hi >>u (Shamt-GRLen);2585// Hi = 0;25862587unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;25882589SDValue Zero = DAG.getConstant(0, DL, VT);2590SDValue One = DAG.getConstant(1, DL, VT);2591SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);2592SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);2593SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);2594SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);25952596SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);2597SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);2598SDValue ShiftLeftHi =2599DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);2600SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);2601SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);2602SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);2603SDValue HiFalse =2604IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;26052606SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);26072608Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);2609Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);26102611SDValue Parts[2] = {Lo, Hi};2612return DAG.getMergeValues(Parts, DL);2613}26142615// Returns the opcode of the target-specific SDNode that implements the 32-bit2616// form of the given Opcode.2617static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {2618switch (Opcode) {2619default:2620llvm_unreachable("Unexpected opcode");2621case ISD::UDIV:2622return LoongArchISD::DIV_WU;2623case ISD::UREM:2624return LoongArchISD::MOD_WU;2625case ISD::SHL:2626return LoongArchISD::SLL_W;2627case ISD::SRA:2628return LoongArchISD::SRA_W;2629case ISD::SRL:2630return LoongArchISD::SRL_W;2631case ISD::ROTL:2632case ISD::ROTR:2633return LoongArchISD::ROTR_W;2634case ISD::CTTZ:2635return LoongArchISD::CTZ_W;2636case ISD::CTLZ:2637return LoongArchISD::CLZ_W;2638}2639}26402641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG2642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would2643// otherwise be promoted to i64, making it difficult to select the2644// SLL_W/.../*W later one because the fact the operation was originally of2645// type i8/i16/i32 is lost.2646static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,2647unsigned ExtOpc = ISD::ANY_EXTEND) {2648SDLoc DL(N);2649LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());2650SDValue NewOp0, NewRes;26512652switch (NumOp) {2653default:2654llvm_unreachable("Unexpected NumOp");2655case 1: {2656NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));2657NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);2658break;2659}2660case 2: {2661NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));2662SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));2663if (N->getOpcode() == ISD::ROTL) {2664SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);2665NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);2666}2667NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);2668break;2669}2670// TODO:Handle more NumOp.2671}26722673// ReplaceNodeResults requires we maintain the same type for the return2674// value.2675return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);2676}26772678// Converts the given 32-bit operation to a i64 operation with signed extension2679// semantic to reduce the signed extension instructions.2680static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {2681SDLoc DL(N);2682SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));2683SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));2684SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);2685SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,2686DAG.getValueType(MVT::i32));2687return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);2688}26892690// Helper function that emits error message for intrinsics with/without chain2691// and return a UNDEF or and the chain as the results.2692static void emitErrorAndReplaceIntrinsicResults(2693SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,2694StringRef ErrorMsg, bool WithChain = true) {2695DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");2696Results.push_back(DAG.getUNDEF(N->getValueType(0)));2697if (!WithChain)2698return;2699Results.push_back(N->getOperand(0));2700}27012702template <unsigned N>2703static void2704replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,2705SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,2706unsigned ResOp) {2707const StringRef ErrorMsgOOR = "argument out of range";2708unsigned Imm = Node->getConstantOperandVal(2);2709if (!isUInt<N>(Imm)) {2710emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,2711/*WithChain=*/false);2712return;2713}2714SDLoc DL(Node);2715SDValue Vec = Node->getOperand(1);27162717SDValue PickElt =2718DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,2719DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),2720DAG.getValueType(Vec.getValueType().getVectorElementType()));2721Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),2722PickElt.getValue(0)));2723}27242725static void replaceVecCondBranchResults(SDNode *N,2726SmallVectorImpl<SDValue> &Results,2727SelectionDAG &DAG,2728const LoongArchSubtarget &Subtarget,2729unsigned ResOp) {2730SDLoc DL(N);2731SDValue Vec = N->getOperand(1);27322733SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);2734Results.push_back(2735DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));2736}27372738static void2739replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,2740SelectionDAG &DAG,2741const LoongArchSubtarget &Subtarget) {2742switch (N->getConstantOperandVal(0)) {2743default:2744llvm_unreachable("Unexpected Intrinsic.");2745case Intrinsic::loongarch_lsx_vpickve2gr_b:2746replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,2747LoongArchISD::VPICK_SEXT_ELT);2748break;2749case Intrinsic::loongarch_lsx_vpickve2gr_h:2750case Intrinsic::loongarch_lasx_xvpickve2gr_w:2751replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,2752LoongArchISD::VPICK_SEXT_ELT);2753break;2754case Intrinsic::loongarch_lsx_vpickve2gr_w:2755replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,2756LoongArchISD::VPICK_SEXT_ELT);2757break;2758case Intrinsic::loongarch_lsx_vpickve2gr_bu:2759replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,2760LoongArchISD::VPICK_ZEXT_ELT);2761break;2762case Intrinsic::loongarch_lsx_vpickve2gr_hu:2763case Intrinsic::loongarch_lasx_xvpickve2gr_wu:2764replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,2765LoongArchISD::VPICK_ZEXT_ELT);2766break;2767case Intrinsic::loongarch_lsx_vpickve2gr_wu:2768replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,2769LoongArchISD::VPICK_ZEXT_ELT);2770break;2771case Intrinsic::loongarch_lsx_bz_b:2772case Intrinsic::loongarch_lsx_bz_h:2773case Intrinsic::loongarch_lsx_bz_w:2774case Intrinsic::loongarch_lsx_bz_d:2775case Intrinsic::loongarch_lasx_xbz_b:2776case Intrinsic::loongarch_lasx_xbz_h:2777case Intrinsic::loongarch_lasx_xbz_w:2778case Intrinsic::loongarch_lasx_xbz_d:2779replaceVecCondBranchResults(N, Results, DAG, Subtarget,2780LoongArchISD::VALL_ZERO);2781break;2782case Intrinsic::loongarch_lsx_bz_v:2783case Intrinsic::loongarch_lasx_xbz_v:2784replaceVecCondBranchResults(N, Results, DAG, Subtarget,2785LoongArchISD::VANY_ZERO);2786break;2787case Intrinsic::loongarch_lsx_bnz_b:2788case Intrinsic::loongarch_lsx_bnz_h:2789case Intrinsic::loongarch_lsx_bnz_w:2790case Intrinsic::loongarch_lsx_bnz_d:2791case Intrinsic::loongarch_lasx_xbnz_b:2792case Intrinsic::loongarch_lasx_xbnz_h:2793case Intrinsic::loongarch_lasx_xbnz_w:2794case Intrinsic::loongarch_lasx_xbnz_d:2795replaceVecCondBranchResults(N, Results, DAG, Subtarget,2796LoongArchISD::VALL_NONZERO);2797break;2798case Intrinsic::loongarch_lsx_bnz_v:2799case Intrinsic::loongarch_lasx_xbnz_v:2800replaceVecCondBranchResults(N, Results, DAG, Subtarget,2801LoongArchISD::VANY_NONZERO);2802break;2803}2804}28052806void LoongArchTargetLowering::ReplaceNodeResults(2807SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {2808SDLoc DL(N);2809EVT VT = N->getValueType(0);2810switch (N->getOpcode()) {2811default:2812llvm_unreachable("Don't know how to legalize this operation");2813case ISD::ADD:2814case ISD::SUB:2815assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&2816"Unexpected custom legalisation");2817Results.push_back(customLegalizeToWOpWithSExt(N, DAG));2818break;2819case ISD::UDIV:2820case ISD::UREM:2821assert(VT == MVT::i32 && Subtarget.is64Bit() &&2822"Unexpected custom legalisation");2823Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));2824break;2825case ISD::SHL:2826case ISD::SRA:2827case ISD::SRL:2828assert(VT == MVT::i32 && Subtarget.is64Bit() &&2829"Unexpected custom legalisation");2830if (N->getOperand(1).getOpcode() != ISD::Constant) {2831Results.push_back(customLegalizeToWOp(N, DAG, 2));2832break;2833}2834break;2835case ISD::ROTL:2836case ISD::ROTR:2837assert(VT == MVT::i32 && Subtarget.is64Bit() &&2838"Unexpected custom legalisation");2839Results.push_back(customLegalizeToWOp(N, DAG, 2));2840break;2841case ISD::FP_TO_SINT: {2842assert(VT == MVT::i32 && Subtarget.is64Bit() &&2843"Unexpected custom legalisation");2844SDValue Src = N->getOperand(0);2845EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));2846if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=2847TargetLowering::TypeSoftenFloat) {2848SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);2849Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));2850return;2851}2852// If the FP type needs to be softened, emit a library call using the 'si'2853// version. If we left it to default legalization we'd end up with 'di'.2854RTLIB::Libcall LC;2855LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);2856MakeLibCallOptions CallOptions;2857EVT OpVT = Src.getValueType();2858CallOptions.setTypeListBeforeSoften(OpVT, VT, true);2859SDValue Chain = SDValue();2860SDValue Result;2861std::tie(Result, Chain) =2862makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);2863Results.push_back(Result);2864break;2865}2866case ISD::BITCAST: {2867SDValue Src = N->getOperand(0);2868EVT SrcVT = Src.getValueType();2869if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&2870Subtarget.hasBasicF()) {2871SDValue Dst =2872DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);2873Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));2874}2875break;2876}2877case ISD::FP_TO_UINT: {2878assert(VT == MVT::i32 && Subtarget.is64Bit() &&2879"Unexpected custom legalisation");2880auto &TLI = DAG.getTargetLoweringInfo();2881SDValue Tmp1, Tmp2;2882TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);2883Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));2884break;2885}2886case ISD::BSWAP: {2887SDValue Src = N->getOperand(0);2888assert((VT == MVT::i16 || VT == MVT::i32) &&2889"Unexpected custom legalization");2890MVT GRLenVT = Subtarget.getGRLenVT();2891SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);2892SDValue Tmp;2893switch (VT.getSizeInBits()) {2894default:2895llvm_unreachable("Unexpected operand width");2896case 16:2897Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);2898break;2899case 32:2900// Only LA64 will get to here due to the size mismatch between VT and2901// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.2902Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);2903break;2904}2905Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));2906break;2907}2908case ISD::BITREVERSE: {2909SDValue Src = N->getOperand(0);2910assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&2911"Unexpected custom legalization");2912MVT GRLenVT = Subtarget.getGRLenVT();2913SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);2914SDValue Tmp;2915switch (VT.getSizeInBits()) {2916default:2917llvm_unreachable("Unexpected operand width");2918case 8:2919Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);2920break;2921case 32:2922Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);2923break;2924}2925Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));2926break;2927}2928case ISD::CTLZ:2929case ISD::CTTZ: {2930assert(VT == MVT::i32 && Subtarget.is64Bit() &&2931"Unexpected custom legalisation");2932Results.push_back(customLegalizeToWOp(N, DAG, 1));2933break;2934}2935case ISD::INTRINSIC_W_CHAIN: {2936SDValue Chain = N->getOperand(0);2937SDValue Op2 = N->getOperand(2);2938MVT GRLenVT = Subtarget.getGRLenVT();2939const StringRef ErrorMsgOOR = "argument out of range";2940const StringRef ErrorMsgReqLA64 = "requires loongarch64";2941const StringRef ErrorMsgReqF = "requires basic 'f' target feature";29422943switch (N->getConstantOperandVal(1)) {2944default:2945llvm_unreachable("Unexpected Intrinsic.");2946case Intrinsic::loongarch_movfcsr2gr: {2947if (!Subtarget.hasBasicF()) {2948emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);2949return;2950}2951unsigned Imm = Op2->getAsZExtVal();2952if (!isUInt<2>(Imm)) {2953emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);2954return;2955}2956SDValue MOVFCSR2GRResults = DAG.getNode(2957LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},2958{Chain, DAG.getConstant(Imm, DL, GRLenVT)});2959Results.push_back(2960DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));2961Results.push_back(MOVFCSR2GRResults.getValue(1));2962break;2963}2964#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \2965case Intrinsic::loongarch_##NAME: { \2966SDValue NODE = DAG.getNode( \2967LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \2968{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \2969DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \2970Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \2971Results.push_back(NODE.getValue(1)); \2972break; \2973}2974CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)2975CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)2976CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)2977CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)2978CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)2979CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)2980#undef CRC_CASE_EXT_BINARYOP29812982#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \2983case Intrinsic::loongarch_##NAME: { \2984SDValue NODE = DAG.getNode( \2985LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \2986{Chain, Op2, \2987DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \2988Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \2989Results.push_back(NODE.getValue(1)); \2990break; \2991}2992CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)2993CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)2994#undef CRC_CASE_EXT_UNARYOP2995#define CSR_CASE(ID) \2996case Intrinsic::loongarch_##ID: { \2997if (!Subtarget.is64Bit()) \2998emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \2999break; \3000}3001CSR_CASE(csrrd_d);3002CSR_CASE(csrwr_d);3003CSR_CASE(csrxchg_d);3004CSR_CASE(iocsrrd_d);3005#undef CSR_CASE3006case Intrinsic::loongarch_csrrd_w: {3007unsigned Imm = Op2->getAsZExtVal();3008if (!isUInt<14>(Imm)) {3009emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);3010return;3011}3012SDValue CSRRDResults =3013DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},3014{Chain, DAG.getConstant(Imm, DL, GRLenVT)});3015Results.push_back(3016DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));3017Results.push_back(CSRRDResults.getValue(1));3018break;3019}3020case Intrinsic::loongarch_csrwr_w: {3021unsigned Imm = N->getConstantOperandVal(3);3022if (!isUInt<14>(Imm)) {3023emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);3024return;3025}3026SDValue CSRWRResults =3027DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},3028{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),3029DAG.getConstant(Imm, DL, GRLenVT)});3030Results.push_back(3031DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));3032Results.push_back(CSRWRResults.getValue(1));3033break;3034}3035case Intrinsic::loongarch_csrxchg_w: {3036unsigned Imm = N->getConstantOperandVal(4);3037if (!isUInt<14>(Imm)) {3038emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);3039return;3040}3041SDValue CSRXCHGResults = DAG.getNode(3042LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},3043{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),3044DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),3045DAG.getConstant(Imm, DL, GRLenVT)});3046Results.push_back(3047DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));3048Results.push_back(CSRXCHGResults.getValue(1));3049break;3050}3051#define IOCSRRD_CASE(NAME, NODE) \3052case Intrinsic::loongarch_##NAME: { \3053SDValue IOCSRRDResults = \3054DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \3055{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \3056Results.push_back( \3057DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \3058Results.push_back(IOCSRRDResults.getValue(1)); \3059break; \3060}3061IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);3062IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);3063IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);3064#undef IOCSRRD_CASE3065case Intrinsic::loongarch_cpucfg: {3066SDValue CPUCFGResults =3067DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},3068{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});3069Results.push_back(3070DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));3071Results.push_back(CPUCFGResults.getValue(1));3072break;3073}3074case Intrinsic::loongarch_lddir_d: {3075if (!Subtarget.is64Bit()) {3076emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);3077return;3078}3079break;3080}3081}3082break;3083}3084case ISD::READ_REGISTER: {3085if (Subtarget.is64Bit())3086DAG.getContext()->emitError(3087"On LA64, only 64-bit registers can be read.");3088else3089DAG.getContext()->emitError(3090"On LA32, only 32-bit registers can be read.");3091Results.push_back(DAG.getUNDEF(VT));3092Results.push_back(N->getOperand(0));3093break;3094}3095case ISD::INTRINSIC_WO_CHAIN: {3096replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);3097break;3098}3099}3100}31013102static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,3103TargetLowering::DAGCombinerInfo &DCI,3104const LoongArchSubtarget &Subtarget) {3105if (DCI.isBeforeLegalizeOps())3106return SDValue();31073108SDValue FirstOperand = N->getOperand(0);3109SDValue SecondOperand = N->getOperand(1);3110unsigned FirstOperandOpc = FirstOperand.getOpcode();3111EVT ValTy = N->getValueType(0);3112SDLoc DL(N);3113uint64_t lsb, msb;3114unsigned SMIdx, SMLen;3115ConstantSDNode *CN;3116SDValue NewOperand;3117MVT GRLenVT = Subtarget.getGRLenVT();31183119// Op's second operand must be a shifted mask.3120if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||3121!isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))3122return SDValue();31233124if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {3125// Pattern match BSTRPICK.3126// $dst = and ((sra or srl) $src , lsb), (2**len - 1)3127// => BSTRPICK $dst, $src, msb, lsb3128// where msb = lsb + len - 131293130// The second operand of the shift must be an immediate.3131if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))3132return SDValue();31333134lsb = CN->getZExtValue();31353136// Return if the shifted mask does not start at bit 0 or the sum of its3137// length and lsb exceeds the word's size.3138if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())3139return SDValue();31403141NewOperand = FirstOperand.getOperand(0);3142} else {3143// Pattern match BSTRPICK.3144// $dst = and $src, (2**len- 1) , if len > 123145// => BSTRPICK $dst, $src, msb, lsb3146// where lsb = 0 and msb = len - 131473148// If the mask is <= 0xfff, andi can be used instead.3149if (CN->getZExtValue() <= 0xfff)3150return SDValue();31513152// Return if the MSB exceeds.3153if (SMIdx + SMLen > ValTy.getSizeInBits())3154return SDValue();31553156if (SMIdx > 0) {3157// Omit if the constant has more than 2 uses. This a conservative3158// decision. Whether it is a win depends on the HW microarchitecture.3159// However it should always be better for 1 and 2 uses.3160if (CN->use_size() > 2)3161return SDValue();3162// Return if the constant can be composed by a single LU12I.W.3163if ((CN->getZExtValue() & 0xfff) == 0)3164return SDValue();3165// Return if the constand can be composed by a single ADDI with3166// the zero register.3167if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)3168return SDValue();3169}31703171lsb = SMIdx;3172NewOperand = FirstOperand;3173}31743175msb = lsb + SMLen - 1;3176SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,3177DAG.getConstant(msb, DL, GRLenVT),3178DAG.getConstant(lsb, DL, GRLenVT));3179if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)3180return NR0;3181// Try to optimize to3182// bstrpick $Rd, $Rs, msb, lsb3183// slli $Rd, $Rd, lsb3184return DAG.getNode(ISD::SHL, DL, ValTy, NR0,3185DAG.getConstant(lsb, DL, GRLenVT));3186}31873188static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,3189TargetLowering::DAGCombinerInfo &DCI,3190const LoongArchSubtarget &Subtarget) {3191if (DCI.isBeforeLegalizeOps())3192return SDValue();31933194// $dst = srl (and $src, Mask), Shamt3195// =>3196// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt3197// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-13198//31993200SDValue FirstOperand = N->getOperand(0);3201ConstantSDNode *CN;3202EVT ValTy = N->getValueType(0);3203SDLoc DL(N);3204MVT GRLenVT = Subtarget.getGRLenVT();3205unsigned MaskIdx, MaskLen;3206uint64_t Shamt;32073208// The first operand must be an AND and the second operand of the AND must be3209// a shifted mask.3210if (FirstOperand.getOpcode() != ISD::AND ||3211!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||3212!isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))3213return SDValue();32143215// The second operand (shift amount) must be an immediate.3216if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))3217return SDValue();32183219Shamt = CN->getZExtValue();3220if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)3221return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,3222FirstOperand->getOperand(0),3223DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),3224DAG.getConstant(Shamt, DL, GRLenVT));32253226return SDValue();3227}32283229static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,3230TargetLowering::DAGCombinerInfo &DCI,3231const LoongArchSubtarget &Subtarget) {3232MVT GRLenVT = Subtarget.getGRLenVT();3233EVT ValTy = N->getValueType(0);3234SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);3235ConstantSDNode *CN0, *CN1;3236SDLoc DL(N);3237unsigned ValBits = ValTy.getSizeInBits();3238unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;3239unsigned Shamt;3240bool SwapAndRetried = false;32413242if (DCI.isBeforeLegalizeOps())3243return SDValue();32443245if (ValBits != 32 && ValBits != 64)3246return SDValue();32473248Retry:3249// 1st pattern to match BSTRINS:3250// R = or (and X, mask0), (and (shl Y, lsb), mask1)3251// where mask1 = (2**size - 1) << lsb, mask0 = ~mask13252// =>3253// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)3254if (N0.getOpcode() == ISD::AND &&3255(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&3256isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&3257N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&3258(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3259isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&3260MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&3261(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&3262(Shamt = CN1->getZExtValue()) == MaskIdx0 &&3263(MaskIdx0 + MaskLen0 <= ValBits)) {3264LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");3265return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),3266N1.getOperand(0).getOperand(0),3267DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),3268DAG.getConstant(MaskIdx0, DL, GRLenVT));3269}32703271// 2nd pattern to match BSTRINS:3272// R = or (and X, mask0), (shl (and Y, mask1), lsb)3273// where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)3274// =>3275// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)3276if (N0.getOpcode() == ISD::AND &&3277(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&3278isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&3279N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&3280(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3281(Shamt = CN1->getZExtValue()) == MaskIdx0 &&3282(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&3283isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&3284MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&3285(MaskIdx0 + MaskLen0 <= ValBits)) {3286LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");3287return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),3288N1.getOperand(0).getOperand(0),3289DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),3290DAG.getConstant(MaskIdx0, DL, GRLenVT));3291}32923293// 3rd pattern to match BSTRINS:3294// R = or (and X, mask0), (and Y, mask1)3295// where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 03296// =>3297// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb3298// where msb = lsb + size - 13299if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&3300(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&3301isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&3302(MaskIdx0 + MaskLen0 <= 64) &&3303(CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&3304(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {3305LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");3306return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),3307DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,3308DAG.getConstant(MaskIdx0, DL, GRLenVT)),3309DAG.getConstant(ValBits == 323310? (MaskIdx0 + (MaskLen0 & 31) - 1)3311: (MaskIdx0 + MaskLen0 - 1),3312DL, GRLenVT),3313DAG.getConstant(MaskIdx0, DL, GRLenVT));3314}33153316// 4th pattern to match BSTRINS:3317// R = or (and X, mask), (shl Y, shamt)3318// where mask = (2**shamt - 1)3319// =>3320// R = BSTRINS X, Y, ValBits - 1, shamt3321// where ValBits = 32 or 643322if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&3323(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&3324isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&3325MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3326(Shamt = CN1->getZExtValue()) == MaskLen0 &&3327(MaskIdx0 + MaskLen0 <= ValBits)) {3328LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");3329return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),3330N1.getOperand(0),3331DAG.getConstant((ValBits - 1), DL, GRLenVT),3332DAG.getConstant(Shamt, DL, GRLenVT));3333}33343335// 5th pattern to match BSTRINS:3336// R = or (and X, mask), const3337// where ~mask = (2**size - 1) << lsb, mask & const = 03338// =>3339// R = BSTRINS X, (const >> lsb), msb, lsb3340// where msb = lsb + size - 13341if (N0.getOpcode() == ISD::AND &&3342(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&3343isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&3344(CN1 = dyn_cast<ConstantSDNode>(N1)) &&3345(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {3346LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");3347return DAG.getNode(3348LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),3349DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),3350DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)3351: (MaskIdx0 + MaskLen0 - 1),3352DL, GRLenVT),3353DAG.getConstant(MaskIdx0, DL, GRLenVT));3354}33553356// 6th pattern.3357// a = b | ((c & mask) << shamt), where all positions in b to be overwritten3358// by the incoming bits are known to be zero.3359// =>3360// a = BSTRINS b, c, shamt + MaskLen - 1, shamt3361//3362// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th3363// pattern is more common than the 1st. So we put the 1st before the 6th in3364// order to match as many nodes as possible.3365ConstantSDNode *CNMask, *CNShamt;3366unsigned MaskIdx, MaskLen;3367if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&3368(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&3369isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&3370MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3371CNShamt->getZExtValue() + MaskLen <= ValBits) {3372Shamt = CNShamt->getZExtValue();3373APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);3374if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {3375LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");3376return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,3377N1.getOperand(0).getOperand(0),3378DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),3379DAG.getConstant(Shamt, DL, GRLenVT));3380}3381}33823383// 7th pattern.3384// a = b | ((c << shamt) & shifted_mask), where all positions in b to be3385// overwritten by the incoming bits are known to be zero.3386// =>3387// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx3388//3389// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd3390// before the 7th in order to match as many nodes as possible.3391if (N1.getOpcode() == ISD::AND &&3392(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3393isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&3394N1.getOperand(0).getOpcode() == ISD::SHL &&3395(CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&3396CNShamt->getZExtValue() == MaskIdx) {3397APInt ShMask(ValBits, CNMask->getZExtValue());3398if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {3399LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");3400return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,3401N1.getOperand(0).getOperand(0),3402DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),3403DAG.getConstant(MaskIdx, DL, GRLenVT));3404}3405}34063407// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.3408if (!SwapAndRetried) {3409std::swap(N0, N1);3410SwapAndRetried = true;3411goto Retry;3412}34133414SwapAndRetried = false;3415Retry2:3416// 8th pattern.3417// a = b | (c & shifted_mask), where all positions in b to be overwritten by3418// the incoming bits are known to be zero.3419// =>3420// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx3421//3422// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So3423// we put it here in order to match as many nodes as possible or generate less3424// instructions.3425if (N1.getOpcode() == ISD::AND &&3426(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&3427isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {3428APInt ShMask(ValBits, CNMask->getZExtValue());3429if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {3430LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");3431return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,3432DAG.getNode(ISD::SRL, DL, N1->getValueType(0),3433N1->getOperand(0),3434DAG.getConstant(MaskIdx, DL, GRLenVT)),3435DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),3436DAG.getConstant(MaskIdx, DL, GRLenVT));3437}3438}3439// Swap N0/N1 and retry.3440if (!SwapAndRetried) {3441std::swap(N0, N1);3442SwapAndRetried = true;3443goto Retry2;3444}34453446return SDValue();3447}34483449static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {3450ExtType = ISD::NON_EXTLOAD;34513452switch (V.getNode()->getOpcode()) {3453case ISD::LOAD: {3454LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());3455if ((LoadNode->getMemoryVT() == MVT::i8) ||3456(LoadNode->getMemoryVT() == MVT::i16)) {3457ExtType = LoadNode->getExtensionType();3458return true;3459}3460return false;3461}3462case ISD::AssertSext: {3463VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));3464if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {3465ExtType = ISD::SEXTLOAD;3466return true;3467}3468return false;3469}3470case ISD::AssertZext: {3471VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));3472if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {3473ExtType = ISD::ZEXTLOAD;3474return true;3475}3476return false;3477}3478default:3479return false;3480}34813482return false;3483}34843485// Eliminate redundant truncation and zero-extension nodes.3486// * Case 1:3487// +------------+ +------------+ +------------+3488// | Input1 | | Input2 | | CC |3489// +------------+ +------------+ +------------+3490// | | |3491// V V +----+3492// +------------+ +------------+ |3493// | TRUNCATE | | TRUNCATE | |3494// +------------+ +------------+ |3495// | | |3496// V V |3497// +------------+ +------------+ |3498// | ZERO_EXT | | ZERO_EXT | |3499// +------------+ +------------+ |3500// | | |3501// | +-------------+ |3502// V V | |3503// +----------------+ | |3504// | AND | | |3505// +----------------+ | |3506// | | |3507// +---------------+ | |3508// | | |3509// V V V3510// +-------------+3511// | CMP |3512// +-------------+3513// * Case 2:3514// +------------+ +------------+ +-------------+ +------------+ +------------+3515// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |3516// +------------+ +------------+ +-------------+ +------------+ +------------+3517// | | | | |3518// V | | | |3519// +------------+ | | | |3520// | XOR |<---------------------+ | |3521// +------------+ | | |3522// | | | |3523// V V +---------------+ |3524// +------------+ +------------+ | |3525// | TRUNCATE | | TRUNCATE | | +-------------------------+3526// +------------+ +------------+ | |3527// | | | |3528// V V | |3529// +------------+ +------------+ | |3530// | ZERO_EXT | | ZERO_EXT | | |3531// +------------+ +------------+ | |3532// | | | |3533// V V | |3534// +----------------+ | |3535// | AND | | |3536// +----------------+ | |3537// | | |3538// +---------------+ | |3539// | | |3540// V V V3541// +-------------+3542// | CMP |3543// +-------------+3544static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,3545TargetLowering::DAGCombinerInfo &DCI,3546const LoongArchSubtarget &Subtarget) {3547ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();35483549SDNode *AndNode = N->getOperand(0).getNode();3550if (AndNode->getOpcode() != ISD::AND)3551return SDValue();35523553SDValue AndInputValue2 = AndNode->getOperand(1);3554if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)3555return SDValue();35563557SDValue CmpInputValue = N->getOperand(1);3558SDValue AndInputValue1 = AndNode->getOperand(0);3559if (AndInputValue1.getOpcode() == ISD::XOR) {3560if (CC != ISD::SETEQ && CC != ISD::SETNE)3561return SDValue();3562ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));3563if (!CN || CN->getSExtValue() != -1)3564return SDValue();3565CN = dyn_cast<ConstantSDNode>(CmpInputValue);3566if (!CN || CN->getSExtValue() != 0)3567return SDValue();3568AndInputValue1 = AndInputValue1.getOperand(0);3569if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)3570return SDValue();3571} else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {3572if (AndInputValue2 != CmpInputValue)3573return SDValue();3574} else {3575return SDValue();3576}35773578SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);3579if (TruncValue1.getOpcode() != ISD::TRUNCATE)3580return SDValue();35813582SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);3583if (TruncValue2.getOpcode() != ISD::TRUNCATE)3584return SDValue();35853586SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);3587SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);3588ISD::LoadExtType ExtType1;3589ISD::LoadExtType ExtType2;35903591if (!checkValueWidth(TruncInputValue1, ExtType1) ||3592!checkValueWidth(TruncInputValue2, ExtType2))3593return SDValue();35943595if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||3596AndNode->getValueType(0) != TruncInputValue1->getValueType(0))3597return SDValue();35983599if ((ExtType2 != ISD::ZEXTLOAD) &&3600((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))3601return SDValue();36023603// These truncation and zero-extension nodes are not necessary, remove them.3604SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),3605TruncInputValue1, TruncInputValue2);3606SDValue NewSetCC =3607DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);3608DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());3609return SDValue(N, 0);3610}36113612// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.3613static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,3614TargetLowering::DAGCombinerInfo &DCI,3615const LoongArchSubtarget &Subtarget) {3616if (DCI.isBeforeLegalizeOps())3617return SDValue();36183619SDValue Src = N->getOperand(0);3620if (Src.getOpcode() != LoongArchISD::REVB_2W)3621return SDValue();36223623return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),3624Src.getOperand(0));3625}36263627template <unsigned N>3628static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,3629SelectionDAG &DAG,3630const LoongArchSubtarget &Subtarget,3631bool IsSigned = false) {3632SDLoc DL(Node);3633auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));3634// Check the ImmArg.3635if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||3636(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {3637DAG.getContext()->emitError(Node->getOperationName(0) +3638": argument out of range.");3639return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());3640}3641return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());3642}36433644template <unsigned N>3645static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,3646SelectionDAG &DAG, bool IsSigned = false) {3647SDLoc DL(Node);3648EVT ResTy = Node->getValueType(0);3649auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));36503651// Check the ImmArg.3652if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||3653(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {3654DAG.getContext()->emitError(Node->getOperationName(0) +3655": argument out of range.");3656return DAG.getNode(ISD::UNDEF, DL, ResTy);3657}3658return DAG.getConstant(3659APInt(ResTy.getScalarType().getSizeInBits(),3660IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),3661DL, ResTy);3662}36633664static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {3665SDLoc DL(Node);3666EVT ResTy = Node->getValueType(0);3667SDValue Vec = Node->getOperand(2);3668SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);3669return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);3670}36713672static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {3673SDLoc DL(Node);3674EVT ResTy = Node->getValueType(0);3675SDValue One = DAG.getConstant(1, DL, ResTy);3676SDValue Bit =3677DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));36783679return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),3680DAG.getNOT(DL, Bit, ResTy));3681}36823683template <unsigned N>3684static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {3685SDLoc DL(Node);3686EVT ResTy = Node->getValueType(0);3687auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));3688// Check the unsigned ImmArg.3689if (!isUInt<N>(CImm->getZExtValue())) {3690DAG.getContext()->emitError(Node->getOperationName(0) +3691": argument out of range.");3692return DAG.getNode(ISD::UNDEF, DL, ResTy);3693}36943695APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();3696SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);36973698return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);3699}37003701template <unsigned N>3702static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {3703SDLoc DL(Node);3704EVT ResTy = Node->getValueType(0);3705auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));3706// Check the unsigned ImmArg.3707if (!isUInt<N>(CImm->getZExtValue())) {3708DAG.getContext()->emitError(Node->getOperationName(0) +3709": argument out of range.");3710return DAG.getNode(ISD::UNDEF, DL, ResTy);3711}37123713APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();3714SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);3715return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);3716}37173718template <unsigned N>3719static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {3720SDLoc DL(Node);3721EVT ResTy = Node->getValueType(0);3722auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));3723// Check the unsigned ImmArg.3724if (!isUInt<N>(CImm->getZExtValue())) {3725DAG.getContext()->emitError(Node->getOperationName(0) +3726": argument out of range.");3727return DAG.getNode(ISD::UNDEF, DL, ResTy);3728}37293730APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();3731SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);3732return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);3733}37343735static SDValue3736performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,3737TargetLowering::DAGCombinerInfo &DCI,3738const LoongArchSubtarget &Subtarget) {3739SDLoc DL(N);3740switch (N->getConstantOperandVal(0)) {3741default:3742break;3743case Intrinsic::loongarch_lsx_vadd_b:3744case Intrinsic::loongarch_lsx_vadd_h:3745case Intrinsic::loongarch_lsx_vadd_w:3746case Intrinsic::loongarch_lsx_vadd_d:3747case Intrinsic::loongarch_lasx_xvadd_b:3748case Intrinsic::loongarch_lasx_xvadd_h:3749case Intrinsic::loongarch_lasx_xvadd_w:3750case Intrinsic::loongarch_lasx_xvadd_d:3751return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),3752N->getOperand(2));3753case Intrinsic::loongarch_lsx_vaddi_bu:3754case Intrinsic::loongarch_lsx_vaddi_hu:3755case Intrinsic::loongarch_lsx_vaddi_wu:3756case Intrinsic::loongarch_lsx_vaddi_du:3757case Intrinsic::loongarch_lasx_xvaddi_bu:3758case Intrinsic::loongarch_lasx_xvaddi_hu:3759case Intrinsic::loongarch_lasx_xvaddi_wu:3760case Intrinsic::loongarch_lasx_xvaddi_du:3761return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),3762lowerVectorSplatImm<5>(N, 2, DAG));3763case Intrinsic::loongarch_lsx_vsub_b:3764case Intrinsic::loongarch_lsx_vsub_h:3765case Intrinsic::loongarch_lsx_vsub_w:3766case Intrinsic::loongarch_lsx_vsub_d:3767case Intrinsic::loongarch_lasx_xvsub_b:3768case Intrinsic::loongarch_lasx_xvsub_h:3769case Intrinsic::loongarch_lasx_xvsub_w:3770case Intrinsic::loongarch_lasx_xvsub_d:3771return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),3772N->getOperand(2));3773case Intrinsic::loongarch_lsx_vsubi_bu:3774case Intrinsic::loongarch_lsx_vsubi_hu:3775case Intrinsic::loongarch_lsx_vsubi_wu:3776case Intrinsic::loongarch_lsx_vsubi_du:3777case Intrinsic::loongarch_lasx_xvsubi_bu:3778case Intrinsic::loongarch_lasx_xvsubi_hu:3779case Intrinsic::loongarch_lasx_xvsubi_wu:3780case Intrinsic::loongarch_lasx_xvsubi_du:3781return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),3782lowerVectorSplatImm<5>(N, 2, DAG));3783case Intrinsic::loongarch_lsx_vneg_b:3784case Intrinsic::loongarch_lsx_vneg_h:3785case Intrinsic::loongarch_lsx_vneg_w:3786case Intrinsic::loongarch_lsx_vneg_d:3787case Intrinsic::loongarch_lasx_xvneg_b:3788case Intrinsic::loongarch_lasx_xvneg_h:3789case Intrinsic::loongarch_lasx_xvneg_w:3790case Intrinsic::loongarch_lasx_xvneg_d:3791return DAG.getNode(3792ISD::SUB, DL, N->getValueType(0),3793DAG.getConstant(3794APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,3795/*isSigned=*/true),3796SDLoc(N), N->getValueType(0)),3797N->getOperand(1));3798case Intrinsic::loongarch_lsx_vmax_b:3799case Intrinsic::loongarch_lsx_vmax_h:3800case Intrinsic::loongarch_lsx_vmax_w:3801case Intrinsic::loongarch_lsx_vmax_d:3802case Intrinsic::loongarch_lasx_xvmax_b:3803case Intrinsic::loongarch_lasx_xvmax_h:3804case Intrinsic::loongarch_lasx_xvmax_w:3805case Intrinsic::loongarch_lasx_xvmax_d:3806return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),3807N->getOperand(2));3808case Intrinsic::loongarch_lsx_vmax_bu:3809case Intrinsic::loongarch_lsx_vmax_hu:3810case Intrinsic::loongarch_lsx_vmax_wu:3811case Intrinsic::loongarch_lsx_vmax_du:3812case Intrinsic::loongarch_lasx_xvmax_bu:3813case Intrinsic::loongarch_lasx_xvmax_hu:3814case Intrinsic::loongarch_lasx_xvmax_wu:3815case Intrinsic::loongarch_lasx_xvmax_du:3816return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),3817N->getOperand(2));3818case Intrinsic::loongarch_lsx_vmaxi_b:3819case Intrinsic::loongarch_lsx_vmaxi_h:3820case Intrinsic::loongarch_lsx_vmaxi_w:3821case Intrinsic::loongarch_lsx_vmaxi_d:3822case Intrinsic::loongarch_lasx_xvmaxi_b:3823case Intrinsic::loongarch_lasx_xvmaxi_h:3824case Intrinsic::loongarch_lasx_xvmaxi_w:3825case Intrinsic::loongarch_lasx_xvmaxi_d:3826return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),3827lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));3828case Intrinsic::loongarch_lsx_vmaxi_bu:3829case Intrinsic::loongarch_lsx_vmaxi_hu:3830case Intrinsic::loongarch_lsx_vmaxi_wu:3831case Intrinsic::loongarch_lsx_vmaxi_du:3832case Intrinsic::loongarch_lasx_xvmaxi_bu:3833case Intrinsic::loongarch_lasx_xvmaxi_hu:3834case Intrinsic::loongarch_lasx_xvmaxi_wu:3835case Intrinsic::loongarch_lasx_xvmaxi_du:3836return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),3837lowerVectorSplatImm<5>(N, 2, DAG));3838case Intrinsic::loongarch_lsx_vmin_b:3839case Intrinsic::loongarch_lsx_vmin_h:3840case Intrinsic::loongarch_lsx_vmin_w:3841case Intrinsic::loongarch_lsx_vmin_d:3842case Intrinsic::loongarch_lasx_xvmin_b:3843case Intrinsic::loongarch_lasx_xvmin_h:3844case Intrinsic::loongarch_lasx_xvmin_w:3845case Intrinsic::loongarch_lasx_xvmin_d:3846return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),3847N->getOperand(2));3848case Intrinsic::loongarch_lsx_vmin_bu:3849case Intrinsic::loongarch_lsx_vmin_hu:3850case Intrinsic::loongarch_lsx_vmin_wu:3851case Intrinsic::loongarch_lsx_vmin_du:3852case Intrinsic::loongarch_lasx_xvmin_bu:3853case Intrinsic::loongarch_lasx_xvmin_hu:3854case Intrinsic::loongarch_lasx_xvmin_wu:3855case Intrinsic::loongarch_lasx_xvmin_du:3856return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),3857N->getOperand(2));3858case Intrinsic::loongarch_lsx_vmini_b:3859case Intrinsic::loongarch_lsx_vmini_h:3860case Intrinsic::loongarch_lsx_vmini_w:3861case Intrinsic::loongarch_lsx_vmini_d:3862case Intrinsic::loongarch_lasx_xvmini_b:3863case Intrinsic::loongarch_lasx_xvmini_h:3864case Intrinsic::loongarch_lasx_xvmini_w:3865case Intrinsic::loongarch_lasx_xvmini_d:3866return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),3867lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));3868case Intrinsic::loongarch_lsx_vmini_bu:3869case Intrinsic::loongarch_lsx_vmini_hu:3870case Intrinsic::loongarch_lsx_vmini_wu:3871case Intrinsic::loongarch_lsx_vmini_du:3872case Intrinsic::loongarch_lasx_xvmini_bu:3873case Intrinsic::loongarch_lasx_xvmini_hu:3874case Intrinsic::loongarch_lasx_xvmini_wu:3875case Intrinsic::loongarch_lasx_xvmini_du:3876return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),3877lowerVectorSplatImm<5>(N, 2, DAG));3878case Intrinsic::loongarch_lsx_vmul_b:3879case Intrinsic::loongarch_lsx_vmul_h:3880case Intrinsic::loongarch_lsx_vmul_w:3881case Intrinsic::loongarch_lsx_vmul_d:3882case Intrinsic::loongarch_lasx_xvmul_b:3883case Intrinsic::loongarch_lasx_xvmul_h:3884case Intrinsic::loongarch_lasx_xvmul_w:3885case Intrinsic::loongarch_lasx_xvmul_d:3886return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),3887N->getOperand(2));3888case Intrinsic::loongarch_lsx_vmadd_b:3889case Intrinsic::loongarch_lsx_vmadd_h:3890case Intrinsic::loongarch_lsx_vmadd_w:3891case Intrinsic::loongarch_lsx_vmadd_d:3892case Intrinsic::loongarch_lasx_xvmadd_b:3893case Intrinsic::loongarch_lasx_xvmadd_h:3894case Intrinsic::loongarch_lasx_xvmadd_w:3895case Intrinsic::loongarch_lasx_xvmadd_d: {3896EVT ResTy = N->getValueType(0);3897return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),3898DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),3899N->getOperand(3)));3900}3901case Intrinsic::loongarch_lsx_vmsub_b:3902case Intrinsic::loongarch_lsx_vmsub_h:3903case Intrinsic::loongarch_lsx_vmsub_w:3904case Intrinsic::loongarch_lsx_vmsub_d:3905case Intrinsic::loongarch_lasx_xvmsub_b:3906case Intrinsic::loongarch_lasx_xvmsub_h:3907case Intrinsic::loongarch_lasx_xvmsub_w:3908case Intrinsic::loongarch_lasx_xvmsub_d: {3909EVT ResTy = N->getValueType(0);3910return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),3911DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),3912N->getOperand(3)));3913}3914case Intrinsic::loongarch_lsx_vdiv_b:3915case Intrinsic::loongarch_lsx_vdiv_h:3916case Intrinsic::loongarch_lsx_vdiv_w:3917case Intrinsic::loongarch_lsx_vdiv_d:3918case Intrinsic::loongarch_lasx_xvdiv_b:3919case Intrinsic::loongarch_lasx_xvdiv_h:3920case Intrinsic::loongarch_lasx_xvdiv_w:3921case Intrinsic::loongarch_lasx_xvdiv_d:3922return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),3923N->getOperand(2));3924case Intrinsic::loongarch_lsx_vdiv_bu:3925case Intrinsic::loongarch_lsx_vdiv_hu:3926case Intrinsic::loongarch_lsx_vdiv_wu:3927case Intrinsic::loongarch_lsx_vdiv_du:3928case Intrinsic::loongarch_lasx_xvdiv_bu:3929case Intrinsic::loongarch_lasx_xvdiv_hu:3930case Intrinsic::loongarch_lasx_xvdiv_wu:3931case Intrinsic::loongarch_lasx_xvdiv_du:3932return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),3933N->getOperand(2));3934case Intrinsic::loongarch_lsx_vmod_b:3935case Intrinsic::loongarch_lsx_vmod_h:3936case Intrinsic::loongarch_lsx_vmod_w:3937case Intrinsic::loongarch_lsx_vmod_d:3938case Intrinsic::loongarch_lasx_xvmod_b:3939case Intrinsic::loongarch_lasx_xvmod_h:3940case Intrinsic::loongarch_lasx_xvmod_w:3941case Intrinsic::loongarch_lasx_xvmod_d:3942return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),3943N->getOperand(2));3944case Intrinsic::loongarch_lsx_vmod_bu:3945case Intrinsic::loongarch_lsx_vmod_hu:3946case Intrinsic::loongarch_lsx_vmod_wu:3947case Intrinsic::loongarch_lsx_vmod_du:3948case Intrinsic::loongarch_lasx_xvmod_bu:3949case Intrinsic::loongarch_lasx_xvmod_hu:3950case Intrinsic::loongarch_lasx_xvmod_wu:3951case Intrinsic::loongarch_lasx_xvmod_du:3952return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),3953N->getOperand(2));3954case Intrinsic::loongarch_lsx_vand_v:3955case Intrinsic::loongarch_lasx_xvand_v:3956return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),3957N->getOperand(2));3958case Intrinsic::loongarch_lsx_vor_v:3959case Intrinsic::loongarch_lasx_xvor_v:3960return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),3961N->getOperand(2));3962case Intrinsic::loongarch_lsx_vxor_v:3963case Intrinsic::loongarch_lasx_xvxor_v:3964return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),3965N->getOperand(2));3966case Intrinsic::loongarch_lsx_vnor_v:3967case Intrinsic::loongarch_lasx_xvnor_v: {3968SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),3969N->getOperand(2));3970return DAG.getNOT(DL, Res, Res->getValueType(0));3971}3972case Intrinsic::loongarch_lsx_vandi_b:3973case Intrinsic::loongarch_lasx_xvandi_b:3974return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),3975lowerVectorSplatImm<8>(N, 2, DAG));3976case Intrinsic::loongarch_lsx_vori_b:3977case Intrinsic::loongarch_lasx_xvori_b:3978return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),3979lowerVectorSplatImm<8>(N, 2, DAG));3980case Intrinsic::loongarch_lsx_vxori_b:3981case Intrinsic::loongarch_lasx_xvxori_b:3982return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),3983lowerVectorSplatImm<8>(N, 2, DAG));3984case Intrinsic::loongarch_lsx_vsll_b:3985case Intrinsic::loongarch_lsx_vsll_h:3986case Intrinsic::loongarch_lsx_vsll_w:3987case Intrinsic::loongarch_lsx_vsll_d:3988case Intrinsic::loongarch_lasx_xvsll_b:3989case Intrinsic::loongarch_lasx_xvsll_h:3990case Intrinsic::loongarch_lasx_xvsll_w:3991case Intrinsic::loongarch_lasx_xvsll_d:3992return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),3993truncateVecElts(N, DAG));3994case Intrinsic::loongarch_lsx_vslli_b:3995case Intrinsic::loongarch_lasx_xvslli_b:3996return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),3997lowerVectorSplatImm<3>(N, 2, DAG));3998case Intrinsic::loongarch_lsx_vslli_h:3999case Intrinsic::loongarch_lasx_xvslli_h:4000return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),4001lowerVectorSplatImm<4>(N, 2, DAG));4002case Intrinsic::loongarch_lsx_vslli_w:4003case Intrinsic::loongarch_lasx_xvslli_w:4004return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),4005lowerVectorSplatImm<5>(N, 2, DAG));4006case Intrinsic::loongarch_lsx_vslli_d:4007case Intrinsic::loongarch_lasx_xvslli_d:4008return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),4009lowerVectorSplatImm<6>(N, 2, DAG));4010case Intrinsic::loongarch_lsx_vsrl_b:4011case Intrinsic::loongarch_lsx_vsrl_h:4012case Intrinsic::loongarch_lsx_vsrl_w:4013case Intrinsic::loongarch_lsx_vsrl_d:4014case Intrinsic::loongarch_lasx_xvsrl_b:4015case Intrinsic::loongarch_lasx_xvsrl_h:4016case Intrinsic::loongarch_lasx_xvsrl_w:4017case Intrinsic::loongarch_lasx_xvsrl_d:4018return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),4019truncateVecElts(N, DAG));4020case Intrinsic::loongarch_lsx_vsrli_b:4021case Intrinsic::loongarch_lasx_xvsrli_b:4022return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),4023lowerVectorSplatImm<3>(N, 2, DAG));4024case Intrinsic::loongarch_lsx_vsrli_h:4025case Intrinsic::loongarch_lasx_xvsrli_h:4026return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),4027lowerVectorSplatImm<4>(N, 2, DAG));4028case Intrinsic::loongarch_lsx_vsrli_w:4029case Intrinsic::loongarch_lasx_xvsrli_w:4030return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),4031lowerVectorSplatImm<5>(N, 2, DAG));4032case Intrinsic::loongarch_lsx_vsrli_d:4033case Intrinsic::loongarch_lasx_xvsrli_d:4034return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),4035lowerVectorSplatImm<6>(N, 2, DAG));4036case Intrinsic::loongarch_lsx_vsra_b:4037case Intrinsic::loongarch_lsx_vsra_h:4038case Intrinsic::loongarch_lsx_vsra_w:4039case Intrinsic::loongarch_lsx_vsra_d:4040case Intrinsic::loongarch_lasx_xvsra_b:4041case Intrinsic::loongarch_lasx_xvsra_h:4042case Intrinsic::loongarch_lasx_xvsra_w:4043case Intrinsic::loongarch_lasx_xvsra_d:4044return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),4045truncateVecElts(N, DAG));4046case Intrinsic::loongarch_lsx_vsrai_b:4047case Intrinsic::loongarch_lasx_xvsrai_b:4048return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),4049lowerVectorSplatImm<3>(N, 2, DAG));4050case Intrinsic::loongarch_lsx_vsrai_h:4051case Intrinsic::loongarch_lasx_xvsrai_h:4052return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),4053lowerVectorSplatImm<4>(N, 2, DAG));4054case Intrinsic::loongarch_lsx_vsrai_w:4055case Intrinsic::loongarch_lasx_xvsrai_w:4056return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),4057lowerVectorSplatImm<5>(N, 2, DAG));4058case Intrinsic::loongarch_lsx_vsrai_d:4059case Intrinsic::loongarch_lasx_xvsrai_d:4060return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),4061lowerVectorSplatImm<6>(N, 2, DAG));4062case Intrinsic::loongarch_lsx_vclz_b:4063case Intrinsic::loongarch_lsx_vclz_h:4064case Intrinsic::loongarch_lsx_vclz_w:4065case Intrinsic::loongarch_lsx_vclz_d:4066case Intrinsic::loongarch_lasx_xvclz_b:4067case Intrinsic::loongarch_lasx_xvclz_h:4068case Intrinsic::loongarch_lasx_xvclz_w:4069case Intrinsic::loongarch_lasx_xvclz_d:4070return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));4071case Intrinsic::loongarch_lsx_vpcnt_b:4072case Intrinsic::loongarch_lsx_vpcnt_h:4073case Intrinsic::loongarch_lsx_vpcnt_w:4074case Intrinsic::loongarch_lsx_vpcnt_d:4075case Intrinsic::loongarch_lasx_xvpcnt_b:4076case Intrinsic::loongarch_lasx_xvpcnt_h:4077case Intrinsic::loongarch_lasx_xvpcnt_w:4078case Intrinsic::loongarch_lasx_xvpcnt_d:4079return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));4080case Intrinsic::loongarch_lsx_vbitclr_b:4081case Intrinsic::loongarch_lsx_vbitclr_h:4082case Intrinsic::loongarch_lsx_vbitclr_w:4083case Intrinsic::loongarch_lsx_vbitclr_d:4084case Intrinsic::loongarch_lasx_xvbitclr_b:4085case Intrinsic::loongarch_lasx_xvbitclr_h:4086case Intrinsic::loongarch_lasx_xvbitclr_w:4087case Intrinsic::loongarch_lasx_xvbitclr_d:4088return lowerVectorBitClear(N, DAG);4089case Intrinsic::loongarch_lsx_vbitclri_b:4090case Intrinsic::loongarch_lasx_xvbitclri_b:4091return lowerVectorBitClearImm<3>(N, DAG);4092case Intrinsic::loongarch_lsx_vbitclri_h:4093case Intrinsic::loongarch_lasx_xvbitclri_h:4094return lowerVectorBitClearImm<4>(N, DAG);4095case Intrinsic::loongarch_lsx_vbitclri_w:4096case Intrinsic::loongarch_lasx_xvbitclri_w:4097return lowerVectorBitClearImm<5>(N, DAG);4098case Intrinsic::loongarch_lsx_vbitclri_d:4099case Intrinsic::loongarch_lasx_xvbitclri_d:4100return lowerVectorBitClearImm<6>(N, DAG);4101case Intrinsic::loongarch_lsx_vbitset_b:4102case Intrinsic::loongarch_lsx_vbitset_h:4103case Intrinsic::loongarch_lsx_vbitset_w:4104case Intrinsic::loongarch_lsx_vbitset_d:4105case Intrinsic::loongarch_lasx_xvbitset_b:4106case Intrinsic::loongarch_lasx_xvbitset_h:4107case Intrinsic::loongarch_lasx_xvbitset_w:4108case Intrinsic::loongarch_lasx_xvbitset_d: {4109EVT VecTy = N->getValueType(0);4110SDValue One = DAG.getConstant(1, DL, VecTy);4111return DAG.getNode(4112ISD::OR, DL, VecTy, N->getOperand(1),4113DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));4114}4115case Intrinsic::loongarch_lsx_vbitseti_b:4116case Intrinsic::loongarch_lasx_xvbitseti_b:4117return lowerVectorBitSetImm<3>(N, DAG);4118case Intrinsic::loongarch_lsx_vbitseti_h:4119case Intrinsic::loongarch_lasx_xvbitseti_h:4120return lowerVectorBitSetImm<4>(N, DAG);4121case Intrinsic::loongarch_lsx_vbitseti_w:4122case Intrinsic::loongarch_lasx_xvbitseti_w:4123return lowerVectorBitSetImm<5>(N, DAG);4124case Intrinsic::loongarch_lsx_vbitseti_d:4125case Intrinsic::loongarch_lasx_xvbitseti_d:4126return lowerVectorBitSetImm<6>(N, DAG);4127case Intrinsic::loongarch_lsx_vbitrev_b:4128case Intrinsic::loongarch_lsx_vbitrev_h:4129case Intrinsic::loongarch_lsx_vbitrev_w:4130case Intrinsic::loongarch_lsx_vbitrev_d:4131case Intrinsic::loongarch_lasx_xvbitrev_b:4132case Intrinsic::loongarch_lasx_xvbitrev_h:4133case Intrinsic::loongarch_lasx_xvbitrev_w:4134case Intrinsic::loongarch_lasx_xvbitrev_d: {4135EVT VecTy = N->getValueType(0);4136SDValue One = DAG.getConstant(1, DL, VecTy);4137return DAG.getNode(4138ISD::XOR, DL, VecTy, N->getOperand(1),4139DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));4140}4141case Intrinsic::loongarch_lsx_vbitrevi_b:4142case Intrinsic::loongarch_lasx_xvbitrevi_b:4143return lowerVectorBitRevImm<3>(N, DAG);4144case Intrinsic::loongarch_lsx_vbitrevi_h:4145case Intrinsic::loongarch_lasx_xvbitrevi_h:4146return lowerVectorBitRevImm<4>(N, DAG);4147case Intrinsic::loongarch_lsx_vbitrevi_w:4148case Intrinsic::loongarch_lasx_xvbitrevi_w:4149return lowerVectorBitRevImm<5>(N, DAG);4150case Intrinsic::loongarch_lsx_vbitrevi_d:4151case Intrinsic::loongarch_lasx_xvbitrevi_d:4152return lowerVectorBitRevImm<6>(N, DAG);4153case Intrinsic::loongarch_lsx_vfadd_s:4154case Intrinsic::loongarch_lsx_vfadd_d:4155case Intrinsic::loongarch_lasx_xvfadd_s:4156case Intrinsic::loongarch_lasx_xvfadd_d:4157return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),4158N->getOperand(2));4159case Intrinsic::loongarch_lsx_vfsub_s:4160case Intrinsic::loongarch_lsx_vfsub_d:4161case Intrinsic::loongarch_lasx_xvfsub_s:4162case Intrinsic::loongarch_lasx_xvfsub_d:4163return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),4164N->getOperand(2));4165case Intrinsic::loongarch_lsx_vfmul_s:4166case Intrinsic::loongarch_lsx_vfmul_d:4167case Intrinsic::loongarch_lasx_xvfmul_s:4168case Intrinsic::loongarch_lasx_xvfmul_d:4169return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),4170N->getOperand(2));4171case Intrinsic::loongarch_lsx_vfdiv_s:4172case Intrinsic::loongarch_lsx_vfdiv_d:4173case Intrinsic::loongarch_lasx_xvfdiv_s:4174case Intrinsic::loongarch_lasx_xvfdiv_d:4175return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),4176N->getOperand(2));4177case Intrinsic::loongarch_lsx_vfmadd_s:4178case Intrinsic::loongarch_lsx_vfmadd_d:4179case Intrinsic::loongarch_lasx_xvfmadd_s:4180case Intrinsic::loongarch_lasx_xvfmadd_d:4181return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),4182N->getOperand(2), N->getOperand(3));4183case Intrinsic::loongarch_lsx_vinsgr2vr_b:4184return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),4185N->getOperand(1), N->getOperand(2),4186legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));4187case Intrinsic::loongarch_lsx_vinsgr2vr_h:4188case Intrinsic::loongarch_lasx_xvinsgr2vr_w:4189return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),4190N->getOperand(1), N->getOperand(2),4191legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));4192case Intrinsic::loongarch_lsx_vinsgr2vr_w:4193case Intrinsic::loongarch_lasx_xvinsgr2vr_d:4194return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),4195N->getOperand(1), N->getOperand(2),4196legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));4197case Intrinsic::loongarch_lsx_vinsgr2vr_d:4198return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),4199N->getOperand(1), N->getOperand(2),4200legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));4201case Intrinsic::loongarch_lsx_vreplgr2vr_b:4202case Intrinsic::loongarch_lsx_vreplgr2vr_h:4203case Intrinsic::loongarch_lsx_vreplgr2vr_w:4204case Intrinsic::loongarch_lsx_vreplgr2vr_d:4205case Intrinsic::loongarch_lasx_xvreplgr2vr_b:4206case Intrinsic::loongarch_lasx_xvreplgr2vr_h:4207case Intrinsic::loongarch_lasx_xvreplgr2vr_w:4208case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {4209EVT ResTy = N->getValueType(0);4210SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));4211return DAG.getBuildVector(ResTy, DL, Ops);4212}4213case Intrinsic::loongarch_lsx_vreplve_b:4214case Intrinsic::loongarch_lsx_vreplve_h:4215case Intrinsic::loongarch_lsx_vreplve_w:4216case Intrinsic::loongarch_lsx_vreplve_d:4217case Intrinsic::loongarch_lasx_xvreplve_b:4218case Intrinsic::loongarch_lasx_xvreplve_h:4219case Intrinsic::loongarch_lasx_xvreplve_w:4220case Intrinsic::loongarch_lasx_xvreplve_d:4221return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),4222N->getOperand(1),4223DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),4224N->getOperand(2)));4225}4226return SDValue();4227}42284229SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,4230DAGCombinerInfo &DCI) const {4231SelectionDAG &DAG = DCI.DAG;4232switch (N->getOpcode()) {4233default:4234break;4235case ISD::AND:4236return performANDCombine(N, DAG, DCI, Subtarget);4237case ISD::OR:4238return performORCombine(N, DAG, DCI, Subtarget);4239case ISD::SETCC:4240return performSETCCCombine(N, DAG, DCI, Subtarget);4241case ISD::SRL:4242return performSRLCombine(N, DAG, DCI, Subtarget);4243case LoongArchISD::BITREV_W:4244return performBITREV_WCombine(N, DAG, DCI, Subtarget);4245case ISD::INTRINSIC_WO_CHAIN:4246return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);4247}4248return SDValue();4249}42504251static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,4252MachineBasicBlock *MBB) {4253if (!ZeroDivCheck)4254return MBB;42554256// Build instructions:4257// MBB:4258// div(or mod) $dst, $dividend, $divisor4259// bnez $divisor, SinkMBB4260// BreakMBB:4261// break 7 // BRK_DIVZERO4262// SinkMBB:4263// fallthrough4264const BasicBlock *LLVM_BB = MBB->getBasicBlock();4265MachineFunction::iterator It = ++MBB->getIterator();4266MachineFunction *MF = MBB->getParent();4267auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);4268auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);4269MF->insert(It, BreakMBB);4270MF->insert(It, SinkMBB);42714272// Transfer the remainder of MBB and its successor edges to SinkMBB.4273SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());4274SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);42754276const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();4277DebugLoc DL = MI.getDebugLoc();4278MachineOperand &Divisor = MI.getOperand(2);4279Register DivisorReg = Divisor.getReg();42804281// MBB:4282BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))4283.addReg(DivisorReg, getKillRegState(Divisor.isKill()))4284.addMBB(SinkMBB);4285MBB->addSuccessor(BreakMBB);4286MBB->addSuccessor(SinkMBB);42874288// BreakMBB:4289// See linux header file arch/loongarch/include/uapi/asm/break.h for the4290// definition of BRK_DIVZERO.4291BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);4292BreakMBB->addSuccessor(SinkMBB);42934294// Clear Divisor's kill flag.4295Divisor.setIsKill(false);42964297return SinkMBB;4298}42994300static MachineBasicBlock *4301emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,4302const LoongArchSubtarget &Subtarget) {4303unsigned CondOpc;4304switch (MI.getOpcode()) {4305default:4306llvm_unreachable("Unexpected opcode");4307case LoongArch::PseudoVBZ:4308CondOpc = LoongArch::VSETEQZ_V;4309break;4310case LoongArch::PseudoVBZ_B:4311CondOpc = LoongArch::VSETANYEQZ_B;4312break;4313case LoongArch::PseudoVBZ_H:4314CondOpc = LoongArch::VSETANYEQZ_H;4315break;4316case LoongArch::PseudoVBZ_W:4317CondOpc = LoongArch::VSETANYEQZ_W;4318break;4319case LoongArch::PseudoVBZ_D:4320CondOpc = LoongArch::VSETANYEQZ_D;4321break;4322case LoongArch::PseudoVBNZ:4323CondOpc = LoongArch::VSETNEZ_V;4324break;4325case LoongArch::PseudoVBNZ_B:4326CondOpc = LoongArch::VSETALLNEZ_B;4327break;4328case LoongArch::PseudoVBNZ_H:4329CondOpc = LoongArch::VSETALLNEZ_H;4330break;4331case LoongArch::PseudoVBNZ_W:4332CondOpc = LoongArch::VSETALLNEZ_W;4333break;4334case LoongArch::PseudoVBNZ_D:4335CondOpc = LoongArch::VSETALLNEZ_D;4336break;4337case LoongArch::PseudoXVBZ:4338CondOpc = LoongArch::XVSETEQZ_V;4339break;4340case LoongArch::PseudoXVBZ_B:4341CondOpc = LoongArch::XVSETANYEQZ_B;4342break;4343case LoongArch::PseudoXVBZ_H:4344CondOpc = LoongArch::XVSETANYEQZ_H;4345break;4346case LoongArch::PseudoXVBZ_W:4347CondOpc = LoongArch::XVSETANYEQZ_W;4348break;4349case LoongArch::PseudoXVBZ_D:4350CondOpc = LoongArch::XVSETANYEQZ_D;4351break;4352case LoongArch::PseudoXVBNZ:4353CondOpc = LoongArch::XVSETNEZ_V;4354break;4355case LoongArch::PseudoXVBNZ_B:4356CondOpc = LoongArch::XVSETALLNEZ_B;4357break;4358case LoongArch::PseudoXVBNZ_H:4359CondOpc = LoongArch::XVSETALLNEZ_H;4360break;4361case LoongArch::PseudoXVBNZ_W:4362CondOpc = LoongArch::XVSETALLNEZ_W;4363break;4364case LoongArch::PseudoXVBNZ_D:4365CondOpc = LoongArch::XVSETALLNEZ_D;4366break;4367}43684369const TargetInstrInfo *TII = Subtarget.getInstrInfo();4370const BasicBlock *LLVM_BB = BB->getBasicBlock();4371DebugLoc DL = MI.getDebugLoc();4372MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();4373MachineFunction::iterator It = ++BB->getIterator();43744375MachineFunction *F = BB->getParent();4376MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);4377MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);4378MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);43794380F->insert(It, FalseBB);4381F->insert(It, TrueBB);4382F->insert(It, SinkBB);43834384// Transfer the remainder of MBB and its successor edges to Sink.4385SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());4386SinkBB->transferSuccessorsAndUpdatePHIs(BB);43874388// Insert the real instruction to BB.4389Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);4390BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());43914392// Insert branch.4393BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);4394BB->addSuccessor(FalseBB);4395BB->addSuccessor(TrueBB);43964397// FalseBB.4398Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);4399BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)4400.addReg(LoongArch::R0)4401.addImm(0);4402BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);4403FalseBB->addSuccessor(SinkBB);44044405// TrueBB.4406Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);4407BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)4408.addReg(LoongArch::R0)4409.addImm(1);4410TrueBB->addSuccessor(SinkBB);44114412// SinkBB: merge the results.4413BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),4414MI.getOperand(0).getReg())4415.addReg(RD1)4416.addMBB(FalseBB)4417.addReg(RD2)4418.addMBB(TrueBB);44194420// The pseudo instruction is gone now.4421MI.eraseFromParent();4422return SinkBB;4423}44244425static MachineBasicBlock *4426emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,4427const LoongArchSubtarget &Subtarget) {4428unsigned InsOp;4429unsigned HalfSize;4430switch (MI.getOpcode()) {4431default:4432llvm_unreachable("Unexpected opcode");4433case LoongArch::PseudoXVINSGR2VR_B:4434HalfSize = 16;4435InsOp = LoongArch::VINSGR2VR_B;4436break;4437case LoongArch::PseudoXVINSGR2VR_H:4438HalfSize = 8;4439InsOp = LoongArch::VINSGR2VR_H;4440break;4441}4442const TargetInstrInfo *TII = Subtarget.getInstrInfo();4443const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;4444const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;4445DebugLoc DL = MI.getDebugLoc();4446MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();4447// XDst = vector_insert XSrc, Elt, Idx4448Register XDst = MI.getOperand(0).getReg();4449Register XSrc = MI.getOperand(1).getReg();4450Register Elt = MI.getOperand(2).getReg();4451unsigned Idx = MI.getOperand(3).getImm();44524453Register ScratchReg1 = XSrc;4454if (Idx >= HalfSize) {4455ScratchReg1 = MRI.createVirtualRegister(RC);4456BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)4457.addReg(XSrc)4458.addReg(XSrc)4459.addImm(1);4460}44614462Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);4463Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);4464BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)4465.addReg(ScratchReg1, 0, LoongArch::sub_128);4466BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)4467.addReg(ScratchSubReg1)4468.addReg(Elt)4469.addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);44704471Register ScratchReg2 = XDst;4472if (Idx >= HalfSize)4473ScratchReg2 = MRI.createVirtualRegister(RC);44744475BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)4476.addImm(0)4477.addReg(ScratchSubReg2)4478.addImm(LoongArch::sub_128);44794480if (Idx >= HalfSize)4481BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)4482.addReg(XSrc)4483.addReg(ScratchReg2)4484.addImm(2);44854486MI.eraseFromParent();4487return BB;4488}44894490MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(4491MachineInstr &MI, MachineBasicBlock *BB) const {4492const TargetInstrInfo *TII = Subtarget.getInstrInfo();4493DebugLoc DL = MI.getDebugLoc();44944495switch (MI.getOpcode()) {4496default:4497llvm_unreachable("Unexpected instr type to insert");4498case LoongArch::DIV_W:4499case LoongArch::DIV_WU:4500case LoongArch::MOD_W:4501case LoongArch::MOD_WU:4502case LoongArch::DIV_D:4503case LoongArch::DIV_DU:4504case LoongArch::MOD_D:4505case LoongArch::MOD_DU:4506return insertDivByZeroTrap(MI, BB);4507break;4508case LoongArch::WRFCSR: {4509BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),4510LoongArch::FCSR0 + MI.getOperand(0).getImm())4511.addReg(MI.getOperand(1).getReg());4512MI.eraseFromParent();4513return BB;4514}4515case LoongArch::RDFCSR: {4516MachineInstr *ReadFCSR =4517BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),4518MI.getOperand(0).getReg())4519.addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());4520ReadFCSR->getOperand(1).setIsUndef();4521MI.eraseFromParent();4522return BB;4523}4524case LoongArch::PseudoVBZ:4525case LoongArch::PseudoVBZ_B:4526case LoongArch::PseudoVBZ_H:4527case LoongArch::PseudoVBZ_W:4528case LoongArch::PseudoVBZ_D:4529case LoongArch::PseudoVBNZ:4530case LoongArch::PseudoVBNZ_B:4531case LoongArch::PseudoVBNZ_H:4532case LoongArch::PseudoVBNZ_W:4533case LoongArch::PseudoVBNZ_D:4534case LoongArch::PseudoXVBZ:4535case LoongArch::PseudoXVBZ_B:4536case LoongArch::PseudoXVBZ_H:4537case LoongArch::PseudoXVBZ_W:4538case LoongArch::PseudoXVBZ_D:4539case LoongArch::PseudoXVBNZ:4540case LoongArch::PseudoXVBNZ_B:4541case LoongArch::PseudoXVBNZ_H:4542case LoongArch::PseudoXVBNZ_W:4543case LoongArch::PseudoXVBNZ_D:4544return emitVecCondBranchPseudo(MI, BB, Subtarget);4545case LoongArch::PseudoXVINSGR2VR_B:4546case LoongArch::PseudoXVINSGR2VR_H:4547return emitPseudoXVINSGR2VR(MI, BB, Subtarget);4548}4549}45504551bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(4552EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,4553unsigned *Fast) const {4554if (!Subtarget.hasUAL())4555return false;45564557// TODO: set reasonable speed number.4558if (Fast)4559*Fast = 1;4560return true;4561}45624563const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {4564switch ((LoongArchISD::NodeType)Opcode) {4565case LoongArchISD::FIRST_NUMBER:4566break;45674568#define NODE_NAME_CASE(node) \4569case LoongArchISD::node: \4570return "LoongArchISD::" #node;45714572// TODO: Add more target-dependent nodes later.4573NODE_NAME_CASE(CALL)4574NODE_NAME_CASE(CALL_MEDIUM)4575NODE_NAME_CASE(CALL_LARGE)4576NODE_NAME_CASE(RET)4577NODE_NAME_CASE(TAIL)4578NODE_NAME_CASE(TAIL_MEDIUM)4579NODE_NAME_CASE(TAIL_LARGE)4580NODE_NAME_CASE(SLL_W)4581NODE_NAME_CASE(SRA_W)4582NODE_NAME_CASE(SRL_W)4583NODE_NAME_CASE(BSTRINS)4584NODE_NAME_CASE(BSTRPICK)4585NODE_NAME_CASE(MOVGR2FR_W_LA64)4586NODE_NAME_CASE(MOVFR2GR_S_LA64)4587NODE_NAME_CASE(FTINT)4588NODE_NAME_CASE(REVB_2H)4589NODE_NAME_CASE(REVB_2W)4590NODE_NAME_CASE(BITREV_4B)4591NODE_NAME_CASE(BITREV_W)4592NODE_NAME_CASE(ROTR_W)4593NODE_NAME_CASE(ROTL_W)4594NODE_NAME_CASE(DIV_WU)4595NODE_NAME_CASE(MOD_WU)4596NODE_NAME_CASE(CLZ_W)4597NODE_NAME_CASE(CTZ_W)4598NODE_NAME_CASE(DBAR)4599NODE_NAME_CASE(IBAR)4600NODE_NAME_CASE(BREAK)4601NODE_NAME_CASE(SYSCALL)4602NODE_NAME_CASE(CRC_W_B_W)4603NODE_NAME_CASE(CRC_W_H_W)4604NODE_NAME_CASE(CRC_W_W_W)4605NODE_NAME_CASE(CRC_W_D_W)4606NODE_NAME_CASE(CRCC_W_B_W)4607NODE_NAME_CASE(CRCC_W_H_W)4608NODE_NAME_CASE(CRCC_W_W_W)4609NODE_NAME_CASE(CRCC_W_D_W)4610NODE_NAME_CASE(CSRRD)4611NODE_NAME_CASE(CSRWR)4612NODE_NAME_CASE(CSRXCHG)4613NODE_NAME_CASE(IOCSRRD_B)4614NODE_NAME_CASE(IOCSRRD_H)4615NODE_NAME_CASE(IOCSRRD_W)4616NODE_NAME_CASE(IOCSRRD_D)4617NODE_NAME_CASE(IOCSRWR_B)4618NODE_NAME_CASE(IOCSRWR_H)4619NODE_NAME_CASE(IOCSRWR_W)4620NODE_NAME_CASE(IOCSRWR_D)4621NODE_NAME_CASE(CPUCFG)4622NODE_NAME_CASE(MOVGR2FCSR)4623NODE_NAME_CASE(MOVFCSR2GR)4624NODE_NAME_CASE(CACOP_D)4625NODE_NAME_CASE(CACOP_W)4626NODE_NAME_CASE(VSHUF)4627NODE_NAME_CASE(VPICKEV)4628NODE_NAME_CASE(VPICKOD)4629NODE_NAME_CASE(VPACKEV)4630NODE_NAME_CASE(VPACKOD)4631NODE_NAME_CASE(VILVL)4632NODE_NAME_CASE(VILVH)4633NODE_NAME_CASE(VSHUF4I)4634NODE_NAME_CASE(VREPLVEI)4635NODE_NAME_CASE(XVPERMI)4636NODE_NAME_CASE(VPICK_SEXT_ELT)4637NODE_NAME_CASE(VPICK_ZEXT_ELT)4638NODE_NAME_CASE(VREPLVE)4639NODE_NAME_CASE(VALL_ZERO)4640NODE_NAME_CASE(VANY_ZERO)4641NODE_NAME_CASE(VALL_NONZERO)4642NODE_NAME_CASE(VANY_NONZERO)4643}4644#undef NODE_NAME_CASE4645return nullptr;4646}46474648//===----------------------------------------------------------------------===//4649// Calling Convention Implementation4650//===----------------------------------------------------------------------===//46514652// Eight general-purpose registers a0-a7 used for passing integer arguments,4653// with a0-a1 reused to return values. Generally, the GPRs are used to pass4654// fixed-point arguments, and floating-point arguments when no FPR is available4655// or with soft float ABI.4656const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,4657LoongArch::R7, LoongArch::R8, LoongArch::R9,4658LoongArch::R10, LoongArch::R11};4659// Eight floating-point registers fa0-fa7 used for passing floating-point4660// arguments, and fa0-fa1 are also used to return values.4661const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,4662LoongArch::F3, LoongArch::F4, LoongArch::F5,4663LoongArch::F6, LoongArch::F7};4664// FPR32 and FPR64 alias each other.4665const MCPhysReg ArgFPR64s[] = {4666LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,4667LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};46684669const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,4670LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,4671LoongArch::VR6, LoongArch::VR7};46724673const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,4674LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,4675LoongArch::XR6, LoongArch::XR7};46764677// Pass a 2*GRLen argument that has been split into two GRLen values through4678// registers or the stack as necessary.4679static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,4680CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,4681unsigned ValNo2, MVT ValVT2, MVT LocVT2,4682ISD::ArgFlagsTy ArgFlags2) {4683unsigned GRLenInBytes = GRLen / 8;4684if (Register Reg = State.AllocateReg(ArgGPRs)) {4685// At least one half can be passed via register.4686State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,4687VA1.getLocVT(), CCValAssign::Full));4688} else {4689// Both halves must be passed on the stack, with proper alignment.4690Align StackAlign =4691std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());4692State.addLoc(4693CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),4694State.AllocateStack(GRLenInBytes, StackAlign),4695VA1.getLocVT(), CCValAssign::Full));4696State.addLoc(CCValAssign::getMem(4697ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),4698LocVT2, CCValAssign::Full));4699return false;4700}4701if (Register Reg = State.AllocateReg(ArgGPRs)) {4702// The second half can also be passed via register.4703State.addLoc(4704CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));4705} else {4706// The second half is passed via the stack, without additional alignment.4707State.addLoc(CCValAssign::getMem(4708ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),4709LocVT2, CCValAssign::Full));4710}4711return false;4712}47134714// Implements the LoongArch calling convention. Returns true upon failure.4715static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,4716unsigned ValNo, MVT ValVT,4717CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,4718CCState &State, bool IsFixed, bool IsRet,4719Type *OrigTy) {4720unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();4721assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");4722MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;4723MVT LocVT = ValVT;47244725// Any return value split into more than two values can't be returned4726// directly.4727if (IsRet && ValNo > 1)4728return true;47294730// If passing a variadic argument, or if no FPR is available.4731bool UseGPRForFloat = true;47324733switch (ABI) {4734default:4735llvm_unreachable("Unexpected ABI");4736break;4737case LoongArchABI::ABI_ILP32F:4738case LoongArchABI::ABI_LP64F:4739case LoongArchABI::ABI_ILP32D:4740case LoongArchABI::ABI_LP64D:4741UseGPRForFloat = !IsFixed;4742break;4743case LoongArchABI::ABI_ILP32S:4744case LoongArchABI::ABI_LP64S:4745break;4746}47474748// FPR32 and FPR64 alias each other.4749if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))4750UseGPRForFloat = true;47514752if (UseGPRForFloat && ValVT == MVT::f32) {4753LocVT = GRLenVT;4754LocInfo = CCValAssign::BCvt;4755} else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {4756LocVT = MVT::i64;4757LocInfo = CCValAssign::BCvt;4758} else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {4759// TODO: Handle passing f64 on LA32 with D feature.4760report_fatal_error("Passing f64 with GPR on LA32 is undefined");4761}47624763// If this is a variadic argument, the LoongArch calling convention requires4764// that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/84765// byte alignment. An aligned register should be used regardless of whether4766// the original argument was split during legalisation or not. The argument4767// will not be passed by registers if the original type is larger than4768// 2*GRLen, so the register alignment rule does not apply.4769unsigned TwoGRLenInBytes = (2 * GRLen) / 8;4770if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&4771DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {4772unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);4773// Skip 'odd' register if necessary.4774if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)4775State.AllocateReg(ArgGPRs);4776}47774778SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();4779SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =4780State.getPendingArgFlags();47814782assert(PendingLocs.size() == PendingArgFlags.size() &&4783"PendingLocs and PendingArgFlags out of sync");47844785// Split arguments might be passed indirectly, so keep track of the pending4786// values.4787if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {4788LocVT = GRLenVT;4789LocInfo = CCValAssign::Indirect;4790PendingLocs.push_back(4791CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));4792PendingArgFlags.push_back(ArgFlags);4793if (!ArgFlags.isSplitEnd()) {4794return false;4795}4796}47974798// If the split argument only had two elements, it should be passed directly4799// in registers or on the stack.4800if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&4801PendingLocs.size() <= 2) {4802assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");4803// Apply the normal calling convention rules to the first half of the4804// split argument.4805CCValAssign VA = PendingLocs[0];4806ISD::ArgFlagsTy AF = PendingArgFlags[0];4807PendingLocs.clear();4808PendingArgFlags.clear();4809return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,4810ArgFlags);4811}48124813// Allocate to a register if possible, or else a stack slot.4814Register Reg;4815unsigned StoreSizeBytes = GRLen / 8;4816Align StackAlign = Align(GRLen / 8);48174818if (ValVT == MVT::f32 && !UseGPRForFloat)4819Reg = State.AllocateReg(ArgFPR32s);4820else if (ValVT == MVT::f64 && !UseGPRForFloat)4821Reg = State.AllocateReg(ArgFPR64s);4822else if (ValVT.is128BitVector())4823Reg = State.AllocateReg(ArgVRs);4824else if (ValVT.is256BitVector())4825Reg = State.AllocateReg(ArgXRs);4826else4827Reg = State.AllocateReg(ArgGPRs);48284829unsigned StackOffset =4830Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);48314832// If we reach this point and PendingLocs is non-empty, we must be at the4833// end of a split argument that must be passed indirectly.4834if (!PendingLocs.empty()) {4835assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");4836assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");4837for (auto &It : PendingLocs) {4838if (Reg)4839It.convertToReg(Reg);4840else4841It.convertToMem(StackOffset);4842State.addLoc(It);4843}4844PendingLocs.clear();4845PendingArgFlags.clear();4846return false;4847}4848assert((!UseGPRForFloat || LocVT == GRLenVT) &&4849"Expected an GRLenVT at this stage");48504851if (Reg) {4852State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));4853return false;4854}48554856// When a floating-point value is passed on the stack, no bit-cast is needed.4857if (ValVT.isFloatingPoint()) {4858LocVT = ValVT;4859LocInfo = CCValAssign::Full;4860}48614862State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));4863return false;4864}48654866void LoongArchTargetLowering::analyzeInputArgs(4867MachineFunction &MF, CCState &CCInfo,4868const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,4869LoongArchCCAssignFn Fn) const {4870FunctionType *FType = MF.getFunction().getFunctionType();4871for (unsigned i = 0, e = Ins.size(); i != e; ++i) {4872MVT ArgVT = Ins[i].VT;4873Type *ArgTy = nullptr;4874if (IsRet)4875ArgTy = FType->getReturnType();4876else if (Ins[i].isOrigArg())4877ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());4878LoongArchABI::ABI ABI =4879MF.getSubtarget<LoongArchSubtarget>().getTargetABI();4880if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,4881CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {4882LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT4883<< '\n');4884llvm_unreachable("");4885}4886}4887}48884889void LoongArchTargetLowering::analyzeOutputArgs(4890MachineFunction &MF, CCState &CCInfo,4891const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,4892CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {4893for (unsigned i = 0, e = Outs.size(); i != e; ++i) {4894MVT ArgVT = Outs[i].VT;4895Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;4896LoongArchABI::ABI ABI =4897MF.getSubtarget<LoongArchSubtarget>().getTargetABI();4898if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,4899CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {4900LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT4901<< "\n");4902llvm_unreachable("");4903}4904}4905}49064907// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect4908// values.4909static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,4910const CCValAssign &VA, const SDLoc &DL) {4911switch (VA.getLocInfo()) {4912default:4913llvm_unreachable("Unexpected CCValAssign::LocInfo");4914case CCValAssign::Full:4915case CCValAssign::Indirect:4916break;4917case CCValAssign::BCvt:4918if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)4919Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);4920else4921Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);4922break;4923}4924return Val;4925}49264927static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,4928const CCValAssign &VA, const SDLoc &DL,4929const ISD::InputArg &In,4930const LoongArchTargetLowering &TLI) {4931MachineFunction &MF = DAG.getMachineFunction();4932MachineRegisterInfo &RegInfo = MF.getRegInfo();4933EVT LocVT = VA.getLocVT();4934SDValue Val;4935const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());4936Register VReg = RegInfo.createVirtualRegister(RC);4937RegInfo.addLiveIn(VA.getLocReg(), VReg);4938Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);49394940// If input is sign extended from 32 bits, note it for the OptW pass.4941if (In.isOrigArg()) {4942Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());4943if (OrigArg->getType()->isIntegerTy()) {4944unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();4945// An input zero extended from i31 can also be considered sign extended.4946if ((BitWidth <= 32 && In.Flags.isSExt()) ||4947(BitWidth < 32 && In.Flags.isZExt())) {4948LoongArchMachineFunctionInfo *LAFI =4949MF.getInfo<LoongArchMachineFunctionInfo>();4950LAFI->addSExt32Register(VReg);4951}4952}4953}49544955return convertLocVTToValVT(DAG, Val, VA, DL);4956}49574958// The caller is responsible for loading the full value if the argument is4959// passed with CCValAssign::Indirect.4960static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,4961const CCValAssign &VA, const SDLoc &DL) {4962MachineFunction &MF = DAG.getMachineFunction();4963MachineFrameInfo &MFI = MF.getFrameInfo();4964EVT ValVT = VA.getValVT();4965int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),4966/*IsImmutable=*/true);4967SDValue FIN = DAG.getFrameIndex(4968FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));49694970ISD::LoadExtType ExtType;4971switch (VA.getLocInfo()) {4972default:4973llvm_unreachable("Unexpected CCValAssign::LocInfo");4974case CCValAssign::Full:4975case CCValAssign::Indirect:4976case CCValAssign::BCvt:4977ExtType = ISD::NON_EXTLOAD;4978break;4979}4980return DAG.getExtLoad(4981ExtType, DL, VA.getLocVT(), Chain, FIN,4982MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);4983}49844985static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,4986const CCValAssign &VA, const SDLoc &DL) {4987EVT LocVT = VA.getLocVT();49884989switch (VA.getLocInfo()) {4990default:4991llvm_unreachable("Unexpected CCValAssign::LocInfo");4992case CCValAssign::Full:4993break;4994case CCValAssign::BCvt:4995if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)4996Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);4997else4998Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);4999break;5000}5001return Val;5002}50035004static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,5005CCValAssign::LocInfo LocInfo,5006ISD::ArgFlagsTy ArgFlags, CCState &State) {5007if (LocVT == MVT::i32 || LocVT == MVT::i64) {5008// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim5009// s0 s1 s2 s3 s4 s5 s6 s7 s85010static const MCPhysReg GPRList[] = {5011LoongArch::R23, LoongArch::R24, LoongArch::R25,5012LoongArch::R26, LoongArch::R27, LoongArch::R28,5013LoongArch::R29, LoongArch::R30, LoongArch::R31};5014if (unsigned Reg = State.AllocateReg(GPRList)) {5015State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));5016return false;5017}5018}50195020if (LocVT == MVT::f32) {5021// Pass in STG registers: F1, F2, F3, F45022// fs0,fs1,fs2,fs35023static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,5024LoongArch::F26, LoongArch::F27};5025if (unsigned Reg = State.AllocateReg(FPR32List)) {5026State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));5027return false;5028}5029}50305031if (LocVT == MVT::f64) {5032// Pass in STG registers: D1, D2, D3, D45033// fs4,fs5,fs6,fs75034static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,5035LoongArch::F30_64, LoongArch::F31_64};5036if (unsigned Reg = State.AllocateReg(FPR64List)) {5037State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));5038return false;5039}5040}50415042report_fatal_error("No registers left in GHC calling convention");5043return true;5044}50455046// Transform physical registers into virtual registers.5047SDValue LoongArchTargetLowering::LowerFormalArguments(5048SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,5049const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,5050SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {50515052MachineFunction &MF = DAG.getMachineFunction();50535054switch (CallConv) {5055default:5056llvm_unreachable("Unsupported calling convention");5057case CallingConv::C:5058case CallingConv::Fast:5059break;5060case CallingConv::GHC:5061if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||5062!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))5063report_fatal_error(5064"GHC calling convention requires the F and D extensions");5065}50665067EVT PtrVT = getPointerTy(DAG.getDataLayout());5068MVT GRLenVT = Subtarget.getGRLenVT();5069unsigned GRLenInBytes = Subtarget.getGRLen() / 8;5070// Used with varargs to acumulate store chains.5071std::vector<SDValue> OutChains;50725073// Assign locations to all of the incoming arguments.5074SmallVector<CCValAssign> ArgLocs;5075CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());50765077if (CallConv == CallingConv::GHC)5078CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);5079else5080analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);50815082for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {5083CCValAssign &VA = ArgLocs[i];5084SDValue ArgValue;5085if (VA.isRegLoc())5086ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);5087else5088ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);5089if (VA.getLocInfo() == CCValAssign::Indirect) {5090// If the original argument was split and passed by reference, we need to5091// load all parts of it here (using the same address).5092InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,5093MachinePointerInfo()));5094unsigned ArgIndex = Ins[i].OrigArgIndex;5095unsigned ArgPartOffset = Ins[i].PartOffset;5096assert(ArgPartOffset == 0);5097while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {5098CCValAssign &PartVA = ArgLocs[i + 1];5099unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;5100SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);5101SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);5102InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,5103MachinePointerInfo()));5104++i;5105}5106continue;5107}5108InVals.push_back(ArgValue);5109}51105111if (IsVarArg) {5112ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);5113unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);5114const TargetRegisterClass *RC = &LoongArch::GPRRegClass;5115MachineFrameInfo &MFI = MF.getFrameInfo();5116MachineRegisterInfo &RegInfo = MF.getRegInfo();5117auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();51185119// Offset of the first variable argument from stack pointer, and size of5120// the vararg save area. For now, the varargs save area is either zero or5121// large enough to hold a0-a7.5122int VaArgOffset, VarArgsSaveSize;51235124// If all registers are allocated, then all varargs must be passed on the5125// stack and we don't need to save any argregs.5126if (ArgRegs.size() == Idx) {5127VaArgOffset = CCInfo.getStackSize();5128VarArgsSaveSize = 0;5129} else {5130VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);5131VaArgOffset = -VarArgsSaveSize;5132}51335134// Record the frame index of the first variable argument5135// which is a value necessary to VASTART.5136int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);5137LoongArchFI->setVarArgsFrameIndex(FI);51385139// If saving an odd number of registers then create an extra stack slot to5140// ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures5141// offsets to even-numbered registered remain 2*GRLen-aligned.5142if (Idx % 2) {5143MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,5144true);5145VarArgsSaveSize += GRLenInBytes;5146}51475148// Copy the integer registers that may have been used for passing varargs5149// to the vararg save area.5150for (unsigned I = Idx; I < ArgRegs.size();5151++I, VaArgOffset += GRLenInBytes) {5152const Register Reg = RegInfo.createVirtualRegister(RC);5153RegInfo.addLiveIn(ArgRegs[I], Reg);5154SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);5155FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);5156SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));5157SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,5158MachinePointerInfo::getFixedStack(MF, FI));5159cast<StoreSDNode>(Store.getNode())5160->getMemOperand()5161->setValue((Value *)nullptr);5162OutChains.push_back(Store);5163}5164LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);5165}51665167// All stores are grouped in one node to allow the matching between5168// the size of Ins and InVals. This only happens for vararg functions.5169if (!OutChains.empty()) {5170OutChains.push_back(Chain);5171Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);5172}51735174return Chain;5175}51765177bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {5178return CI->isTailCall();5179}51805181// Check if the return value is used as only a return value, as otherwise5182// we can't perform a tail-call.5183bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,5184SDValue &Chain) const {5185if (N->getNumValues() != 1)5186return false;5187if (!N->hasNUsesOfValue(1, 0))5188return false;51895190SDNode *Copy = *N->use_begin();5191if (Copy->getOpcode() != ISD::CopyToReg)5192return false;51935194// If the ISD::CopyToReg has a glue operand, we conservatively assume it5195// isn't safe to perform a tail call.5196if (Copy->getGluedNode())5197return false;51985199// The copy must be used by a LoongArchISD::RET, and nothing else.5200bool HasRet = false;5201for (SDNode *Node : Copy->uses()) {5202if (Node->getOpcode() != LoongArchISD::RET)5203return false;5204HasRet = true;5205}52065207if (!HasRet)5208return false;52095210Chain = Copy->getOperand(0);5211return true;5212}52135214// Check whether the call is eligible for tail call optimization.5215bool LoongArchTargetLowering::isEligibleForTailCallOptimization(5216CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,5217const SmallVectorImpl<CCValAssign> &ArgLocs) const {52185219auto CalleeCC = CLI.CallConv;5220auto &Outs = CLI.Outs;5221auto &Caller = MF.getFunction();5222auto CallerCC = Caller.getCallingConv();52235224// Do not tail call opt if the stack is used to pass parameters.5225if (CCInfo.getStackSize() != 0)5226return false;52275228// Do not tail call opt if any parameters need to be passed indirectly.5229for (auto &VA : ArgLocs)5230if (VA.getLocInfo() == CCValAssign::Indirect)5231return false;52325233// Do not tail call opt if either caller or callee uses struct return5234// semantics.5235auto IsCallerStructRet = Caller.hasStructRetAttr();5236auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();5237if (IsCallerStructRet || IsCalleeStructRet)5238return false;52395240// Do not tail call opt if either the callee or caller has a byval argument.5241for (auto &Arg : Outs)5242if (Arg.Flags.isByVal())5243return false;52445245// The callee has to preserve all registers the caller needs to preserve.5246const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();5247const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);5248if (CalleeCC != CallerCC) {5249const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);5250if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))5251return false;5252}5253return true;5254}52555256static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {5257return DAG.getDataLayout().getPrefTypeAlign(5258VT.getTypeForEVT(*DAG.getContext()));5259}52605261// Lower a call to a callseq_start + CALL + callseq_end chain, and add input5262// and output parameter nodes.5263SDValue5264LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,5265SmallVectorImpl<SDValue> &InVals) const {5266SelectionDAG &DAG = CLI.DAG;5267SDLoc &DL = CLI.DL;5268SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;5269SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;5270SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;5271SDValue Chain = CLI.Chain;5272SDValue Callee = CLI.Callee;5273CallingConv::ID CallConv = CLI.CallConv;5274bool IsVarArg = CLI.IsVarArg;5275EVT PtrVT = getPointerTy(DAG.getDataLayout());5276MVT GRLenVT = Subtarget.getGRLenVT();5277bool &IsTailCall = CLI.IsTailCall;52785279MachineFunction &MF = DAG.getMachineFunction();52805281// Analyze the operands of the call, assigning locations to each operand.5282SmallVector<CCValAssign> ArgLocs;5283CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());52845285if (CallConv == CallingConv::GHC)5286ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);5287else5288analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);52895290// Check if it's really possible to do a tail call.5291if (IsTailCall)5292IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);52935294if (IsTailCall)5295++NumTailCalls;5296else if (CLI.CB && CLI.CB->isMustTailCall())5297report_fatal_error("failed to perform tail call elimination on a call "5298"site marked musttail");52995300// Get a count of how many bytes are to be pushed on the stack.5301unsigned NumBytes = ArgCCInfo.getStackSize();53025303// Create local copies for byval args.5304SmallVector<SDValue> ByValArgs;5305for (unsigned i = 0, e = Outs.size(); i != e; ++i) {5306ISD::ArgFlagsTy Flags = Outs[i].Flags;5307if (!Flags.isByVal())5308continue;53095310SDValue Arg = OutVals[i];5311unsigned Size = Flags.getByValSize();5312Align Alignment = Flags.getNonZeroByValAlign();53135314int FI =5315MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);5316SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));5317SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);53185319Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,5320/*IsVolatile=*/false,5321/*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,5322MachinePointerInfo(), MachinePointerInfo());5323ByValArgs.push_back(FIPtr);5324}53255326if (!IsTailCall)5327Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);53285329// Copy argument values to their designated locations.5330SmallVector<std::pair<Register, SDValue>> RegsToPass;5331SmallVector<SDValue> MemOpChains;5332SDValue StackPtr;5333for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {5334CCValAssign &VA = ArgLocs[i];5335SDValue ArgValue = OutVals[i];5336ISD::ArgFlagsTy Flags = Outs[i].Flags;53375338// Promote the value if needed.5339// For now, only handle fully promoted and indirect arguments.5340if (VA.getLocInfo() == CCValAssign::Indirect) {5341// Store the argument in a stack slot and pass its address.5342Align StackAlign =5343std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),5344getPrefTypeAlign(ArgValue.getValueType(), DAG));5345TypeSize StoredSize = ArgValue.getValueType().getStoreSize();5346// If the original argument was split and passed by reference, we need to5347// store the required parts of it here (and pass just one address).5348unsigned ArgIndex = Outs[i].OrigArgIndex;5349unsigned ArgPartOffset = Outs[i].PartOffset;5350assert(ArgPartOffset == 0);5351// Calculate the total size to store. We don't have access to what we're5352// actually storing other than performing the loop and collecting the5353// info.5354SmallVector<std::pair<SDValue, SDValue>> Parts;5355while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {5356SDValue PartValue = OutVals[i + 1];5357unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;5358SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);5359EVT PartVT = PartValue.getValueType();53605361StoredSize += PartVT.getStoreSize();5362StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));5363Parts.push_back(std::make_pair(PartValue, Offset));5364++i;5365}5366SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);5367int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();5368MemOpChains.push_back(5369DAG.getStore(Chain, DL, ArgValue, SpillSlot,5370MachinePointerInfo::getFixedStack(MF, FI)));5371for (const auto &Part : Parts) {5372SDValue PartValue = Part.first;5373SDValue PartOffset = Part.second;5374SDValue Address =5375DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);5376MemOpChains.push_back(5377DAG.getStore(Chain, DL, PartValue, Address,5378MachinePointerInfo::getFixedStack(MF, FI)));5379}5380ArgValue = SpillSlot;5381} else {5382ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);5383}53845385// Use local copy if it is a byval arg.5386if (Flags.isByVal())5387ArgValue = ByValArgs[j++];53885389if (VA.isRegLoc()) {5390// Queue up the argument copies and emit them at the end.5391RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));5392} else {5393assert(VA.isMemLoc() && "Argument not register or memory");5394assert(!IsTailCall && "Tail call not allowed if stack is used "5395"for passing parameters");53965397// Work out the address of the stack slot.5398if (!StackPtr.getNode())5399StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);5400SDValue Address =5401DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,5402DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));54035404// Emit the store.5405MemOpChains.push_back(5406DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));5407}5408}54095410// Join the stores, which are independent of one another.5411if (!MemOpChains.empty())5412Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);54135414SDValue Glue;54155416// Build a sequence of copy-to-reg nodes, chained and glued together.5417for (auto &Reg : RegsToPass) {5418Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);5419Glue = Chain.getValue(1);5420}54215422// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a5423// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't5424// split it and then direct call can be matched by PseudoCALL.5425if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {5426const GlobalValue *GV = S->getGlobal();5427unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)5428? LoongArchII::MO_CALL5429: LoongArchII::MO_CALL_PLT;5430Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);5431} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {5432unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)5433? LoongArchII::MO_CALL5434: LoongArchII::MO_CALL_PLT;5435Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);5436}54375438// The first call operand is the chain and the second is the target address.5439SmallVector<SDValue> Ops;5440Ops.push_back(Chain);5441Ops.push_back(Callee);54425443// Add argument registers to the end of the list so that they are5444// known live into the call.5445for (auto &Reg : RegsToPass)5446Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));54475448if (!IsTailCall) {5449// Add a register mask operand representing the call-preserved registers.5450const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();5451const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);5452assert(Mask && "Missing call preserved mask for calling convention");5453Ops.push_back(DAG.getRegisterMask(Mask));5454}54555456// Glue the call to the argument copies, if any.5457if (Glue.getNode())5458Ops.push_back(Glue);54595460// Emit the call.5461SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);5462unsigned Op;5463switch (DAG.getTarget().getCodeModel()) {5464default:5465report_fatal_error("Unsupported code model");5466case CodeModel::Small:5467Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;5468break;5469case CodeModel::Medium:5470assert(Subtarget.is64Bit() && "Medium code model requires LA64");5471Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;5472break;5473case CodeModel::Large:5474assert(Subtarget.is64Bit() && "Large code model requires LA64");5475Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;5476break;5477}54785479if (IsTailCall) {5480MF.getFrameInfo().setHasTailCall();5481SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);5482DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);5483return Ret;5484}54855486Chain = DAG.getNode(Op, DL, NodeTys, Ops);5487DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);5488Glue = Chain.getValue(1);54895490// Mark the end of the call, which is glued to the call itself.5491Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);5492Glue = Chain.getValue(1);54935494// Assign locations to each value returned by this call.5495SmallVector<CCValAssign> RVLocs;5496CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());5497analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);54985499// Copy all of the result registers out of their specified physreg.5500for (auto &VA : RVLocs) {5501// Copy the value out.5502SDValue RetValue =5503DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);5504// Glue the RetValue to the end of the call sequence.5505Chain = RetValue.getValue(1);5506Glue = RetValue.getValue(2);55075508RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);55095510InVals.push_back(RetValue);5511}55125513return Chain;5514}55155516bool LoongArchTargetLowering::CanLowerReturn(5517CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,5518const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {5519SmallVector<CCValAssign> RVLocs;5520CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);55215522for (unsigned i = 0, e = Outs.size(); i != e; ++i) {5523LoongArchABI::ABI ABI =5524MF.getSubtarget<LoongArchSubtarget>().getTargetABI();5525if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,5526Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,5527nullptr))5528return false;5529}5530return true;5531}55325533SDValue LoongArchTargetLowering::LowerReturn(5534SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,5535const SmallVectorImpl<ISD::OutputArg> &Outs,5536const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,5537SelectionDAG &DAG) const {5538// Stores the assignment of the return value to a location.5539SmallVector<CCValAssign> RVLocs;55405541// Info about the registers and stack slot.5542CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,5543*DAG.getContext());55445545analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,5546nullptr, CC_LoongArch);5547if (CallConv == CallingConv::GHC && !RVLocs.empty())5548report_fatal_error("GHC functions return void only");5549SDValue Glue;5550SmallVector<SDValue, 4> RetOps(1, Chain);55515552// Copy the result values into the output registers.5553for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {5554CCValAssign &VA = RVLocs[i];5555assert(VA.isRegLoc() && "Can only return in registers!");55565557// Handle a 'normal' return.5558SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);5559Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);55605561// Guarantee that all emitted copies are stuck together.5562Glue = Chain.getValue(1);5563RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));5564}55655566RetOps[0] = Chain; // Update chain.55675568// Add the glue node if we have it.5569if (Glue.getNode())5570RetOps.push_back(Glue);55715572return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);5573}55745575bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,5576bool ForCodeSize) const {5577// TODO: Maybe need more checks here after vector extension is supported.5578if (VT == MVT::f32 && !Subtarget.hasBasicF())5579return false;5580if (VT == MVT::f64 && !Subtarget.hasBasicD())5581return false;5582return (Imm.isZero() || Imm.isExactlyValue(+1.0));5583}55845585bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {5586return true;5587}55885589bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {5590return true;5591}55925593bool LoongArchTargetLowering::shouldInsertFencesForAtomic(5594const Instruction *I) const {5595if (!Subtarget.is64Bit())5596return isa<LoadInst>(I) || isa<StoreInst>(I);55975598if (isa<LoadInst>(I))5599return true;56005601// On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not5602// require fences beacuse we can use amswap_db.[w/d].5603Type *Ty = I->getOperand(0)->getType();5604if (isa<StoreInst>(I) && Ty->isIntegerTy()) {5605unsigned Size = Ty->getIntegerBitWidth();5606return (Size == 8 || Size == 16);5607}56085609return false;5610}56115612EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,5613LLVMContext &Context,5614EVT VT) const {5615if (!VT.isVector())5616return getPointerTy(DL);5617return VT.changeVectorElementTypeToInteger();5618}56195620bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {5621// TODO: Support vectors.5622return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);5623}56245625bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,5626const CallInst &I,5627MachineFunction &MF,5628unsigned Intrinsic) const {5629switch (Intrinsic) {5630default:5631return false;5632case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:5633case Intrinsic::loongarch_masked_atomicrmw_add_i32:5634case Intrinsic::loongarch_masked_atomicrmw_sub_i32:5635case Intrinsic::loongarch_masked_atomicrmw_nand_i32:5636Info.opc = ISD::INTRINSIC_W_CHAIN;5637Info.memVT = MVT::i32;5638Info.ptrVal = I.getArgOperand(0);5639Info.offset = 0;5640Info.align = Align(4);5641Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |5642MachineMemOperand::MOVolatile;5643return true;5644// TODO: Add more Intrinsics later.5645}5646}56475648TargetLowering::AtomicExpansionKind5649LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {5650// TODO: Add more AtomicRMWInst that needs to be extended.56515652// Since floating-point operation requires a non-trivial set of data5653// operations, use CmpXChg to expand.5654if (AI->isFloatingPointOperation() ||5655AI->getOperation() == AtomicRMWInst::UIncWrap ||5656AI->getOperation() == AtomicRMWInst::UDecWrap)5657return AtomicExpansionKind::CmpXChg;56585659unsigned Size = AI->getType()->getPrimitiveSizeInBits();5660if (Size == 8 || Size == 16)5661return AtomicExpansionKind::MaskedIntrinsic;5662return AtomicExpansionKind::None;5663}56645665static Intrinsic::ID5666getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,5667AtomicRMWInst::BinOp BinOp) {5668if (GRLen == 64) {5669switch (BinOp) {5670default:5671llvm_unreachable("Unexpected AtomicRMW BinOp");5672case AtomicRMWInst::Xchg:5673return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;5674case AtomicRMWInst::Add:5675return Intrinsic::loongarch_masked_atomicrmw_add_i64;5676case AtomicRMWInst::Sub:5677return Intrinsic::loongarch_masked_atomicrmw_sub_i64;5678case AtomicRMWInst::Nand:5679return Intrinsic::loongarch_masked_atomicrmw_nand_i64;5680case AtomicRMWInst::UMax:5681return Intrinsic::loongarch_masked_atomicrmw_umax_i64;5682case AtomicRMWInst::UMin:5683return Intrinsic::loongarch_masked_atomicrmw_umin_i64;5684case AtomicRMWInst::Max:5685return Intrinsic::loongarch_masked_atomicrmw_max_i64;5686case AtomicRMWInst::Min:5687return Intrinsic::loongarch_masked_atomicrmw_min_i64;5688// TODO: support other AtomicRMWInst.5689}5690}56915692if (GRLen == 32) {5693switch (BinOp) {5694default:5695llvm_unreachable("Unexpected AtomicRMW BinOp");5696case AtomicRMWInst::Xchg:5697return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;5698case AtomicRMWInst::Add:5699return Intrinsic::loongarch_masked_atomicrmw_add_i32;5700case AtomicRMWInst::Sub:5701return Intrinsic::loongarch_masked_atomicrmw_sub_i32;5702case AtomicRMWInst::Nand:5703return Intrinsic::loongarch_masked_atomicrmw_nand_i32;5704// TODO: support other AtomicRMWInst.5705}5706}57075708llvm_unreachable("Unexpected GRLen\n");5709}57105711TargetLowering::AtomicExpansionKind5712LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(5713AtomicCmpXchgInst *CI) const {5714unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();5715if (Size == 8 || Size == 16)5716return AtomicExpansionKind::MaskedIntrinsic;5717return AtomicExpansionKind::None;5718}57195720Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(5721IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,5722Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {5723AtomicOrdering FailOrd = CI->getFailureOrdering();5724Value *FailureOrdering =5725Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));57265727// TODO: Support cmpxchg on LA32.5728Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;5729CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());5730NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());5731Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());5732Type *Tys[] = {AlignedAddr->getType()};5733Function *MaskedCmpXchg =5734Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);5735Value *Result = Builder.CreateCall(5736MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});5737Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());5738return Result;5739}57405741Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(5742IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,5743Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {5744// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace5745// the atomic instruction with an AtomicRMWInst::And/Or with appropriate5746// mask, as this produces better code than the LL/SC loop emitted by5747// int_loongarch_masked_atomicrmw_xchg.5748if (AI->getOperation() == AtomicRMWInst::Xchg &&5749isa<ConstantInt>(AI->getValOperand())) {5750ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());5751if (CVal->isZero())5752return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,5753Builder.CreateNot(Mask, "Inv_Mask"),5754AI->getAlign(), Ord);5755if (CVal->isMinusOne())5756return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,5757AI->getAlign(), Ord);5758}57595760unsigned GRLen = Subtarget.getGRLen();5761Value *Ordering =5762Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));5763Type *Tys[] = {AlignedAddr->getType()};5764Function *LlwOpScwLoop = Intrinsic::getDeclaration(5765AI->getModule(),5766getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);57675768if (GRLen == 64) {5769Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());5770Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());5771ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());5772}57735774Value *Result;57755776// Must pass the shift amount needed to sign extend the loaded value prior5777// to performing a signed comparison for min/max. ShiftAmt is the number of5778// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which5779// is the number of bits to left+right shift the value in order to5780// sign-extend.5781if (AI->getOperation() == AtomicRMWInst::Min ||5782AI->getOperation() == AtomicRMWInst::Max) {5783const DataLayout &DL = AI->getDataLayout();5784unsigned ValWidth =5785DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());5786Value *SextShamt =5787Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);5788Result = Builder.CreateCall(LlwOpScwLoop,5789{AlignedAddr, Incr, Mask, SextShamt, Ordering});5790} else {5791Result =5792Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});5793}57945795if (GRLen == 64)5796Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());5797return Result;5798}57995800bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(5801const MachineFunction &MF, EVT VT) const {5802VT = VT.getScalarType();58035804if (!VT.isSimple())5805return false;58065807switch (VT.getSimpleVT().SimpleTy) {5808case MVT::f32:5809case MVT::f64:5810return true;5811default:5812break;5813}58145815return false;5816}58175818Register LoongArchTargetLowering::getExceptionPointerRegister(5819const Constant *PersonalityFn) const {5820return LoongArch::R4;5821}58225823Register LoongArchTargetLowering::getExceptionSelectorRegister(5824const Constant *PersonalityFn) const {5825return LoongArch::R5;5826}58275828//===----------------------------------------------------------------------===//5829// LoongArch Inline Assembly Support5830//===----------------------------------------------------------------------===//58315832LoongArchTargetLowering::ConstraintType5833LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {5834// LoongArch specific constraints in GCC: config/loongarch/constraints.md5835//5836// 'f': A floating-point register (if available).5837// 'k': A memory operand whose address is formed by a base register and5838// (optionally scaled) index register.5839// 'l': A signed 16-bit constant.5840// 'm': A memory operand whose address is formed by a base register and5841// offset that is suitable for use in instructions with the same5842// addressing mode as st.w and ld.w.5843// 'I': A signed 12-bit constant (for arithmetic instructions).5844// 'J': Integer zero.5845// 'K': An unsigned 12-bit constant (for logic instructions).5846// "ZB": An address that is held in a general-purpose register. The offset is5847// zero.5848// "ZC": A memory operand whose address is formed by a base register and5849// offset that is suitable for use in instructions with the same5850// addressing mode as ll.w and sc.w.5851if (Constraint.size() == 1) {5852switch (Constraint[0]) {5853default:5854break;5855case 'f':5856return C_RegisterClass;5857case 'l':5858case 'I':5859case 'J':5860case 'K':5861return C_Immediate;5862case 'k':5863return C_Memory;5864}5865}58665867if (Constraint == "ZC" || Constraint == "ZB")5868return C_Memory;58695870// 'm' is handled here.5871return TargetLowering::getConstraintType(Constraint);5872}58735874InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(5875StringRef ConstraintCode) const {5876return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)5877.Case("k", InlineAsm::ConstraintCode::k)5878.Case("ZB", InlineAsm::ConstraintCode::ZB)5879.Case("ZC", InlineAsm::ConstraintCode::ZC)5880.Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));5881}58825883std::pair<unsigned, const TargetRegisterClass *>5884LoongArchTargetLowering::getRegForInlineAsmConstraint(5885const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {5886// First, see if this is a constraint that directly corresponds to a LoongArch5887// register class.5888if (Constraint.size() == 1) {5889switch (Constraint[0]) {5890case 'r':5891// TODO: Support fixed vectors up to GRLen?5892if (VT.isVector())5893break;5894return std::make_pair(0U, &LoongArch::GPRRegClass);5895case 'f':5896if (Subtarget.hasBasicF() && VT == MVT::f32)5897return std::make_pair(0U, &LoongArch::FPR32RegClass);5898if (Subtarget.hasBasicD() && VT == MVT::f64)5899return std::make_pair(0U, &LoongArch::FPR64RegClass);5900if (Subtarget.hasExtLSX() &&5901TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))5902return std::make_pair(0U, &LoongArch::LSX128RegClass);5903if (Subtarget.hasExtLASX() &&5904TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))5905return std::make_pair(0U, &LoongArch::LASX256RegClass);5906break;5907default:5908break;5909}5910}59115912// TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen5913// record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm5914// constraints while the official register name is prefixed with a '$'. So we5915// clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)5916// before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is5917// case insensitive, so no need to convert the constraint to upper case here.5918//5919// For now, no need to support ABI names (e.g. `$a0`) as clang will correctly5920// decode the usage of register name aliases into their official names. And5921// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use5922// official register names.5923if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||5924Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {5925bool IsFP = Constraint[2] == 'f';5926std::pair<StringRef, StringRef> Temp = Constraint.split('$');5927std::pair<unsigned, const TargetRegisterClass *> R;5928R = TargetLowering::getRegForInlineAsmConstraint(5929TRI, join_items("", Temp.first, Temp.second), VT);5930// Match those names to the widest floating point register type available.5931if (IsFP) {5932unsigned RegNo = R.first;5933if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {5934if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {5935unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;5936return std::make_pair(DReg, &LoongArch::FPR64RegClass);5937}5938}5939}5940return R;5941}59425943return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);5944}59455946void LoongArchTargetLowering::LowerAsmOperandForConstraint(5947SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,5948SelectionDAG &DAG) const {5949// Currently only support length 1 constraints.5950if (Constraint.size() == 1) {5951switch (Constraint[0]) {5952case 'l':5953// Validate & create a 16-bit signed immediate operand.5954if (auto *C = dyn_cast<ConstantSDNode>(Op)) {5955uint64_t CVal = C->getSExtValue();5956if (isInt<16>(CVal))5957Ops.push_back(5958DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));5959}5960return;5961case 'I':5962// Validate & create a 12-bit signed immediate operand.5963if (auto *C = dyn_cast<ConstantSDNode>(Op)) {5964uint64_t CVal = C->getSExtValue();5965if (isInt<12>(CVal))5966Ops.push_back(5967DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));5968}5969return;5970case 'J':5971// Validate & create an integer zero operand.5972if (auto *C = dyn_cast<ConstantSDNode>(Op))5973if (C->getZExtValue() == 0)5974Ops.push_back(5975DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));5976return;5977case 'K':5978// Validate & create a 12-bit unsigned immediate operand.5979if (auto *C = dyn_cast<ConstantSDNode>(Op)) {5980uint64_t CVal = C->getZExtValue();5981if (isUInt<12>(CVal))5982Ops.push_back(5983DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));5984}5985return;5986default:5987break;5988}5989}5990TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);5991}59925993#define GET_REGISTER_MATCHER5994#include "LoongArchGenAsmMatcher.inc"59955996Register5997LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,5998const MachineFunction &MF) const {5999std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');6000std::string NewRegName = Name.second.str();6001Register Reg = MatchRegisterAltName(NewRegName);6002if (Reg == LoongArch::NoRegister)6003Reg = MatchRegisterName(NewRegName);6004if (Reg == LoongArch::NoRegister)6005report_fatal_error(6006Twine("Invalid register name \"" + StringRef(RegName) + "\"."));6007BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);6008if (!ReservedRegs.test(Reg))6009report_fatal_error(Twine("Trying to obtain non-reserved register \"" +6010StringRef(RegName) + "\"."));6011return Reg;6012}60136014bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,6015EVT VT, SDValue C) const {6016// TODO: Support vectors.6017if (!VT.isScalarInteger())6018return false;60196020// Omit the optimization if the data size exceeds GRLen.6021if (VT.getSizeInBits() > Subtarget.getGRLen())6022return false;60236024if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {6025const APInt &Imm = ConstNode->getAPIntValue();6026// Break MUL into (SLLI + ADD/SUB) or ALSL.6027if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||6028(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())6029return true;6030// Break MUL into (ALSL x, (SLLI x, imm0), imm1).6031if (ConstNode->hasOneUse() &&6032((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||6033(Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))6034return true;6035// Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),6036// in which the immediate has two set bits. Or Break (MUL x, imm)6037// into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate6038// equals to (1 << s0) - (1 << s1).6039if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {6040unsigned Shifts = Imm.countr_zero();6041// Reject immediates which can be composed via a single LUI.6042if (Shifts >= 12)6043return false;6044// Reject multiplications can be optimized to6045// (SLLI (ALSL x, x, 1/2/3/4), s).6046APInt ImmPop = Imm.ashr(Shifts);6047if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)6048return false;6049// We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,6050// since it needs one more instruction than other 3 cases.6051APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);6052if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||6053(ImmSmall - Imm).isPowerOf2())6054return true;6055}6056}60576058return false;6059}60606061bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,6062const AddrMode &AM,6063Type *Ty, unsigned AS,6064Instruction *I) const {6065// LoongArch has four basic addressing modes:6066// 1. reg6067// 2. reg + 12-bit signed offset6068// 3. reg + 14-bit signed offset left-shifted by 26069// 4. reg1 + reg26070// TODO: Add more checks after support vector extension.60716072// No global is ever allowed as a base.6073if (AM.BaseGV)6074return false;60756076// Require a 12-bit signed offset or 14-bit signed offset left-shifted by 26077// with `UAL` feature.6078if (!isInt<12>(AM.BaseOffs) &&6079!(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))6080return false;60816082switch (AM.Scale) {6083case 0:6084// "r+i" or just "i", depending on HasBaseReg.6085break;6086case 1:6087// "r+r+i" is not allowed.6088if (AM.HasBaseReg && AM.BaseOffs)6089return false;6090// Otherwise we have "r+r" or "r+i".6091break;6092case 2:6093// "2*r+r" or "2*r+i" is not allowed.6094if (AM.HasBaseReg || AM.BaseOffs)6095return false;6096// Allow "2*r" as "r+r".6097break;6098default:6099return false;6100}61016102return true;6103}61046105bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {6106return isInt<12>(Imm);6107}61086109bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {6110return isInt<12>(Imm);6111}61126113bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {6114// Zexts are free if they can be combined with a load.6115// Don't advertise i32->i64 zextload as being free for LA64. It interacts6116// poorly with type legalization of compares preferring sext.6117if (auto *LD = dyn_cast<LoadSDNode>(Val)) {6118EVT MemVT = LD->getMemoryVT();6119if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&6120(LD->getExtensionType() == ISD::NON_EXTLOAD ||6121LD->getExtensionType() == ISD::ZEXTLOAD))6122return true;6123}61246125return TargetLowering::isZExtFree(Val, VT2);6126}61276128bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,6129EVT DstVT) const {6130return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;6131}61326133bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {6134return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);6135}61366137bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {6138// TODO: Support vectors.6139if (Y.getValueType().isVector())6140return false;61416142return !isa<ConstantSDNode>(Y);6143}61446145ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {6146// TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.6147return ISD::SIGN_EXTEND;6148}61496150bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(6151EVT Type, bool IsSigned) const {6152if (Subtarget.is64Bit() && Type == MVT::i32)6153return true;61546155return IsSigned;6156}61576158bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {6159// Return false to suppress the unnecessary extensions if the LibCall6160// arguments or return value is a float narrower than GRLEN on a soft FP ABI.6161if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&6162Type.getSizeInBits() < Subtarget.getGRLen()))6163return false;6164return true;6165}616661676168