Path: blob/main/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
35266 views
//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This file implements the WebAssemblyTargetLowering class.10///11//===----------------------------------------------------------------------===//1213#include "WebAssemblyISelLowering.h"14#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"15#include "Utils/WebAssemblyTypeUtilities.h"16#include "WebAssemblyMachineFunctionInfo.h"17#include "WebAssemblySubtarget.h"18#include "WebAssemblyTargetMachine.h"19#include "WebAssemblyUtilities.h"20#include "llvm/CodeGen/CallingConvLower.h"21#include "llvm/CodeGen/MachineFrameInfo.h"22#include "llvm/CodeGen/MachineFunctionPass.h"23#include "llvm/CodeGen/MachineInstrBuilder.h"24#include "llvm/CodeGen/MachineJumpTableInfo.h"25#include "llvm/CodeGen/MachineModuleInfo.h"26#include "llvm/CodeGen/MachineRegisterInfo.h"27#include "llvm/CodeGen/SelectionDAG.h"28#include "llvm/CodeGen/SelectionDAGNodes.h"29#include "llvm/IR/DiagnosticInfo.h"30#include "llvm/IR/DiagnosticPrinter.h"31#include "llvm/IR/Function.h"32#include "llvm/IR/Intrinsics.h"33#include "llvm/IR/IntrinsicsWebAssembly.h"34#include "llvm/IR/PatternMatch.h"35#include "llvm/Support/Debug.h"36#include "llvm/Support/ErrorHandling.h"37#include "llvm/Support/KnownBits.h"38#include "llvm/Support/MathExtras.h"39#include "llvm/Support/raw_ostream.h"40#include "llvm/Target/TargetOptions.h"41using namespace llvm;4243#define DEBUG_TYPE "wasm-lower"4445WebAssemblyTargetLowering::WebAssemblyTargetLowering(46const TargetMachine &TM, const WebAssemblySubtarget &STI)47: TargetLowering(TM), Subtarget(&STI) {48auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;4950// Booleans always contain 0 or 1.51setBooleanContents(ZeroOrOneBooleanContent);52// Except in SIMD vectors53setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);54// We don't know the microarchitecture here, so just reduce register pressure.55setSchedulingPreference(Sched::RegPressure);56// Tell ISel that we have a stack pointer.57setStackPointerRegisterToSaveRestore(58Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);59// Set up the register classes.60addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);61addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);62addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);63addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);64if (Subtarget->hasSIMD128()) {65addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);66addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);67addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);68addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);69addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);70addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);71}72if (Subtarget->hasHalfPrecision()) {73addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);74}75if (Subtarget->hasReferenceTypes()) {76addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);77addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);78if (Subtarget->hasExceptionHandling()) {79addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);80}81}82// Compute derived properties from the register classes.83computeRegisterProperties(Subtarget->getRegisterInfo());8485// Transform loads and stores to pointers in address space 1 to loads and86// stores to WebAssembly global variables, outside linear memory.87for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {88setOperationAction(ISD::LOAD, T, Custom);89setOperationAction(ISD::STORE, T, Custom);90}91if (Subtarget->hasSIMD128()) {92for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,93MVT::v2f64}) {94setOperationAction(ISD::LOAD, T, Custom);95setOperationAction(ISD::STORE, T, Custom);96}97}98if (Subtarget->hasReferenceTypes()) {99// We need custom load and store lowering for both externref, funcref and100// Other. The MVT::Other here represents tables of reference types.101for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {102setOperationAction(ISD::LOAD, T, Custom);103setOperationAction(ISD::STORE, T, Custom);104}105}106107setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);108setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);109setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);110setOperationAction(ISD::JumpTable, MVTPtr, Custom);111setOperationAction(ISD::BlockAddress, MVTPtr, Custom);112setOperationAction(ISD::BRIND, MVT::Other, Custom);113setOperationAction(ISD::CLEAR_CACHE, MVT::Other, Custom);114115// Take the default expansion for va_arg, va_copy, and va_end. There is no116// default action for va_start, so we do that custom.117setOperationAction(ISD::VASTART, MVT::Other, Custom);118setOperationAction(ISD::VAARG, MVT::Other, Expand);119setOperationAction(ISD::VACOPY, MVT::Other, Expand);120setOperationAction(ISD::VAEND, MVT::Other, Expand);121122for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {123// Don't expand the floating-point types to constant pools.124setOperationAction(ISD::ConstantFP, T, Legal);125// Expand floating-point comparisons.126for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,127ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})128setCondCodeAction(CC, T, Expand);129// Expand floating-point library function operators.130for (auto Op :131{ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})132setOperationAction(Op, T, Expand);133// Note supported floating-point library function operators that otherwise134// default to expand.135for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,136ISD::FRINT, ISD::FROUNDEVEN})137setOperationAction(Op, T, Legal);138// Support minimum and maximum, which otherwise default to expand.139setOperationAction(ISD::FMINIMUM, T, Legal);140setOperationAction(ISD::FMAXIMUM, T, Legal);141// WebAssembly currently has no builtin f16 support.142setOperationAction(ISD::FP16_TO_FP, T, Expand);143setOperationAction(ISD::FP_TO_FP16, T, Expand);144setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);145setTruncStoreAction(T, MVT::f16, Expand);146}147148if (Subtarget->hasHalfPrecision()) {149setOperationAction(ISD::FMINIMUM, MVT::v8f16, Legal);150setOperationAction(ISD::FMAXIMUM, MVT::v8f16, Legal);151}152153// Expand unavailable integer operations.154for (auto Op :155{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,156ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,157ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {158for (auto T : {MVT::i32, MVT::i64})159setOperationAction(Op, T, Expand);160if (Subtarget->hasSIMD128())161for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})162setOperationAction(Op, T, Expand);163}164165if (Subtarget->hasNontrappingFPToInt())166for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})167for (auto T : {MVT::i32, MVT::i64})168setOperationAction(Op, T, Custom);169170// SIMD-specific configuration171if (Subtarget->hasSIMD128()) {172// Combine vector mask reductions into alltrue/anytrue173setTargetDAGCombine(ISD::SETCC);174175// Convert vector to integer bitcasts to bitmask176setTargetDAGCombine(ISD::BITCAST);177178// Hoist bitcasts out of shuffles179setTargetDAGCombine(ISD::VECTOR_SHUFFLE);180181// Combine extends of extract_subvectors into widening ops182setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});183184// Combine int_to_fp or fp_extend of extract_vectors and vice versa into185// conversions ops186setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,187ISD::EXTRACT_SUBVECTOR});188189// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa190// into conversion ops191setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,192ISD::FP_ROUND, ISD::CONCAT_VECTORS});193194setTargetDAGCombine(ISD::TRUNCATE);195196// Support saturating add for i8x16 and i16x8197for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})198for (auto T : {MVT::v16i8, MVT::v8i16})199setOperationAction(Op, T, Legal);200201// Support integer abs202for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})203setOperationAction(ISD::ABS, T, Legal);204205// Custom lower BUILD_VECTORs to minimize number of replace_lanes206for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,207MVT::v2f64})208setOperationAction(ISD::BUILD_VECTOR, T, Custom);209210// We have custom shuffle lowering to expose the shuffle mask211for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,212MVT::v2f64})213setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);214215// Support splatting216for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,217MVT::v2f64})218setOperationAction(ISD::SPLAT_VECTOR, T, Legal);219220// Custom lowering since wasm shifts must have a scalar shift amount221for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})222for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})223setOperationAction(Op, T, Custom);224225// Custom lower lane accesses to expand out variable indices226for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})227for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,228MVT::v2f64})229setOperationAction(Op, T, Custom);230231// There is no i8x16.mul instruction232setOperationAction(ISD::MUL, MVT::v16i8, Expand);233234// There is no vector conditional select instruction235for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,236MVT::v2f64})237setOperationAction(ISD::SELECT_CC, T, Expand);238239// Expand integer operations supported for scalars but not SIMD240for (auto Op :241{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})242for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})243setOperationAction(Op, T, Expand);244245// But we do have integer min and max operations246for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})247for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})248setOperationAction(Op, T, Legal);249250// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.251setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);252setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);253setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);254255// Custom lower bit counting operations for other types to scalarize them.256for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})257for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})258setOperationAction(Op, T, Custom);259260// Expand float operations supported for scalars but not SIMD261for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,262ISD::FEXP, ISD::FEXP2})263for (auto T : {MVT::v4f32, MVT::v2f64})264setOperationAction(Op, T, Expand);265266// Unsigned comparison operations are unavailable for i64x2 vectors.267for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})268setCondCodeAction(CC, MVT::v2i64, Custom);269270// 64x2 conversions are not in the spec271for (auto Op :272{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})273for (auto T : {MVT::v2i64, MVT::v2f64})274setOperationAction(Op, T, Expand);275276// But saturating fp_to_int converstions are277for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})278setOperationAction(Op, MVT::v4i32, Custom);279280// Support vector extending281for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {282setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);283setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);284}285}286287// As a special case, these operators use the type to mean the type to288// sign-extend from.289setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);290if (!Subtarget->hasSignExt()) {291// Sign extends are legal only when extending a vector extract292auto Action = Subtarget->hasSIMD128() ? Custom : Expand;293for (auto T : {MVT::i8, MVT::i16, MVT::i32})294setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);295}296for (auto T : MVT::integer_fixedlen_vector_valuetypes())297setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);298299// Dynamic stack allocation: use the default expansion.300setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);301setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);302setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);303304setOperationAction(ISD::FrameIndex, MVT::i32, Custom);305setOperationAction(ISD::FrameIndex, MVT::i64, Custom);306setOperationAction(ISD::CopyToReg, MVT::Other, Custom);307308// Expand these forms; we pattern-match the forms that we can handle in isel.309for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})310for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})311setOperationAction(Op, T, Expand);312313// We have custom switch handling.314setOperationAction(ISD::BR_JT, MVT::Other, Custom);315316// WebAssembly doesn't have:317// - Floating-point extending loads.318// - Floating-point truncating stores.319// - i1 extending loads.320// - truncating SIMD stores and most extending loads321setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);322setTruncStoreAction(MVT::f64, MVT::f32, Expand);323for (auto T : MVT::integer_valuetypes())324for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})325setLoadExtAction(Ext, T, MVT::i1, Promote);326if (Subtarget->hasSIMD128()) {327for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,328MVT::v2f64}) {329for (auto MemT : MVT::fixedlen_vector_valuetypes()) {330if (MVT(T) != MemT) {331setTruncStoreAction(T, MemT, Expand);332for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})333setLoadExtAction(Ext, T, MemT, Expand);334}335}336}337// But some vector extending loads are legal338for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {339setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);340setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);341setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);342}343setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);344}345346// Don't do anything clever with build_pairs347setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);348349// Trap lowers to wasm unreachable350setOperationAction(ISD::TRAP, MVT::Other, Legal);351setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);352353// Exception handling intrinsics354setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);355setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);356setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);357358setMaxAtomicSizeInBitsSupported(64);359360// Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is361// consistent with the f64 and f128 names.362setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");363setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");364365// Define the emscripten name for return address helper.366// TODO: when implementing other Wasm backends, make this generic or only do367// this on emscripten depending on what they end up doing.368setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");369370// Always convert switches to br_tables unless there is only one case, which371// is equivalent to a simple branch. This reduces code size for wasm, and we372// defer possible jump table optimizations to the VM.373setMinimumJumpTableEntries(2);374}375376MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,377uint32_t AS) const {378if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)379return MVT::externref;380if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)381return MVT::funcref;382return TargetLowering::getPointerTy(DL, AS);383}384385MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,386uint32_t AS) const {387if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)388return MVT::externref;389if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)390return MVT::funcref;391return TargetLowering::getPointerMemTy(DL, AS);392}393394TargetLowering::AtomicExpansionKind395WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {396// We have wasm instructions for these397switch (AI->getOperation()) {398case AtomicRMWInst::Add:399case AtomicRMWInst::Sub:400case AtomicRMWInst::And:401case AtomicRMWInst::Or:402case AtomicRMWInst::Xor:403case AtomicRMWInst::Xchg:404return AtomicExpansionKind::None;405default:406break;407}408return AtomicExpansionKind::CmpXChg;409}410411bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {412// Implementation copied from X86TargetLowering.413unsigned Opc = VecOp.getOpcode();414415// Assume target opcodes can't be scalarized.416// TODO - do we have any exceptions?417if (Opc >= ISD::BUILTIN_OP_END)418return false;419420// If the vector op is not supported, try to convert to scalar.421EVT VecVT = VecOp.getValueType();422if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))423return true;424425// If the vector op is supported, but the scalar op is not, the transform may426// not be worthwhile.427EVT ScalarVT = VecVT.getScalarType();428return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);429}430431FastISel *WebAssemblyTargetLowering::createFastISel(432FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {433return WebAssembly::createFastISel(FuncInfo, LibInfo);434}435436MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,437EVT VT) const {438unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);439if (BitWidth > 1 && BitWidth < 8)440BitWidth = 8;441442if (BitWidth > 64) {443// The shift will be lowered to a libcall, and compiler-rt libcalls expect444// the count to be an i32.445BitWidth = 32;446assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&447"32-bit shift counts ought to be enough for anyone");448}449450MVT Result = MVT::getIntegerVT(BitWidth);451assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&452"Unable to represent scalar shift amount type");453return Result;454}455456// Lower an fp-to-int conversion operator from the LLVM opcode, which has an457// undefined result on invalid/overflow, to the WebAssembly opcode, which458// traps on invalid/overflow.459static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,460MachineBasicBlock *BB,461const TargetInstrInfo &TII,462bool IsUnsigned, bool Int64,463bool Float64, unsigned LoweredOpcode) {464MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();465466Register OutReg = MI.getOperand(0).getReg();467Register InReg = MI.getOperand(1).getReg();468469unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;470unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;471unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;472unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;473unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;474unsigned Eqz = WebAssembly::EQZ_I32;475unsigned And = WebAssembly::AND_I32;476int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;477int64_t Substitute = IsUnsigned ? 0 : Limit;478double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;479auto &Context = BB->getParent()->getFunction().getContext();480Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);481482const BasicBlock *LLVMBB = BB->getBasicBlock();483MachineFunction *F = BB->getParent();484MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);485MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);486MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);487488MachineFunction::iterator It = ++BB->getIterator();489F->insert(It, FalseMBB);490F->insert(It, TrueMBB);491F->insert(It, DoneMBB);492493// Transfer the remainder of BB and its successor edges to DoneMBB.494DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());495DoneMBB->transferSuccessorsAndUpdatePHIs(BB);496497BB->addSuccessor(TrueMBB);498BB->addSuccessor(FalseMBB);499TrueMBB->addSuccessor(DoneMBB);500FalseMBB->addSuccessor(DoneMBB);501502unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;503Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));504Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));505CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);506EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);507FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));508TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));509510MI.eraseFromParent();511// For signed numbers, we can do a single comparison to determine whether512// fabs(x) is within range.513if (IsUnsigned) {514Tmp0 = InReg;515} else {516BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);517}518BuildMI(BB, DL, TII.get(FConst), Tmp1)519.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));520BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);521522// For unsigned numbers, we have to do a separate comparison with zero.523if (IsUnsigned) {524Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));525Register SecondCmpReg =526MRI.createVirtualRegister(&WebAssembly::I32RegClass);527Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);528BuildMI(BB, DL, TII.get(FConst), Tmp1)529.addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));530BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);531BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);532CmpReg = AndReg;533}534535BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);536537// Create the CFG diamond to select between doing the conversion or using538// the substitute value.539BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);540BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);541BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);542BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);543BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)544.addReg(FalseReg)545.addMBB(FalseMBB)546.addReg(TrueReg)547.addMBB(TrueMBB);548549return DoneMBB;550}551552static MachineBasicBlock *553LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,554const WebAssemblySubtarget *Subtarget,555const TargetInstrInfo &TII) {556MachineInstr &CallParams = *CallResults.getPrevNode();557assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);558assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||559CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);560561bool IsIndirect =562CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();563bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;564565bool IsFuncrefCall = false;566if (IsIndirect && CallParams.getOperand(0).isReg()) {567Register Reg = CallParams.getOperand(0).getReg();568const MachineFunction *MF = BB->getParent();569const MachineRegisterInfo &MRI = MF->getRegInfo();570const TargetRegisterClass *TRC = MRI.getRegClass(Reg);571IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);572assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());573}574575unsigned CallOp;576if (IsIndirect && IsRetCall) {577CallOp = WebAssembly::RET_CALL_INDIRECT;578} else if (IsIndirect) {579CallOp = WebAssembly::CALL_INDIRECT;580} else if (IsRetCall) {581CallOp = WebAssembly::RET_CALL;582} else {583CallOp = WebAssembly::CALL;584}585586MachineFunction &MF = *BB->getParent();587const MCInstrDesc &MCID = TII.get(CallOp);588MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));589590// Move the function pointer to the end of the arguments for indirect calls591if (IsIndirect) {592auto FnPtr = CallParams.getOperand(0);593CallParams.removeOperand(0);594595// For funcrefs, call_indirect is done through __funcref_call_table and the596// funcref is always installed in slot 0 of the table, therefore instead of597// having the function pointer added at the end of the params list, a zero598// (the index in599// __funcref_call_table is added).600if (IsFuncrefCall) {601Register RegZero =602MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);603MachineInstrBuilder MIBC0 =604BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);605606BB->insert(CallResults.getIterator(), MIBC0);607MachineInstrBuilder(MF, CallParams).addReg(RegZero);608} else609CallParams.addOperand(FnPtr);610}611612for (auto Def : CallResults.defs())613MIB.add(Def);614615if (IsIndirect) {616// Placeholder for the type index.617MIB.addImm(0);618// The table into which this call_indirect indexes.619MCSymbolWasm *Table = IsFuncrefCall620? WebAssembly::getOrCreateFuncrefCallTableSymbol(621MF.getContext(), Subtarget)622: WebAssembly::getOrCreateFunctionTableSymbol(623MF.getContext(), Subtarget);624if (Subtarget->hasReferenceTypes()) {625MIB.addSym(Table);626} else {627// For the MVP there is at most one table whose number is 0, but we can't628// write a table symbol or issue relocations. Instead we just ensure the629// table is live and write a zero.630Table->setNoStrip();631MIB.addImm(0);632}633}634635for (auto Use : CallParams.uses())636MIB.add(Use);637638BB->insert(CallResults.getIterator(), MIB);639CallParams.eraseFromParent();640CallResults.eraseFromParent();641642// If this is a funcref call, to avoid hidden GC roots, we need to clear the643// table slot with ref.null upon call_indirect return.644//645// This generates the following code, which comes right after a call_indirect646// of a funcref:647//648// i32.const 0649// ref.null func650// table.set __funcref_call_table651if (IsIndirect && IsFuncrefCall) {652MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(653MF.getContext(), Subtarget);654Register RegZero =655MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);656MachineInstr *Const0 =657BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);658BB->insertAfter(MIB.getInstr()->getIterator(), Const0);659660Register RegFuncref =661MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);662MachineInstr *RefNull =663BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);664BB->insertAfter(Const0->getIterator(), RefNull);665666MachineInstr *TableSet =667BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))668.addSym(Table)669.addReg(RegZero)670.addReg(RegFuncref);671BB->insertAfter(RefNull->getIterator(), TableSet);672}673674return BB;675}676677MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(678MachineInstr &MI, MachineBasicBlock *BB) const {679const TargetInstrInfo &TII = *Subtarget->getInstrInfo();680DebugLoc DL = MI.getDebugLoc();681682switch (MI.getOpcode()) {683default:684llvm_unreachable("Unexpected instr type to insert");685case WebAssembly::FP_TO_SINT_I32_F32:686return LowerFPToInt(MI, DL, BB, TII, false, false, false,687WebAssembly::I32_TRUNC_S_F32);688case WebAssembly::FP_TO_UINT_I32_F32:689return LowerFPToInt(MI, DL, BB, TII, true, false, false,690WebAssembly::I32_TRUNC_U_F32);691case WebAssembly::FP_TO_SINT_I64_F32:692return LowerFPToInt(MI, DL, BB, TII, false, true, false,693WebAssembly::I64_TRUNC_S_F32);694case WebAssembly::FP_TO_UINT_I64_F32:695return LowerFPToInt(MI, DL, BB, TII, true, true, false,696WebAssembly::I64_TRUNC_U_F32);697case WebAssembly::FP_TO_SINT_I32_F64:698return LowerFPToInt(MI, DL, BB, TII, false, false, true,699WebAssembly::I32_TRUNC_S_F64);700case WebAssembly::FP_TO_UINT_I32_F64:701return LowerFPToInt(MI, DL, BB, TII, true, false, true,702WebAssembly::I32_TRUNC_U_F64);703case WebAssembly::FP_TO_SINT_I64_F64:704return LowerFPToInt(MI, DL, BB, TII, false, true, true,705WebAssembly::I64_TRUNC_S_F64);706case WebAssembly::FP_TO_UINT_I64_F64:707return LowerFPToInt(MI, DL, BB, TII, true, true, true,708WebAssembly::I64_TRUNC_U_F64);709case WebAssembly::CALL_RESULTS:710case WebAssembly::RET_CALL_RESULTS:711return LowerCallResults(MI, DL, BB, Subtarget, TII);712}713}714715const char *716WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {717switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {718case WebAssemblyISD::FIRST_NUMBER:719case WebAssemblyISD::FIRST_MEM_OPCODE:720break;721#define HANDLE_NODETYPE(NODE) \722case WebAssemblyISD::NODE: \723return "WebAssemblyISD::" #NODE;724#define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)725#include "WebAssemblyISD.def"726#undef HANDLE_MEM_NODETYPE727#undef HANDLE_NODETYPE728}729return nullptr;730}731732std::pair<unsigned, const TargetRegisterClass *>733WebAssemblyTargetLowering::getRegForInlineAsmConstraint(734const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {735// First, see if this is a constraint that directly corresponds to a736// WebAssembly register class.737if (Constraint.size() == 1) {738switch (Constraint[0]) {739case 'r':740assert(VT != MVT::iPTR && "Pointer MVT not expected here");741if (Subtarget->hasSIMD128() && VT.isVector()) {742if (VT.getSizeInBits() == 128)743return std::make_pair(0U, &WebAssembly::V128RegClass);744}745if (VT.isInteger() && !VT.isVector()) {746if (VT.getSizeInBits() <= 32)747return std::make_pair(0U, &WebAssembly::I32RegClass);748if (VT.getSizeInBits() <= 64)749return std::make_pair(0U, &WebAssembly::I64RegClass);750}751if (VT.isFloatingPoint() && !VT.isVector()) {752switch (VT.getSizeInBits()) {753case 32:754return std::make_pair(0U, &WebAssembly::F32RegClass);755case 64:756return std::make_pair(0U, &WebAssembly::F64RegClass);757default:758break;759}760}761break;762default:763break;764}765}766767return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);768}769770bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {771// Assume ctz is a relatively cheap operation.772return true;773}774775bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {776// Assume clz is a relatively cheap operation.777return true;778}779780bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,781const AddrMode &AM,782Type *Ty, unsigned AS,783Instruction *I) const {784// WebAssembly offsets are added as unsigned without wrapping. The785// isLegalAddressingMode gives us no way to determine if wrapping could be786// happening, so we approximate this by accepting only non-negative offsets.787if (AM.BaseOffs < 0)788return false;789790// WebAssembly has no scale register operands.791if (AM.Scale != 0)792return false;793794// Everything else is legal.795return true;796}797798bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(799EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,800MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {801// WebAssembly supports unaligned accesses, though it should be declared802// with the p2align attribute on loads and stores which do so, and there803// may be a performance impact. We tell LLVM they're "fast" because804// for the kinds of things that LLVM uses this for (merging adjacent stores805// of constants, etc.), WebAssembly implementations will either want the806// unaligned access or they'll split anyway.807if (Fast)808*Fast = 1;809return true;810}811812bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,813AttributeList Attr) const {814// The current thinking is that wasm engines will perform this optimization,815// so we can save on code size.816return true;817}818819bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {820EVT ExtT = ExtVal.getValueType();821EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);822return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||823(ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||824(ExtT == MVT::v2i64 && MemT == MVT::v2i32);825}826827bool WebAssemblyTargetLowering::isOffsetFoldingLegal(828const GlobalAddressSDNode *GA) const {829// Wasm doesn't support function addresses with offsets830const GlobalValue *GV = GA->getGlobal();831return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);832}833834bool WebAssemblyTargetLowering::shouldSinkOperands(835Instruction *I, SmallVectorImpl<Use *> &Ops) const {836using namespace llvm::PatternMatch;837838if (!I->getType()->isVectorTy() || !I->isShift())839return false;840841Value *V = I->getOperand(1);842// We dont need to sink constant splat.843if (dyn_cast<Constant>(V))844return false;845846if (match(V, m_Shuffle(m_InsertElt(m_Value(), m_Value(), m_ZeroInt()),847m_Value(), m_ZeroMask()))) {848// Sink insert849Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));850// Sink shuffle851Ops.push_back(&I->getOperandUse(1));852return true;853}854855return false;856}857858EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,859LLVMContext &C,860EVT VT) const {861if (VT.isVector())862return VT.changeVectorElementTypeToInteger();863864// So far, all branch instructions in Wasm take an I32 condition.865// The default TargetLowering::getSetCCResultType returns the pointer size,866// which would be useful to reduce instruction counts when testing867// against 64-bit pointers/values if at some point Wasm supports that.868return EVT::getIntegerVT(C, 32);869}870871bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,872const CallInst &I,873MachineFunction &MF,874unsigned Intrinsic) const {875switch (Intrinsic) {876case Intrinsic::wasm_memory_atomic_notify:877Info.opc = ISD::INTRINSIC_W_CHAIN;878Info.memVT = MVT::i32;879Info.ptrVal = I.getArgOperand(0);880Info.offset = 0;881Info.align = Align(4);882// atomic.notify instruction does not really load the memory specified with883// this argument, but MachineMemOperand should either be load or store, so884// we set this to a load.885// FIXME Volatile isn't really correct, but currently all LLVM atomic886// instructions are treated as volatiles in the backend, so we should be887// consistent. The same applies for wasm_atomic_wait intrinsics too.888Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;889return true;890case Intrinsic::wasm_memory_atomic_wait32:891Info.opc = ISD::INTRINSIC_W_CHAIN;892Info.memVT = MVT::i32;893Info.ptrVal = I.getArgOperand(0);894Info.offset = 0;895Info.align = Align(4);896Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;897return true;898case Intrinsic::wasm_memory_atomic_wait64:899Info.opc = ISD::INTRINSIC_W_CHAIN;900Info.memVT = MVT::i64;901Info.ptrVal = I.getArgOperand(0);902Info.offset = 0;903Info.align = Align(8);904Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;905return true;906case Intrinsic::wasm_loadf16_f32:907Info.opc = ISD::INTRINSIC_W_CHAIN;908Info.memVT = MVT::f16;909Info.ptrVal = I.getArgOperand(0);910Info.offset = 0;911Info.align = Align(2);912Info.flags = MachineMemOperand::MOLoad;913return true;914case Intrinsic::wasm_storef16_f32:915Info.opc = ISD::INTRINSIC_VOID;916Info.memVT = MVT::f16;917Info.ptrVal = I.getArgOperand(1);918Info.offset = 0;919Info.align = Align(2);920Info.flags = MachineMemOperand::MOStore;921return true;922default:923return false;924}925}926927void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(928const SDValue Op, KnownBits &Known, const APInt &DemandedElts,929const SelectionDAG &DAG, unsigned Depth) const {930switch (Op.getOpcode()) {931default:932break;933case ISD::INTRINSIC_WO_CHAIN: {934unsigned IntNo = Op.getConstantOperandVal(0);935switch (IntNo) {936default:937break;938case Intrinsic::wasm_bitmask: {939unsigned BitWidth = Known.getBitWidth();940EVT VT = Op.getOperand(1).getSimpleValueType();941unsigned PossibleBits = VT.getVectorNumElements();942APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);943Known.Zero |= ZeroMask;944break;945}946}947}948}949}950951TargetLoweringBase::LegalizeTypeAction952WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {953if (VT.isFixedLengthVector()) {954MVT EltVT = VT.getVectorElementType();955// We have legal vector types with these lane types, so widening the956// vector would let us use some of the lanes directly without having to957// extend or truncate values.958if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||959EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)960return TypeWidenVector;961}962963return TargetLoweringBase::getPreferredVectorAction(VT);964}965966bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(967SDValue Op, const TargetLoweringOpt &TLO) const {968// ISel process runs DAGCombiner after legalization; this step is called969// SelectionDAG optimization phase. This post-legalization combining process970// runs DAGCombiner on each node, and if there was a change to be made,971// re-runs legalization again on it and its user nodes to make sure972// everythiing is in a legalized state.973//974// The legalization calls lowering routines, and we do our custom lowering for975// build_vectors (LowerBUILD_VECTOR), which converts undef vector elements976// into zeros. But there is a set of routines in DAGCombiner that turns unused977// (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts978// turns unused vector elements into undefs. But this routine does not work979// with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This980// combination can result in a infinite loop, in which undefs are converted to981// zeros in legalization and back to undefs in combining.982//983// So after DAG is legalized, we prevent SimplifyDemandedVectorElts from984// running for build_vectors.985if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)986return false;987return true;988}989990//===----------------------------------------------------------------------===//991// WebAssembly Lowering private implementation.992//===----------------------------------------------------------------------===//993994//===----------------------------------------------------------------------===//995// Lowering Code996//===----------------------------------------------------------------------===//997998static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {999MachineFunction &MF = DAG.getMachineFunction();1000DAG.getContext()->diagnose(1001DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));1002}10031004// Test whether the given calling convention is supported.1005static bool callingConvSupported(CallingConv::ID CallConv) {1006// We currently support the language-independent target-independent1007// conventions. We don't yet have a way to annotate calls with properties like1008// "cold", and we don't have any call-clobbered registers, so these are mostly1009// all handled the same.1010return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||1011CallConv == CallingConv::Cold ||1012CallConv == CallingConv::PreserveMost ||1013CallConv == CallingConv::PreserveAll ||1014CallConv == CallingConv::CXX_FAST_TLS ||1015CallConv == CallingConv::WASM_EmscriptenInvoke ||1016CallConv == CallingConv::Swift;1017}10181019SDValue1020WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,1021SmallVectorImpl<SDValue> &InVals) const {1022SelectionDAG &DAG = CLI.DAG;1023SDLoc DL = CLI.DL;1024SDValue Chain = CLI.Chain;1025SDValue Callee = CLI.Callee;1026MachineFunction &MF = DAG.getMachineFunction();1027auto Layout = MF.getDataLayout();10281029CallingConv::ID CallConv = CLI.CallConv;1030if (!callingConvSupported(CallConv))1031fail(DL, DAG,1032"WebAssembly doesn't support language-specific or target-specific "1033"calling conventions yet");1034if (CLI.IsPatchPoint)1035fail(DL, DAG, "WebAssembly doesn't support patch point yet");10361037if (CLI.IsTailCall) {1038auto NoTail = [&](const char *Msg) {1039if (CLI.CB && CLI.CB->isMustTailCall())1040fail(DL, DAG, Msg);1041CLI.IsTailCall = false;1042};10431044if (!Subtarget->hasTailCall())1045NoTail("WebAssembly 'tail-call' feature not enabled");10461047// Varargs calls cannot be tail calls because the buffer is on the stack1048if (CLI.IsVarArg)1049NoTail("WebAssembly does not support varargs tail calls");10501051// Do not tail call unless caller and callee return types match1052const Function &F = MF.getFunction();1053const TargetMachine &TM = getTargetMachine();1054Type *RetTy = F.getReturnType();1055SmallVector<MVT, 4> CallerRetTys;1056SmallVector<MVT, 4> CalleeRetTys;1057computeLegalValueVTs(F, TM, RetTy, CallerRetTys);1058computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);1059bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&1060std::equal(CallerRetTys.begin(), CallerRetTys.end(),1061CalleeRetTys.begin());1062if (!TypesMatch)1063NoTail("WebAssembly tail call requires caller and callee return types to "1064"match");10651066// If pointers to local stack values are passed, we cannot tail call1067if (CLI.CB) {1068for (auto &Arg : CLI.CB->args()) {1069Value *Val = Arg.get();1070// Trace the value back through pointer operations1071while (true) {1072Value *Src = Val->stripPointerCastsAndAliases();1073if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))1074Src = GEP->getPointerOperand();1075if (Val == Src)1076break;1077Val = Src;1078}1079if (isa<AllocaInst>(Val)) {1080NoTail(1081"WebAssembly does not support tail calling with stack arguments");1082break;1083}1084}1085}1086}10871088SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;1089SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;1090SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;10911092// The generic code may have added an sret argument. If we're lowering an1093// invoke function, the ABI requires that the function pointer be the first1094// argument, so we may have to swap the arguments.1095if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&1096Outs[0].Flags.isSRet()) {1097std::swap(Outs[0], Outs[1]);1098std::swap(OutVals[0], OutVals[1]);1099}11001101bool HasSwiftSelfArg = false;1102bool HasSwiftErrorArg = false;1103unsigned NumFixedArgs = 0;1104for (unsigned I = 0; I < Outs.size(); ++I) {1105const ISD::OutputArg &Out = Outs[I];1106SDValue &OutVal = OutVals[I];1107HasSwiftSelfArg |= Out.Flags.isSwiftSelf();1108HasSwiftErrorArg |= Out.Flags.isSwiftError();1109if (Out.Flags.isNest())1110fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");1111if (Out.Flags.isInAlloca())1112fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");1113if (Out.Flags.isInConsecutiveRegs())1114fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");1115if (Out.Flags.isInConsecutiveRegsLast())1116fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");1117if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {1118auto &MFI = MF.getFrameInfo();1119int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),1120Out.Flags.getNonZeroByValAlign(),1121/*isSS=*/false);1122SDValue SizeNode =1123DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);1124SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));1125Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,1126Out.Flags.getNonZeroByValAlign(),1127/*isVolatile*/ false, /*AlwaysInline=*/false,1128/*CI=*/nullptr, std::nullopt, MachinePointerInfo(),1129MachinePointerInfo());1130OutVal = FINode;1131}1132// Count the number of fixed args *after* legalization.1133NumFixedArgs += Out.IsFixed;1134}11351136bool IsVarArg = CLI.IsVarArg;1137auto PtrVT = getPointerTy(Layout);11381139// For swiftcc, emit additional swiftself and swifterror arguments1140// if there aren't. These additional arguments are also added for callee1141// signature They are necessary to match callee and caller signature for1142// indirect call.1143if (CallConv == CallingConv::Swift) {1144if (!HasSwiftSelfArg) {1145NumFixedArgs++;1146ISD::OutputArg Arg;1147Arg.Flags.setSwiftSelf();1148CLI.Outs.push_back(Arg);1149SDValue ArgVal = DAG.getUNDEF(PtrVT);1150CLI.OutVals.push_back(ArgVal);1151}1152if (!HasSwiftErrorArg) {1153NumFixedArgs++;1154ISD::OutputArg Arg;1155Arg.Flags.setSwiftError();1156CLI.Outs.push_back(Arg);1157SDValue ArgVal = DAG.getUNDEF(PtrVT);1158CLI.OutVals.push_back(ArgVal);1159}1160}11611162// Analyze operands of the call, assigning locations to each operand.1163SmallVector<CCValAssign, 16> ArgLocs;1164CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());11651166if (IsVarArg) {1167// Outgoing non-fixed arguments are placed in a buffer. First1168// compute their offsets and the total amount of buffer space needed.1169for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {1170const ISD::OutputArg &Out = Outs[I];1171SDValue &Arg = OutVals[I];1172EVT VT = Arg.getValueType();1173assert(VT != MVT::iPTR && "Legalized args should be concrete");1174Type *Ty = VT.getTypeForEVT(*DAG.getContext());1175Align Alignment =1176std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));1177unsigned Offset =1178CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);1179CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),1180Offset, VT.getSimpleVT(),1181CCValAssign::Full));1182}1183}11841185unsigned NumBytes = CCInfo.getAlignedCallFrameSize();11861187SDValue FINode;1188if (IsVarArg && NumBytes) {1189// For non-fixed arguments, next emit stores to store the argument values1190// to the stack buffer at the offsets computed above.1191int FI = MF.getFrameInfo().CreateStackObject(NumBytes,1192Layout.getStackAlignment(),1193/*isSS=*/false);1194unsigned ValNo = 0;1195SmallVector<SDValue, 8> Chains;1196for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {1197assert(ArgLocs[ValNo].getValNo() == ValNo &&1198"ArgLocs should remain in order and only hold varargs args");1199unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();1200FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));1201SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,1202DAG.getConstant(Offset, DL, PtrVT));1203Chains.push_back(1204DAG.getStore(Chain, DL, Arg, Add,1205MachinePointerInfo::getFixedStack(MF, FI, Offset)));1206}1207if (!Chains.empty())1208Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);1209} else if (IsVarArg) {1210FINode = DAG.getIntPtrConstant(0, DL);1211}12121213if (Callee->getOpcode() == ISD::GlobalAddress) {1214// If the callee is a GlobalAddress node (quite common, every direct call1215// is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress1216// doesn't at MO_GOT which is not needed for direct calls.1217GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);1218Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,1219getPointerTy(DAG.getDataLayout()),1220GA->getOffset());1221Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,1222getPointerTy(DAG.getDataLayout()), Callee);1223}12241225// Compute the operands for the CALLn node.1226SmallVector<SDValue, 16> Ops;1227Ops.push_back(Chain);1228Ops.push_back(Callee);12291230// Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs1231// isn't reliable.1232Ops.append(OutVals.begin(),1233IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());1234// Add a pointer to the vararg buffer.1235if (IsVarArg)1236Ops.push_back(FINode);12371238SmallVector<EVT, 8> InTys;1239for (const auto &In : Ins) {1240assert(!In.Flags.isByVal() && "byval is not valid for return values");1241assert(!In.Flags.isNest() && "nest is not valid for return values");1242if (In.Flags.isInAlloca())1243fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");1244if (In.Flags.isInConsecutiveRegs())1245fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");1246if (In.Flags.isInConsecutiveRegsLast())1247fail(DL, DAG,1248"WebAssembly hasn't implemented cons regs last return values");1249// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in1250// registers.1251InTys.push_back(In.VT);1252}12531254// Lastly, if this is a call to a funcref we need to add an instruction1255// table.set to the chain and transform the call.1256if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType(1257CLI.CB->getCalledOperand()->getType())) {1258// In the absence of function references proposal where a funcref call is1259// lowered to call_ref, using reference types we generate a table.set to set1260// the funcref to a special table used solely for this purpose, followed by1261// a call_indirect. Here we just generate the table set, and return the1262// SDValue of the table.set so that LowerCall can finalize the lowering by1263// generating the call_indirect.1264SDValue Chain = Ops[0];12651266MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(1267MF.getContext(), Subtarget);1268SDValue Sym = DAG.getMCSymbol(Table, PtrVT);1269SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);1270SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};1271SDValue TableSet = DAG.getMemIntrinsicNode(1272WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,1273MVT::funcref,1274// Machine Mem Operand args1275MachinePointerInfo(1276WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),1277CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),1278MachineMemOperand::MOStore);12791280Ops[0] = TableSet; // The new chain is the TableSet itself1281}12821283if (CLI.IsTailCall) {1284// ret_calls do not return values to the current frame1285SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);1286return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);1287}12881289InTys.push_back(MVT::Other);1290SDVTList InTyList = DAG.getVTList(InTys);1291SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);12921293for (size_t I = 0; I < Ins.size(); ++I)1294InVals.push_back(Res.getValue(I));12951296// Return the chain1297return Res.getValue(Ins.size());1298}12991300bool WebAssemblyTargetLowering::CanLowerReturn(1301CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,1302const SmallVectorImpl<ISD::OutputArg> &Outs,1303LLVMContext & /*Context*/) const {1304// WebAssembly can only handle returning tuples with multivalue enabled1305return WebAssembly::canLowerReturn(Outs.size(), Subtarget);1306}13071308SDValue WebAssemblyTargetLowering::LowerReturn(1309SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,1310const SmallVectorImpl<ISD::OutputArg> &Outs,1311const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,1312SelectionDAG &DAG) const {1313assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&1314"MVP WebAssembly can only return up to one value");1315if (!callingConvSupported(CallConv))1316fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");13171318SmallVector<SDValue, 4> RetOps(1, Chain);1319RetOps.append(OutVals.begin(), OutVals.end());1320Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);13211322// Record the number and types of the return values.1323for (const ISD::OutputArg &Out : Outs) {1324assert(!Out.Flags.isByVal() && "byval is not valid for return values");1325assert(!Out.Flags.isNest() && "nest is not valid for return values");1326assert(Out.IsFixed && "non-fixed return value is not valid");1327if (Out.Flags.isInAlloca())1328fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");1329if (Out.Flags.isInConsecutiveRegs())1330fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");1331if (Out.Flags.isInConsecutiveRegsLast())1332fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");1333}13341335return Chain;1336}13371338SDValue WebAssemblyTargetLowering::LowerFormalArguments(1339SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,1340const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,1341SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {1342if (!callingConvSupported(CallConv))1343fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");13441345MachineFunction &MF = DAG.getMachineFunction();1346auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();13471348// Set up the incoming ARGUMENTS value, which serves to represent the liveness1349// of the incoming values before they're represented by virtual registers.1350MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);13511352bool HasSwiftErrorArg = false;1353bool HasSwiftSelfArg = false;1354for (const ISD::InputArg &In : Ins) {1355HasSwiftSelfArg |= In.Flags.isSwiftSelf();1356HasSwiftErrorArg |= In.Flags.isSwiftError();1357if (In.Flags.isInAlloca())1358fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");1359if (In.Flags.isNest())1360fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");1361if (In.Flags.isInConsecutiveRegs())1362fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");1363if (In.Flags.isInConsecutiveRegsLast())1364fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");1365// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in1366// registers.1367InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,1368DAG.getTargetConstant(InVals.size(),1369DL, MVT::i32))1370: DAG.getUNDEF(In.VT));13711372// Record the number and types of arguments.1373MFI->addParam(In.VT);1374}13751376// For swiftcc, emit additional swiftself and swifterror arguments1377// if there aren't. These additional arguments are also added for callee1378// signature They are necessary to match callee and caller signature for1379// indirect call.1380auto PtrVT = getPointerTy(MF.getDataLayout());1381if (CallConv == CallingConv::Swift) {1382if (!HasSwiftSelfArg) {1383MFI->addParam(PtrVT);1384}1385if (!HasSwiftErrorArg) {1386MFI->addParam(PtrVT);1387}1388}1389// Varargs are copied into a buffer allocated by the caller, and a pointer to1390// the buffer is passed as an argument.1391if (IsVarArg) {1392MVT PtrVT = getPointerTy(MF.getDataLayout());1393Register VarargVreg =1394MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));1395MFI->setVarargBufferVreg(VarargVreg);1396Chain = DAG.getCopyToReg(1397Chain, DL, VarargVreg,1398DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,1399DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));1400MFI->addParam(PtrVT);1401}14021403// Record the number and types of arguments and results.1404SmallVector<MVT, 4> Params;1405SmallVector<MVT, 4> Results;1406computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),1407MF.getFunction(), DAG.getTarget(), Params, Results);1408for (MVT VT : Results)1409MFI->addResult(VT);1410// TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify1411// the param logic here with ComputeSignatureVTs1412assert(MFI->getParams().size() == Params.size() &&1413std::equal(MFI->getParams().begin(), MFI->getParams().end(),1414Params.begin()));14151416return Chain;1417}14181419void WebAssemblyTargetLowering::ReplaceNodeResults(1420SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {1421switch (N->getOpcode()) {1422case ISD::SIGN_EXTEND_INREG:1423// Do not add any results, signifying that N should not be custom lowered1424// after all. This happens because simd128 turns on custom lowering for1425// SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an1426// illegal type.1427break;1428case ISD::SIGN_EXTEND_VECTOR_INREG:1429case ISD::ZERO_EXTEND_VECTOR_INREG:1430// Do not add any results, signifying that N should not be custom lowered.1431// EXTEND_VECTOR_INREG is implemented for some vectors, but not all.1432break;1433default:1434llvm_unreachable(1435"ReplaceNodeResults not implemented for this op for WebAssembly!");1436}1437}14381439//===----------------------------------------------------------------------===//1440// Custom lowering hooks.1441//===----------------------------------------------------------------------===//14421443SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,1444SelectionDAG &DAG) const {1445SDLoc DL(Op);1446switch (Op.getOpcode()) {1447default:1448llvm_unreachable("unimplemented operation lowering");1449return SDValue();1450case ISD::FrameIndex:1451return LowerFrameIndex(Op, DAG);1452case ISD::GlobalAddress:1453return LowerGlobalAddress(Op, DAG);1454case ISD::GlobalTLSAddress:1455return LowerGlobalTLSAddress(Op, DAG);1456case ISD::ExternalSymbol:1457return LowerExternalSymbol(Op, DAG);1458case ISD::JumpTable:1459return LowerJumpTable(Op, DAG);1460case ISD::BR_JT:1461return LowerBR_JT(Op, DAG);1462case ISD::VASTART:1463return LowerVASTART(Op, DAG);1464case ISD::BlockAddress:1465case ISD::BRIND:1466fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");1467return SDValue();1468case ISD::RETURNADDR:1469return LowerRETURNADDR(Op, DAG);1470case ISD::FRAMEADDR:1471return LowerFRAMEADDR(Op, DAG);1472case ISD::CopyToReg:1473return LowerCopyToReg(Op, DAG);1474case ISD::EXTRACT_VECTOR_ELT:1475case ISD::INSERT_VECTOR_ELT:1476return LowerAccessVectorElement(Op, DAG);1477case ISD::INTRINSIC_VOID:1478case ISD::INTRINSIC_WO_CHAIN:1479case ISD::INTRINSIC_W_CHAIN:1480return LowerIntrinsic(Op, DAG);1481case ISD::SIGN_EXTEND_INREG:1482return LowerSIGN_EXTEND_INREG(Op, DAG);1483case ISD::ZERO_EXTEND_VECTOR_INREG:1484case ISD::SIGN_EXTEND_VECTOR_INREG:1485return LowerEXTEND_VECTOR_INREG(Op, DAG);1486case ISD::BUILD_VECTOR:1487return LowerBUILD_VECTOR(Op, DAG);1488case ISD::VECTOR_SHUFFLE:1489return LowerVECTOR_SHUFFLE(Op, DAG);1490case ISD::SETCC:1491return LowerSETCC(Op, DAG);1492case ISD::SHL:1493case ISD::SRA:1494case ISD::SRL:1495return LowerShift(Op, DAG);1496case ISD::FP_TO_SINT_SAT:1497case ISD::FP_TO_UINT_SAT:1498return LowerFP_TO_INT_SAT(Op, DAG);1499case ISD::LOAD:1500return LowerLoad(Op, DAG);1501case ISD::STORE:1502return LowerStore(Op, DAG);1503case ISD::CTPOP:1504case ISD::CTLZ:1505case ISD::CTTZ:1506return DAG.UnrollVectorOp(Op.getNode());1507case ISD::CLEAR_CACHE:1508report_fatal_error("llvm.clear_cache is not supported on wasm");1509}1510}15111512static bool IsWebAssemblyGlobal(SDValue Op) {1513if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))1514return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());15151516return false;1517}15181519static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,1520SelectionDAG &DAG) {1521const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);1522if (!FI)1523return std::nullopt;15241525auto &MF = DAG.getMachineFunction();1526return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());1527}15281529SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,1530SelectionDAG &DAG) const {1531SDLoc DL(Op);1532StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());1533const SDValue &Value = SN->getValue();1534const SDValue &Base = SN->getBasePtr();1535const SDValue &Offset = SN->getOffset();15361537if (IsWebAssemblyGlobal(Base)) {1538if (!Offset->isUndef())1539report_fatal_error("unexpected offset when storing to webassembly global",1540false);15411542SDVTList Tys = DAG.getVTList(MVT::Other);1543SDValue Ops[] = {SN->getChain(), Value, Base};1544return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,1545SN->getMemoryVT(), SN->getMemOperand());1546}15471548if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {1549if (!Offset->isUndef())1550report_fatal_error("unexpected offset when storing to webassembly local",1551false);15521553SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);1554SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.1555SDValue Ops[] = {SN->getChain(), Idx, Value};1556return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);1557}15581559if (WebAssembly::isWasmVarAddressSpace(SN->getAddressSpace()))1560report_fatal_error(1561"Encountered an unlowerable store to the wasm_var address space",1562false);15631564return Op;1565}15661567SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,1568SelectionDAG &DAG) const {1569SDLoc DL(Op);1570LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());1571const SDValue &Base = LN->getBasePtr();1572const SDValue &Offset = LN->getOffset();15731574if (IsWebAssemblyGlobal(Base)) {1575if (!Offset->isUndef())1576report_fatal_error(1577"unexpected offset when loading from webassembly global", false);15781579SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);1580SDValue Ops[] = {LN->getChain(), Base};1581return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,1582LN->getMemoryVT(), LN->getMemOperand());1583}15841585if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {1586if (!Offset->isUndef())1587report_fatal_error(1588"unexpected offset when loading from webassembly local", false);15891590SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);1591EVT LocalVT = LN->getValueType(0);1592SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,1593{LN->getChain(), Idx});1594SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);1595assert(Result->getNumValues() == 2 && "Loads must carry a chain!");1596return Result;1597}15981599if (WebAssembly::isWasmVarAddressSpace(LN->getAddressSpace()))1600report_fatal_error(1601"Encountered an unlowerable load from the wasm_var address space",1602false);16031604return Op;1605}16061607SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,1608SelectionDAG &DAG) const {1609SDValue Src = Op.getOperand(2);1610if (isa<FrameIndexSDNode>(Src.getNode())) {1611// CopyToReg nodes don't support FrameIndex operands. Other targets select1612// the FI to some LEA-like instruction, but since we don't have that, we1613// need to insert some kind of instruction that can take an FI operand and1614// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy1615// local.copy between Op and its FI operand.1616SDValue Chain = Op.getOperand(0);1617SDLoc DL(Op);1618Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();1619EVT VT = Src.getValueType();1620SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I321621: WebAssembly::COPY_I64,1622DL, VT, Src),16230);1624return Op.getNode()->getNumValues() == 11625? DAG.getCopyToReg(Chain, DL, Reg, Copy)1626: DAG.getCopyToReg(Chain, DL, Reg, Copy,1627Op.getNumOperands() == 4 ? Op.getOperand(3)1628: SDValue());1629}1630return SDValue();1631}16321633SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,1634SelectionDAG &DAG) const {1635int FI = cast<FrameIndexSDNode>(Op)->getIndex();1636return DAG.getTargetFrameIndex(FI, Op.getValueType());1637}16381639SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,1640SelectionDAG &DAG) const {1641SDLoc DL(Op);16421643if (!Subtarget->getTargetTriple().isOSEmscripten()) {1644fail(DL, DAG,1645"Non-Emscripten WebAssembly hasn't implemented "1646"__builtin_return_address");1647return SDValue();1648}16491650if (verifyReturnAddressArgumentIsConstant(Op, DAG))1651return SDValue();16521653unsigned Depth = Op.getConstantOperandVal(0);1654MakeLibCallOptions CallOptions;1655return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),1656{DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)1657.first;1658}16591660SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,1661SelectionDAG &DAG) const {1662// Non-zero depths are not supported by WebAssembly currently. Use the1663// legalizer's default expansion, which is to return 0 (what this function is1664// documented to do).1665if (Op.getConstantOperandVal(0) > 0)1666return SDValue();16671668DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);1669EVT VT = Op.getValueType();1670Register FP =1671Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());1672return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);1673}16741675SDValue1676WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,1677SelectionDAG &DAG) const {1678SDLoc DL(Op);1679const auto *GA = cast<GlobalAddressSDNode>(Op);16801681MachineFunction &MF = DAG.getMachineFunction();1682if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())1683report_fatal_error("cannot use thread-local storage without bulk memory",1684false);16851686const GlobalValue *GV = GA->getGlobal();16871688// Currently only Emscripten supports dynamic linking with threads. Therefore,1689// on other targets, if we have thread-local storage, only the local-exec1690// model is possible.1691auto model = Subtarget->getTargetTriple().isOSEmscripten()1692? GV->getThreadLocalMode()1693: GlobalValue::LocalExecTLSModel;16941695// Unsupported TLS modes1696assert(model != GlobalValue::NotThreadLocal);1697assert(model != GlobalValue::InitialExecTLSModel);16981699if (model == GlobalValue::LocalExecTLSModel ||1700model == GlobalValue::LocalDynamicTLSModel ||1701(model == GlobalValue::GeneralDynamicTLSModel &&1702getTargetMachine().shouldAssumeDSOLocal(GV))) {1703// For DSO-local TLS variables we use offset from __tls_base17041705MVT PtrVT = getPointerTy(DAG.getDataLayout());1706auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I641707: WebAssembly::GLOBAL_GET_I32;1708const char *BaseName = MF.createExternalSymbolName("__tls_base");17091710SDValue BaseAddr(1711DAG.getMachineNode(GlobalGet, DL, PtrVT,1712DAG.getTargetExternalSymbol(BaseName, PtrVT)),17130);17141715SDValue TLSOffset = DAG.getTargetGlobalAddress(1716GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);1717SDValue SymOffset =1718DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);17191720return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);1721}17221723assert(model == GlobalValue::GeneralDynamicTLSModel);17241725EVT VT = Op.getValueType();1726return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,1727DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,1728GA->getOffset(),1729WebAssemblyII::MO_GOT_TLS));1730}17311732SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,1733SelectionDAG &DAG) const {1734SDLoc DL(Op);1735const auto *GA = cast<GlobalAddressSDNode>(Op);1736EVT VT = Op.getValueType();1737assert(GA->getTargetFlags() == 0 &&1738"Unexpected target flags on generic GlobalAddressSDNode");1739if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace()))1740fail(DL, DAG, "Invalid address space for WebAssembly target");17411742unsigned OperandFlags = 0;1743const GlobalValue *GV = GA->getGlobal();1744// Since WebAssembly tables cannot yet be shared accross modules, we don't1745// need special treatment for tables in PIC mode.1746if (isPositionIndependent() &&1747!WebAssembly::isWebAssemblyTableType(GV->getValueType())) {1748if (getTargetMachine().shouldAssumeDSOLocal(GV)) {1749MachineFunction &MF = DAG.getMachineFunction();1750MVT PtrVT = getPointerTy(MF.getDataLayout());1751const char *BaseName;1752if (GV->getValueType()->isFunctionTy()) {1753BaseName = MF.createExternalSymbolName("__table_base");1754OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;1755} else {1756BaseName = MF.createExternalSymbolName("__memory_base");1757OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;1758}1759SDValue BaseAddr =1760DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,1761DAG.getTargetExternalSymbol(BaseName, PtrVT));17621763SDValue SymAddr = DAG.getNode(1764WebAssemblyISD::WrapperREL, DL, VT,1765DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),1766OperandFlags));17671768return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);1769}1770OperandFlags = WebAssemblyII::MO_GOT;1771}17721773return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,1774DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,1775GA->getOffset(), OperandFlags));1776}17771778SDValue1779WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,1780SelectionDAG &DAG) const {1781SDLoc DL(Op);1782const auto *ES = cast<ExternalSymbolSDNode>(Op);1783EVT VT = Op.getValueType();1784assert(ES->getTargetFlags() == 0 &&1785"Unexpected target flags on generic ExternalSymbolSDNode");1786return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,1787DAG.getTargetExternalSymbol(ES->getSymbol(), VT));1788}17891790SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,1791SelectionDAG &DAG) const {1792// There's no need for a Wrapper node because we always incorporate a jump1793// table operand into a BR_TABLE instruction, rather than ever1794// materializing it in a register.1795const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);1796return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),1797JT->getTargetFlags());1798}17991800SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,1801SelectionDAG &DAG) const {1802SDLoc DL(Op);1803SDValue Chain = Op.getOperand(0);1804const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));1805SDValue Index = Op.getOperand(2);1806assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");18071808SmallVector<SDValue, 8> Ops;1809Ops.push_back(Chain);1810Ops.push_back(Index);18111812MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();1813const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;18141815// Add an operand for each case.1816for (auto *MBB : MBBs)1817Ops.push_back(DAG.getBasicBlock(MBB));18181819// Add the first MBB as a dummy default target for now. This will be replaced1820// with the proper default target (and the preceding range check eliminated)1821// if possible by WebAssemblyFixBrTableDefaults.1822Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));1823return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);1824}18251826SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,1827SelectionDAG &DAG) const {1828SDLoc DL(Op);1829EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());18301831auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();1832const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();18331834SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,1835MFI->getVarargBufferVreg(), PtrVT);1836return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),1837MachinePointerInfo(SV));1838}18391840SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,1841SelectionDAG &DAG) const {1842MachineFunction &MF = DAG.getMachineFunction();1843unsigned IntNo;1844switch (Op.getOpcode()) {1845case ISD::INTRINSIC_VOID:1846case ISD::INTRINSIC_W_CHAIN:1847IntNo = Op.getConstantOperandVal(1);1848break;1849case ISD::INTRINSIC_WO_CHAIN:1850IntNo = Op.getConstantOperandVal(0);1851break;1852default:1853llvm_unreachable("Invalid intrinsic");1854}1855SDLoc DL(Op);18561857switch (IntNo) {1858default:1859return SDValue(); // Don't custom lower most intrinsics.18601861case Intrinsic::wasm_lsda: {1862auto PtrVT = getPointerTy(MF.getDataLayout());1863const char *SymName = MF.createExternalSymbolName(1864"GCC_except_table" + std::to_string(MF.getFunctionNumber()));1865if (isPositionIndependent()) {1866SDValue Node = DAG.getTargetExternalSymbol(1867SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);1868const char *BaseName = MF.createExternalSymbolName("__memory_base");1869SDValue BaseAddr =1870DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,1871DAG.getTargetExternalSymbol(BaseName, PtrVT));1872SDValue SymAddr =1873DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);1874return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);1875}1876SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);1877return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);1878}18791880case Intrinsic::wasm_shuffle: {1881// Drop in-chain and replace undefs, but otherwise pass through unchanged1882SDValue Ops[18];1883size_t OpIdx = 0;1884Ops[OpIdx++] = Op.getOperand(1);1885Ops[OpIdx++] = Op.getOperand(2);1886while (OpIdx < 18) {1887const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);1888if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {1889bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;1890Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);1891} else {1892Ops[OpIdx++] = MaskIdx;1893}1894}1895return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);1896}1897}1898}18991900SDValue1901WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,1902SelectionDAG &DAG) const {1903SDLoc DL(Op);1904// If sign extension operations are disabled, allow sext_inreg only if operand1905// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign1906// extension operations, but allowing sext_inreg in this context lets us have1907// simple patterns to select extract_lane_s instructions. Expanding sext_inreg1908// everywhere would be simpler in this file, but would necessitate large and1909// brittle patterns to undo the expansion and select extract_lane_s1910// instructions.1911assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());1912if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)1913return SDValue();19141915const SDValue &Extract = Op.getOperand(0);1916MVT VecT = Extract.getOperand(0).getSimpleValueType();1917if (VecT.getVectorElementType().getSizeInBits() > 32)1918return SDValue();1919MVT ExtractedLaneT =1920cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();1921MVT ExtractedVecT =1922MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());1923if (ExtractedVecT == VecT)1924return Op;19251926// Bitcast vector to appropriate type to ensure ISel pattern coverage1927const SDNode *Index = Extract.getOperand(1).getNode();1928if (!isa<ConstantSDNode>(Index))1929return SDValue();1930unsigned IndexVal = Index->getAsZExtVal();1931unsigned Scale =1932ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();1933assert(Scale > 1);1934SDValue NewIndex =1935DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));1936SDValue NewExtract = DAG.getNode(1937ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),1938DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);1939return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,1940Op.getOperand(1));1941}19421943SDValue1944WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,1945SelectionDAG &DAG) const {1946SDLoc DL(Op);1947EVT VT = Op.getValueType();1948SDValue Src = Op.getOperand(0);1949EVT SrcVT = Src.getValueType();19501951if (SrcVT.getVectorElementType() == MVT::i1 ||1952SrcVT.getVectorElementType() == MVT::i64)1953return SDValue();19541955assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&1956"Unexpected extension factor.");1957unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();19581959if (Scale != 2 && Scale != 4 && Scale != 8)1960return SDValue();19611962unsigned Ext;1963switch (Op.getOpcode()) {1964case ISD::ZERO_EXTEND_VECTOR_INREG:1965Ext = WebAssemblyISD::EXTEND_LOW_U;1966break;1967case ISD::SIGN_EXTEND_VECTOR_INREG:1968Ext = WebAssemblyISD::EXTEND_LOW_S;1969break;1970}19711972SDValue Ret = Src;1973while (Scale != 1) {1974Ret = DAG.getNode(Ext, DL,1975Ret.getValueType()1976.widenIntegerVectorElementType(*DAG.getContext())1977.getHalfNumVectorElementsVT(*DAG.getContext()),1978Ret);1979Scale /= 2;1980}1981assert(Ret.getValueType() == VT);1982return Ret;1983}19841985static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {1986SDLoc DL(Op);1987if (Op.getValueType() != MVT::v2f64)1988return SDValue();19891990auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,1991unsigned &Index) -> bool {1992switch (Op.getOpcode()) {1993case ISD::SINT_TO_FP:1994Opcode = WebAssemblyISD::CONVERT_LOW_S;1995break;1996case ISD::UINT_TO_FP:1997Opcode = WebAssemblyISD::CONVERT_LOW_U;1998break;1999case ISD::FP_EXTEND:2000Opcode = WebAssemblyISD::PROMOTE_LOW;2001break;2002default:2003return false;2004}20052006auto ExtractVector = Op.getOperand(0);2007if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)2008return false;20092010if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))2011return false;20122013SrcVec = ExtractVector.getOperand(0);2014Index = ExtractVector.getConstantOperandVal(1);2015return true;2016};20172018unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;2019SDValue LHSSrcVec, RHSSrcVec;2020if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||2021!GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))2022return SDValue();20232024if (LHSOpcode != RHSOpcode)2025return SDValue();20262027MVT ExpectedSrcVT;2028switch (LHSOpcode) {2029case WebAssemblyISD::CONVERT_LOW_S:2030case WebAssemblyISD::CONVERT_LOW_U:2031ExpectedSrcVT = MVT::v4i32;2032break;2033case WebAssemblyISD::PROMOTE_LOW:2034ExpectedSrcVT = MVT::v4f32;2035break;2036}2037if (LHSSrcVec.getValueType() != ExpectedSrcVT)2038return SDValue();20392040auto Src = LHSSrcVec;2041if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {2042// Shuffle the source vector so that the converted lanes are the low lanes.2043Src = DAG.getVectorShuffle(2044ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,2045{static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});2046}2047return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);2048}20492050SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,2051SelectionDAG &DAG) const {2052if (auto ConvertLow = LowerConvertLow(Op, DAG))2053return ConvertLow;20542055SDLoc DL(Op);2056const EVT VecT = Op.getValueType();2057const EVT LaneT = Op.getOperand(0).getValueType();2058const size_t Lanes = Op.getNumOperands();2059bool CanSwizzle = VecT == MVT::v16i8;20602061// BUILD_VECTORs are lowered to the instruction that initializes the highest2062// possible number of lanes at once followed by a sequence of replace_lane2063// instructions to individually initialize any remaining lanes.20642065// TODO: Tune this. For example, lanewise swizzling is very expensive, so2066// swizzled lanes should be given greater weight.20672068// TODO: Investigate looping rather than always extracting/replacing specific2069// lanes to fill gaps.20702071auto IsConstant = [](const SDValue &V) {2072return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;2073};20742075// Returns the source vector and index vector pair if they exist. Checks for:2076// (extract_vector_elt2077// $src,2078// (sign_extend_inreg (extract_vector_elt $indices, $i))2079// )2080auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {2081auto Bail = std::make_pair(SDValue(), SDValue());2082if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)2083return Bail;2084const SDValue &SwizzleSrc = Lane->getOperand(0);2085const SDValue &IndexExt = Lane->getOperand(1);2086if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)2087return Bail;2088const SDValue &Index = IndexExt->getOperand(0);2089if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)2090return Bail;2091const SDValue &SwizzleIndices = Index->getOperand(0);2092if (SwizzleSrc.getValueType() != MVT::v16i8 ||2093SwizzleIndices.getValueType() != MVT::v16i8 ||2094Index->getOperand(1)->getOpcode() != ISD::Constant ||2095Index->getConstantOperandVal(1) != I)2096return Bail;2097return std::make_pair(SwizzleSrc, SwizzleIndices);2098};20992100// If the lane is extracted from another vector at a constant index, return2101// that vector. The source vector must not have more lanes than the dest2102// because the shufflevector indices are in terms of the destination lanes and2103// would not be able to address the smaller individual source lanes.2104auto GetShuffleSrc = [&](const SDValue &Lane) {2105if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)2106return SDValue();2107if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))2108return SDValue();2109if (Lane->getOperand(0).getValueType().getVectorNumElements() >2110VecT.getVectorNumElements())2111return SDValue();2112return Lane->getOperand(0);2113};21142115using ValueEntry = std::pair<SDValue, size_t>;2116SmallVector<ValueEntry, 16> SplatValueCounts;21172118using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;2119SmallVector<SwizzleEntry, 16> SwizzleCounts;21202121using ShuffleEntry = std::pair<SDValue, size_t>;2122SmallVector<ShuffleEntry, 16> ShuffleCounts;21232124auto AddCount = [](auto &Counts, const auto &Val) {2125auto CountIt =2126llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });2127if (CountIt == Counts.end()) {2128Counts.emplace_back(Val, 1);2129} else {2130CountIt->second++;2131}2132};21332134auto GetMostCommon = [](auto &Counts) {2135auto CommonIt =2136std::max_element(Counts.begin(), Counts.end(), llvm::less_second());2137assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");2138return *CommonIt;2139};21402141size_t NumConstantLanes = 0;21422143// Count eligible lanes for each type of vector creation op2144for (size_t I = 0; I < Lanes; ++I) {2145const SDValue &Lane = Op->getOperand(I);2146if (Lane.isUndef())2147continue;21482149AddCount(SplatValueCounts, Lane);21502151if (IsConstant(Lane))2152NumConstantLanes++;2153if (auto ShuffleSrc = GetShuffleSrc(Lane))2154AddCount(ShuffleCounts, ShuffleSrc);2155if (CanSwizzle) {2156auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);2157if (SwizzleSrcs.first)2158AddCount(SwizzleCounts, SwizzleSrcs);2159}2160}21612162SDValue SplatValue;2163size_t NumSplatLanes;2164std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);21652166SDValue SwizzleSrc;2167SDValue SwizzleIndices;2168size_t NumSwizzleLanes = 0;2169if (SwizzleCounts.size())2170std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),2171NumSwizzleLanes) = GetMostCommon(SwizzleCounts);21722173// Shuffles can draw from up to two vectors, so find the two most common2174// sources.2175SDValue ShuffleSrc1, ShuffleSrc2;2176size_t NumShuffleLanes = 0;2177if (ShuffleCounts.size()) {2178std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);2179llvm::erase_if(ShuffleCounts,2180[&](const auto &Pair) { return Pair.first == ShuffleSrc1; });2181}2182if (ShuffleCounts.size()) {2183size_t AdditionalShuffleLanes;2184std::tie(ShuffleSrc2, AdditionalShuffleLanes) =2185GetMostCommon(ShuffleCounts);2186NumShuffleLanes += AdditionalShuffleLanes;2187}21882189// Predicate returning true if the lane is properly initialized by the2190// original instruction2191std::function<bool(size_t, const SDValue &)> IsLaneConstructed;2192SDValue Result;2193// Prefer swizzles over shuffles over vector consts over splats2194if (NumSwizzleLanes >= NumShuffleLanes &&2195NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {2196Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,2197SwizzleIndices);2198auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);2199IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {2200return Swizzled == GetSwizzleSrcs(I, Lane);2201};2202} else if (NumShuffleLanes >= NumConstantLanes &&2203NumShuffleLanes >= NumSplatLanes) {2204size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;2205size_t DestLaneCount = VecT.getVectorNumElements();2206size_t Scale1 = 1;2207size_t Scale2 = 1;2208SDValue Src1 = ShuffleSrc1;2209SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);2210if (Src1.getValueType() != VecT) {2211size_t LaneSize =2212Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;2213assert(LaneSize > DestLaneSize);2214Scale1 = LaneSize / DestLaneSize;2215Src1 = DAG.getBitcast(VecT, Src1);2216}2217if (Src2.getValueType() != VecT) {2218size_t LaneSize =2219Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;2220assert(LaneSize > DestLaneSize);2221Scale2 = LaneSize / DestLaneSize;2222Src2 = DAG.getBitcast(VecT, Src2);2223}22242225int Mask[16];2226assert(DestLaneCount <= 16);2227for (size_t I = 0; I < DestLaneCount; ++I) {2228const SDValue &Lane = Op->getOperand(I);2229SDValue Src = GetShuffleSrc(Lane);2230if (Src == ShuffleSrc1) {2231Mask[I] = Lane->getConstantOperandVal(1) * Scale1;2232} else if (Src && Src == ShuffleSrc2) {2233Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;2234} else {2235Mask[I] = -1;2236}2237}2238ArrayRef<int> MaskRef(Mask, DestLaneCount);2239Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);2240IsLaneConstructed = [&](size_t, const SDValue &Lane) {2241auto Src = GetShuffleSrc(Lane);2242return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);2243};2244} else if (NumConstantLanes >= NumSplatLanes) {2245SmallVector<SDValue, 16> ConstLanes;2246for (const SDValue &Lane : Op->op_values()) {2247if (IsConstant(Lane)) {2248// Values may need to be fixed so that they will sign extend to be2249// within the expected range during ISel. Check whether the value is in2250// bounds based on the lane bit width and if it is out of bounds, lop2251// off the extra bits and subtract 2^n to reflect giving the high bit2252// value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it2253// cannot possibly be out of range.2254auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());2255int64_t Val = Const ? Const->getSExtValue() : 0;2256uint64_t LaneBits = 128 / Lanes;2257assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&2258"Unexpected out of bounds negative value");2259if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {2260uint64_t Mask = (1ll << LaneBits) - 1;2261auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;2262ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));2263} else {2264ConstLanes.push_back(Lane);2265}2266} else if (LaneT.isFloatingPoint()) {2267ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));2268} else {2269ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));2270}2271}2272Result = DAG.getBuildVector(VecT, DL, ConstLanes);2273IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {2274return IsConstant(Lane);2275};2276} else {2277// Use a splat (which might be selected as a load splat)2278Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);2279IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {2280return Lane == SplatValue;2281};2282}22832284assert(Result);2285assert(IsLaneConstructed);22862287// Add replace_lane instructions for any unhandled values2288for (size_t I = 0; I < Lanes; ++I) {2289const SDValue &Lane = Op->getOperand(I);2290if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))2291Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,2292DAG.getConstant(I, DL, MVT::i32));2293}22942295return Result;2296}22972298SDValue2299WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,2300SelectionDAG &DAG) const {2301SDLoc DL(Op);2302ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();2303MVT VecType = Op.getOperand(0).getSimpleValueType();2304assert(VecType.is128BitVector() && "Unexpected shuffle vector type");2305size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;23062307// Space for two vector args and sixteen mask indices2308SDValue Ops[18];2309size_t OpIdx = 0;2310Ops[OpIdx++] = Op.getOperand(0);2311Ops[OpIdx++] = Op.getOperand(1);23122313// Expand mask indices to byte indices and materialize them as operands2314for (int M : Mask) {2315for (size_t J = 0; J < LaneBytes; ++J) {2316// Lower undefs (represented by -1 in mask) to {0..J}, which use a2317// whole lane of vector input, to allow further reduction at VM. E.g.2318// match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.2319uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;2320Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);2321}2322}23232324return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);2325}23262327SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,2328SelectionDAG &DAG) const {2329SDLoc DL(Op);2330// The legalizer does not know how to expand the unsupported comparison modes2331// of i64x2 vectors, so we manually unroll them here.2332assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);2333SmallVector<SDValue, 2> LHS, RHS;2334DAG.ExtractVectorElements(Op->getOperand(0), LHS);2335DAG.ExtractVectorElements(Op->getOperand(1), RHS);2336const SDValue &CC = Op->getOperand(2);2337auto MakeLane = [&](unsigned I) {2338return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],2339DAG.getConstant(uint64_t(-1), DL, MVT::i64),2340DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);2341};2342return DAG.getBuildVector(Op->getValueType(0), DL,2343{MakeLane(0), MakeLane(1)});2344}23452346SDValue2347WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,2348SelectionDAG &DAG) const {2349// Allow constant lane indices, expand variable lane indices2350SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();2351if (isa<ConstantSDNode>(IdxNode)) {2352// Ensure the index type is i32 to match the tablegen patterns2353uint64_t Idx = IdxNode->getAsZExtVal();2354SmallVector<SDValue, 3> Ops(Op.getNode()->ops());2355Ops[Op.getNumOperands() - 1] =2356DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);2357return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);2358}2359// Perform default expansion2360return SDValue();2361}23622363static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {2364EVT LaneT = Op.getSimpleValueType().getVectorElementType();2365// 32-bit and 64-bit unrolled shifts will have proper semantics2366if (LaneT.bitsGE(MVT::i32))2367return DAG.UnrollVectorOp(Op.getNode());2368// Otherwise mask the shift value to get proper semantics from 32-bit shift2369SDLoc DL(Op);2370size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();2371SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);2372unsigned ShiftOpcode = Op.getOpcode();2373SmallVector<SDValue, 16> ShiftedElements;2374DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);2375SmallVector<SDValue, 16> ShiftElements;2376DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);2377SmallVector<SDValue, 16> UnrolledOps;2378for (size_t i = 0; i < NumLanes; ++i) {2379SDValue MaskedShiftValue =2380DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);2381SDValue ShiftedValue = ShiftedElements[i];2382if (ShiftOpcode == ISD::SRA)2383ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,2384ShiftedValue, DAG.getValueType(LaneT));2385UnrolledOps.push_back(2386DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));2387}2388return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);2389}23902391SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,2392SelectionDAG &DAG) const {2393SDLoc DL(Op);23942395// Only manually lower vector shifts2396assert(Op.getSimpleValueType().isVector());23972398uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();2399auto ShiftVal = Op.getOperand(1);24002401// Try to skip bitmask operation since it is implied inside shift instruction2402auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {2403if (MaskOp.getOpcode() != ISD::AND)2404return MaskOp;2405SDValue LHS = MaskOp.getOperand(0);2406SDValue RHS = MaskOp.getOperand(1);2407if (MaskOp.getValueType().isVector()) {2408APInt MaskVal;2409if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))2410std::swap(LHS, RHS);24112412if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&2413MaskVal == MaskBits)2414MaskOp = LHS;2415} else {2416if (!isa<ConstantSDNode>(RHS.getNode()))2417std::swap(LHS, RHS);24182419auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());2420if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)2421MaskOp = LHS;2422}24232424return MaskOp;2425};24262427// Skip vector and operation2428ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);2429ShiftVal = DAG.getSplatValue(ShiftVal);2430if (!ShiftVal)2431return unrollVectorShift(Op, DAG);24322433// Skip scalar and operation2434ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);2435// Use anyext because none of the high bits can affect the shift2436ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);24372438unsigned Opcode;2439switch (Op.getOpcode()) {2440case ISD::SHL:2441Opcode = WebAssemblyISD::VEC_SHL;2442break;2443case ISD::SRA:2444Opcode = WebAssemblyISD::VEC_SHR_S;2445break;2446case ISD::SRL:2447Opcode = WebAssemblyISD::VEC_SHR_U;2448break;2449default:2450llvm_unreachable("unexpected opcode");2451}24522453return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);2454}24552456SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,2457SelectionDAG &DAG) const {2458SDLoc DL(Op);2459EVT ResT = Op.getValueType();2460EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();24612462if ((ResT == MVT::i32 || ResT == MVT::i64) &&2463(SatVT == MVT::i32 || SatVT == MVT::i64))2464return Op;24652466if (ResT == MVT::v4i32 && SatVT == MVT::i32)2467return Op;24682469return SDValue();2470}24712472//===----------------------------------------------------------------------===//2473// Custom DAG combine hooks2474//===----------------------------------------------------------------------===//2475static SDValue2476performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {2477auto &DAG = DCI.DAG;2478auto Shuffle = cast<ShuffleVectorSDNode>(N);24792480// Hoist vector bitcasts that don't change the number of lanes out of unary2481// shuffles, where they are less likely to get in the way of other combines.2482// (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->2483// (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))2484SDValue Bitcast = N->getOperand(0);2485if (Bitcast.getOpcode() != ISD::BITCAST)2486return SDValue();2487if (!N->getOperand(1).isUndef())2488return SDValue();2489SDValue CastOp = Bitcast.getOperand(0);2490EVT SrcType = CastOp.getValueType();2491EVT DstType = Bitcast.getValueType();2492if (!SrcType.is128BitVector() ||2493SrcType.getVectorNumElements() != DstType.getVectorNumElements())2494return SDValue();2495SDValue NewShuffle = DAG.getVectorShuffle(2496SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());2497return DAG.getBitcast(DstType, NewShuffle);2498}24992500/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get2501/// split up into scalar instructions during legalization, and the vector2502/// extending instructions are selected in performVectorExtendCombine below.2503static SDValue2504performVectorExtendToFPCombine(SDNode *N,2505TargetLowering::DAGCombinerInfo &DCI) {2506auto &DAG = DCI.DAG;2507assert(N->getOpcode() == ISD::UINT_TO_FP ||2508N->getOpcode() == ISD::SINT_TO_FP);25092510EVT InVT = N->getOperand(0)->getValueType(0);2511EVT ResVT = N->getValueType(0);2512MVT ExtVT;2513if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))2514ExtVT = MVT::v4i32;2515else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))2516ExtVT = MVT::v2i32;2517else2518return SDValue();25192520unsigned Op =2521N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;2522SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));2523return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);2524}25252526static SDValue2527performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {2528auto &DAG = DCI.DAG;2529assert(N->getOpcode() == ISD::SIGN_EXTEND ||2530N->getOpcode() == ISD::ZERO_EXTEND);25312532// Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if2533// possible before the extract_subvector can be expanded.2534auto Extract = N->getOperand(0);2535if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)2536return SDValue();2537auto Source = Extract.getOperand(0);2538auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));2539if (IndexNode == nullptr)2540return SDValue();2541auto Index = IndexNode->getZExtValue();25422543// Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the2544// extracted subvector is the low or high half of its source.2545EVT ResVT = N->getValueType(0);2546if (ResVT == MVT::v8i16) {2547if (Extract.getValueType() != MVT::v8i8 ||2548Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))2549return SDValue();2550} else if (ResVT == MVT::v4i32) {2551if (Extract.getValueType() != MVT::v4i16 ||2552Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))2553return SDValue();2554} else if (ResVT == MVT::v2i64) {2555if (Extract.getValueType() != MVT::v2i32 ||2556Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))2557return SDValue();2558} else {2559return SDValue();2560}25612562bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;2563bool IsLow = Index == 0;25642565unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S2566: WebAssemblyISD::EXTEND_HIGH_S)2567: (IsLow ? WebAssemblyISD::EXTEND_LOW_U2568: WebAssemblyISD::EXTEND_HIGH_U);25692570return DAG.getNode(Op, SDLoc(N), ResVT, Source);2571}25722573static SDValue2574performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {2575auto &DAG = DCI.DAG;25762577auto GetWasmConversionOp = [](unsigned Op) {2578switch (Op) {2579case ISD::FP_TO_SINT_SAT:2580return WebAssemblyISD::TRUNC_SAT_ZERO_S;2581case ISD::FP_TO_UINT_SAT:2582return WebAssemblyISD::TRUNC_SAT_ZERO_U;2583case ISD::FP_ROUND:2584return WebAssemblyISD::DEMOTE_ZERO;2585}2586llvm_unreachable("unexpected op");2587};25882589auto IsZeroSplat = [](SDValue SplatVal) {2590auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());2591APInt SplatValue, SplatUndef;2592unsigned SplatBitSize;2593bool HasAnyUndefs;2594// Endianness doesn't matter in this context because we are looking for2595// an all-zero value.2596return Splat &&2597Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,2598HasAnyUndefs) &&2599SplatValue == 0;2600};26012602if (N->getOpcode() == ISD::CONCAT_VECTORS) {2603// Combine this:2604//2605// (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))2606//2607// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).2608//2609// Or this:2610//2611// (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))2612//2613// into (f32x4.demote_zero_f64x2 $x).2614EVT ResVT;2615EVT ExpectedConversionType;2616auto Conversion = N->getOperand(0);2617auto ConversionOp = Conversion.getOpcode();2618switch (ConversionOp) {2619case ISD::FP_TO_SINT_SAT:2620case ISD::FP_TO_UINT_SAT:2621ResVT = MVT::v4i32;2622ExpectedConversionType = MVT::v2i32;2623break;2624case ISD::FP_ROUND:2625ResVT = MVT::v4f32;2626ExpectedConversionType = MVT::v2f32;2627break;2628default:2629return SDValue();2630}26312632if (N->getValueType(0) != ResVT)2633return SDValue();26342635if (Conversion.getValueType() != ExpectedConversionType)2636return SDValue();26372638auto Source = Conversion.getOperand(0);2639if (Source.getValueType() != MVT::v2f64)2640return SDValue();26412642if (!IsZeroSplat(N->getOperand(1)) ||2643N->getOperand(1).getValueType() != ExpectedConversionType)2644return SDValue();26452646unsigned Op = GetWasmConversionOp(ConversionOp);2647return DAG.getNode(Op, SDLoc(N), ResVT, Source);2648}26492650// Combine this:2651//2652// (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)2653//2654// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).2655//2656// Or this:2657//2658// (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))2659//2660// into (f32x4.demote_zero_f64x2 $x).2661EVT ResVT;2662auto ConversionOp = N->getOpcode();2663switch (ConversionOp) {2664case ISD::FP_TO_SINT_SAT:2665case ISD::FP_TO_UINT_SAT:2666ResVT = MVT::v4i32;2667break;2668case ISD::FP_ROUND:2669ResVT = MVT::v4f32;2670break;2671default:2672llvm_unreachable("unexpected op");2673}26742675if (N->getValueType(0) != ResVT)2676return SDValue();26772678auto Concat = N->getOperand(0);2679if (Concat.getValueType() != MVT::v4f64)2680return SDValue();26812682auto Source = Concat.getOperand(0);2683if (Source.getValueType() != MVT::v2f64)2684return SDValue();26852686if (!IsZeroSplat(Concat.getOperand(1)) ||2687Concat.getOperand(1).getValueType() != MVT::v2f64)2688return SDValue();26892690unsigned Op = GetWasmConversionOp(ConversionOp);2691return DAG.getNode(Op, SDLoc(N), ResVT, Source);2692}26932694// Helper to extract VectorWidth bits from Vec, starting from IdxVal.2695static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,2696const SDLoc &DL, unsigned VectorWidth) {2697EVT VT = Vec.getValueType();2698EVT ElVT = VT.getVectorElementType();2699unsigned Factor = VT.getSizeInBits() / VectorWidth;2700EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,2701VT.getVectorNumElements() / Factor);27022703// Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR2704unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();2705assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");27062707// This is the index of the first element of the VectorWidth-bit chunk2708// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.2709IdxVal &= ~(ElemsPerChunk - 1);27102711// If the input is a buildvector just emit a smaller one.2712if (Vec.getOpcode() == ISD::BUILD_VECTOR)2713return DAG.getBuildVector(ResultVT, DL,2714Vec->ops().slice(IdxVal, ElemsPerChunk));27152716SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);2717return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);2718}27192720// Helper to recursively truncate vector elements in half with NARROW_U. DstVT2721// is the expected destination value type after recursion. In is the initial2722// input. Note that the input should have enough leading zero bits to prevent2723// NARROW_U from saturating results.2724static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,2725SelectionDAG &DAG) {2726EVT SrcVT = In.getValueType();27272728// No truncation required, we might get here due to recursive calls.2729if (SrcVT == DstVT)2730return In;27312732unsigned SrcSizeInBits = SrcVT.getSizeInBits();2733unsigned NumElems = SrcVT.getVectorNumElements();2734if (!isPowerOf2_32(NumElems))2735return SDValue();2736assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");2737assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");27382739LLVMContext &Ctx = *DAG.getContext();2740EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);27412742// Narrow to the largest type possible:2743// vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.2744EVT InVT = MVT::i16, OutVT = MVT::i8;2745if (SrcVT.getScalarSizeInBits() > 16) {2746InVT = MVT::i32;2747OutVT = MVT::i16;2748}2749unsigned SubSizeInBits = SrcSizeInBits / 2;2750InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());2751OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());27522753// Split lower/upper subvectors.2754SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);2755SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);27562757// 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.2758if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {2759Lo = DAG.getBitcast(InVT, Lo);2760Hi = DAG.getBitcast(InVT, Hi);2761SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);2762return DAG.getBitcast(DstVT, Res);2763}27642765// Recursively narrow lower/upper subvectors, concat result and narrow again.2766EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);2767Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);2768Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);27692770PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);2771SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);2772return truncateVectorWithNARROW(DstVT, Res, DL, DAG);2773}27742775static SDValue performTruncateCombine(SDNode *N,2776TargetLowering::DAGCombinerInfo &DCI) {2777auto &DAG = DCI.DAG;27782779SDValue In = N->getOperand(0);2780EVT InVT = In.getValueType();2781if (!InVT.isSimple())2782return SDValue();27832784EVT OutVT = N->getValueType(0);2785if (!OutVT.isVector())2786return SDValue();27872788EVT OutSVT = OutVT.getVectorElementType();2789EVT InSVT = InVT.getVectorElementType();2790// Currently only cover truncate to v16i8 or v8i16.2791if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&2792(OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))2793return SDValue();27942795SDLoc DL(N);2796APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),2797OutVT.getScalarSizeInBits());2798In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));2799return truncateVectorWithNARROW(OutVT, In, DL, DAG);2800}28012802static SDValue performBitcastCombine(SDNode *N,2803TargetLowering::DAGCombinerInfo &DCI) {2804auto &DAG = DCI.DAG;2805SDLoc DL(N);2806SDValue Src = N->getOperand(0);2807EVT VT = N->getValueType(0);2808EVT SrcVT = Src.getValueType();28092810// bitcast <N x i1> to iN2811// ==> bitmask2812if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&2813SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) {2814unsigned NumElts = SrcVT.getVectorNumElements();2815if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)2816return SDValue();2817EVT Width = MVT::getIntegerVT(128 / NumElts);2818return DAG.getZExtOrTrunc(2819DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,2820{DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),2821DAG.getSExtOrTrunc(N->getOperand(0), DL,2822SrcVT.changeVectorElementType(Width))}),2823DL, VT);2824}28252826return SDValue();2827}28282829static SDValue performSETCCCombine(SDNode *N,2830TargetLowering::DAGCombinerInfo &DCI) {2831auto &DAG = DCI.DAG;28322833SDValue LHS = N->getOperand(0);2834SDValue RHS = N->getOperand(1);2835ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();2836SDLoc DL(N);2837EVT VT = N->getValueType(0);28382839// setcc (iN (bitcast (vNi1 X))), 0, ne2840// ==> any_true (vNi1 X)2841// setcc (iN (bitcast (vNi1 X))), 0, eq2842// ==> xor (any_true (vNi1 X)), -12843// setcc (iN (bitcast (vNi1 X))), -1, eq2844// ==> all_true (vNi1 X)2845// setcc (iN (bitcast (vNi1 X))), -1, ne2846// ==> xor (all_true (vNi1 X)), -12847if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&2848(Cond == ISD::SETEQ || Cond == ISD::SETNE) &&2849(isNullConstant(RHS) || isAllOnesConstant(RHS)) &&2850LHS->getOpcode() == ISD::BITCAST) {2851EVT FromVT = LHS->getOperand(0).getValueType();2852if (FromVT.isFixedLengthVector() &&2853FromVT.getVectorElementType() == MVT::i1) {2854int Intrin = isNullConstant(RHS) ? Intrinsic::wasm_anytrue2855: Intrinsic::wasm_alltrue;2856unsigned NumElts = FromVT.getVectorNumElements();2857if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)2858return SDValue();2859EVT Width = MVT::getIntegerVT(128 / NumElts);2860SDValue Ret = DAG.getZExtOrTrunc(2861DAG.getNode(2862ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,2863{DAG.getConstant(Intrin, DL, MVT::i32),2864DAG.getSExtOrTrunc(LHS->getOperand(0), DL,2865FromVT.changeVectorElementType(Width))}),2866DL, MVT::i1);2867if ((isNullConstant(RHS) && (Cond == ISD::SETEQ)) ||2868(isAllOnesConstant(RHS) && (Cond == ISD::SETNE))) {2869Ret = DAG.getNOT(DL, Ret, MVT::i1);2870}2871return DAG.getZExtOrTrunc(Ret, DL, VT);2872}2873}28742875return SDValue();2876}28772878SDValue2879WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,2880DAGCombinerInfo &DCI) const {2881switch (N->getOpcode()) {2882default:2883return SDValue();2884case ISD::BITCAST:2885return performBitcastCombine(N, DCI);2886case ISD::SETCC:2887return performSETCCCombine(N, DCI);2888case ISD::VECTOR_SHUFFLE:2889return performVECTOR_SHUFFLECombine(N, DCI);2890case ISD::SIGN_EXTEND:2891case ISD::ZERO_EXTEND:2892return performVectorExtendCombine(N, DCI);2893case ISD::UINT_TO_FP:2894case ISD::SINT_TO_FP:2895return performVectorExtendToFPCombine(N, DCI);2896case ISD::FP_TO_SINT_SAT:2897case ISD::FP_TO_UINT_SAT:2898case ISD::FP_ROUND:2899case ISD::CONCAT_VECTORS:2900return performVectorTruncZeroCombine(N, DCI);2901case ISD::TRUNCATE:2902return performTruncateCombine(N, DCI);2903}2904}290529062907