Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
35269 views
//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that X86 uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H14#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H1516#include "llvm/CodeGen/MachineFunction.h"17#include "llvm/CodeGen/TargetLowering.h"1819namespace llvm {20class X86Subtarget;21class X86TargetMachine;2223namespace X86ISD {24// X86 Specific DAG Nodes25enum NodeType : unsigned {26// Start the numbering where the builtin ops leave off.27FIRST_NUMBER = ISD::BUILTIN_OP_END,2829/// Bit scan forward.30BSF,31/// Bit scan reverse.32BSR,3334/// X86 funnel/double shift i16 instructions. These correspond to35/// X86::SHLDW and X86::SHRDW instructions which have different amt36/// modulo rules to generic funnel shifts.37/// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.38FSHL,39FSHR,4041/// Bitwise logical AND of floating point values. This corresponds42/// to X86::ANDPS or X86::ANDPD.43FAND,4445/// Bitwise logical OR of floating point values. This corresponds46/// to X86::ORPS or X86::ORPD.47FOR,4849/// Bitwise logical XOR of floating point values. This corresponds50/// to X86::XORPS or X86::XORPD.51FXOR,5253/// Bitwise logical ANDNOT of floating point values. This54/// corresponds to X86::ANDNPS or X86::ANDNPD.55FANDN,5657/// These operations represent an abstract X86 call58/// instruction, which includes a bunch of information. In particular the59/// operands of these node are:60///61/// #0 - The incoming token chain62/// #1 - The callee63/// #2 - The number of arg bytes the caller pushes on the stack.64/// #3 - The number of arg bytes the callee pops off the stack.65/// #4 - The value to pass in AL/AX/EAX (optional)66/// #5 - The value to pass in DL/DX/EDX (optional)67///68/// The result values of these nodes are:69///70/// #0 - The outgoing token chain71/// #1 - The first register result value (optional)72/// #2 - The second register result value (optional)73///74CALL,7576/// Same as call except it adds the NoTrack prefix.77NT_CALL,7879// Pseudo for a OBJC call that gets emitted together with a special80// marker instruction.81CALL_RVMARKER,8283/// X86 compare and logical compare instructions.84CMP,85FCMP,86COMI,87UCOMI,8889/// X86 bit-test instructions.90BT,9192/// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS93/// operand, usually produced by a CMP instruction.94SETCC,9596/// X86 Select97SELECTS,9899// Same as SETCC except it's materialized with a sbb and the value is all100// one's or all zero's.101SETCC_CARRY, // R = carry_bit ? ~0 : 0102103/// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.104/// Operands are two FP values to compare; result is a mask of105/// 0s or 1s. Generally DTRT for C/C++ with NaNs.106FSETCC,107108/// X86 FP SETCC, similar to above, but with output as an i1 mask and109/// and a version with SAE.110FSETCCM,111FSETCCM_SAE,112113/// X86 conditional moves. Operand 0 and operand 1 are the two values114/// to select from. Operand 2 is the condition code, and operand 3 is the115/// flag operand produced by a CMP or TEST instruction.116CMOV,117118/// X86 conditional branches. Operand 0 is the chain operand, operand 1119/// is the block to branch if condition is true, operand 2 is the120/// condition code, and operand 3 is the flag operand produced by a CMP121/// or TEST instruction.122BRCOND,123124/// BRIND node with NoTrack prefix. Operand 0 is the chain operand and125/// operand 1 is the target address.126NT_BRIND,127128/// Return with a glue operand. Operand 0 is the chain operand, operand129/// 1 is the number of bytes of stack to pop.130RET_GLUE,131132/// Return from interrupt. Operand 0 is the number of bytes to pop.133IRET,134135/// Repeat fill, corresponds to X86::REP_STOSx.136REP_STOS,137138/// Repeat move, corresponds to X86::REP_MOVSx.139REP_MOVS,140141/// On Darwin, this node represents the result of the popl142/// at function entry, used for PIC code.143GlobalBaseReg,144145/// A wrapper node for TargetConstantPool, TargetJumpTable,146/// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,147/// MCSymbol and TargetBlockAddress.148Wrapper,149150/// Special wrapper used under X86-64 PIC mode for RIP151/// relative displacements.152WrapperRIP,153154/// Copies a 64-bit value from an MMX vector to the low word155/// of an XMM vector, with the high word zero filled.156MOVQ2DQ,157158/// Copies a 64-bit value from the low word of an XMM vector159/// to an MMX vector.160MOVDQ2Q,161162/// Copies a 32-bit value from the low word of a MMX163/// vector to a GPR.164MMX_MOVD2W,165166/// Copies a GPR into the low 32-bit word of a MMX vector167/// and zero out the high word.168MMX_MOVW2D,169170/// Extract an 8-bit value from a vector and zero extend it to171/// i32, corresponds to X86::PEXTRB.172PEXTRB,173174/// Extract a 16-bit value from a vector and zero extend it to175/// i32, corresponds to X86::PEXTRW.176PEXTRW,177178/// Insert any element of a 4 x float vector into any element179/// of a destination 4 x floatvector.180INSERTPS,181182/// Insert the lower 8-bits of a 32-bit value to a vector,183/// corresponds to X86::PINSRB.184PINSRB,185186/// Insert the lower 16-bits of a 32-bit value to a vector,187/// corresponds to X86::PINSRW.188PINSRW,189190/// Shuffle 16 8-bit values within a vector.191PSHUFB,192193/// Compute Sum of Absolute Differences.194PSADBW,195/// Compute Double Block Packed Sum-Absolute-Differences196DBPSADBW,197198/// Bitwise Logical AND NOT of Packed FP values.199ANDNP,200201/// Blend where the selector is an immediate.202BLENDI,203204/// Dynamic (non-constant condition) vector blend where only the sign bits205/// of the condition elements are used. This is used to enforce that the206/// condition mask is not valid for generic VSELECT optimizations. This207/// is also used to implement the intrinsics.208/// Operands are in VSELECT order: MASK, TRUE, FALSE209BLENDV,210211/// Combined add and sub on an FP vector.212ADDSUB,213214// FP vector ops with rounding mode.215FADD_RND,216FADDS,217FADDS_RND,218FSUB_RND,219FSUBS,220FSUBS_RND,221FMUL_RND,222FMULS,223FMULS_RND,224FDIV_RND,225FDIVS,226FDIVS_RND,227FMAX_SAE,228FMAXS_SAE,229FMIN_SAE,230FMINS_SAE,231FSQRT_RND,232FSQRTS,233FSQRTS_RND,234235// FP vector get exponent.236FGETEXP,237FGETEXP_SAE,238FGETEXPS,239FGETEXPS_SAE,240// Extract Normalized Mantissas.241VGETMANT,242VGETMANT_SAE,243VGETMANTS,244VGETMANTS_SAE,245// FP Scale.246SCALEF,247SCALEF_RND,248SCALEFS,249SCALEFS_RND,250251/// Integer horizontal add/sub.252HADD,253HSUB,254255/// Floating point horizontal add/sub.256FHADD,257FHSUB,258259// Detect Conflicts Within a Vector260CONFLICT,261262/// Floating point max and min.263FMAX,264FMIN,265266/// Commutative FMIN and FMAX.267FMAXC,268FMINC,269270/// Scalar intrinsic floating point max and min.271FMAXS,272FMINS,273274/// Floating point reciprocal-sqrt and reciprocal approximation.275/// Note that these typically require refinement276/// in order to obtain suitable precision.277FRSQRT,278FRCP,279280// AVX-512 reciprocal approximations with a little more precision.281RSQRT14,282RSQRT14S,283RCP14,284RCP14S,285286// Thread Local Storage.287TLSADDR,288289// Thread Local Storage. A call to get the start address290// of the TLS block for the current module.291TLSBASEADDR,292293// Thread Local Storage. When calling to an OS provided294// thunk at the address from an earlier relocation.295TLSCALL,296297// Thread Local Storage. A descriptor containing pointer to298// code and to argument to get the TLS offset for the symbol.299TLSDESC,300301// Exception Handling helpers.302EH_RETURN,303304// SjLj exception handling setjmp.305EH_SJLJ_SETJMP,306307// SjLj exception handling longjmp.308EH_SJLJ_LONGJMP,309310// SjLj exception handling dispatch.311EH_SJLJ_SETUP_DISPATCH,312313/// Tail call return. See X86TargetLowering::LowerCall for314/// the list of operands.315TC_RETURN,316317// Vector move to low scalar and zero higher vector elements.318VZEXT_MOVL,319320// Vector integer truncate.321VTRUNC,322// Vector integer truncate with unsigned/signed saturation.323VTRUNCUS,324VTRUNCS,325326// Masked version of the above. Used when less than a 128-bit result is327// produced since the mask only applies to the lower elements and can't328// be represented by a select.329// SRC, PASSTHRU, MASK330VMTRUNC,331VMTRUNCUS,332VMTRUNCS,333334// Vector FP extend.335VFPEXT,336VFPEXT_SAE,337VFPEXTS,338VFPEXTS_SAE,339340// Vector FP round.341VFPROUND,342VFPROUND_RND,343VFPROUNDS,344VFPROUNDS_RND,345346// Masked version of above. Used for v2f64->v4f32.347// SRC, PASSTHRU, MASK348VMFPROUND,349350// 128-bit vector logical left / right shift351VSHLDQ,352VSRLDQ,353354// Vector shift elements355VSHL,356VSRL,357VSRA,358359// Vector variable shift360VSHLV,361VSRLV,362VSRAV,363364// Vector shift elements by immediate365VSHLI,366VSRLI,367VSRAI,368369// Shifts of mask registers.370KSHIFTL,371KSHIFTR,372373// Bit rotate by immediate374VROTLI,375VROTRI,376377// Vector packed double/float comparison.378CMPP,379380// Vector integer comparisons.381PCMPEQ,382PCMPGT,383384// v8i16 Horizontal minimum and position.385PHMINPOS,386387MULTISHIFT,388389/// Vector comparison generating mask bits for fp and390/// integer signed and unsigned data types.391CMPM,392// Vector mask comparison generating mask bits for FP values.393CMPMM,394// Vector mask comparison with SAE for FP values.395CMPMM_SAE,396397// Arithmetic operations with FLAGS results.398ADD,399SUB,400ADC,401SBB,402SMUL,403UMUL,404OR,405XOR,406AND,407408// Bit field extract.409BEXTR,410BEXTRI,411412// Zero High Bits Starting with Specified Bit Position.413BZHI,414415// Parallel extract and deposit.416PDEP,417PEXT,418419// X86-specific multiply by immediate.420MUL_IMM,421422// Vector sign bit extraction.423MOVMSK,424425// Vector bitwise comparisons.426PTEST,427428// Vector packed fp sign bitwise comparisons.429TESTP,430431// OR/AND test for masks.432KORTEST,433KTEST,434435// ADD for masks.436KADD,437438// Several flavors of instructions with vector shuffle behaviors.439// Saturated signed/unnsigned packing.440PACKSS,441PACKUS,442// Intra-lane alignr.443PALIGNR,444// AVX512 inter-lane alignr.445VALIGN,446PSHUFD,447PSHUFHW,448PSHUFLW,449SHUFP,450// VBMI2 Concat & Shift.451VSHLD,452VSHRD,453VSHLDV,454VSHRDV,455// Shuffle Packed Values at 128-bit granularity.456SHUF128,457MOVDDUP,458MOVSHDUP,459MOVSLDUP,460MOVLHPS,461MOVHLPS,462MOVSD,463MOVSS,464MOVSH,465UNPCKL,466UNPCKH,467VPERMILPV,468VPERMILPI,469VPERMI,470VPERM2X128,471472// Variable Permute (VPERM).473// Res = VPERMV MaskV, V0474VPERMV,475476// 3-op Variable Permute (VPERMT2).477// Res = VPERMV3 V0, MaskV, V1478VPERMV3,479480// Bitwise ternary logic.481VPTERNLOG,482// Fix Up Special Packed Float32/64 values.483VFIXUPIMM,484VFIXUPIMM_SAE,485VFIXUPIMMS,486VFIXUPIMMS_SAE,487// Range Restriction Calculation For Packed Pairs of Float32/64 values.488VRANGE,489VRANGE_SAE,490VRANGES,491VRANGES_SAE,492// Reduce - Perform Reduction Transformation on scalar\packed FP.493VREDUCE,494VREDUCE_SAE,495VREDUCES,496VREDUCES_SAE,497// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.498// Also used by the legacy (V)ROUND intrinsics where we mask out the499// scaling part of the immediate.500VRNDSCALE,501VRNDSCALE_SAE,502VRNDSCALES,503VRNDSCALES_SAE,504// Tests Types Of a FP Values for packed types.505VFPCLASS,506// Tests Types Of a FP Values for scalar types.507VFPCLASSS,508509// Broadcast (splat) scalar or element 0 of a vector. If the operand is510// a vector, this node may change the vector length as part of the splat.511VBROADCAST,512// Broadcast mask to vector.513VBROADCASTM,514515/// SSE4A Extraction and Insertion.516EXTRQI,517INSERTQI,518519// XOP arithmetic/logical shifts.520VPSHA,521VPSHL,522// XOP signed/unsigned integer comparisons.523VPCOM,524VPCOMU,525// XOP packed permute bytes.526VPPERM,527// XOP two source permutation.528VPERMIL2,529530// Vector multiply packed unsigned doubleword integers.531PMULUDQ,532// Vector multiply packed signed doubleword integers.533PMULDQ,534// Vector Multiply Packed UnsignedIntegers with Round and Scale.535MULHRS,536537// Multiply and Add Packed Integers.538VPMADDUBSW,539VPMADDWD,540541// AVX512IFMA multiply and add.542// NOTE: These are different than the instruction and perform543// op0 x op1 + op2.544VPMADD52L,545VPMADD52H,546547// VNNI548VPDPBUSD,549VPDPBUSDS,550VPDPWSSD,551VPDPWSSDS,552553// FMA nodes.554// We use the target independent ISD::FMA for the non-inverted case.555FNMADD,556FMSUB,557FNMSUB,558FMADDSUB,559FMSUBADD,560561// FMA with rounding mode.562FMADD_RND,563FNMADD_RND,564FMSUB_RND,565FNMSUB_RND,566FMADDSUB_RND,567FMSUBADD_RND,568569// AVX512-FP16 complex addition and multiplication.570VFMADDC,571VFMADDC_RND,572VFCMADDC,573VFCMADDC_RND,574575VFMULC,576VFMULC_RND,577VFCMULC,578VFCMULC_RND,579580VFMADDCSH,581VFMADDCSH_RND,582VFCMADDCSH,583VFCMADDCSH_RND,584585VFMULCSH,586VFMULCSH_RND,587VFCMULCSH,588VFCMULCSH_RND,589590VPDPBSUD,591VPDPBSUDS,592VPDPBUUD,593VPDPBUUDS,594VPDPBSSD,595VPDPBSSDS,596597// Compress and expand.598COMPRESS,599EXPAND,600601// Bits shuffle602VPSHUFBITQMB,603604// Convert Unsigned/Integer to Floating-Point Value with rounding mode.605SINT_TO_FP_RND,606UINT_TO_FP_RND,607SCALAR_SINT_TO_FP,608SCALAR_UINT_TO_FP,609SCALAR_SINT_TO_FP_RND,610SCALAR_UINT_TO_FP_RND,611612// Vector float/double to signed/unsigned integer.613CVTP2SI,614CVTP2UI,615CVTP2SI_RND,616CVTP2UI_RND,617// Scalar float/double to signed/unsigned integer.618CVTS2SI,619CVTS2UI,620CVTS2SI_RND,621CVTS2UI_RND,622623// Vector float/double to signed/unsigned integer with truncation.624CVTTP2SI,625CVTTP2UI,626CVTTP2SI_SAE,627CVTTP2UI_SAE,628// Scalar float/double to signed/unsigned integer with truncation.629CVTTS2SI,630CVTTS2UI,631CVTTS2SI_SAE,632CVTTS2UI_SAE,633634// Vector signed/unsigned integer to float/double.635CVTSI2P,636CVTUI2P,637638// Masked versions of above. Used for v2f64->v4f32.639// SRC, PASSTHRU, MASK640MCVTP2SI,641MCVTP2UI,642MCVTTP2SI,643MCVTTP2UI,644MCVTSI2P,645MCVTUI2P,646647// Vector float to bfloat16.648// Convert TWO packed single data to one packed BF16 data649CVTNE2PS2BF16,650// Convert packed single data to packed BF16 data651CVTNEPS2BF16,652// Masked version of above.653// SRC, PASSTHRU, MASK654MCVTNEPS2BF16,655656// Dot product of BF16 pairs to accumulated into657// packed single precision.658DPBF16PS,659660// A stack checking function call. On Windows it's _chkstk call.661DYN_ALLOCA,662663// For allocating variable amounts of stack space when using664// segmented stacks. Check if the current stacklet has enough space, and665// falls back to heap allocation if not.666SEG_ALLOCA,667668// For allocating stack space when using stack clash protector.669// Allocation is performed by block, and each block is probed.670PROBED_ALLOCA,671672// Memory barriers.673MFENCE,674675// Get a random integer and indicate whether it is valid in CF.676RDRAND,677678// Get a NIST SP800-90B & C compliant random integer and679// indicate whether it is valid in CF.680RDSEED,681682// Protection keys683// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.684// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is685// value for ECX.686RDPKRU,687WRPKRU,688689// SSE42 string comparisons.690// These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG691// will emit one or two instructions based on which results are used. If692// flags and index/mask this allows us to use a single instruction since693// we won't have to pick and opcode for flags. Instead we can rely on the694// DAG to CSE everything and decide at isel.695PCMPISTR,696PCMPESTR,697698// Test if in transactional execution.699XTEST,700701// Conversions between float and half-float.702CVTPS2PH,703CVTPS2PH_SAE,704CVTPH2PS,705CVTPH2PS_SAE,706707// Masked version of above.708// SRC, RND, PASSTHRU, MASK709MCVTPS2PH,710MCVTPS2PH_SAE,711712// Galois Field Arithmetic Instructions713GF2P8AFFINEINVQB,714GF2P8AFFINEQB,715GF2P8MULB,716717// LWP insert record.718LWPINS,719720// User level wait721UMWAIT,722TPAUSE,723724// Enqueue Stores Instructions725ENQCMD,726ENQCMDS,727728// For avx512-vp2intersect729VP2INTERSECT,730731// User level interrupts - testui732TESTUI,733734// Perform an FP80 add after changing precision control in FPCW.735FP80_ADD,736737// Conditional compare instructions738CCMP,739CTEST,740741/// X86 strict FP compare instructions.742STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,743STRICT_FCMPS,744745// Vector packed double/float comparison.746STRICT_CMPP,747748/// Vector comparison generating mask bits for fp and749/// integer signed and unsigned data types.750STRICT_CMPM,751752// Vector float/double to signed/unsigned integer with truncation.753STRICT_CVTTP2SI,754STRICT_CVTTP2UI,755756// Vector FP extend.757STRICT_VFPEXT,758759// Vector FP round.760STRICT_VFPROUND,761762// RndScale - Round FP Values To Include A Given Number Of Fraction Bits.763// Also used by the legacy (V)ROUND intrinsics where we mask out the764// scaling part of the immediate.765STRICT_VRNDSCALE,766767// Vector signed/unsigned integer to float/double.768STRICT_CVTSI2P,769STRICT_CVTUI2P,770771// Strict FMA nodes.772STRICT_FNMADD,773STRICT_FMSUB,774STRICT_FNMSUB,775776// Conversions between float and half-float.777STRICT_CVTPS2PH,778STRICT_CVTPH2PS,779780// Perform an FP80 add after changing precision control in FPCW.781STRICT_FP80_ADD,782783// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and784// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.785786// Compare and swap.787LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,788LCMPXCHG8_DAG,789LCMPXCHG16_DAG,790LCMPXCHG16_SAVE_RBX_DAG,791792/// LOCK-prefixed arithmetic read-modify-write instructions.793/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)794LADD,795LSUB,796LOR,797LXOR,798LAND,799LBTS,800LBTC,801LBTR,802LBTS_RM,803LBTC_RM,804LBTR_RM,805806/// RAO arithmetic instructions.807/// OUTCHAIN = AADD(INCHAIN, PTR, RHS)808AADD,809AOR,810AXOR,811AAND,812813// Load, scalar_to_vector, and zero extend.814VZEXT_LOAD,815816// extract_vector_elt, store.817VEXTRACT_STORE,818819// scalar broadcast from memory.820VBROADCAST_LOAD,821822// subvector broadcast from memory.823SUBV_BROADCAST_LOAD,824825// Store FP control word into i16 memory.826FNSTCW16m,827828// Load FP control word from i16 memory.829FLDCW16m,830831// Store x87 FPU environment into memory.832FNSTENVm,833834// Load x87 FPU environment from memory.835FLDENVm,836837/// This instruction implements FP_TO_SINT with the838/// integer destination in memory and a FP reg source. This corresponds839/// to the X86::FIST*m instructions and the rounding mode change stuff. It840/// has two inputs (token chain and address) and two outputs (int value841/// and token chain). Memory VT specifies the type to store to.842FP_TO_INT_IN_MEM,843844/// This instruction implements SINT_TO_FP with the845/// integer source in memory and FP reg result. This corresponds to the846/// X86::FILD*m instructions. It has two inputs (token chain and address)847/// and two outputs (FP value and token chain). The integer source type is848/// specified by the memory VT.849FILD,850851/// This instruction implements a fp->int store from FP stack852/// slots. This corresponds to the fist instruction. It takes a853/// chain operand, value to store, address, and glue. The memory VT854/// specifies the type to store as.855FIST,856857/// This instruction implements an extending load to FP stack slots.858/// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain859/// operand, and ptr to load from. The memory VT specifies the type to860/// load from.861FLD,862863/// This instruction implements a truncating store from FP stack864/// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a865/// chain operand, value to store, address, and glue. The memory VT866/// specifies the type to store as.867FST,868869/// These instructions grab the address of the next argument870/// from a va_list. (reads and modifies the va_list in memory)871VAARG_64,872VAARG_X32,873874// Vector truncating store with unsigned/signed saturation875VTRUNCSTOREUS,876VTRUNCSTORES,877// Vector truncating masked store with unsigned/signed saturation878VMTRUNCSTOREUS,879VMTRUNCSTORES,880881// X86 specific gather and scatter882MGATHER,883MSCATTER,884885// Key locker nodes that produce flags.886AESENC128KL,887AESDEC128KL,888AESENC256KL,889AESDEC256KL,890AESENCWIDE128KL,891AESDECWIDE128KL,892AESENCWIDE256KL,893AESDECWIDE256KL,894895/// Compare and Add if Condition is Met. Compare value in operand 2 with896/// value in memory of operand 1. If condition of operand 4 is met, add897/// value operand 3 to m32 and write new value in operand 1. Operand 2 is898/// always updated with the original value from operand 1.899CMPCCXADD,900901// Save xmm argument registers to the stack, according to %al. An operator902// is needed so that this can be expanded with control flow.903VASTART_SAVE_XMM_REGS,904905// Conditional load/store instructions906CLOAD,907CSTORE,908909// WARNING: Do not add anything in the end unless you want the node to910// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all911// opcodes will be thought as target memory ops!912};913} // end namespace X86ISD914915namespace X86 {916/// Current rounding mode is represented in bits 11:10 of FPSR. These917/// values are same as corresponding constants for rounding mode used918/// in glibc.919enum RoundingMode {920rmToNearest = 0, // FE_TONEAREST921rmDownward = 1 << 10, // FE_DOWNWARD922rmUpward = 2 << 10, // FE_UPWARD923rmTowardZero = 3 << 10, // FE_TOWARDZERO924rmMask = 3 << 10 // Bit mask selecting rounding mode925};926}927928/// Define some predicates that are used for node matching.929namespace X86 {930/// Returns true if Elt is a constant zero or floating point constant +0.0.931bool isZeroNode(SDValue Elt);932933/// Returns true of the given offset can be934/// fit into displacement field of the instruction.935bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,936bool hasSymbolicDisplacement);937938/// Determines whether the callee is required to pop its939/// own arguments. Callee pop is necessary to support tail calls.940bool isCalleePop(CallingConv::ID CallingConv,941bool is64Bit, bool IsVarArg, bool GuaranteeTCO);942943/// If Op is a constant whose elements are all the same constant or944/// undefined, return true and return the constant value in \p SplatVal.945/// If we have undef bits that don't cover an entire element, we treat these946/// as zero if AllowPartialUndefs is set, else we fail and return false.947bool isConstantSplat(SDValue Op, APInt &SplatVal,948bool AllowPartialUndefs = true);949950/// Check if Op is a load operation that could be folded into some other x86951/// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.952bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,953bool AssumeSingleUse = false);954955/// Check if Op is a load operation that could be folded into a vector splat956/// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.957bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,958const X86Subtarget &Subtarget,959bool AssumeSingleUse = false);960961/// Check if Op is a value that could be used to fold a store into some962/// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).963bool mayFoldIntoStore(SDValue Op);964965/// Check if Op is an operation that could be folded into a zero extend x86966/// instruction.967bool mayFoldIntoZeroExtend(SDValue Op);968969/// True if the target supports the extended frame for async Swift970/// functions.971bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,972const MachineFunction &MF);973} // end namespace X86974975//===--------------------------------------------------------------------===//976// X86 Implementation of the TargetLowering interface977class X86TargetLowering final : public TargetLowering {978public:979explicit X86TargetLowering(const X86TargetMachine &TM,980const X86Subtarget &STI);981982unsigned getJumpTableEncoding() const override;983bool useSoftFloat() const override;984985void markLibCallAttributes(MachineFunction *MF, unsigned CC,986ArgListTy &Args) const override;987988MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {989return MVT::i8;990}991992const MCExpr *993LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,994const MachineBasicBlock *MBB, unsigned uid,995MCContext &Ctx) const override;996997/// Returns relocation base for the given PIC jumptable.998SDValue getPICJumpTableRelocBase(SDValue Table,999SelectionDAG &DAG) const override;1000const MCExpr *1001getPICJumpTableRelocBaseExpr(const MachineFunction *MF,1002unsigned JTI, MCContext &Ctx) const override;10031004/// Return the desired alignment for ByVal aggregate1005/// function arguments in the caller parameter area. For X86, aggregates1006/// that contains are placed at 16-byte boundaries while the rest are at1007/// 4-byte boundaries.1008uint64_t getByValTypeAlignment(Type *Ty,1009const DataLayout &DL) const override;10101011EVT getOptimalMemOpType(const MemOp &Op,1012const AttributeList &FuncAttributes) const override;10131014/// Returns true if it's safe to use load / store of the1015/// specified type to expand memcpy / memset inline. This is mostly true1016/// for all types except for some special cases. For example, on X861017/// targets without SSE2 f64 load / store are done with fldl / fstpl which1018/// also does type conversion. Note the specified type doesn't have to be1019/// legal as the hook is used before type legalization.1020bool isSafeMemOpType(MVT VT) const override;10211022bool isMemoryAccessFast(EVT VT, Align Alignment) const;10231024/// Returns true if the target allows unaligned memory accesses of the1025/// specified type. Returns whether it is "fast" in the last argument.1026bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,1027MachineMemOperand::Flags Flags,1028unsigned *Fast) const override;10291030/// This function returns true if the memory access is aligned or if the1031/// target allows this specific unaligned memory access. If the access is1032/// allowed, the optional final parameter returns a relative speed of the1033/// access (as defined by the target).1034bool allowsMemoryAccess(1035LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,1036Align Alignment,1037MachineMemOperand::Flags Flags = MachineMemOperand::MONone,1038unsigned *Fast = nullptr) const override;10391040bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,1041const MachineMemOperand &MMO,1042unsigned *Fast) const {1043return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),1044MMO.getAlign(), MMO.getFlags(), Fast);1045}10461047/// Provide custom lowering hooks for some operations.1048///1049SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;10501051/// Replace the results of node with an illegal result1052/// type with new values built out of custom code.1053///1054void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,1055SelectionDAG &DAG) const override;10561057SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;10581059bool preferABDSToABSWithNSW(EVT VT) const override;10601061bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,1062EVT ExtVT) const override;10631064bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,1065EVT VT) const override;10661067/// Return true if the target has native support for1068/// the specified value type and it is 'desirable' to use the type for the1069/// given node type. e.g. On x86 i16 is legal, but undesirable since i161070/// instruction encodings are longer and some i16 instructions are slow.1071bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;10721073/// Return true if the target has native support for the1074/// specified value type and it is 'desirable' to use the type. e.g. On x861075/// i16 is legal, but undesirable since i16 instruction encodings are longer1076/// and some i16 instructions are slow.1077bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;10781079/// Return prefered fold type, Abs if this is a vector, AddAnd if its an1080/// integer, None otherwise.1081TargetLowering::AndOrSETCCFoldKind1082isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,1083const SDNode *SETCC0,1084const SDNode *SETCC1) const override;10851086/// Return the newly negated expression if the cost is not expensive and1087/// set the cost in \p Cost to indicate that if it is cheaper or neutral to1088/// do the negation.1089SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,1090bool LegalOperations, bool ForCodeSize,1091NegatibleCost &Cost,1092unsigned Depth) const override;10931094MachineBasicBlock *1095EmitInstrWithCustomInserter(MachineInstr &MI,1096MachineBasicBlock *MBB) const override;10971098/// This method returns the name of a target specific DAG node.1099const char *getTargetNodeName(unsigned Opcode) const override;11001101/// Do not merge vector stores after legalization because that may conflict1102/// with x86-specific store splitting optimizations.1103bool mergeStoresAfterLegalization(EVT MemVT) const override {1104return !MemVT.isVector();1105}11061107bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,1108const MachineFunction &MF) const override;11091110bool isCheapToSpeculateCttz(Type *Ty) const override;11111112bool isCheapToSpeculateCtlz(Type *Ty) const override;11131114bool isCtlzFast() const override;11151116bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {1117// If the pair to store is a mixture of float and int values, we will1118// save two bitwise instructions and one float-to-int instruction and1119// increase one store instruction. There is potentially a more1120// significant benefit because it avoids the float->int domain switch1121// for input value. So It is more likely a win.1122if ((LTy.isFloatingPoint() && HTy.isInteger()) ||1123(LTy.isInteger() && HTy.isFloatingPoint()))1124return true;1125// If the pair only contains int values, we will save two bitwise1126// instructions and increase one store instruction (costing one more1127// store buffer). Since the benefit is more blurred so we leave1128// such pair out until we get testcase to prove it is a win.1129return false;1130}11311132bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;11331134bool hasAndNotCompare(SDValue Y) const override;11351136bool hasAndNot(SDValue Y) const override;11371138bool hasBitTest(SDValue X, SDValue Y) const override;11391140bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(1141SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,1142unsigned OldShiftOpcode, unsigned NewShiftOpcode,1143SelectionDAG &DAG) const override;11441145unsigned preferedOpcodeForCmpEqPiecesOfOperand(1146EVT VT, unsigned ShiftOpc, bool MayTransformRotate,1147const APInt &ShiftOrRotateAmt,1148const std::optional<APInt> &AndMask) const override;11491150bool preferScalarizeSplat(SDNode *N) const override;11511152CondMergingParams1153getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,1154const Value *Rhs) const override;11551156bool shouldFoldConstantShiftPairToMask(const SDNode *N,1157CombineLevel Level) const override;11581159bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;11601161bool1162shouldTransformSignedTruncationCheck(EVT XVT,1163unsigned KeptBits) const override {1164// For vectors, we don't have a preference..1165if (XVT.isVector())1166return false;11671168auto VTIsOk = [](EVT VT) -> bool {1169return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||1170VT == MVT::i64;1171};11721173// We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.1174// XVT will be larger than KeptBitsVT.1175MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);1176return VTIsOk(XVT) && VTIsOk(KeptBitsVT);1177}11781179ShiftLegalizationStrategy1180preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,1181unsigned ExpansionFactor) const override;11821183bool shouldSplatInsEltVarIndex(EVT VT) const override;11841185bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {1186// Converting to sat variants holds little benefit on X86 as we will just1187// need to saturate the value back using fp arithmatic.1188return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);1189}11901191bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {1192return VT.isScalarInteger();1193}11941195/// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.1196MVT hasFastEqualityCompare(unsigned NumBits) const override;11971198/// Return the value type to use for ISD::SETCC.1199EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,1200EVT VT) const override;12011202bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,1203const APInt &DemandedElts,1204TargetLoweringOpt &TLO) const override;12051206/// Determine which of the bits specified in Mask are known to be either1207/// zero or one and return them in the KnownZero/KnownOne bitsets.1208void computeKnownBitsForTargetNode(const SDValue Op,1209KnownBits &Known,1210const APInt &DemandedElts,1211const SelectionDAG &DAG,1212unsigned Depth = 0) const override;12131214/// Determine the number of bits in the operation that are sign bits.1215unsigned ComputeNumSignBitsForTargetNode(SDValue Op,1216const APInt &DemandedElts,1217const SelectionDAG &DAG,1218unsigned Depth) const override;12191220bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,1221const APInt &DemandedElts,1222APInt &KnownUndef,1223APInt &KnownZero,1224TargetLoweringOpt &TLO,1225unsigned Depth) const override;12261227bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,1228const APInt &DemandedElts,1229unsigned MaskIndex,1230TargetLoweringOpt &TLO,1231unsigned Depth) const;12321233bool SimplifyDemandedBitsForTargetNode(SDValue Op,1234const APInt &DemandedBits,1235const APInt &DemandedElts,1236KnownBits &Known,1237TargetLoweringOpt &TLO,1238unsigned Depth) const override;12391240SDValue SimplifyMultipleUseDemandedBitsForTargetNode(1241SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,1242SelectionDAG &DAG, unsigned Depth) const override;12431244bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(1245SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,1246bool PoisonOnly, unsigned Depth) const override;12471248bool canCreateUndefOrPoisonForTargetNode(1249SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,1250bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;12511252bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,1253APInt &UndefElts, const SelectionDAG &DAG,1254unsigned Depth) const override;12551256bool isTargetCanonicalConstantNode(SDValue Op) const override {1257// Peek through bitcasts/extracts/inserts to see if we have a broadcast1258// vector from memory.1259while (Op.getOpcode() == ISD::BITCAST ||1260Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||1261(Op.getOpcode() == ISD::INSERT_SUBVECTOR &&1262Op.getOperand(0).isUndef()))1263Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);12641265return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||1266TargetLowering::isTargetCanonicalConstantNode(Op);1267}12681269const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;12701271SDValue unwrapAddress(SDValue N) const override;12721273SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;12741275bool ExpandInlineAsm(CallInst *CI) const override;12761277ConstraintType getConstraintType(StringRef Constraint) const override;12781279/// Examine constraint string and operand type and determine a weight value.1280/// The operand object must already have been set up with the operand type.1281ConstraintWeight1282getSingleConstraintMatchWeight(AsmOperandInfo &Info,1283const char *Constraint) const override;12841285const char *LowerXConstraint(EVT ConstraintVT) const override;12861287/// Lower the specified operand into the Ops vector. If it is invalid, don't1288/// add anything to Ops. If hasMemory is true it means one of the asm1289/// constraint of the inline asm instruction being processed is 'm'.1290void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,1291std::vector<SDValue> &Ops,1292SelectionDAG &DAG) const override;12931294InlineAsm::ConstraintCode1295getInlineAsmMemConstraint(StringRef ConstraintCode) const override {1296if (ConstraintCode == "v")1297return InlineAsm::ConstraintCode::v;1298return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);1299}13001301/// Handle Lowering flag assembly outputs.1302SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,1303const SDLoc &DL,1304const AsmOperandInfo &Constraint,1305SelectionDAG &DAG) const override;13061307/// Given a physical register constraint1308/// (e.g. {edx}), return the register number and the register class for the1309/// register. This should only be used for C_Register constraints. On1310/// error, this returns a register number of 0.1311std::pair<unsigned, const TargetRegisterClass *>1312getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,1313StringRef Constraint, MVT VT) const override;13141315/// Return true if the addressing mode represented1316/// by AM is legal for this target, for a load/store of the specified type.1317bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,1318Type *Ty, unsigned AS,1319Instruction *I = nullptr) const override;13201321bool addressingModeSupportsTLS(const GlobalValue &GV) const override;13221323/// Return true if the specified immediate is legal1324/// icmp immediate, that is the target has icmp instructions which can1325/// compare a register against the immediate without having to materialize1326/// the immediate into a register.1327bool isLegalICmpImmediate(int64_t Imm) const override;13281329/// Return true if the specified immediate is legal1330/// add immediate, that is the target has add instructions which can1331/// add a register and the immediate without having to materialize1332/// the immediate into a register.1333bool isLegalAddImmediate(int64_t Imm) const override;13341335bool isLegalStoreImmediate(int64_t Imm) const override;13361337/// This is used to enable splatted operand transforms for vector shifts1338/// and vector funnel shifts.1339bool isVectorShiftByScalarCheap(Type *Ty) const override;13401341/// Add x86-specific opcodes to the default list.1342bool isBinOp(unsigned Opcode) const override;13431344/// Returns true if the opcode is a commutative binary operation.1345bool isCommutativeBinOp(unsigned Opcode) const override;13461347/// Return true if it's free to truncate a value of1348/// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in1349/// register EAX to i16 by referencing its sub-register AX.1350bool isTruncateFree(Type *Ty1, Type *Ty2) const override;1351bool isTruncateFree(EVT VT1, EVT VT2) const override;13521353bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;13541355/// Return true if any actual instruction that defines a1356/// value of type Ty1 implicit zero-extends the value to Ty2 in the result1357/// register. This does not necessarily include registers defined in1358/// unknown ways, such as incoming arguments, or copies from unknown1359/// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this1360/// does not necessarily apply to truncate instructions. e.g. on x86-64,1361/// all instructions that define 32-bit values implicit zero-extend the1362/// result out to 64 bits.1363bool isZExtFree(Type *Ty1, Type *Ty2) const override;1364bool isZExtFree(EVT VT1, EVT VT2) const override;1365bool isZExtFree(SDValue Val, EVT VT2) const override;13661367bool shouldSinkOperands(Instruction *I,1368SmallVectorImpl<Use *> &Ops) const override;1369bool shouldConvertPhiType(Type *From, Type *To) const override;13701371/// Return true if folding a vector load into ExtVal (a sign, zero, or any1372/// extend node) is profitable.1373bool isVectorLoadExtDesirable(SDValue) const override;13741375/// Return true if an FMA operation is faster than a pair of fmul and fadd1376/// instructions. fmuladd intrinsics will be expanded to FMAs when this1377/// method returns true, otherwise fmuladd is expanded to fmul + fadd.1378bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,1379EVT VT) const override;13801381/// Return true if it's profitable to narrow operations of type SrcVT to1382/// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not1383/// from i32 to i16.1384bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;13851386bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,1387EVT VT) const override;13881389/// Given an intrinsic, checks if on the target the intrinsic will need to map1390/// to a MemIntrinsicNode (touches memory). If this is the case, it returns1391/// true and stores the intrinsic information into the IntrinsicInfo that was1392/// passed to the function.1393bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,1394MachineFunction &MF,1395unsigned Intrinsic) const override;13961397/// Returns true if the target can instruction select the1398/// specified FP immediate natively. If false, the legalizer will1399/// materialize the FP immediate as a load from a constant pool.1400bool isFPImmLegal(const APFloat &Imm, EVT VT,1401bool ForCodeSize) const override;14021403/// Targets can use this to indicate that they only support *some*1404/// VECTOR_SHUFFLE operations, those with specific masks. By default, if a1405/// target supports the VECTOR_SHUFFLE node, all mask values are assumed to1406/// be legal.1407bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;14081409/// Similar to isShuffleMaskLegal. Targets can use this to indicate if there1410/// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a1411/// constant pool entry.1412bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;14131414/// Returns true if lowering to a jump table is allowed.1415bool areJTsAllowed(const Function *Fn) const override;14161417MVT getPreferredSwitchConditionType(LLVMContext &Context,1418EVT ConditionVT) const override;14191420/// If true, then instruction selection should1421/// seek to shrink the FP constant of the specified type to a smaller type1422/// in order to save space and / or reduce runtime.1423bool ShouldShrinkFPConstant(EVT VT) const override;14241425/// Return true if we believe it is correct and profitable to reduce the1426/// load node to a smaller type.1427bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,1428EVT NewVT) const override;14291430/// Return true if the specified scalar FP type is computed in an SSE1431/// register, not on the X87 floating point stack.1432bool isScalarFPTypeInSSEReg(EVT VT) const;14331434/// Returns true if it is beneficial to convert a load of a constant1435/// to just the constant itself.1436bool shouldConvertConstantLoadToIntImm(const APInt &Imm,1437Type *Ty) const override;14381439bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;14401441bool convertSelectOfConstantsToMath(EVT VT) const override;14421443bool decomposeMulByConstant(LLVMContext &Context, EVT VT,1444SDValue C) const override;14451446/// Return true if EXTRACT_SUBVECTOR is cheap for this result type1447/// with this index.1448bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,1449unsigned Index) const override;14501451/// Scalar ops always have equal or better analysis/performance/power than1452/// the vector equivalent, so this always makes sense if the scalar op is1453/// supported.1454bool shouldScalarizeBinop(SDValue) const override;14551456/// Extract of a scalar FP value from index 0 of a vector is free.1457bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {1458EVT EltVT = VT.getScalarType();1459return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;1460}14611462/// Overflow nodes should get combined/lowered to optimal instructions1463/// (they should allow eliminating explicit compares by getting flags from1464/// math ops).1465bool shouldFormOverflowOp(unsigned Opcode, EVT VT,1466bool MathUsed) const override;14671468bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,1469unsigned AddrSpace) const override {1470// If we can replace more than 2 scalar stores, there will be a reduction1471// in instructions even after we add a vector constant load.1472return IsZero || NumElem > 2;1473}14741475bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,1476const SelectionDAG &DAG,1477const MachineMemOperand &MMO) const override;14781479Register getRegisterByName(const char* RegName, LLT VT,1480const MachineFunction &MF) const override;14811482/// If a physical register, this returns the register that receives the1483/// exception address on entry to an EH pad.1484Register1485getExceptionPointerRegister(const Constant *PersonalityFn) const override;14861487/// If a physical register, this returns the register that receives the1488/// exception typeid on entry to a landing pad.1489Register1490getExceptionSelectorRegister(const Constant *PersonalityFn) const override;14911492bool needsFixedCatchObjects() const override;14931494/// This method returns a target specific FastISel object,1495/// or null if the target does not support "fast" ISel.1496FastISel *createFastISel(FunctionLoweringInfo &funcInfo,1497const TargetLibraryInfo *libInfo) const override;14981499/// If the target has a standard location for the stack protector cookie,1500/// returns the address of that location. Otherwise, returns nullptr.1501Value *getIRStackGuard(IRBuilderBase &IRB) const override;15021503bool useLoadStackGuardNode() const override;1504bool useStackGuardXorFP() const override;1505void insertSSPDeclarations(Module &M) const override;1506Value *getSDagStackGuard(const Module &M) const override;1507Function *getSSPStackGuardCheck(const Module &M) const override;1508SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,1509const SDLoc &DL) const override;151015111512/// Return true if the target stores SafeStack pointer at a fixed offset in1513/// some non-standard address space, and populates the address space and1514/// offset as appropriate.1515Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;15161517std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,1518SDValue Chain, SDValue Pointer,1519MachinePointerInfo PtrInfo,1520Align Alignment,1521SelectionDAG &DAG) const;15221523/// Customize the preferred legalization strategy for certain types.1524LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;15251526bool softPromoteHalfType() const override { return true; }15271528MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,1529EVT VT) const override;15301531unsigned getNumRegistersForCallingConv(LLVMContext &Context,1532CallingConv::ID CC,1533EVT VT) const override;15341535unsigned getVectorTypeBreakdownForCallingConv(1536LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,1537unsigned &NumIntermediates, MVT &RegisterVT) const override;15381539bool isIntDivCheap(EVT VT, AttributeList Attr) const override;15401541bool supportSwiftError() const override;15421543bool supportKCFIBundles() const override { return true; }15441545MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,1546MachineBasicBlock::instr_iterator &MBBI,1547const TargetInstrInfo *TII) const override;15481549bool hasStackProbeSymbol(const MachineFunction &MF) const override;1550bool hasInlineStackProbe(const MachineFunction &MF) const override;1551StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;15521553unsigned getStackProbeSize(const MachineFunction &MF) const;15541555bool hasVectorBlend() const override { return true; }15561557unsigned getMaxSupportedInterleaveFactor() const override { return 4; }15581559bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,1560unsigned OpNo) const override;15611562SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,1563MachineMemOperand *MMO, SDValue &NewLoad,1564SDValue Ptr, SDValue PassThru,1565SDValue Mask) const override;1566SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,1567MachineMemOperand *MMO, SDValue Ptr, SDValue Val,1568SDValue Mask) const override;15691570/// Lower interleaved load(s) into target specific1571/// instructions/intrinsics.1572bool lowerInterleavedLoad(LoadInst *LI,1573ArrayRef<ShuffleVectorInst *> Shuffles,1574ArrayRef<unsigned> Indices,1575unsigned Factor) const override;15761577/// Lower interleaved store(s) into target specific1578/// instructions/intrinsics.1579bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,1580unsigned Factor) const override;15811582SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,1583int JTI, SelectionDAG &DAG) const override;15841585Align getPrefLoopAlignment(MachineLoop *ML) const override;15861587EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {1588if (VT == MVT::f80)1589return EVT::getIntegerVT(Context, 96);1590return TargetLoweringBase::getTypeToTransformTo(Context, VT);1591}15921593protected:1594std::pair<const TargetRegisterClass *, uint8_t>1595findRepresentativeClass(const TargetRegisterInfo *TRI,1596MVT VT) const override;15971598private:1599/// Keep a reference to the X86Subtarget around so that we can1600/// make the right decision when generating code for different targets.1601const X86Subtarget &Subtarget;16021603/// A list of legal FP immediates.1604std::vector<APFloat> LegalFPImmediates;16051606/// Indicate that this x86 target can instruction1607/// select the specified FP immediate natively.1608void addLegalFPImmediate(const APFloat& Imm) {1609LegalFPImmediates.push_back(Imm);1610}16111612SDValue LowerCallResult(SDValue Chain, SDValue InGlue,1613CallingConv::ID CallConv, bool isVarArg,1614const SmallVectorImpl<ISD::InputArg> &Ins,1615const SDLoc &dl, SelectionDAG &DAG,1616SmallVectorImpl<SDValue> &InVals,1617uint32_t *RegMask) const;1618SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,1619const SmallVectorImpl<ISD::InputArg> &ArgInfo,1620const SDLoc &dl, SelectionDAG &DAG,1621const CCValAssign &VA, MachineFrameInfo &MFI,1622unsigned i) const;1623SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,1624const SDLoc &dl, SelectionDAG &DAG,1625const CCValAssign &VA,1626ISD::ArgFlagsTy Flags, bool isByval) const;16271628// Call lowering helpers.16291630/// Check whether the call is eligible for tail call optimization. Targets1631/// that want to do tail call optimization should implement this function.1632bool IsEligibleForTailCallOptimization(1633TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,1634SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;1635SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,1636SDValue Chain, bool IsTailCall,1637bool Is64Bit, int FPDiff,1638const SDLoc &dl) const;16391640unsigned GetAlignedArgumentStackSize(unsigned StackSize,1641SelectionDAG &DAG) const;16421643unsigned getAddressSpace() const;16441645SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,1646SDValue &Chain) const;1647SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;16481649SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;1650SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;1651SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;1652SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;16531654unsigned getGlobalWrapperKind(const GlobalValue *GV,1655const unsigned char OpFlags) const;1656SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;1657SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;1658SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;1659SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;1660SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;16611662/// Creates target global address or external symbol nodes for calls or1663/// other uses.1664SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,1665bool ForCall) const;16661667SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;1668SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;1669SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;1670SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;1671SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;1672SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;1673SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;1674SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;1675SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;1676SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;1677SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;1678SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;1679SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;1680SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;1681SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;1682SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;1683SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;1684SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;1685SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;1686SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;1687SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;1688SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;1689SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;1690SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;1691SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;1692SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;1693SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;1694SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;1695SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;1696SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,1697SDValue &Chain) const;1698SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;1699SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;1700SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;1701SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;1702SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;1703SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;1704SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;17051706SDValue1707LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1708const SmallVectorImpl<ISD::InputArg> &Ins,1709const SDLoc &dl, SelectionDAG &DAG,1710SmallVectorImpl<SDValue> &InVals) const override;1711SDValue LowerCall(CallLoweringInfo &CLI,1712SmallVectorImpl<SDValue> &InVals) const override;17131714SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1715const SmallVectorImpl<ISD::OutputArg> &Outs,1716const SmallVectorImpl<SDValue> &OutVals,1717const SDLoc &dl, SelectionDAG &DAG) const override;17181719bool supportSplitCSR(MachineFunction *MF) const override {1720return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&1721MF->getFunction().hasFnAttribute(Attribute::NoUnwind);1722}1723void initializeSplitCSR(MachineBasicBlock *Entry) const override;1724void insertCopiesSplitCSR(1725MachineBasicBlock *Entry,1726const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;17271728bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;17291730bool mayBeEmittedAsTailCall(const CallInst *CI) const override;17311732EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,1733ISD::NodeType ExtendKind) const override;17341735bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,1736bool isVarArg,1737const SmallVectorImpl<ISD::OutputArg> &Outs,1738LLVMContext &Context) const override;17391740const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;1741ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;17421743TargetLoweringBase::AtomicExpansionKind1744shouldExpandAtomicLoadInIR(LoadInst *LI) const override;1745TargetLoweringBase::AtomicExpansionKind1746shouldExpandAtomicStoreInIR(StoreInst *SI) const override;1747TargetLoweringBase::AtomicExpansionKind1748shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;1749TargetLoweringBase::AtomicExpansionKind1750shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;1751void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;1752void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;17531754LoadInst *1755lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;17561757bool needsCmpXchgNb(Type *MemType) const;17581759void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,1760MachineBasicBlock *DispatchBB, int FI) const;17611762// Utility function to emit the low-level va_arg code for X86-64.1763MachineBasicBlock *1764EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;17651766/// Utility function to emit the xmm reg save portion of va_start.1767MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,1768MachineInstr &MI2,1769MachineBasicBlock *BB) const;17701771MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,1772MachineBasicBlock *BB) const;17731774MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,1775MachineBasicBlock *BB) const;17761777MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,1778MachineBasicBlock *BB) const;17791780MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,1781MachineBasicBlock *BB) const;17821783MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,1784MachineBasicBlock *BB) const;17851786MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,1787MachineBasicBlock *BB) const;17881789MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,1790MachineBasicBlock *BB) const;17911792MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,1793MachineBasicBlock *MBB) const;17941795void emitSetJmpShadowStackFix(MachineInstr &MI,1796MachineBasicBlock *MBB) const;17971798MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,1799MachineBasicBlock *MBB) const;18001801MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,1802MachineBasicBlock *MBB) const;18031804MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,1805MachineBasicBlock *MBB) const;18061807MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,1808MachineBasicBlock *MBB) const;18091810/// Emit flags for the given setcc condition and operands. Also returns the1811/// corresponding X86 condition code constant in X86CC.1812SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,1813const SDLoc &dl, SelectionDAG &DAG,1814SDValue &X86CC) const;18151816bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,1817SDValue IntPow2) const override;18181819/// Check if replacement of SQRT with RSQRT should be disabled.1820bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;18211822/// Use rsqrt* to speed up sqrt calculations.1823SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,1824int &RefinementSteps, bool &UseOneConstNR,1825bool Reciprocal) const override;18261827/// Use rcp* to speed up fdiv calculations.1828SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,1829int &RefinementSteps) const override;18301831/// Reassociate floating point divisions into multiply by reciprocal.1832unsigned combineRepeatedFPDivisors() const override;18331834SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,1835SmallVectorImpl<SDNode *> &Created) const override;18361837SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,1838SDValue V2) const;1839};18401841namespace X86 {1842FastISel *createFastISel(FunctionLoweringInfo &funcInfo,1843const TargetLibraryInfo *libInfo);1844} // end namespace X8618451846// X86 specific Gather/Scatter nodes.1847// The class has the same order of operands as MaskedGatherScatterSDNode for1848// convenience.1849class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {1850public:1851// This is a intended as a utility and should never be directly created.1852X86MaskedGatherScatterSDNode() = delete;1853~X86MaskedGatherScatterSDNode() = delete;18541855const SDValue &getBasePtr() const { return getOperand(3); }1856const SDValue &getIndex() const { return getOperand(4); }1857const SDValue &getMask() const { return getOperand(2); }1858const SDValue &getScale() const { return getOperand(5); }18591860static bool classof(const SDNode *N) {1861return N->getOpcode() == X86ISD::MGATHER ||1862N->getOpcode() == X86ISD::MSCATTER;1863}1864};18651866class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {1867public:1868const SDValue &getPassThru() const { return getOperand(1); }18691870static bool classof(const SDNode *N) {1871return N->getOpcode() == X86ISD::MGATHER;1872}1873};18741875class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {1876public:1877const SDValue &getValue() const { return getOperand(1); }18781879static bool classof(const SDNode *N) {1880return N->getOpcode() == X86ISD::MSCATTER;1881}1882};18831884/// Generate unpacklo/unpackhi shuffle mask.1885void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,1886bool Unary);18871888/// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation1889/// imposed by AVX and specific to the unary pattern. Example:1890/// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>1891/// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>1892void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);18931894} // end namespace llvm18951896#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H189718981899