Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
35268 views
//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that PPC uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H14#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H1516#include "PPCInstrInfo.h"17#include "llvm/CodeGen/CallingConvLower.h"18#include "llvm/CodeGen/MachineFunction.h"19#include "llvm/CodeGen/MachineMemOperand.h"20#include "llvm/CodeGen/SelectionDAG.h"21#include "llvm/CodeGen/SelectionDAGNodes.h"22#include "llvm/CodeGen/TargetLowering.h"23#include "llvm/CodeGen/ValueTypes.h"24#include "llvm/CodeGenTypes/MachineValueType.h"25#include "llvm/IR/Attributes.h"26#include "llvm/IR/CallingConv.h"27#include "llvm/IR/Function.h"28#include "llvm/IR/InlineAsm.h"29#include "llvm/IR/Metadata.h"30#include "llvm/IR/Type.h"31#include <optional>32#include <utility>3334namespace llvm {3536namespace PPCISD {3738// When adding a NEW PPCISD node please add it to the correct position in39// the enum. The order of elements in this enum matters!40// Values that are added after this entry:41// STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE42// are considered memory opcodes and are treated differently than entries43// that come before it. For example, ADD or MUL should be placed before44// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come45// after it.46enum NodeType : unsigned {47// Start the numbering where the builtin ops and target ops leave off.48FIRST_NUMBER = ISD::BUILTIN_OP_END,4950/// FSEL - Traditional three-operand fsel node.51///52FSEL,5354/// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.55XSMAXC,56XSMINC,5758/// FCFID - The FCFID instruction, taking an f64 operand and producing59/// and f64 value containing the FP representation of the integer that60/// was temporarily in the f64 operand.61FCFID,6263/// Newer FCFID[US] integer-to-floating-point conversion instructions for64/// unsigned integers and single-precision outputs.65FCFIDU,66FCFIDS,67FCFIDUS,6869/// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f6470/// operand, producing an f64 value containing the integer representation71/// of that FP value.72FCTIDZ,73FCTIWZ,7475/// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for76/// unsigned integers with round toward zero.77FCTIDUZ,78FCTIWUZ,7980/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in81/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.82VEXTS,8384/// Reciprocal estimate instructions (unary FP ops).85FRE,86FRSQRTE,8788/// Test instruction for software square root.89FTSQRT,9091/// Square root instruction.92FSQRT,9394/// VPERM - The PPC VPERM Instruction.95///96VPERM,9798/// XXSPLT - The PPC VSX splat instructions99///100XXSPLT,101102/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for103/// converting immediate single precision numbers to double precision104/// vector or scalar.105XXSPLTI_SP_TO_DP,106107/// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.108///109XXSPLTI32DX,110111/// VECINSERT - The PPC vector insert instruction112///113VECINSERT,114115/// VECSHL - The PPC vector shift left instruction116///117VECSHL,118119/// XXPERMDI - The PPC XXPERMDI instruction120///121XXPERMDI,122XXPERM,123124/// The CMPB instruction (takes two operands of i32 or i64).125CMPB,126127/// Hi/Lo - These represent the high and low 16-bit parts of a global128/// address respectively. These nodes have two operands, the first of129/// which must be a TargetGlobalAddress, and the second of which must be a130/// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',131/// though these are usually folded into other nodes.132Hi,133Lo,134135/// The following two target-specific nodes are used for calls through136/// function pointers in the 64-bit SVR4 ABI.137138/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)139/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to140/// compute an allocation on the stack.141DYNALLOC,142143/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to144/// compute an offset from native SP to the address of the most recent145/// dynamic alloca.146DYNAREAOFFSET,147148/// To avoid stack clash, allocation is performed by block and each block is149/// probed.150PROBED_ALLOCA,151152/// The result of the mflr at function entry, used for PIC code.153GlobalBaseReg,154155/// These nodes represent PPC shifts.156///157/// For scalar types, only the last `n + 1` bits of the shift amounts158/// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.159/// for exact behaviors.160///161/// For vector types, only the last n bits are used. See vsld.162SRL,163SRA,164SHL,165166/// FNMSUB - Negated multiply-subtract instruction.167FNMSUB,168169/// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign170/// word and shift left immediate.171EXTSWSLI,172173/// The combination of sra[wd]i and addze used to implemented signed174/// integer division by a power of 2. The first operand is the dividend,175/// and the second is the constant shift amount (representing the176/// divisor).177SRA_ADDZE,178179/// CALL - A direct function call.180/// CALL_NOP is a call with the special NOP which follows 64-bit181/// CALL_NOTOC the caller does not use the TOC.182/// SVR4 calls and 32-bit/64-bit AIX calls.183CALL,184CALL_NOP,185CALL_NOTOC,186187/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a188/// MTCTR instruction.189MTCTR,190191/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a192/// BCTRL instruction.193BCTRL,194195/// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl196/// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX197/// and 64-bit AIX.198BCTRL_LOAD_TOC,199200/// The variants that implicitly define rounding mode for calls with201/// strictfp semantics.202CALL_RM,203CALL_NOP_RM,204CALL_NOTOC_RM,205BCTRL_RM,206BCTRL_LOAD_TOC_RM,207208/// Return with a glue operand, matched by 'blr'209RET_GLUE,210211/// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.212/// This copies the bits corresponding to the specified CRREG into the213/// resultant GPR. Bits corresponding to other CR regs are undefined.214MFOCRF,215216/// Direct move from a VSX register to a GPR217MFVSR,218219/// Direct move from a GPR to a VSX register (algebraic)220MTVSRA,221222/// Direct move from a GPR to a VSX register (zero)223MTVSRZ,224225/// Direct move of 2 consecutive GPR to a VSX register.226BUILD_FP128,227228/// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and229/// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is230/// unsupported for this target.231/// Merge 2 GPRs to a single SPE register.232BUILD_SPE64,233234/// Extract SPE register component, second argument is high or low.235EXTRACT_SPE,236237/// Extract a subvector from signed integer vector and convert to FP.238/// It is primarily used to convert a (widened) illegal integer vector239/// type to a legal floating point vector type.240/// For example v2i32 -> widened to v4i32 -> v2f64241SINT_VEC_TO_FP,242243/// Extract a subvector from unsigned integer vector and convert to FP.244/// As with SINT_VEC_TO_FP, used for converting illegal types.245UINT_VEC_TO_FP,246247/// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to248/// place the value into the least significant element of the most249/// significant doubleword in the vector. This is not element zero for250/// anything smaller than a doubleword on either endianness. This node has251/// the same semantics as SCALAR_TO_VECTOR except that the value remains in252/// the aforementioned location in the vector register.253SCALAR_TO_VECTOR_PERMUTED,254255// FIXME: Remove these once the ANDI glue bug is fixed:256/// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the257/// eq or gt bit of CR0 after executing andi. x, 1. This is used to258/// implement truncation of i32 or i64 to i1.259ANDI_rec_1_EQ_BIT,260ANDI_rec_1_GT_BIT,261262// READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit263// target (returns (Lo, Hi)). It takes a chain operand.264READ_TIME_BASE,265266// EH_SJLJ_SETJMP - SjLj exception handling setjmp.267EH_SJLJ_SETJMP,268269// EH_SJLJ_LONGJMP - SjLj exception handling longjmp.270EH_SJLJ_LONGJMP,271272/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*273/// instructions. For lack of better number, we use the opcode number274/// encoding for the OPC field to identify the compare. For example, 838275/// is VCMPGTSH.276VCMP,277278/// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the279/// altivec VCMP*_rec instructions. For lack of better number, we use the280/// opcode number encoding for the OPC field to identify the compare. For281/// example, 838 is VCMPGTSH.282VCMP_rec,283284/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This285/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the286/// condition register to branch on, OPC is the branch opcode to use (e.g.287/// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is288/// an optional input flag argument.289COND_BRANCH,290291/// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based292/// loops.293BDNZ,294BDZ,295296/// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding297/// towards zero. Used only as part of the long double-to-int298/// conversion sequence.299FADDRTZ,300301/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.302MFFS,303304/// TC_RETURN - A tail call return.305/// operand #0 chain306/// operand #1 callee (register or absolute)307/// operand #2 stack adjustment308/// operand #3 optional in flag309TC_RETURN,310311/// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls312CR6SET,313CR6UNSET,314315/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS316/// for non-position independent code on PPC32.317PPC32_GOT,318319/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and320/// local dynamic TLS and position indendepent code on PPC32.321PPC32_PICGOT,322323/// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec324/// TLS model, produces an ADDIS8 instruction that adds the GOT325/// base to sym\@got\@tprel\@ha.326ADDIS_GOT_TPREL_HA,327328/// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec329/// TLS model, produces a LD instruction with base register G8RReg330/// and offset sym\@got\@tprel\@l. This completes the addition that331/// finds the offset of "sym" relative to the thread pointer.332LD_GOT_TPREL_L,333334/// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec335/// and local-exec TLS models, produces an ADD instruction that adds336/// the contents of G8RReg to the thread pointer. Symbol contains a337/// relocation sym\@tls which is to be replaced by the thread pointer338/// and identifies to the linker that the instruction is part of a339/// TLS sequence.340ADD_TLS,341342/// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS343/// model, produces an ADDIS8 instruction that adds the GOT base344/// register to sym\@got\@tlsgd\@ha.345ADDIS_TLSGD_HA,346347/// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS348/// model, produces an ADDI8 instruction that adds G8RReg to349/// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by350/// ADDIS_TLSGD_L_ADDR until after register assignment.351ADDI_TLSGD_L,352353/// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS354/// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by355/// ADDIS_TLSGD_L_ADDR until after register assignment.356GET_TLS_ADDR,357358/// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on359/// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread360/// pointer. At the end of the call, the thread pointer is found in R3.361GET_TPOINTER,362363/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that364/// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following365/// register assignment.366ADDI_TLSGD_L_ADDR,367368/// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY369/// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY370/// Op that combines two register copies of TOC entries371/// (region handle into R3 and variable offset into R4) followed by a372/// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.373/// This node is used in 64-bit mode as well (in which case the result is374/// G8RC and inputs are X3/X4).375TLSGD_AIX,376377/// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,378/// produces a call to .__tls_get_mod(_$TLSML\@ml).379GET_TLS_MOD_AIX,380381/// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)382/// Op that requires a single input of the module handle TOC entry in R3,383/// and generates a GET_TLS_MOD_AIX node which will be expanded into a call384/// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes.385/// The only difference is the register class.386TLSLD_AIX,387388/// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS389/// model, produces an ADDIS8 instruction that adds the GOT base390/// register to sym\@got\@tlsld\@ha.391ADDIS_TLSLD_HA,392393/// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS394/// model, produces an ADDI8 instruction that adds G8RReg to395/// sym\@got\@tlsld\@l and stores the result in X3. Hidden by396/// ADDIS_TLSLD_L_ADDR until after register assignment.397ADDI_TLSLD_L,398399/// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS400/// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by401/// ADDIS_TLSLD_L_ADDR until after register assignment.402GET_TLSLD_ADDR,403404/// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that405/// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion406/// following register assignment.407ADDI_TLSLD_L_ADDR,408409/// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS410/// model, produces an ADDIS8 instruction that adds X3 to411/// sym\@dtprel\@ha.412ADDIS_DTPREL_HA,413414/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS415/// model, produces an ADDI8 instruction that adds G8RReg to416/// sym\@got\@dtprel\@l.417ADDI_DTPREL_L,418419/// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS420/// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.421PADDI_DTPREL,422423/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded424/// during instruction selection to optimize a BUILD_VECTOR into425/// operations on splats. This is necessary to avoid losing these426/// optimizations due to constant folding.427VADD_SPLAT,428429/// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned430/// operand identifies the operating system entry point.431SC,432433/// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.434CLRBHRB,435436/// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch437/// history rolling buffer entry.438MFBHRBE,439440/// CHAIN = RFEBB CHAIN, State - Return from event-based branch.441RFEBB,442443/// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little444/// endian. Maps to an xxswapd instruction that corrects an lxvd2x445/// or stxvd2x instruction. The chain is necessary because the446/// sequence replaces a load and needs to provide the same number447/// of outputs.448XXSWAPD,449450/// An SDNode for swaps that are not associated with any loads/stores451/// and thereby have no chain.452SWAP_NO_CHAIN,453454/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or455/// lower (IDX=1) half of v4f32 to v2f64.456FP_EXTEND_HALF,457458/// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done459/// either through an add like PADDI or through a PC Relative load like460/// PLD.461MAT_PCREL_ADDR,462463/// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for464/// TLS global address when using dynamic access models. This can be done465/// through an add like PADDI.466TLS_DYNAMIC_MAT_PCREL_ADDR,467468/// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address469/// when using local exec access models, and when prefixed instructions are470/// available. This is used with ADD_TLS to produce an add like PADDI.471TLS_LOCAL_EXEC_MAT_ADDR,472473/// ACC_BUILD = Build an accumulator register from 4 VSX registers.474ACC_BUILD,475476/// PAIR_BUILD = Build a vector pair register from 2 VSX registers.477PAIR_BUILD,478479/// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of480/// an accumulator or pair register. This node is needed because481/// EXTRACT_SUBVECTOR expects the input and output vectors to have the same482/// element type.483EXTRACT_VSX_REG,484485/// XXMFACC = This corresponds to the xxmfacc instruction.486XXMFACC,487488// Constrained conversion from floating point to int489STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,490STRICT_FCTIWZ,491STRICT_FCTIDUZ,492STRICT_FCTIWUZ,493494/// Constrained integer-to-floating-point conversion instructions.495STRICT_FCFID,496STRICT_FCFIDU,497STRICT_FCFIDS,498STRICT_FCFIDUS,499500/// Constrained floating point add in round-to-zero mode.501STRICT_FADDRTZ,502503// NOTE: The nodes below may require PC-Rel specific patterns if the504// address could be PC-Relative. When adding new nodes below, consider505// whether or not the address can be PC-Relative and add the corresponding506// PC-relative patterns and tests.507508/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a509/// byte-swapping store instruction. It byte-swaps the low "Type" bits of510/// the GPRC input, then stores it through Ptr. Type can be either i16 or511/// i32.512STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,513514/// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a515/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,516/// then puts it in the bottom bits of the GPRC. TYPE can be either i16517/// or i32.518LBRX,519520/// STFIWX - The STFIWX instruction. The first operand is an input token521/// chain, then an f64 value to store, then an address to store it to.522STFIWX,523524/// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point525/// load which sign-extends from a 32-bit integer value into the526/// destination 64-bit register.527LFIWAX,528529/// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point530/// load which zero-extends from a 32-bit integer value into the531/// destination 64-bit register.532LFIWZX,533534/// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an535/// integer smaller than 64 bits into a VSR. The integer is zero-extended.536/// This can be used for converting loaded integers to floating point.537LXSIZX,538539/// STXSIX - The STXSI[bh]X instruction. The first operand is an input540/// chain, then an f64 value to store, then an address to store it to,541/// followed by a byte-width for the store.542STXSIX,543544/// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.545/// Maps directly to an lxvd2x instruction that will be followed by546/// an xxswapd.547LXVD2X,548549/// LXVRZX - Load VSX Vector Rightmost and Zero Extend550/// This node represents v1i128 BUILD_VECTOR of a zero extending load551/// instruction from <byte, halfword, word, or doubleword> to i128.552/// Allows utilization of the Load VSX Vector Rightmost Instructions.553LXVRZX,554555/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.556/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on557/// the vector type to load vector in big-endian element order.558LOAD_VEC_BE,559560/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a561/// v2f32 value into the lower half of a VSR register.562LD_VSX_LH,563564/// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory565/// instructions such as LXVDSX, LXVWSX.566LD_SPLAT,567568/// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory569/// that zero-extends.570ZEXT_LD_SPLAT,571572/// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory573/// that sign-extends.574SEXT_LD_SPLAT,575576/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.577/// Maps directly to an stxvd2x instruction that will be preceded by578/// an xxswapd.579STXVD2X,580581/// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.582/// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on583/// the vector type to store vector in big-endian element order.584STORE_VEC_BE,585586/// Store scalar integers from VSR.587ST_VSR_SCAL_INT,588589/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes590/// except they ensure that the compare input is zero-extended for591/// sub-word versions because the atomic loads zero-extend.592ATOMIC_CMP_SWAP_8,593ATOMIC_CMP_SWAP_16,594595/// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr596/// The store conditional instruction ST[BHWD]ARX that produces a glue597/// result to attach it to a conditional branch.598STORE_COND,599600/// GPRC = TOC_ENTRY GA, TOC601/// Loads the entry for GA from the TOC, where the TOC base is given by602/// the last operand.603TOC_ENTRY604};605606} // end namespace PPCISD607608/// Define some predicates that are used for node matching.609namespace PPC {610611/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a612/// VPKUHUM instruction.613bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,614SelectionDAG &DAG);615616/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a617/// VPKUWUM instruction.618bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,619SelectionDAG &DAG);620621/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a622/// VPKUDUM instruction.623bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,624SelectionDAG &DAG);625626/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for627/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).628bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,629unsigned ShuffleKind, SelectionDAG &DAG);630631/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for632/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).633bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,634unsigned ShuffleKind, SelectionDAG &DAG);635636/// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for637/// a VMRGEW or VMRGOW instruction638bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,639unsigned ShuffleKind, SelectionDAG &DAG);640/// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable641/// for a XXSLDWI instruction.642bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,643bool &Swap, bool IsLE);644645/// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable646/// for a XXBRH instruction.647bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);648649/// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable650/// for a XXBRW instruction.651bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);652653/// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable654/// for a XXBRD instruction.655bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);656657/// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable658/// for a XXBRQ instruction.659bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);660661/// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable662/// for a XXPERMDI instruction.663bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,664bool &Swap, bool IsLE);665666/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the667/// shift amount, otherwise return -1.668int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,669SelectionDAG &DAG);670671/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand672/// specifies a splat of a single element that is suitable for input to673/// VSPLTB/VSPLTH/VSPLTW.674bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);675676/// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by677/// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any678/// shuffle of v4f32/v4i32 vectors that just inserts one element from one679/// vector into the other. This function will also set a couple of680/// output parameters for how much the source vector needs to be shifted and681/// what byte number needs to be specified for the instruction to put the682/// element in the desired location of the target vector.683bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,684unsigned &InsertAtByte, bool &Swap, bool IsLE);685686/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is687/// appropriate for PPC mnemonics (which have a big endian bias - namely688/// elements are counted from the left of the vector register).689unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,690SelectionDAG &DAG);691692/// get_VSPLTI_elt - If this is a build_vector of constants which can be693/// formed by using a vspltis[bhw] instruction of the specified element694/// size, return the constant being splatted. The ByteSize field indicates695/// the number of bytes of each element [124] -> [bhw].696SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);697698// Flags for computing the optimal addressing mode for loads and stores.699enum MemOpFlags {700MOF_None = 0,701702// Extension mode for integer loads.703MOF_SExt = 1,704MOF_ZExt = 1 << 1,705MOF_NoExt = 1 << 2,706707// Address computation flags.708MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar.709MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant.710MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation711MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4.712MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16.713MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant.714MOF_RPlusR = 1 << 11, // Sum of two variables.715MOF_PCRel = 1 << 12, // PC-Relative relocation.716MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant.717718// The in-memory type.719MOF_SubWordInt = 1 << 15,720MOF_WordInt = 1 << 16,721MOF_DoubleWordInt = 1 << 17,722MOF_ScalarFloat = 1 << 18, // Scalar single or double precision.723MOF_Vector = 1 << 19, // Vector types and quad precision scalars.724MOF_Vector256 = 1 << 20,725726// Subtarget features.727MOF_SubtargetBeforeP9 = 1 << 22,728MOF_SubtargetP9 = 1 << 23,729MOF_SubtargetP10 = 1 << 24,730MOF_SubtargetSPE = 1 << 25731};732733// The addressing modes for loads and stores.734enum AddrMode {735AM_None,736AM_DForm,737AM_DSForm,738AM_DQForm,739AM_PrefixDForm,740AM_XForm,741AM_PCRel742};743} // end namespace PPC744745class PPCTargetLowering : public TargetLowering {746const PPCSubtarget &Subtarget;747748public:749explicit PPCTargetLowering(const PPCTargetMachine &TM,750const PPCSubtarget &STI);751752/// getTargetNodeName() - This method returns the name of a target specific753/// DAG node.754const char *getTargetNodeName(unsigned Opcode) const override;755756bool isSelectSupported(SelectSupportKind Kind) const override {757// PowerPC does not support scalar condition selects on vectors.758return (Kind != SelectSupportKind::ScalarCondVectorVal);759}760761/// getPreferredVectorAction - The code we generate when vector types are762/// legalized by promoting the integer element type is often much worse763/// than code we generate if we widen the type for applicable vector types.764/// The issue with promoting is that the vector is scalaraized, individual765/// elements promoted and then the vector is rebuilt. So say we load a pair766/// of v4i8's and shuffle them. This will turn into a mess of 8 extending767/// loads, moves back into VSR's (or memory ops if we don't have moves) and768/// then the VPERM for the shuffle. All in all a very slow sequence.769TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)770const override {771// Default handling for scalable and single-element vectors.772if (VT.isScalableVector() || VT.getVectorNumElements() == 1)773return TargetLoweringBase::getPreferredVectorAction(VT);774775// Split and promote vNi1 vectors so we don't produce v256i1/v512i1776// types as those are only for MMA instructions.777if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)778return TypeSplitVector;779if (VT.getScalarSizeInBits() == 1)780return TypePromoteInteger;781782// Widen vectors that have reasonably sized elements.783if (VT.getScalarSizeInBits() % 8 == 0)784return TypeWidenVector;785return TargetLoweringBase::getPreferredVectorAction(VT);786}787788bool useSoftFloat() const override;789790bool hasSPE() const;791792MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {793return MVT::i32;794}795796bool isCheapToSpeculateCttz(Type *Ty) const override {797return true;798}799800bool isCheapToSpeculateCtlz(Type *Ty) const override {801return true;802}803804bool805shallExtractConstSplatVectorElementToStore(Type *VectorTy,806unsigned ElemSizeInBits,807unsigned &Index) const override;808809bool isCtlzFast() const override {810return true;811}812813bool isEqualityCmpFoldedWithSignedCmp() const override {814return false;815}816817bool hasAndNotCompare(SDValue) const override {818return true;819}820821bool preferIncOfAddToSubOfNot(EVT VT) const override;822823bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {824return VT.isScalarInteger();825}826827SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,828bool OptForSize, NegatibleCost &Cost,829unsigned Depth = 0) const override;830831/// getSetCCResultType - Return the ISD::SETCC ValueType832EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,833EVT VT) const override;834835/// Return true if target always benefits from combining into FMA for a836/// given value type. This must typically return false on targets where FMA837/// takes more cycles to execute than FADD.838bool enableAggressiveFMAFusion(EVT VT) const override;839840/// getPreIndexedAddressParts - returns true by value, base pointer and841/// offset pointer and addressing mode by reference if the node's address842/// can be legally represented as pre-indexed load / store address.843bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,844SDValue &Offset,845ISD::MemIndexedMode &AM,846SelectionDAG &DAG) const override;847848/// SelectAddressEVXRegReg - Given the specified addressed, check to see if849/// it can be more efficiently represented as [r+imm].850bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,851SelectionDAG &DAG) const;852853/// SelectAddressRegReg - Given the specified addressed, check to see if it854/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment855/// is non-zero, only accept displacement which is not suitable for [r+imm].856/// Returns false if it can be represented by [r+imm], which are preferred.857bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,858SelectionDAG &DAG,859MaybeAlign EncodingAlignment = std::nullopt) const;860861/// SelectAddressRegImm - Returns true if the address N can be represented862/// by a base register plus a signed 16-bit displacement [r+imm], and if it863/// is not better represented as reg+reg. If \p EncodingAlignment is864/// non-zero, only accept displacements suitable for instruction encoding865/// requirement, i.e. multiples of 4 for DS form.866bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,867SelectionDAG &DAG,868MaybeAlign EncodingAlignment) const;869bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,870SelectionDAG &DAG) const;871872/// SelectAddressRegRegOnly - Given the specified addressed, force it to be873/// represented as an indexed [r+r] operation.874bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,875SelectionDAG &DAG) const;876877/// SelectAddressPCRel - Represent the specified address as pc relative to878/// be represented as [pc+imm]879bool SelectAddressPCRel(SDValue N, SDValue &Base) const;880881Sched::Preference getSchedulingPreference(SDNode *N) const override;882883/// LowerOperation - Provide custom lowering hooks for some operations.884///885SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;886887/// ReplaceNodeResults - Replace the results of node with an illegal result888/// type with new values built out of custom code.889///890void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,891SelectionDAG &DAG) const override;892893SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;894SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;895896SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;897898SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,899SmallVectorImpl<SDNode *> &Created) const override;900901Register getRegisterByName(const char* RegName, LLT VT,902const MachineFunction &MF) const override;903904void computeKnownBitsForTargetNode(const SDValue Op,905KnownBits &Known,906const APInt &DemandedElts,907const SelectionDAG &DAG,908unsigned Depth = 0) const override;909910Align getPrefLoopAlignment(MachineLoop *ML) const override;911912bool shouldInsertFencesForAtomic(const Instruction *I) const override {913return true;914}915916Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,917AtomicOrdering Ord) const override;918Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,919AtomicOrdering Ord) const override;920921bool shouldInlineQuadwordAtomics() const;922923TargetLowering::AtomicExpansionKind924shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;925926TargetLowering::AtomicExpansionKind927shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;928929Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,930AtomicRMWInst *AI, Value *AlignedAddr,931Value *Incr, Value *Mask,932Value *ShiftAmt,933AtomicOrdering Ord) const override;934Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,935AtomicCmpXchgInst *CI,936Value *AlignedAddr, Value *CmpVal,937Value *NewVal, Value *Mask,938AtomicOrdering Ord) const override;939940MachineBasicBlock *941EmitInstrWithCustomInserter(MachineInstr &MI,942MachineBasicBlock *MBB) const override;943MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,944MachineBasicBlock *MBB,945unsigned AtomicSize,946unsigned BinOpcode,947unsigned CmpOpcode = 0,948unsigned CmpPred = 0) const;949MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,950MachineBasicBlock *MBB,951bool is8bit,952unsigned Opcode,953unsigned CmpOpcode = 0,954unsigned CmpPred = 0) const;955956MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,957MachineBasicBlock *MBB) const;958959MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,960MachineBasicBlock *MBB) const;961962MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,963MachineBasicBlock *MBB) const;964965bool hasInlineStackProbe(const MachineFunction &MF) const override;966967unsigned getStackProbeSize(const MachineFunction &MF) const;968969ConstraintType getConstraintType(StringRef Constraint) const override;970971/// Examine constraint string and operand type and determine a weight value.972/// The operand object must already have been set up with the operand type.973ConstraintWeight getSingleConstraintMatchWeight(974AsmOperandInfo &info, const char *constraint) const override;975976std::pair<unsigned, const TargetRegisterClass *>977getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,978StringRef Constraint, MVT VT) const override;979980/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate981/// function arguments in the caller parameter area. This is the actual982/// alignment, not its logarithm.983uint64_t getByValTypeAlignment(Type *Ty,984const DataLayout &DL) const override;985986/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops987/// vector. If it is invalid, don't add anything to Ops.988void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,989std::vector<SDValue> &Ops,990SelectionDAG &DAG) const override;991992InlineAsm::ConstraintCode993getInlineAsmMemConstraint(StringRef ConstraintCode) const override {994if (ConstraintCode == "es")995return InlineAsm::ConstraintCode::es;996else if (ConstraintCode == "Q")997return InlineAsm::ConstraintCode::Q;998else if (ConstraintCode == "Z")999return InlineAsm::ConstraintCode::Z;1000else if (ConstraintCode == "Zy")1001return InlineAsm::ConstraintCode::Zy;1002return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);1003}10041005void CollectTargetIntrinsicOperands(const CallInst &I,1006SmallVectorImpl<SDValue> &Ops,1007SelectionDAG &DAG) const override;10081009/// isLegalAddressingMode - Return true if the addressing mode represented1010/// by AM is legal for this target, for a load/store of the specified type.1011bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,1012Type *Ty, unsigned AS,1013Instruction *I = nullptr) const override;10141015/// isLegalICmpImmediate - Return true if the specified immediate is legal1016/// icmp immediate, that is the target has icmp instructions which can1017/// compare a register against the immediate without having to materialize1018/// the immediate into a register.1019bool isLegalICmpImmediate(int64_t Imm) const override;10201021/// isLegalAddImmediate - Return true if the specified immediate is legal1022/// add immediate, that is the target has add instructions which can1023/// add a register and the immediate without having to materialize1024/// the immediate into a register.1025bool isLegalAddImmediate(int64_t Imm) const override;10261027/// isTruncateFree - Return true if it's free to truncate a value of1028/// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in1029/// register X1 to i32 by referencing its sub-register R1.1030bool isTruncateFree(Type *Ty1, Type *Ty2) const override;1031bool isTruncateFree(EVT VT1, EVT VT2) const override;10321033bool isZExtFree(SDValue Val, EVT VT2) const override;10341035bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;10361037/// Returns true if it is beneficial to convert a load of a constant1038/// to just the constant itself.1039bool shouldConvertConstantLoadToIntImm(const APInt &Imm,1040Type *Ty) const override;10411042bool convertSelectOfConstantsToMath(EVT VT) const override {1043return true;1044}10451046bool decomposeMulByConstant(LLVMContext &Context, EVT VT,1047SDValue C) const override;10481049bool isDesirableToTransformToIntegerOp(unsigned Opc,1050EVT VT) const override {1051// Only handle float load/store pair because float(fpr) load/store1052// instruction has more cycles than integer(gpr) load/store in PPC.1053if (Opc != ISD::LOAD && Opc != ISD::STORE)1054return false;1055if (VT != MVT::f32 && VT != MVT::f64)1056return false;10571058return true;1059}10601061// Returns true if the address of the global is stored in TOC entry.1062bool isAccessedAsGotIndirect(SDValue N) const;10631064bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;10651066bool getTgtMemIntrinsic(IntrinsicInfo &Info,1067const CallInst &I,1068MachineFunction &MF,1069unsigned Intrinsic) const override;10701071/// It returns EVT::Other if the type should be determined using generic1072/// target-independent logic.1073EVT getOptimalMemOpType(const MemOp &Op,1074const AttributeList &FuncAttributes) const override;10751076/// Is unaligned memory access allowed for the given type, and is it fast1077/// relative to software emulation.1078bool allowsMisalignedMemoryAccesses(1079EVT VT, unsigned AddrSpace, Align Alignment = Align(1),1080MachineMemOperand::Flags Flags = MachineMemOperand::MONone,1081unsigned *Fast = nullptr) const override;10821083/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster1084/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be1085/// expanded to FMAs when this method returns true, otherwise fmuladd is1086/// expanded to fmul + fadd.1087bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,1088EVT VT) const override;10891090bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;10911092/// isProfitableToHoist - Check if it is profitable to hoist instruction1093/// \p I to its dominator block.1094/// For example, it is not profitable if \p I and it's only user can form a1095/// FMA instruction, because Powerpc prefers FMADD.1096bool isProfitableToHoist(Instruction *I) const override;10971098const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;10991100// Should we expand the build vector with shuffles?1101bool1102shouldExpandBuildVectorWithShuffles(EVT VT,1103unsigned DefinedValues) const override;11041105// Keep the zero-extensions for arguments to libcalls.1106bool shouldKeepZExtForFP16Conv() const override { return true; }11071108/// createFastISel - This method returns a target-specific FastISel object,1109/// or null if the target does not support "fast" instruction selection.1110FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,1111const TargetLibraryInfo *LibInfo) const override;11121113/// Returns true if an argument of type Ty needs to be passed in a1114/// contiguous block of registers in calling convention CallConv.1115bool functionArgumentNeedsConsecutiveRegisters(1116Type *Ty, CallingConv::ID CallConv, bool isVarArg,1117const DataLayout &DL) const override {1118// We support any array type as "consecutive" block in the parameter1119// save area. The element type defines the alignment requirement and1120// whether the argument should go in GPRs, FPRs, or VRs if available.1121//1122// Note that clang uses this capability both to implement the ELFv21123// homogeneous float/vector aggregate ABI, and to avoid having to use1124// "byval" when passing aggregates that might fully fit in registers.1125return Ty->isArrayTy();1126}11271128/// If a physical register, this returns the register that receives the1129/// exception address on entry to an EH pad.1130Register1131getExceptionPointerRegister(const Constant *PersonalityFn) const override;11321133/// If a physical register, this returns the register that receives the1134/// exception typeid on entry to a landing pad.1135Register1136getExceptionSelectorRegister(const Constant *PersonalityFn) const override;11371138/// Override to support customized stack guard loading.1139bool useLoadStackGuardNode() const override;1140void insertSSPDeclarations(Module &M) const override;1141Value *getSDagStackGuard(const Module &M) const override;11421143bool isFPImmLegal(const APFloat &Imm, EVT VT,1144bool ForCodeSize) const override;11451146unsigned getJumpTableEncoding() const override;1147bool isJumpTableRelative() const override;1148SDValue getPICJumpTableRelocBase(SDValue Table,1149SelectionDAG &DAG) const override;1150const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,1151unsigned JTI,1152MCContext &Ctx) const override;11531154/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),1155/// compute the address flags of the node, get the optimal address mode1156/// based on the flags, and set the Base and Disp based on the address mode.1157PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N,1158SDValue &Disp, SDValue &Base,1159SelectionDAG &DAG,1160MaybeAlign Align) const;1161/// SelectForceXFormMode - Given the specified address, force it to be1162/// represented as an indexed [r+r] operation (an XForm instruction).1163PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,1164SelectionDAG &DAG) const;11651166bool splitValueIntoRegisterParts(1167SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,1168unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)1169const override;1170/// Structure that collects some common arguments that get passed around1171/// between the functions for call lowering.1172struct CallFlags {1173const CallingConv::ID CallConv;1174const bool IsTailCall : 1;1175const bool IsVarArg : 1;1176const bool IsPatchPoint : 1;1177const bool IsIndirect : 1;1178const bool HasNest : 1;1179const bool NoMerge : 1;11801181CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,1182bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)1183: CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),1184IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),1185HasNest(HasNest), NoMerge(NoMerge) {}1186};11871188CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return,1189bool IsVarArg) const;1190bool supportsTailCallFor(const CallBase *CB) const;11911192private:1193struct ReuseLoadInfo {1194SDValue Ptr;1195SDValue Chain;1196SDValue ResChain;1197MachinePointerInfo MPI;1198bool IsDereferenceable = false;1199bool IsInvariant = false;1200Align Alignment;1201AAMDNodes AAInfo;1202const MDNode *Ranges = nullptr;12031204ReuseLoadInfo() = default;12051206MachineMemOperand::Flags MMOFlags() const {1207MachineMemOperand::Flags F = MachineMemOperand::MONone;1208if (IsDereferenceable)1209F |= MachineMemOperand::MODereferenceable;1210if (IsInvariant)1211F |= MachineMemOperand::MOInvariant;1212return F;1213}1214};12151216// Map that relates a set of common address flags to PPC addressing modes.1217std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap;1218void initializeAddrModeMap();12191220bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,1221SelectionDAG &DAG,1222ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;1223void spliceIntoChain(SDValue ResChain, SDValue NewResChain,1224SelectionDAG &DAG) const;12251226void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,1227SelectionDAG &DAG, const SDLoc &dl) const;1228SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,1229const SDLoc &dl) const;12301231bool directMoveIsProfitable(const SDValue &Op) const;1232SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,1233const SDLoc &dl) const;12341235SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,1236const SDLoc &dl) const;12371238SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;12391240SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;1241SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;12421243bool IsEligibleForTailCallOptimization(1244const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,1245CallingConv::ID CallerCC, bool isVarArg,1246const SmallVectorImpl<ISD::InputArg> &Ins) const;12471248bool IsEligibleForTailCallOptimization_64SVR4(1249const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,1250CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,1251const SmallVectorImpl<ISD::OutputArg> &Outs,1252const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,1253bool isCalleeExternalSymbol) const;12541255bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,1256CallingConv::ID CallerCC, const CallBase *CB,1257bool isVarArg,1258const SmallVectorImpl<ISD::OutputArg> &Outs,1259const SmallVectorImpl<ISD::InputArg> &Ins,1260const Function *CallerFunc,1261bool isCalleeExternalSymbol) const;12621263SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,1264SDValue Chain, SDValue &LROpOut,1265SDValue &FPOpOut,1266const SDLoc &dl) const;12671268SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;12691270SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;1271SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;1272SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;1273SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;1274SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;1275SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const;1276SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;1277SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;1278SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;1279SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;1280SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;1281SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;1282SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;1283SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;1284SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;1285SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;1286SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;1287SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;1288SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;1289SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;1290SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;1291SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;1292SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;1293SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;1294SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,1295const SDLoc &dl) const;1296SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;1297SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;1298SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;1299SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;1300SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;1301SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;1302SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;1303SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;1304SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,1305EVT VT, SDValue V1, SDValue V2) const;1306SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;1307SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;1308SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;1309SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;1310SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;1311SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;1312SDValue lowerToLibCall(const char *LibCallName, SDValue Op,1313SelectionDAG &DAG) const;1314SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,1315const char *LibCallDoubleName, SDValue Op,1316SelectionDAG &DAG) const;1317bool isLowringToMASSFiniteSafe(SDValue Op) const;1318bool isLowringToMASSSafe(SDValue Op) const;1319bool isScalarMASSConversionEnabled() const;1320SDValue lowerLibCallBase(const char *LibCallDoubleName,1321const char *LibCallFloatName,1322const char *LibCallDoubleNameFinite,1323const char *LibCallFloatNameFinite, SDValue Op,1324SelectionDAG &DAG) const;1325SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const;1326SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const;1327SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const;1328SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const;1329SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const;1330SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const;1331SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;1332SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;1333SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;1334SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;1335SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;1336SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;13371338SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;1339SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;13401341SDValue LowerCallResult(SDValue Chain, SDValue InGlue,1342CallingConv::ID CallConv, bool isVarArg,1343const SmallVectorImpl<ISD::InputArg> &Ins,1344const SDLoc &dl, SelectionDAG &DAG,1345SmallVectorImpl<SDValue> &InVals) const;13461347SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,1348SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,1349SDValue InGlue, SDValue Chain, SDValue CallSeqStart,1350SDValue &Callee, int SPDiff, unsigned NumBytes,1351const SmallVectorImpl<ISD::InputArg> &Ins,1352SmallVectorImpl<SDValue> &InVals,1353const CallBase *CB) const;13541355SDValue1356LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1357const SmallVectorImpl<ISD::InputArg> &Ins,1358const SDLoc &dl, SelectionDAG &DAG,1359SmallVectorImpl<SDValue> &InVals) const override;13601361SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,1362SmallVectorImpl<SDValue> &InVals) const override;13631364bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,1365bool isVarArg,1366const SmallVectorImpl<ISD::OutputArg> &Outs,1367LLVMContext &Context) const override;13681369SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1370const SmallVectorImpl<ISD::OutputArg> &Outs,1371const SmallVectorImpl<SDValue> &OutVals,1372const SDLoc &dl, SelectionDAG &DAG) const override;13731374SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,1375SelectionDAG &DAG, SDValue ArgVal,1376const SDLoc &dl) const;13771378SDValue LowerFormalArguments_AIX(1379SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1380const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1381SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;1382SDValue LowerFormalArguments_64SVR4(1383SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1384const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1385SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;1386SDValue LowerFormalArguments_32SVR4(1387SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1388const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1389SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;13901391SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,1392SDValue CallSeqStart,1393ISD::ArgFlagsTy Flags, SelectionDAG &DAG,1394const SDLoc &dl) const;13951396SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,1397const SmallVectorImpl<ISD::OutputArg> &Outs,1398const SmallVectorImpl<SDValue> &OutVals,1399const SmallVectorImpl<ISD::InputArg> &Ins,1400const SDLoc &dl, SelectionDAG &DAG,1401SmallVectorImpl<SDValue> &InVals,1402const CallBase *CB) const;1403SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,1404const SmallVectorImpl<ISD::OutputArg> &Outs,1405const SmallVectorImpl<SDValue> &OutVals,1406const SmallVectorImpl<ISD::InputArg> &Ins,1407const SDLoc &dl, SelectionDAG &DAG,1408SmallVectorImpl<SDValue> &InVals,1409const CallBase *CB) const;1410SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,1411const SmallVectorImpl<ISD::OutputArg> &Outs,1412const SmallVectorImpl<SDValue> &OutVals,1413const SmallVectorImpl<ISD::InputArg> &Ins,1414const SDLoc &dl, SelectionDAG &DAG,1415SmallVectorImpl<SDValue> &InVals,1416const CallBase *CB) const;14171418SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;1419SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;1420SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;14211422SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;1423SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;1424SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;1425SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;1426SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;1427SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;1428SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;1429SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;1430SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;1431SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;1432SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;1433SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;1434SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;1435SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,1436SelectionDAG &DAG) const;1437SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,1438DAGCombinerInfo &DCI) const;14391440/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces1441/// SETCC with integer subtraction when (1) there is a legal way of doing it1442/// (2) keeping the result of comparison in GPR has performance benefit.1443SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;14441445SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,1446int &RefinementSteps, bool &UseOneConstNR,1447bool Reciprocal) const override;1448SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,1449int &RefinementSteps) const override;1450SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,1451const DenormalMode &Mode) const override;1452SDValue getSqrtResultForDenormInput(SDValue Operand,1453SelectionDAG &DAG) const override;1454unsigned combineRepeatedFPDivisors() const override;14551456SDValue1457combineElementTruncationToVectorTruncation(SDNode *N,1458DAGCombinerInfo &DCI) const;14591460/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be1461/// handled by the VINSERTH instruction introduced in ISA 3.0. This is1462/// essentially any shuffle of v8i16 vectors that just inserts one element1463/// from one vector into the other.1464SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;14651466/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be1467/// handled by the VINSERTB instruction introduced in ISA 3.0. This is1468/// essentially v16i8 vector version of VINSERTH.1469SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;14701471/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be1472/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.1473SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;14741475// Return whether the call instruction can potentially be optimized to a1476// tail call. This will cause the optimizers to attempt to move, or1477// duplicate return instructions to help enable tail call optimizations.1478bool mayBeEmittedAsTailCall(const CallInst *CI) const override;1479bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;14801481/// getAddrModeForFlags - Based on the set of address flags, select the most1482/// optimal instruction format to match by.1483PPC::AddrMode getAddrModeForFlags(unsigned Flags) const;14841485/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute1486/// the address flags of the load/store instruction that is to be matched.1487/// The address flags are stored in a map, which is then searched1488/// through to determine the optimal load/store instruction format.1489unsigned computeMOFlags(const SDNode *Parent, SDValue N,1490SelectionDAG &DAG) const;1491}; // end class PPCTargetLowering14921493namespace PPC {14941495FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,1496const TargetLibraryInfo *LibInfo);14971498} // end namespace PPC14991500bool isIntS16Immediate(SDNode *N, int16_t &Imm);1501bool isIntS16Immediate(SDValue Op, int16_t &Imm);1502bool isIntS34Immediate(SDNode *N, int64_t &Imm);1503bool isIntS34Immediate(SDValue Op, int64_t &Imm);15041505bool convertToNonDenormSingle(APInt &ArgAPInt);1506bool convertToNonDenormSingle(APFloat &ArgAPFloat);1507bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);15081509} // end namespace llvm15101511#endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H151215131514