Path: blob/main/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
35266 views
//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the SystemZTargetLowering class.9//10//===----------------------------------------------------------------------===//1112#include "SystemZISelLowering.h"13#include "SystemZCallingConv.h"14#include "SystemZConstantPoolValue.h"15#include "SystemZMachineFunctionInfo.h"16#include "SystemZTargetMachine.h"17#include "llvm/CodeGen/CallingConvLower.h"18#include "llvm/CodeGen/ISDOpcodes.h"19#include "llvm/CodeGen/MachineInstrBuilder.h"20#include "llvm/CodeGen/MachineRegisterInfo.h"21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"22#include "llvm/IR/GlobalAlias.h"23#include "llvm/IR/IntrinsicInst.h"24#include "llvm/IR/Intrinsics.h"25#include "llvm/IR/IntrinsicsS390.h"26#include "llvm/Support/CommandLine.h"27#include "llvm/Support/ErrorHandling.h"28#include "llvm/Support/KnownBits.h"29#include <cctype>30#include <optional>3132using namespace llvm;3334#define DEBUG_TYPE "systemz-lower"3536namespace {37// Represents information about a comparison.38struct Comparison {39Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)40: Op0(Op0In), Op1(Op1In), Chain(ChainIn),41Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}4243// The operands to the comparison.44SDValue Op0, Op1;4546// Chain if this is a strict floating-point comparison.47SDValue Chain;4849// The opcode that should be used to compare Op0 and Op1.50unsigned Opcode;5152// A SystemZICMP value. Only used for integer comparisons.53unsigned ICmpType;5455// The mask of CC values that Opcode can produce.56unsigned CCValid;5758// The mask of CC values for which the original condition is true.59unsigned CCMask;60};61} // end anonymous namespace6263// Classify VT as either 32 or 64 bit.64static bool is32Bit(EVT VT) {65switch (VT.getSimpleVT().SimpleTy) {66case MVT::i32:67return true;68case MVT::i64:69return false;70default:71llvm_unreachable("Unsupported type");72}73}7475// Return a version of MachineOperand that can be safely used before the76// final use.77static MachineOperand earlyUseOperand(MachineOperand Op) {78if (Op.isReg())79Op.setIsKill(false);80return Op;81}8283SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,84const SystemZSubtarget &STI)85: TargetLowering(TM), Subtarget(STI) {86MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));8788auto *Regs = STI.getSpecialRegisters();8990// Set up the register classes.91if (Subtarget.hasHighWord())92addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);93else94addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);95addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);96if (!useSoftFloat()) {97if (Subtarget.hasVector()) {98addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);99addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);100} else {101addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);102addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);103}104if (Subtarget.hasVectorEnhancements1())105addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);106else107addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);108109if (Subtarget.hasVector()) {110addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);111addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);112addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);113addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);114addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);115addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);116}117118if (Subtarget.hasVector())119addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);120}121122// Compute derived properties from the register classes123computeRegisterProperties(Subtarget.getRegisterInfo());124125// Set up special registers.126setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());127128// TODO: It may be better to default to latency-oriented scheduling, however129// LLVM's current latency-oriented scheduler can't handle physreg definitions130// such as SystemZ has with CC, so set this to the register-pressure131// scheduler, because it can.132setSchedulingPreference(Sched::RegPressure);133134setBooleanContents(ZeroOrOneBooleanContent);135setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);136137setMaxAtomicSizeInBitsSupported(128);138139// Instructions are strings of 2-byte aligned 2-byte values.140setMinFunctionAlignment(Align(2));141// For performance reasons we prefer 16-byte alignment.142setPrefFunctionAlignment(Align(16));143144// Handle operations that are handled in a similar way for all types.145for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;146I <= MVT::LAST_FP_VALUETYPE;147++I) {148MVT VT = MVT::SimpleValueType(I);149if (isTypeLegal(VT)) {150// Lower SET_CC into an IPM-based sequence.151setOperationAction(ISD::SETCC, VT, Custom);152setOperationAction(ISD::STRICT_FSETCC, VT, Custom);153setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);154155// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).156setOperationAction(ISD::SELECT, VT, Expand);157158// Lower SELECT_CC and BR_CC into separate comparisons and branches.159setOperationAction(ISD::SELECT_CC, VT, Custom);160setOperationAction(ISD::BR_CC, VT, Custom);161}162}163164// Expand jump table branches as address arithmetic followed by an165// indirect jump.166setOperationAction(ISD::BR_JT, MVT::Other, Expand);167168// Expand BRCOND into a BR_CC (see above).169setOperationAction(ISD::BRCOND, MVT::Other, Expand);170171// Handle integer types except i128.172for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;173I <= MVT::LAST_INTEGER_VALUETYPE;174++I) {175MVT VT = MVT::SimpleValueType(I);176if (isTypeLegal(VT) && VT != MVT::i128) {177setOperationAction(ISD::ABS, VT, Legal);178179// Expand individual DIV and REMs into DIVREMs.180setOperationAction(ISD::SDIV, VT, Expand);181setOperationAction(ISD::UDIV, VT, Expand);182setOperationAction(ISD::SREM, VT, Expand);183setOperationAction(ISD::UREM, VT, Expand);184setOperationAction(ISD::SDIVREM, VT, Custom);185setOperationAction(ISD::UDIVREM, VT, Custom);186187// Support addition/subtraction with overflow.188setOperationAction(ISD::SADDO, VT, Custom);189setOperationAction(ISD::SSUBO, VT, Custom);190191// Support addition/subtraction with carry.192setOperationAction(ISD::UADDO, VT, Custom);193setOperationAction(ISD::USUBO, VT, Custom);194195// Support carry in as value rather than glue.196setOperationAction(ISD::UADDO_CARRY, VT, Custom);197setOperationAction(ISD::USUBO_CARRY, VT, Custom);198199// Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are200// available, or if the operand is constant.201setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);202203// Use POPCNT on z196 and above.204if (Subtarget.hasPopulationCount())205setOperationAction(ISD::CTPOP, VT, Custom);206else207setOperationAction(ISD::CTPOP, VT, Expand);208209// No special instructions for these.210setOperationAction(ISD::CTTZ, VT, Expand);211setOperationAction(ISD::ROTR, VT, Expand);212213// Use *MUL_LOHI where possible instead of MULH*.214setOperationAction(ISD::MULHS, VT, Expand);215setOperationAction(ISD::MULHU, VT, Expand);216setOperationAction(ISD::SMUL_LOHI, VT, Custom);217setOperationAction(ISD::UMUL_LOHI, VT, Custom);218219// Only z196 and above have native support for conversions to unsigned.220// On z10, promoting to i64 doesn't generate an inexact condition for221// values that are outside the i32 range but in the i64 range, so use222// the default expansion.223if (!Subtarget.hasFPExtension())224setOperationAction(ISD::FP_TO_UINT, VT, Expand);225226// Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all227// default to Expand, so need to be modified to Legal where appropriate.228setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);229if (Subtarget.hasFPExtension())230setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);231232// And similarly for STRICT_[SU]INT_TO_FP.233setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);234if (Subtarget.hasFPExtension())235setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);236}237}238239// Handle i128 if legal.240if (isTypeLegal(MVT::i128)) {241// No special instructions for these.242setOperationAction(ISD::SDIVREM, MVT::i128, Expand);243setOperationAction(ISD::UDIVREM, MVT::i128, Expand);244setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand);245setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand);246setOperationAction(ISD::ROTR, MVT::i128, Expand);247setOperationAction(ISD::ROTL, MVT::i128, Expand);248setOperationAction(ISD::MUL, MVT::i128, Expand);249setOperationAction(ISD::MULHS, MVT::i128, Expand);250setOperationAction(ISD::MULHU, MVT::i128, Expand);251setOperationAction(ISD::SDIV, MVT::i128, Expand);252setOperationAction(ISD::UDIV, MVT::i128, Expand);253setOperationAction(ISD::SREM, MVT::i128, Expand);254setOperationAction(ISD::UREM, MVT::i128, Expand);255setOperationAction(ISD::CTLZ, MVT::i128, Expand);256setOperationAction(ISD::CTTZ, MVT::i128, Expand);257258// Support addition/subtraction with carry.259setOperationAction(ISD::UADDO, MVT::i128, Custom);260setOperationAction(ISD::USUBO, MVT::i128, Custom);261setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom);262setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom);263264// Use VPOPCT and add up partial results.265setOperationAction(ISD::CTPOP, MVT::i128, Custom);266267// We have to use libcalls for these.268setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall);269setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall);270setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall);271setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall);272setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall);273setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall);274setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall);275setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall);276}277278// Type legalization will convert 8- and 16-bit atomic operations into279// forms that operate on i32s (but still keeping the original memory VT).280// Lower them into full i32 operations.281setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);282setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);283setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);284setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);285setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);286setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);287setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);288setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);289setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);290setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);291setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);292293// Whether or not i128 is not a legal type, we need to custom lower294// the atomic operations in order to exploit SystemZ instructions.295setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);296setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);297setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);298setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom);299300// Mark sign/zero extending atomic loads as legal, which will make301// DAGCombiner fold extensions into atomic loads if possible.302setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64,303{MVT::i8, MVT::i16, MVT::i32}, Legal);304setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,305{MVT::i8, MVT::i16}, Legal);306setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16,307MVT::i8, Legal);308309// We can use the CC result of compare-and-swap to implement310// the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.311setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);312setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);313setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);314315setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);316317// Traps are legal, as we will convert them to "j .+2".318setOperationAction(ISD::TRAP, MVT::Other, Legal);319320// z10 has instructions for signed but not unsigned FP conversion.321// Handle unsigned 32-bit types as signed 64-bit types.322if (!Subtarget.hasFPExtension()) {323setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);324setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);325setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);326setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);327}328329// We have native support for a 64-bit CTLZ, via FLOGR.330setOperationAction(ISD::CTLZ, MVT::i32, Promote);331setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);332setOperationAction(ISD::CTLZ, MVT::i64, Legal);333334// On z15 we have native support for a 64-bit CTPOP.335if (Subtarget.hasMiscellaneousExtensions3()) {336setOperationAction(ISD::CTPOP, MVT::i32, Promote);337setOperationAction(ISD::CTPOP, MVT::i64, Legal);338}339340// Give LowerOperation the chance to replace 64-bit ORs with subregs.341setOperationAction(ISD::OR, MVT::i64, Custom);342343// Expand 128 bit shifts without using a libcall.344setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);345setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);346setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);347348// Also expand 256 bit shifts if i128 is a legal type.349if (isTypeLegal(MVT::i128)) {350setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand);351setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand);352setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand);353}354355// Handle bitcast from fp128 to i128.356if (!isTypeLegal(MVT::i128))357setOperationAction(ISD::BITCAST, MVT::i128, Custom);358359// We have native instructions for i8, i16 and i32 extensions, but not i1.360setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);361for (MVT VT : MVT::integer_valuetypes()) {362setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);363setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);364setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);365}366367// Handle the various types of symbolic address.368setOperationAction(ISD::ConstantPool, PtrVT, Custom);369setOperationAction(ISD::GlobalAddress, PtrVT, Custom);370setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);371setOperationAction(ISD::BlockAddress, PtrVT, Custom);372setOperationAction(ISD::JumpTable, PtrVT, Custom);373374// We need to handle dynamic allocations specially because of the375// 160-byte area at the bottom of the stack.376setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);377setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);378379setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);380setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);381382// Handle prefetches with PFD or PFDRL.383setOperationAction(ISD::PREFETCH, MVT::Other, Custom);384385// Handle readcyclecounter with STCKF.386setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);387388for (MVT VT : MVT::fixedlen_vector_valuetypes()) {389// Assume by default that all vector operations need to be expanded.390for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)391if (getOperationAction(Opcode, VT) == Legal)392setOperationAction(Opcode, VT, Expand);393394// Likewise all truncating stores and extending loads.395for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {396setTruncStoreAction(VT, InnerVT, Expand);397setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);398setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);399setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);400}401402if (isTypeLegal(VT)) {403// These operations are legal for anything that can be stored in a404// vector register, even if there is no native support for the format405// as such. In particular, we can do these for v4f32 even though there406// are no specific instructions for that format.407setOperationAction(ISD::LOAD, VT, Legal);408setOperationAction(ISD::STORE, VT, Legal);409setOperationAction(ISD::VSELECT, VT, Legal);410setOperationAction(ISD::BITCAST, VT, Legal);411setOperationAction(ISD::UNDEF, VT, Legal);412413// Likewise, except that we need to replace the nodes with something414// more specific.415setOperationAction(ISD::BUILD_VECTOR, VT, Custom);416setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);417}418}419420// Handle integer vector types.421for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {422if (isTypeLegal(VT)) {423// These operations have direct equivalents.424setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);425setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);426setOperationAction(ISD::ADD, VT, Legal);427setOperationAction(ISD::SUB, VT, Legal);428if (VT != MVT::v2i64)429setOperationAction(ISD::MUL, VT, Legal);430setOperationAction(ISD::ABS, VT, Legal);431setOperationAction(ISD::AND, VT, Legal);432setOperationAction(ISD::OR, VT, Legal);433setOperationAction(ISD::XOR, VT, Legal);434if (Subtarget.hasVectorEnhancements1())435setOperationAction(ISD::CTPOP, VT, Legal);436else437setOperationAction(ISD::CTPOP, VT, Custom);438setOperationAction(ISD::CTTZ, VT, Legal);439setOperationAction(ISD::CTLZ, VT, Legal);440441// Convert a GPR scalar to a vector by inserting it into element 0.442setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);443444// Use a series of unpacks for extensions.445setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);446setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);447448// Detect shifts/rotates by a scalar amount and convert them into449// V*_BY_SCALAR.450setOperationAction(ISD::SHL, VT, Custom);451setOperationAction(ISD::SRA, VT, Custom);452setOperationAction(ISD::SRL, VT, Custom);453setOperationAction(ISD::ROTL, VT, Custom);454455// Add ISD::VECREDUCE_ADD as custom in order to implement456// it with VZERO+VSUM457setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);458459// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands460// and inverting the result as necessary.461setOperationAction(ISD::SETCC, VT, Custom);462}463}464465if (Subtarget.hasVector()) {466// There should be no need to check for float types other than v2f64467// since <2 x f32> isn't a legal type.468setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);469setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);470setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);471setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);472setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);473setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);474setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);475setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);476477setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);478setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);479setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);480setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);481setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);482setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);483setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);484setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);485}486487if (Subtarget.hasVectorEnhancements2()) {488setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);489setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);490setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);491setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);492setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);493setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);494setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);495setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);496497setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);498setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);499setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);500setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);501setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);502setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);503setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);504setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);505}506507// Handle floating-point types.508for (unsigned I = MVT::FIRST_FP_VALUETYPE;509I <= MVT::LAST_FP_VALUETYPE;510++I) {511MVT VT = MVT::SimpleValueType(I);512if (isTypeLegal(VT)) {513// We can use FI for FRINT.514setOperationAction(ISD::FRINT, VT, Legal);515516// We can use the extended form of FI for other rounding operations.517if (Subtarget.hasFPExtension()) {518setOperationAction(ISD::FNEARBYINT, VT, Legal);519setOperationAction(ISD::FFLOOR, VT, Legal);520setOperationAction(ISD::FCEIL, VT, Legal);521setOperationAction(ISD::FTRUNC, VT, Legal);522setOperationAction(ISD::FROUND, VT, Legal);523}524525// No special instructions for these.526setOperationAction(ISD::FSIN, VT, Expand);527setOperationAction(ISD::FCOS, VT, Expand);528setOperationAction(ISD::FSINCOS, VT, Expand);529setOperationAction(ISD::FREM, VT, Expand);530setOperationAction(ISD::FPOW, VT, Expand);531532// Special treatment.533setOperationAction(ISD::IS_FPCLASS, VT, Custom);534535// Handle constrained floating-point operations.536setOperationAction(ISD::STRICT_FADD, VT, Legal);537setOperationAction(ISD::STRICT_FSUB, VT, Legal);538setOperationAction(ISD::STRICT_FMUL, VT, Legal);539setOperationAction(ISD::STRICT_FDIV, VT, Legal);540setOperationAction(ISD::STRICT_FMA, VT, Legal);541setOperationAction(ISD::STRICT_FSQRT, VT, Legal);542setOperationAction(ISD::STRICT_FRINT, VT, Legal);543setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);544setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);545if (Subtarget.hasFPExtension()) {546setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);547setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);548setOperationAction(ISD::STRICT_FCEIL, VT, Legal);549setOperationAction(ISD::STRICT_FROUND, VT, Legal);550setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);551}552}553}554555// Handle floating-point vector types.556if (Subtarget.hasVector()) {557// Scalar-to-vector conversion is just a subreg.558setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);559setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);560561// Some insertions and extractions can be done directly but others562// need to go via integers.563setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);564setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);565setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);566setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);567568// These operations have direct equivalents.569setOperationAction(ISD::FADD, MVT::v2f64, Legal);570setOperationAction(ISD::FNEG, MVT::v2f64, Legal);571setOperationAction(ISD::FSUB, MVT::v2f64, Legal);572setOperationAction(ISD::FMUL, MVT::v2f64, Legal);573setOperationAction(ISD::FMA, MVT::v2f64, Legal);574setOperationAction(ISD::FDIV, MVT::v2f64, Legal);575setOperationAction(ISD::FABS, MVT::v2f64, Legal);576setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);577setOperationAction(ISD::FRINT, MVT::v2f64, Legal);578setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);579setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);580setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);581setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);582setOperationAction(ISD::FROUND, MVT::v2f64, Legal);583584// Handle constrained floating-point operations.585setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);586setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);587setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);588setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);589setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);590setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);591setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);592setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);593setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);594setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);595setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);596setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);597598setOperationAction(ISD::SETCC, MVT::v2f64, Custom);599setOperationAction(ISD::SETCC, MVT::v4f32, Custom);600setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);601setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);602if (Subtarget.hasVectorEnhancements1()) {603setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom);604setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom);605}606}607608// The vector enhancements facility 1 has instructions for these.609if (Subtarget.hasVectorEnhancements1()) {610setOperationAction(ISD::FADD, MVT::v4f32, Legal);611setOperationAction(ISD::FNEG, MVT::v4f32, Legal);612setOperationAction(ISD::FSUB, MVT::v4f32, Legal);613setOperationAction(ISD::FMUL, MVT::v4f32, Legal);614setOperationAction(ISD::FMA, MVT::v4f32, Legal);615setOperationAction(ISD::FDIV, MVT::v4f32, Legal);616setOperationAction(ISD::FABS, MVT::v4f32, Legal);617setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);618setOperationAction(ISD::FRINT, MVT::v4f32, Legal);619setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);620setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);621setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);622setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);623setOperationAction(ISD::FROUND, MVT::v4f32, Legal);624625setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);626setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);627setOperationAction(ISD::FMINNUM, MVT::f64, Legal);628setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);629630setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);631setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);632setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);633setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);634635setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);636setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);637setOperationAction(ISD::FMINNUM, MVT::f32, Legal);638setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);639640setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);641setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);642setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);643setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);644645setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);646setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);647setOperationAction(ISD::FMINNUM, MVT::f128, Legal);648setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);649650// Handle constrained floating-point operations.651setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);652setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);653setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);654setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);655setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);656setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);657setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);658setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);659setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);660setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);661setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);662setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);663for (auto VT : { MVT::f32, MVT::f64, MVT::f128,664MVT::v4f32, MVT::v2f64 }) {665setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);666setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);667setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);668setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);669}670}671672// We only have fused f128 multiply-addition on vector registers.673if (!Subtarget.hasVectorEnhancements1()) {674setOperationAction(ISD::FMA, MVT::f128, Expand);675setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);676}677678// We don't have a copysign instruction on vector registers.679if (Subtarget.hasVectorEnhancements1())680setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);681682// Needed so that we don't try to implement f128 constant loads using683// a load-and-extend of a f80 constant (in cases where the constant684// would fit in an f80).685for (MVT VT : MVT::fp_valuetypes())686setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);687688// We don't have extending load instruction on vector registers.689if (Subtarget.hasVectorEnhancements1()) {690setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);691setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);692}693694// Floating-point truncation and stores need to be done separately.695setTruncStoreAction(MVT::f64, MVT::f32, Expand);696setTruncStoreAction(MVT::f128, MVT::f32, Expand);697setTruncStoreAction(MVT::f128, MVT::f64, Expand);698699// We have 64-bit FPR<->GPR moves, but need special handling for700// 32-bit forms.701if (!Subtarget.hasVector()) {702setOperationAction(ISD::BITCAST, MVT::i32, Custom);703setOperationAction(ISD::BITCAST, MVT::f32, Custom);704}705706// VASTART and VACOPY need to deal with the SystemZ-specific varargs707// structure, but VAEND is a no-op.708setOperationAction(ISD::VASTART, MVT::Other, Custom);709setOperationAction(ISD::VACOPY, MVT::Other, Custom);710setOperationAction(ISD::VAEND, MVT::Other, Expand);711712setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);713714// Codes for which we want to perform some z-specific combinations.715setTargetDAGCombine({ISD::ZERO_EXTEND,716ISD::SIGN_EXTEND,717ISD::SIGN_EXTEND_INREG,718ISD::LOAD,719ISD::STORE,720ISD::VECTOR_SHUFFLE,721ISD::EXTRACT_VECTOR_ELT,722ISD::FP_ROUND,723ISD::STRICT_FP_ROUND,724ISD::FP_EXTEND,725ISD::SINT_TO_FP,726ISD::UINT_TO_FP,727ISD::STRICT_FP_EXTEND,728ISD::BSWAP,729ISD::SDIV,730ISD::UDIV,731ISD::SREM,732ISD::UREM,733ISD::INTRINSIC_VOID,734ISD::INTRINSIC_W_CHAIN});735736// Handle intrinsics.737setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);738setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);739740// We want to use MVC in preference to even a single load/store pair.741MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;742MaxStoresPerMemcpyOptSize = 0;743744// The main memset sequence is a byte store followed by an MVC.745// Two STC or MV..I stores win over that, but the kind of fused stores746// generated by target-independent code don't when the byte value is747// variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better748// than "STC;MVC". Handle the choice in target-specific code instead.749MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;750MaxStoresPerMemsetOptSize = 0;751752// Default to having -disable-strictnode-mutation on753IsStrictFPEnabled = true;754755if (Subtarget.isTargetzOS()) {756struct RTLibCallMapping {757RTLIB::Libcall Code;758const char *Name;759};760static RTLibCallMapping RTLibCallCommon[] = {761#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},762#include "ZOSLibcallNames.def"763};764for (auto &E : RTLibCallCommon)765setLibcallName(E.Code, E.Name);766}767}768769bool SystemZTargetLowering::useSoftFloat() const {770return Subtarget.hasSoftFloat();771}772773EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,774LLVMContext &, EVT VT) const {775if (!VT.isVector())776return MVT::i32;777return VT.changeVectorElementTypeToInteger();778}779780bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(781const MachineFunction &MF, EVT VT) const {782VT = VT.getScalarType();783784if (!VT.isSimple())785return false;786787switch (VT.getSimpleVT().SimpleTy) {788case MVT::f32:789case MVT::f64:790return true;791case MVT::f128:792return Subtarget.hasVectorEnhancements1();793default:794break;795}796797return false;798}799800// Return true if the constant can be generated with a vector instruction,801// such as VGM, VGMB or VREPI.802bool SystemZVectorConstantInfo::isVectorConstantLegal(803const SystemZSubtarget &Subtarget) {804const SystemZInstrInfo *TII = Subtarget.getInstrInfo();805if (!Subtarget.hasVector() ||806(isFP128 && !Subtarget.hasVectorEnhancements1()))807return false;808809// Try using VECTOR GENERATE BYTE MASK. This is the architecturally-810// preferred way of creating all-zero and all-one vectors so give it811// priority over other methods below.812unsigned Mask = 0;813unsigned I = 0;814for (; I < SystemZ::VectorBytes; ++I) {815uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();816if (Byte == 0xff)817Mask |= 1ULL << I;818else if (Byte != 0)819break;820}821if (I == SystemZ::VectorBytes) {822Opcode = SystemZISD::BYTE_MASK;823OpVals.push_back(Mask);824VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);825return true;826}827828if (SplatBitSize > 64)829return false;830831auto tryValue = [&](uint64_t Value) -> bool {832// Try VECTOR REPLICATE IMMEDIATE833int64_t SignedValue = SignExtend64(Value, SplatBitSize);834if (isInt<16>(SignedValue)) {835OpVals.push_back(((unsigned) SignedValue));836Opcode = SystemZISD::REPLICATE;837VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),838SystemZ::VectorBits / SplatBitSize);839return true;840}841// Try VECTOR GENERATE MASK842unsigned Start, End;843if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {844// isRxSBGMask returns the bit numbers for a full 64-bit value, with 0845// denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for846// an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).847OpVals.push_back(Start - (64 - SplatBitSize));848OpVals.push_back(End - (64 - SplatBitSize));849Opcode = SystemZISD::ROTATE_MASK;850VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),851SystemZ::VectorBits / SplatBitSize);852return true;853}854return false;855};856857// First try assuming that any undefined bits above the highest set bit858// and below the lowest set bit are 1s. This increases the likelihood of859// being able to use a sign-extended element value in VECTOR REPLICATE860// IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.861uint64_t SplatBitsZ = SplatBits.getZExtValue();862uint64_t SplatUndefZ = SplatUndef.getZExtValue();863unsigned LowerBits = llvm::countr_zero(SplatBitsZ);864unsigned UpperBits = llvm::countl_zero(SplatBitsZ);865uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);866uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);867if (tryValue(SplatBitsZ | Upper | Lower))868return true;869870// Now try assuming that any undefined bits between the first and871// last defined set bits are set. This increases the chances of872// using a non-wraparound mask.873uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;874return tryValue(SplatBitsZ | Middle);875}876877SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {878if (IntImm.isSingleWord()) {879IntBits = APInt(128, IntImm.getZExtValue());880IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());881} else882IntBits = IntImm;883assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");884885// Find the smallest splat.886SplatBits = IntImm;887unsigned Width = SplatBits.getBitWidth();888while (Width > 8) {889unsigned HalfSize = Width / 2;890APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);891APInt LowValue = SplatBits.trunc(HalfSize);892893// If the two halves do not match, stop here.894if (HighValue != LowValue || 8 > HalfSize)895break;896897SplatBits = HighValue;898Width = HalfSize;899}900SplatUndef = 0;901SplatBitSize = Width;902}903904SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {905assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");906bool HasAnyUndefs;907908// Get IntBits by finding the 128 bit splat.909BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,910true);911912// Get SplatBits by finding the 8 bit or greater splat.913BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,914true);915}916917bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,918bool ForCodeSize) const {919// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.920if (Imm.isZero() || Imm.isNegZero())921return true;922923return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);924}925926/// Returns true if stack probing through inline assembly is requested.927bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {928// If the function specifically requests inline stack probes, emit them.929if (MF.getFunction().hasFnAttribute("probe-stack"))930return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==931"inline-asm";932return false;933}934935TargetLowering::AtomicExpansionKind936SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {937return AtomicExpansionKind::None;938}939940TargetLowering::AtomicExpansionKind941SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const {942return AtomicExpansionKind::None;943}944945TargetLowering::AtomicExpansionKind946SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {947// Don't expand subword operations as they require special treatment.948if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))949return AtomicExpansionKind::None;950951// Don't expand if there is a target instruction available.952if (Subtarget.hasInterlockedAccess1() &&953(RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&954(RMW->getOperation() == AtomicRMWInst::BinOp::Add ||955RMW->getOperation() == AtomicRMWInst::BinOp::Sub ||956RMW->getOperation() == AtomicRMWInst::BinOp::And ||957RMW->getOperation() == AtomicRMWInst::BinOp::Or ||958RMW->getOperation() == AtomicRMWInst::BinOp::Xor))959return AtomicExpansionKind::None;960961return AtomicExpansionKind::CmpXChg;962}963964bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {965// We can use CGFI or CLGFI.966return isInt<32>(Imm) || isUInt<32>(Imm);967}968969bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {970// We can use ALGFI or SLGFI.971return isUInt<32>(Imm) || isUInt<32>(-Imm);972}973974bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(975EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {976// Unaligned accesses should never be slower than the expanded version.977// We check specifically for aligned accesses in the few cases where978// they are required.979if (Fast)980*Fast = 1;981return true;982}983984// Information about the addressing mode for a memory access.985struct AddressingMode {986// True if a long displacement is supported.987bool LongDisplacement;988989// True if use of index register is supported.990bool IndexReg;991992AddressingMode(bool LongDispl, bool IdxReg) :993LongDisplacement(LongDispl), IndexReg(IdxReg) {}994};995996// Return the desired addressing mode for a Load which has only one use (in997// the same block) which is a Store.998static AddressingMode getLoadStoreAddrMode(bool HasVector,999Type *Ty) {1000// With vector support a Load->Store combination may be combined to either1001// an MVC or vector operations and it seems to work best to allow the1002// vector addressing mode.1003if (HasVector)1004return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);10051006// Otherwise only the MVC case is special.1007bool MVC = Ty->isIntegerTy(8);1008return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);1009}10101011// Return the addressing mode which seems most desirable given an LLVM1012// Instruction pointer.1013static AddressingMode1014supportedAddressingMode(Instruction *I, bool HasVector) {1015if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {1016switch (II->getIntrinsicID()) {1017default: break;1018case Intrinsic::memset:1019case Intrinsic::memmove:1020case Intrinsic::memcpy:1021return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);1022}1023}10241025if (isa<LoadInst>(I) && I->hasOneUse()) {1026auto *SingleUser = cast<Instruction>(*I->user_begin());1027if (SingleUser->getParent() == I->getParent()) {1028if (isa<ICmpInst>(SingleUser)) {1029if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))1030if (C->getBitWidth() <= 64 &&1031(isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))1032// Comparison of memory with 16 bit signed / unsigned immediate1033return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);1034} else if (isa<StoreInst>(SingleUser))1035// Load->Store1036return getLoadStoreAddrMode(HasVector, I->getType());1037}1038} else if (auto *StoreI = dyn_cast<StoreInst>(I)) {1039if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))1040if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())1041// Load->Store1042return getLoadStoreAddrMode(HasVector, LoadI->getType());1043}10441045if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {10461047// * Use LDE instead of LE/LEY for z13 to avoid partial register1048// dependencies (LDE only supports small offsets).1049// * Utilize the vector registers to hold floating point1050// values (vector load / store instructions only support small1051// offsets).10521053Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :1054I->getOperand(0)->getType());1055bool IsFPAccess = MemAccessTy->isFloatingPointTy();1056bool IsVectorAccess = MemAccessTy->isVectorTy();10571058// A store of an extracted vector element will be combined into a VSTE type1059// instruction.1060if (!IsVectorAccess && isa<StoreInst>(I)) {1061Value *DataOp = I->getOperand(0);1062if (isa<ExtractElementInst>(DataOp))1063IsVectorAccess = true;1064}10651066// A load which gets inserted into a vector element will be combined into a1067// VLE type instruction.1068if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {1069User *LoadUser = *I->user_begin();1070if (isa<InsertElementInst>(LoadUser))1071IsVectorAccess = true;1072}10731074if (IsFPAccess || IsVectorAccess)1075return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);1076}10771078return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);1079}10801081bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,1082const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {1083// Punt on globals for now, although they can be used in limited1084// RELATIVE LONG cases.1085if (AM.BaseGV)1086return false;10871088// Require a 20-bit signed offset.1089if (!isInt<20>(AM.BaseOffs))1090return false;10911092bool RequireD12 =1093Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));1094AddressingMode SupportedAM(!RequireD12, true);1095if (I != nullptr)1096SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());10971098if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))1099return false;11001101if (!SupportedAM.IndexReg)1102// No indexing allowed.1103return AM.Scale == 0;1104else1105// Indexing is OK but no scale factor can be applied.1106return AM.Scale == 0 || AM.Scale == 1;1107}11081109bool SystemZTargetLowering::findOptimalMemOpLowering(1110std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,1111unsigned SrcAS, const AttributeList &FuncAttributes) const {1112const int MVCFastLen = 16;11131114if (Limit != ~unsigned(0)) {1115// Don't expand Op into scalar loads/stores in these cases:1116if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)1117return false; // Small memcpy: Use MVC1118if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)1119return false; // Small memset (first byte with STC/MVI): Use MVC1120if (Op.isZeroMemset())1121return false; // Memset zero: Use XC1122}11231124return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,1125SrcAS, FuncAttributes);1126}11271128EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,1129const AttributeList &FuncAttributes) const {1130return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;1131}11321133bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {1134if (!FromType->isIntegerTy() || !ToType->isIntegerTy())1135return false;1136unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();1137unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();1138return FromBits > ToBits;1139}11401141bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {1142if (!FromVT.isInteger() || !ToVT.isInteger())1143return false;1144unsigned FromBits = FromVT.getFixedSizeInBits();1145unsigned ToBits = ToVT.getFixedSizeInBits();1146return FromBits > ToBits;1147}11481149//===----------------------------------------------------------------------===//1150// Inline asm support1151//===----------------------------------------------------------------------===//11521153TargetLowering::ConstraintType1154SystemZTargetLowering::getConstraintType(StringRef Constraint) const {1155if (Constraint.size() == 1) {1156switch (Constraint[0]) {1157case 'a': // Address register1158case 'd': // Data register (equivalent to 'r')1159case 'f': // Floating-point register1160case 'h': // High-part register1161case 'r': // General-purpose register1162case 'v': // Vector register1163return C_RegisterClass;11641165case 'Q': // Memory with base and unsigned 12-bit displacement1166case 'R': // Likewise, plus an index1167case 'S': // Memory with base and signed 20-bit displacement1168case 'T': // Likewise, plus an index1169case 'm': // Equivalent to 'T'.1170return C_Memory;11711172case 'I': // Unsigned 8-bit constant1173case 'J': // Unsigned 12-bit constant1174case 'K': // Signed 16-bit constant1175case 'L': // Signed 20-bit displacement (on all targets we support)1176case 'M': // 0x7fffffff1177return C_Immediate;11781179default:1180break;1181}1182} else if (Constraint.size() == 2 && Constraint[0] == 'Z') {1183switch (Constraint[1]) {1184case 'Q': // Address with base and unsigned 12-bit displacement1185case 'R': // Likewise, plus an index1186case 'S': // Address with base and signed 20-bit displacement1187case 'T': // Likewise, plus an index1188return C_Address;11891190default:1191break;1192}1193}1194return TargetLowering::getConstraintType(Constraint);1195}11961197TargetLowering::ConstraintWeight SystemZTargetLowering::1198getSingleConstraintMatchWeight(AsmOperandInfo &info,1199const char *constraint) const {1200ConstraintWeight weight = CW_Invalid;1201Value *CallOperandVal = info.CallOperandVal;1202// If we don't have a value, we can't do a match,1203// but allow it at the lowest weight.1204if (!CallOperandVal)1205return CW_Default;1206Type *type = CallOperandVal->getType();1207// Look at the constraint type.1208switch (*constraint) {1209default:1210weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);1211break;12121213case 'a': // Address register1214case 'd': // Data register (equivalent to 'r')1215case 'h': // High-part register1216case 'r': // General-purpose register1217weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;1218break;12191220case 'f': // Floating-point register1221if (!useSoftFloat())1222weight = type->isFloatingPointTy() ? CW_Register : CW_Default;1223break;12241225case 'v': // Vector register1226if (Subtarget.hasVector())1227weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register1228: CW_Default;1229break;12301231case 'I': // Unsigned 8-bit constant1232if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))1233if (isUInt<8>(C->getZExtValue()))1234weight = CW_Constant;1235break;12361237case 'J': // Unsigned 12-bit constant1238if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))1239if (isUInt<12>(C->getZExtValue()))1240weight = CW_Constant;1241break;12421243case 'K': // Signed 16-bit constant1244if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))1245if (isInt<16>(C->getSExtValue()))1246weight = CW_Constant;1247break;12481249case 'L': // Signed 20-bit displacement (on all targets we support)1250if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))1251if (isInt<20>(C->getSExtValue()))1252weight = CW_Constant;1253break;12541255case 'M': // 0x7fffffff1256if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))1257if (C->getZExtValue() == 0x7fffffff)1258weight = CW_Constant;1259break;1260}1261return weight;1262}12631264// Parse a "{tNNN}" register constraint for which the register type "t"1265// has already been verified. MC is the class associated with "t" and1266// Map maps 0-based register numbers to LLVM register numbers.1267static std::pair<unsigned, const TargetRegisterClass *>1268parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,1269const unsigned *Map, unsigned Size) {1270assert(*(Constraint.end()-1) == '}' && "Missing '}'");1271if (isdigit(Constraint[2])) {1272unsigned Index;1273bool Failed =1274Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);1275if (!Failed && Index < Size && Map[Index])1276return std::make_pair(Map[Index], RC);1277}1278return std::make_pair(0U, nullptr);1279}12801281std::pair<unsigned, const TargetRegisterClass *>1282SystemZTargetLowering::getRegForInlineAsmConstraint(1283const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {1284if (Constraint.size() == 1) {1285// GCC Constraint Letters1286switch (Constraint[0]) {1287default: break;1288case 'd': // Data register (equivalent to 'r')1289case 'r': // General-purpose register1290if (VT.getSizeInBits() == 64)1291return std::make_pair(0U, &SystemZ::GR64BitRegClass);1292else if (VT.getSizeInBits() == 128)1293return std::make_pair(0U, &SystemZ::GR128BitRegClass);1294return std::make_pair(0U, &SystemZ::GR32BitRegClass);12951296case 'a': // Address register1297if (VT == MVT::i64)1298return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);1299else if (VT == MVT::i128)1300return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);1301return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);13021303case 'h': // High-part register (an LLVM extension)1304return std::make_pair(0U, &SystemZ::GRH32BitRegClass);13051306case 'f': // Floating-point register1307if (!useSoftFloat()) {1308if (VT.getSizeInBits() == 64)1309return std::make_pair(0U, &SystemZ::FP64BitRegClass);1310else if (VT.getSizeInBits() == 128)1311return std::make_pair(0U, &SystemZ::FP128BitRegClass);1312return std::make_pair(0U, &SystemZ::FP32BitRegClass);1313}1314break;13151316case 'v': // Vector register1317if (Subtarget.hasVector()) {1318if (VT.getSizeInBits() == 32)1319return std::make_pair(0U, &SystemZ::VR32BitRegClass);1320if (VT.getSizeInBits() == 64)1321return std::make_pair(0U, &SystemZ::VR64BitRegClass);1322return std::make_pair(0U, &SystemZ::VR128BitRegClass);1323}1324break;1325}1326}1327if (Constraint.starts_with("{")) {13281329// A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal1330// to check the size on.1331auto getVTSizeInBits = [&VT]() {1332return VT == MVT::Other ? 0 : VT.getSizeInBits();1333};13341335// We need to override the default register parsing for GPRs and FPRs1336// because the interpretation depends on VT. The internal names of1337// the registers are also different from the external names1338// (F0D and F0S instead of F0, etc.).1339if (Constraint[1] == 'r') {1340if (getVTSizeInBits() == 32)1341return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,1342SystemZMC::GR32Regs, 16);1343if (getVTSizeInBits() == 128)1344return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,1345SystemZMC::GR128Regs, 16);1346return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,1347SystemZMC::GR64Regs, 16);1348}1349if (Constraint[1] == 'f') {1350if (useSoftFloat())1351return std::make_pair(13520u, static_cast<const TargetRegisterClass *>(nullptr));1353if (getVTSizeInBits() == 32)1354return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,1355SystemZMC::FP32Regs, 16);1356if (getVTSizeInBits() == 128)1357return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,1358SystemZMC::FP128Regs, 16);1359return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,1360SystemZMC::FP64Regs, 16);1361}1362if (Constraint[1] == 'v') {1363if (!Subtarget.hasVector())1364return std::make_pair(13650u, static_cast<const TargetRegisterClass *>(nullptr));1366if (getVTSizeInBits() == 32)1367return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,1368SystemZMC::VR32Regs, 32);1369if (getVTSizeInBits() == 64)1370return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,1371SystemZMC::VR64Regs, 32);1372return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,1373SystemZMC::VR128Regs, 32);1374}1375}1376return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);1377}13781379// FIXME? Maybe this could be a TableGen attribute on some registers and1380// this table could be generated automatically from RegInfo.1381Register1382SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,1383const MachineFunction &MF) const {1384Register Reg =1385StringSwitch<Register>(RegName)1386.Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)1387.Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)1388.Default(0);13891390if (Reg)1391return Reg;1392report_fatal_error("Invalid register name global variable");1393}13941395Register SystemZTargetLowering::getExceptionPointerRegister(1396const Constant *PersonalityFn) const {1397return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;1398}13991400Register SystemZTargetLowering::getExceptionSelectorRegister(1401const Constant *PersonalityFn) const {1402return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;1403}14041405void SystemZTargetLowering::LowerAsmOperandForConstraint(1406SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,1407SelectionDAG &DAG) const {1408// Only support length 1 constraints for now.1409if (Constraint.size() == 1) {1410switch (Constraint[0]) {1411case 'I': // Unsigned 8-bit constant1412if (auto *C = dyn_cast<ConstantSDNode>(Op))1413if (isUInt<8>(C->getZExtValue()))1414Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),1415Op.getValueType()));1416return;14171418case 'J': // Unsigned 12-bit constant1419if (auto *C = dyn_cast<ConstantSDNode>(Op))1420if (isUInt<12>(C->getZExtValue()))1421Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),1422Op.getValueType()));1423return;14241425case 'K': // Signed 16-bit constant1426if (auto *C = dyn_cast<ConstantSDNode>(Op))1427if (isInt<16>(C->getSExtValue()))1428Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),1429Op.getValueType()));1430return;14311432case 'L': // Signed 20-bit displacement (on all targets we support)1433if (auto *C = dyn_cast<ConstantSDNode>(Op))1434if (isInt<20>(C->getSExtValue()))1435Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),1436Op.getValueType()));1437return;14381439case 'M': // 0x7fffffff1440if (auto *C = dyn_cast<ConstantSDNode>(Op))1441if (C->getZExtValue() == 0x7fffffff)1442Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),1443Op.getValueType()));1444return;1445}1446}1447TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);1448}14491450//===----------------------------------------------------------------------===//1451// Calling conventions1452//===----------------------------------------------------------------------===//14531454#include "SystemZGenCallingConv.inc"14551456const MCPhysReg *SystemZTargetLowering::getScratchRegisters(1457CallingConv::ID) const {1458static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,1459SystemZ::R14D, 0 };1460return ScratchRegs;1461}14621463bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,1464Type *ToType) const {1465return isTruncateFree(FromType, ToType);1466}14671468bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {1469return CI->isTailCall();1470}14711472// Value is a value that has been passed to us in the location described by VA1473// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining1474// any loads onto Chain.1475static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,1476CCValAssign &VA, SDValue Chain,1477SDValue Value) {1478// If the argument has been promoted from a smaller type, insert an1479// assertion to capture this.1480if (VA.getLocInfo() == CCValAssign::SExt)1481Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,1482DAG.getValueType(VA.getValVT()));1483else if (VA.getLocInfo() == CCValAssign::ZExt)1484Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,1485DAG.getValueType(VA.getValVT()));14861487if (VA.isExtInLoc())1488Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);1489else if (VA.getLocInfo() == CCValAssign::BCvt) {1490// If this is a short vector argument loaded from the stack,1491// extend from i64 to full vector size and then bitcast.1492assert(VA.getLocVT() == MVT::i64);1493assert(VA.getValVT().isVector());1494Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});1495Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);1496} else1497assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");1498return Value;1499}15001501// Value is a value of type VA.getValVT() that we need to copy into1502// the location described by VA. Return a copy of Value converted to1503// VA.getValVT(). The caller is responsible for handling indirect values.1504static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,1505CCValAssign &VA, SDValue Value) {1506switch (VA.getLocInfo()) {1507case CCValAssign::SExt:1508return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);1509case CCValAssign::ZExt:1510return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);1511case CCValAssign::AExt:1512return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);1513case CCValAssign::BCvt: {1514assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);1515assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||1516VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);1517// For an f32 vararg we need to first promote it to an f64 and then1518// bitcast it to an i64.1519if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)1520Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);1521MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i641522? MVT::v2i641523: VA.getLocVT();1524Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);1525// For ELF, this is a short vector argument to be stored to the stack,1526// bitcast to v2i64 and then extract first element.1527if (BitCastToType == MVT::v2i64)1528return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,1529DAG.getConstant(0, DL, MVT::i32));1530return Value;1531}1532case CCValAssign::Full:1533return Value;1534default:1535llvm_unreachable("Unhandled getLocInfo()");1536}1537}15381539static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {1540SDLoc DL(In);1541SDValue Lo, Hi;1542if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {1543Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);1544Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,1545DAG.getNode(ISD::SRL, DL, MVT::i128, In,1546DAG.getConstant(64, DL, MVT::i32)));1547} else {1548std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);1549}15501551// FIXME: If v2i64 were a legal type, we could use it instead of1552// Untyped here. This might enable improved folding.1553SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,1554MVT::Untyped, Hi, Lo);1555return SDValue(Pair, 0);1556}15571558static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {1559SDLoc DL(In);1560SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,1561DL, MVT::i64, In);1562SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,1563DL, MVT::i64, In);15641565if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {1566Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);1567Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);1568Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,1569DAG.getConstant(64, DL, MVT::i32));1570return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);1571} else {1572return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);1573}1574}15751576bool SystemZTargetLowering::splitValueIntoRegisterParts(1577SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,1578unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {1579EVT ValueVT = Val.getValueType();1580if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {1581// Inline assembly operand.1582Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));1583return true;1584}15851586return false;1587}15881589SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(1590SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,1591MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {1592if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {1593// Inline assembly operand.1594SDValue Res = lowerGR128ToI128(DAG, Parts[0]);1595return DAG.getBitcast(ValueVT, Res);1596}15971598return SDValue();1599}16001601SDValue SystemZTargetLowering::LowerFormalArguments(1602SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,1603const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,1604SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {1605MachineFunction &MF = DAG.getMachineFunction();1606MachineFrameInfo &MFI = MF.getFrameInfo();1607MachineRegisterInfo &MRI = MF.getRegInfo();1608SystemZMachineFunctionInfo *FuncInfo =1609MF.getInfo<SystemZMachineFunctionInfo>();1610auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();1611EVT PtrVT = getPointerTy(DAG.getDataLayout());16121613// Assign locations to all of the incoming arguments.1614SmallVector<CCValAssign, 16> ArgLocs;1615SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());1616CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);1617FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());16181619unsigned NumFixedGPRs = 0;1620unsigned NumFixedFPRs = 0;1621for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {1622SDValue ArgValue;1623CCValAssign &VA = ArgLocs[I];1624EVT LocVT = VA.getLocVT();1625if (VA.isRegLoc()) {1626// Arguments passed in registers1627const TargetRegisterClass *RC;1628switch (LocVT.getSimpleVT().SimpleTy) {1629default:1630// Integers smaller than i64 should be promoted to i64.1631llvm_unreachable("Unexpected argument type");1632case MVT::i32:1633NumFixedGPRs += 1;1634RC = &SystemZ::GR32BitRegClass;1635break;1636case MVT::i64:1637NumFixedGPRs += 1;1638RC = &SystemZ::GR64BitRegClass;1639break;1640case MVT::f32:1641NumFixedFPRs += 1;1642RC = &SystemZ::FP32BitRegClass;1643break;1644case MVT::f64:1645NumFixedFPRs += 1;1646RC = &SystemZ::FP64BitRegClass;1647break;1648case MVT::f128:1649NumFixedFPRs += 2;1650RC = &SystemZ::FP128BitRegClass;1651break;1652case MVT::v16i8:1653case MVT::v8i16:1654case MVT::v4i32:1655case MVT::v2i64:1656case MVT::v4f32:1657case MVT::v2f64:1658RC = &SystemZ::VR128BitRegClass;1659break;1660}16611662Register VReg = MRI.createVirtualRegister(RC);1663MRI.addLiveIn(VA.getLocReg(), VReg);1664ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);1665} else {1666assert(VA.isMemLoc() && "Argument not register or memory");16671668// Create the frame index object for this incoming parameter.1669// FIXME: Pre-include call frame size in the offset, should not1670// need to manually add it here.1671int64_t ArgSPOffset = VA.getLocMemOffset();1672if (Subtarget.isTargetXPLINK64()) {1673auto &XPRegs =1674Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();1675ArgSPOffset += XPRegs.getCallFrameSize();1676}1677int FI =1678MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);16791680// Create the SelectionDAG nodes corresponding to a load1681// from this parameter. Unpromoted ints and floats are1682// passed as right-justified 8-byte values.1683SDValue FIN = DAG.getFrameIndex(FI, PtrVT);1684if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)1685FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,1686DAG.getIntPtrConstant(4, DL));1687ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,1688MachinePointerInfo::getFixedStack(MF, FI));1689}16901691// Convert the value of the argument register into the value that's1692// being passed.1693if (VA.getLocInfo() == CCValAssign::Indirect) {1694InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,1695MachinePointerInfo()));1696// If the original argument was split (e.g. i128), we need1697// to load all parts of it here (using the same address).1698unsigned ArgIndex = Ins[I].OrigArgIndex;1699assert (Ins[I].PartOffset == 0);1700while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {1701CCValAssign &PartVA = ArgLocs[I + 1];1702unsigned PartOffset = Ins[I + 1].PartOffset;1703SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,1704DAG.getIntPtrConstant(PartOffset, DL));1705InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,1706MachinePointerInfo()));1707++I;1708}1709} else1710InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));1711}17121713if (IsVarArg && Subtarget.isTargetXPLINK64()) {1714// Save the number of non-varargs registers for later use by va_start, etc.1715FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);1716FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);17171718auto *Regs = static_cast<SystemZXPLINK64Registers *>(1719Subtarget.getSpecialRegisters());17201721// Likewise the address (in the form of a frame index) of where the1722// first stack vararg would be. The 1-byte size here is arbitrary.1723// FIXME: Pre-include call frame size in the offset, should not1724// need to manually add it here.1725int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();1726int FI = MFI.CreateFixedObject(1, VarArgOffset, true);1727FuncInfo->setVarArgsFrameIndex(FI);1728}17291730if (IsVarArg && Subtarget.isTargetELF()) {1731// Save the number of non-varargs registers for later use by va_start, etc.1732FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);1733FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);17341735// Likewise the address (in the form of a frame index) of where the1736// first stack vararg would be. The 1-byte size here is arbitrary.1737int64_t VarArgsOffset = CCInfo.getStackSize();1738FuncInfo->setVarArgsFrameIndex(1739MFI.CreateFixedObject(1, VarArgsOffset, true));17401741// ...and a similar frame index for the caller-allocated save area1742// that will be used to store the incoming registers.1743int64_t RegSaveOffset =1744-SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;1745unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);1746FuncInfo->setRegSaveFrameIndex(RegSaveIndex);17471748// Store the FPR varargs in the reserved frame slots. (We store the1749// GPRs as part of the prologue.)1750if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {1751SDValue MemOps[SystemZ::ELFNumArgFPRs];1752for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {1753unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);1754int FI =1755MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);1756SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));1757Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],1758&SystemZ::FP64BitRegClass);1759SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);1760MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,1761MachinePointerInfo::getFixedStack(MF, FI));1762}1763// Join the stores, which are independent of one another.1764Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,1765ArrayRef(&MemOps[NumFixedFPRs],1766SystemZ::ELFNumArgFPRs - NumFixedFPRs));1767}1768}17691770if (Subtarget.isTargetXPLINK64()) {1771// Create virual register for handling incoming "ADA" special register (R5)1772const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;1773Register ADAvReg = MRI.createVirtualRegister(RC);1774auto *Regs = static_cast<SystemZXPLINK64Registers *>(1775Subtarget.getSpecialRegisters());1776MRI.addLiveIn(Regs->getADARegister(), ADAvReg);1777FuncInfo->setADAVirtualRegister(ADAvReg);1778}1779return Chain;1780}17811782static bool canUseSiblingCall(const CCState &ArgCCInfo,1783SmallVectorImpl<CCValAssign> &ArgLocs,1784SmallVectorImpl<ISD::OutputArg> &Outs) {1785// Punt if there are any indirect or stack arguments, or if the call1786// needs the callee-saved argument register R6, or if the call uses1787// the callee-saved register arguments SwiftSelf and SwiftError.1788for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {1789CCValAssign &VA = ArgLocs[I];1790if (VA.getLocInfo() == CCValAssign::Indirect)1791return false;1792if (!VA.isRegLoc())1793return false;1794Register Reg = VA.getLocReg();1795if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)1796return false;1797if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())1798return false;1799}1800return true;1801}18021803static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL,1804unsigned Offset, bool LoadAdr = false) {1805MachineFunction &MF = DAG.getMachineFunction();1806SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();1807unsigned ADAvReg = MFI->getADAVirtualRegister();1808EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());18091810SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);1811SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);18121813SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);1814if (!LoadAdr)1815Result = DAG.getLoad(1816PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),1817MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);18181819return Result;1820}18211822// ADA access using Global value1823// Note: for functions, address of descriptor is returned1824static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL,1825EVT PtrVT) {1826unsigned ADAtype;1827bool LoadAddr = false;1828const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);1829bool IsFunction =1830(isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));1831bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());18321833if (IsFunction) {1834if (IsInternal) {1835ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC;1836LoadAddr = true;1837} else1838ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC;1839} else {1840ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR;1841}1842SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);18431844return getADAEntry(DAG, Val, DL, 0, LoadAddr);1845}18461847static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,1848SDLoc &DL, SDValue &Chain) {1849unsigned ADADelta = 0; // ADA offset in desc.1850unsigned EPADelta = 8; // EPA offset in desc.1851MachineFunction &MF = DAG.getMachineFunction();1852EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());18531854// XPLink calling convention.1855if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {1856bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||1857G->getGlobal()->hasPrivateLinkage());1858if (IsInternal) {1859SystemZMachineFunctionInfo *MFI =1860MF.getInfo<SystemZMachineFunctionInfo>();1861unsigned ADAvReg = MFI->getADAVirtualRegister();1862ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);1863Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);1864Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);1865return true;1866} else {1867SDValue GA = DAG.getTargetGlobalAddress(1868G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);1869ADA = getADAEntry(DAG, GA, DL, ADADelta);1870Callee = getADAEntry(DAG, GA, DL, EPADelta);1871}1872} else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {1873SDValue ES = DAG.getTargetExternalSymbol(1874E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);1875ADA = getADAEntry(DAG, ES, DL, ADADelta);1876Callee = getADAEntry(DAG, ES, DL, EPADelta);1877} else {1878// Function pointer case1879ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,1880DAG.getConstant(ADADelta, DL, PtrVT));1881ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,1882MachinePointerInfo::getGOT(DAG.getMachineFunction()));1883Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,1884DAG.getConstant(EPADelta, DL, PtrVT));1885Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,1886MachinePointerInfo::getGOT(DAG.getMachineFunction()));1887}1888return false;1889}18901891SDValue1892SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,1893SmallVectorImpl<SDValue> &InVals) const {1894SelectionDAG &DAG = CLI.DAG;1895SDLoc &DL = CLI.DL;1896SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;1897SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;1898SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;1899SDValue Chain = CLI.Chain;1900SDValue Callee = CLI.Callee;1901bool &IsTailCall = CLI.IsTailCall;1902CallingConv::ID CallConv = CLI.CallConv;1903bool IsVarArg = CLI.IsVarArg;1904MachineFunction &MF = DAG.getMachineFunction();1905EVT PtrVT = getPointerTy(MF.getDataLayout());1906LLVMContext &Ctx = *DAG.getContext();1907SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();19081909// FIXME: z/OS support to be added in later.1910if (Subtarget.isTargetXPLINK64())1911IsTailCall = false;19121913// Analyze the operands of the call, assigning locations to each operand.1914SmallVector<CCValAssign, 16> ArgLocs;1915SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);1916ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);19171918// We don't support GuaranteedTailCallOpt, only automatically-detected1919// sibling calls.1920if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))1921IsTailCall = false;19221923// Get a count of how many bytes are to be pushed on the stack.1924unsigned NumBytes = ArgCCInfo.getStackSize();19251926// Mark the start of the call.1927if (!IsTailCall)1928Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);19291930// Copy argument values to their designated locations.1931SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;1932SmallVector<SDValue, 8> MemOpChains;1933SDValue StackPtr;1934for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {1935CCValAssign &VA = ArgLocs[I];1936SDValue ArgValue = OutVals[I];19371938if (VA.getLocInfo() == CCValAssign::Indirect) {1939// Store the argument in a stack slot and pass its address.1940unsigned ArgIndex = Outs[I].OrigArgIndex;1941EVT SlotVT;1942if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {1943// Allocate the full stack space for a promoted (and split) argument.1944Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;1945EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);1946MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);1947unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);1948SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);1949} else {1950SlotVT = Outs[I].VT;1951}1952SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);1953int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();1954MemOpChains.push_back(1955DAG.getStore(Chain, DL, ArgValue, SpillSlot,1956MachinePointerInfo::getFixedStack(MF, FI)));1957// If the original argument was split (e.g. i128), we need1958// to store all parts of it here (and pass just one address).1959assert (Outs[I].PartOffset == 0);1960while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {1961SDValue PartValue = OutVals[I + 1];1962unsigned PartOffset = Outs[I + 1].PartOffset;1963SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,1964DAG.getIntPtrConstant(PartOffset, DL));1965MemOpChains.push_back(1966DAG.getStore(Chain, DL, PartValue, Address,1967MachinePointerInfo::getFixedStack(MF, FI)));1968assert((PartOffset + PartValue.getValueType().getStoreSize() <=1969SlotVT.getStoreSize()) && "Not enough space for argument part!");1970++I;1971}1972ArgValue = SpillSlot;1973} else1974ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);19751976if (VA.isRegLoc()) {1977// In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a1978// MVT::i128 type. We decompose the 128-bit type to a pair of its high1979// and low values.1980if (VA.getLocVT() == MVT::i128)1981ArgValue = lowerI128ToGR128(DAG, ArgValue);1982// Queue up the argument copies and emit them at the end.1983RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));1984} else {1985assert(VA.isMemLoc() && "Argument not register or memory");19861987// Work out the address of the stack slot. Unpromoted ints and1988// floats are passed as right-justified 8-byte values.1989if (!StackPtr.getNode())1990StackPtr = DAG.getCopyFromReg(Chain, DL,1991Regs->getStackPointerRegister(), PtrVT);1992unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +1993VA.getLocMemOffset();1994if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)1995Offset += 4;1996SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,1997DAG.getIntPtrConstant(Offset, DL));19981999// Emit the store.2000MemOpChains.push_back(2001DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));20022003// Although long doubles or vectors are passed through the stack when2004// they are vararg (non-fixed arguments), if a long double or vector2005// occupies the third and fourth slot of the argument list GPR3 should2006// still shadow the third slot of the argument list.2007if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {2008SDValue ShadowArgValue =2009DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,2010DAG.getIntPtrConstant(1, DL));2011RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));2012}2013}2014}20152016// Join the stores, which are independent of one another.2017if (!MemOpChains.empty())2018Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);20192020// Accept direct calls by converting symbolic call addresses to the2021// associated Target* opcodes. Force %r1 to be used for indirect2022// tail calls.2023SDValue Glue;20242025if (Subtarget.isTargetXPLINK64()) {2026SDValue ADA;2027bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);2028if (!IsBRASL) {2029unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)2030->getAddressOfCalleeRegister();2031Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);2032Glue = Chain.getValue(1);2033Callee = DAG.getRegister(CalleeReg, Callee.getValueType());2034}2035RegsToPass.push_back(std::make_pair(2036static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));2037} else {2038if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {2039Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);2040Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);2041} else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {2042Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);2043Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);2044} else if (IsTailCall) {2045Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);2046Glue = Chain.getValue(1);2047Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());2048}2049}20502051// Build a sequence of copy-to-reg nodes, chained and glued together.2052for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {2053Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,2054RegsToPass[I].second, Glue);2055Glue = Chain.getValue(1);2056}20572058// The first call operand is the chain and the second is the target address.2059SmallVector<SDValue, 8> Ops;2060Ops.push_back(Chain);2061Ops.push_back(Callee);20622063// Add argument registers to the end of the list so that they are2064// known live into the call.2065for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)2066Ops.push_back(DAG.getRegister(RegsToPass[I].first,2067RegsToPass[I].second.getValueType()));20682069// Add a register mask operand representing the call-preserved registers.2070const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();2071const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);2072assert(Mask && "Missing call preserved mask for calling convention");2073Ops.push_back(DAG.getRegisterMask(Mask));20742075// Glue the call to the argument copies, if any.2076if (Glue.getNode())2077Ops.push_back(Glue);20782079// Emit the call.2080SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);2081if (IsTailCall) {2082SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);2083DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);2084return Ret;2085}2086Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);2087DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);2088Glue = Chain.getValue(1);20892090// Mark the end of the call, which is glued to the call itself.2091Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);2092Glue = Chain.getValue(1);20932094// Assign locations to each value returned by this call.2095SmallVector<CCValAssign, 16> RetLocs;2096CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);2097RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);20982099// Copy all of the result registers out of their specified physreg.2100for (CCValAssign &VA : RetLocs) {2101// Copy the value out, gluing the copy to the end of the call sequence.2102SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),2103VA.getLocVT(), Glue);2104Chain = RetValue.getValue(1);2105Glue = RetValue.getValue(2);21062107// Convert the value of the return register into the value that's2108// being returned.2109InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));2110}21112112return Chain;2113}21142115// Generate a call taking the given operands as arguments and returning a2116// result of type RetVT.2117std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(2118SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,2119ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,2120bool DoesNotReturn, bool IsReturnValueUsed) const {2121TargetLowering::ArgListTy Args;2122Args.reserve(Ops.size());21232124TargetLowering::ArgListEntry Entry;2125for (SDValue Op : Ops) {2126Entry.Node = Op;2127Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());2128Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);2129Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);2130Args.push_back(Entry);2131}21322133SDValue Callee =2134DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));21352136Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());2137TargetLowering::CallLoweringInfo CLI(DAG);2138bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);2139CLI.setDebugLoc(DL)2140.setChain(Chain)2141.setCallee(CallConv, RetTy, Callee, std::move(Args))2142.setNoReturn(DoesNotReturn)2143.setDiscardResult(!IsReturnValueUsed)2144.setSExtResult(SignExtend)2145.setZExtResult(!SignExtend);2146return LowerCallTo(CLI);2147}21482149bool SystemZTargetLowering::2150CanLowerReturn(CallingConv::ID CallConv,2151MachineFunction &MF, bool isVarArg,2152const SmallVectorImpl<ISD::OutputArg> &Outs,2153LLVMContext &Context) const {2154// Special case that we cannot easily detect in RetCC_SystemZ since2155// i128 may not be a legal type.2156for (auto &Out : Outs)2157if (Out.ArgVT == MVT::i128)2158return false;21592160SmallVector<CCValAssign, 16> RetLocs;2161CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);2162return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);2163}21642165SDValue2166SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,2167bool IsVarArg,2168const SmallVectorImpl<ISD::OutputArg> &Outs,2169const SmallVectorImpl<SDValue> &OutVals,2170const SDLoc &DL, SelectionDAG &DAG) const {2171MachineFunction &MF = DAG.getMachineFunction();21722173// Assign locations to each returned value.2174SmallVector<CCValAssign, 16> RetLocs;2175CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());2176RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);21772178// Quick exit for void returns2179if (RetLocs.empty())2180return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);21812182if (CallConv == CallingConv::GHC)2183report_fatal_error("GHC functions return void only");21842185// Copy the result values into the output registers.2186SDValue Glue;2187SmallVector<SDValue, 4> RetOps;2188RetOps.push_back(Chain);2189for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {2190CCValAssign &VA = RetLocs[I];2191SDValue RetValue = OutVals[I];21922193// Make the return register live on exit.2194assert(VA.isRegLoc() && "Can only return in registers!");21952196// Promote the value as required.2197RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);21982199// Chain and glue the copies together.2200Register Reg = VA.getLocReg();2201Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);2202Glue = Chain.getValue(1);2203RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));2204}22052206// Update chain and glue.2207RetOps[0] = Chain;2208if (Glue.getNode())2209RetOps.push_back(Glue);22102211return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);2212}22132214// Return true if Op is an intrinsic node with chain that returns the CC value2215// as its only (other) argument. Provide the associated SystemZISD opcode and2216// the mask of valid CC values if so.2217static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,2218unsigned &CCValid) {2219unsigned Id = Op.getConstantOperandVal(1);2220switch (Id) {2221case Intrinsic::s390_tbegin:2222Opcode = SystemZISD::TBEGIN;2223CCValid = SystemZ::CCMASK_TBEGIN;2224return true;22252226case Intrinsic::s390_tbegin_nofloat:2227Opcode = SystemZISD::TBEGIN_NOFLOAT;2228CCValid = SystemZ::CCMASK_TBEGIN;2229return true;22302231case Intrinsic::s390_tend:2232Opcode = SystemZISD::TEND;2233CCValid = SystemZ::CCMASK_TEND;2234return true;22352236default:2237return false;2238}2239}22402241// Return true if Op is an intrinsic node without chain that returns the2242// CC value as its final argument. Provide the associated SystemZISD2243// opcode and the mask of valid CC values if so.2244static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {2245unsigned Id = Op.getConstantOperandVal(0);2246switch (Id) {2247case Intrinsic::s390_vpkshs:2248case Intrinsic::s390_vpksfs:2249case Intrinsic::s390_vpksgs:2250Opcode = SystemZISD::PACKS_CC;2251CCValid = SystemZ::CCMASK_VCMP;2252return true;22532254case Intrinsic::s390_vpklshs:2255case Intrinsic::s390_vpklsfs:2256case Intrinsic::s390_vpklsgs:2257Opcode = SystemZISD::PACKLS_CC;2258CCValid = SystemZ::CCMASK_VCMP;2259return true;22602261case Intrinsic::s390_vceqbs:2262case Intrinsic::s390_vceqhs:2263case Intrinsic::s390_vceqfs:2264case Intrinsic::s390_vceqgs:2265Opcode = SystemZISD::VICMPES;2266CCValid = SystemZ::CCMASK_VCMP;2267return true;22682269case Intrinsic::s390_vchbs:2270case Intrinsic::s390_vchhs:2271case Intrinsic::s390_vchfs:2272case Intrinsic::s390_vchgs:2273Opcode = SystemZISD::VICMPHS;2274CCValid = SystemZ::CCMASK_VCMP;2275return true;22762277case Intrinsic::s390_vchlbs:2278case Intrinsic::s390_vchlhs:2279case Intrinsic::s390_vchlfs:2280case Intrinsic::s390_vchlgs:2281Opcode = SystemZISD::VICMPHLS;2282CCValid = SystemZ::CCMASK_VCMP;2283return true;22842285case Intrinsic::s390_vtm:2286Opcode = SystemZISD::VTM;2287CCValid = SystemZ::CCMASK_VCMP;2288return true;22892290case Intrinsic::s390_vfaebs:2291case Intrinsic::s390_vfaehs:2292case Intrinsic::s390_vfaefs:2293Opcode = SystemZISD::VFAE_CC;2294CCValid = SystemZ::CCMASK_ANY;2295return true;22962297case Intrinsic::s390_vfaezbs:2298case Intrinsic::s390_vfaezhs:2299case Intrinsic::s390_vfaezfs:2300Opcode = SystemZISD::VFAEZ_CC;2301CCValid = SystemZ::CCMASK_ANY;2302return true;23032304case Intrinsic::s390_vfeebs:2305case Intrinsic::s390_vfeehs:2306case Intrinsic::s390_vfeefs:2307Opcode = SystemZISD::VFEE_CC;2308CCValid = SystemZ::CCMASK_ANY;2309return true;23102311case Intrinsic::s390_vfeezbs:2312case Intrinsic::s390_vfeezhs:2313case Intrinsic::s390_vfeezfs:2314Opcode = SystemZISD::VFEEZ_CC;2315CCValid = SystemZ::CCMASK_ANY;2316return true;23172318case Intrinsic::s390_vfenebs:2319case Intrinsic::s390_vfenehs:2320case Intrinsic::s390_vfenefs:2321Opcode = SystemZISD::VFENE_CC;2322CCValid = SystemZ::CCMASK_ANY;2323return true;23242325case Intrinsic::s390_vfenezbs:2326case Intrinsic::s390_vfenezhs:2327case Intrinsic::s390_vfenezfs:2328Opcode = SystemZISD::VFENEZ_CC;2329CCValid = SystemZ::CCMASK_ANY;2330return true;23312332case Intrinsic::s390_vistrbs:2333case Intrinsic::s390_vistrhs:2334case Intrinsic::s390_vistrfs:2335Opcode = SystemZISD::VISTR_CC;2336CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;2337return true;23382339case Intrinsic::s390_vstrcbs:2340case Intrinsic::s390_vstrchs:2341case Intrinsic::s390_vstrcfs:2342Opcode = SystemZISD::VSTRC_CC;2343CCValid = SystemZ::CCMASK_ANY;2344return true;23452346case Intrinsic::s390_vstrczbs:2347case Intrinsic::s390_vstrczhs:2348case Intrinsic::s390_vstrczfs:2349Opcode = SystemZISD::VSTRCZ_CC;2350CCValid = SystemZ::CCMASK_ANY;2351return true;23522353case Intrinsic::s390_vstrsb:2354case Intrinsic::s390_vstrsh:2355case Intrinsic::s390_vstrsf:2356Opcode = SystemZISD::VSTRS_CC;2357CCValid = SystemZ::CCMASK_ANY;2358return true;23592360case Intrinsic::s390_vstrszb:2361case Intrinsic::s390_vstrszh:2362case Intrinsic::s390_vstrszf:2363Opcode = SystemZISD::VSTRSZ_CC;2364CCValid = SystemZ::CCMASK_ANY;2365return true;23662367case Intrinsic::s390_vfcedbs:2368case Intrinsic::s390_vfcesbs:2369Opcode = SystemZISD::VFCMPES;2370CCValid = SystemZ::CCMASK_VCMP;2371return true;23722373case Intrinsic::s390_vfchdbs:2374case Intrinsic::s390_vfchsbs:2375Opcode = SystemZISD::VFCMPHS;2376CCValid = SystemZ::CCMASK_VCMP;2377return true;23782379case Intrinsic::s390_vfchedbs:2380case Intrinsic::s390_vfchesbs:2381Opcode = SystemZISD::VFCMPHES;2382CCValid = SystemZ::CCMASK_VCMP;2383return true;23842385case Intrinsic::s390_vftcidb:2386case Intrinsic::s390_vftcisb:2387Opcode = SystemZISD::VFTCI;2388CCValid = SystemZ::CCMASK_VCMP;2389return true;23902391case Intrinsic::s390_tdc:2392Opcode = SystemZISD::TDC;2393CCValid = SystemZ::CCMASK_TDC;2394return true;23952396default:2397return false;2398}2399}24002401// Emit an intrinsic with chain and an explicit CC register result.2402static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,2403unsigned Opcode) {2404// Copy all operands except the intrinsic ID.2405unsigned NumOps = Op.getNumOperands();2406SmallVector<SDValue, 6> Ops;2407Ops.reserve(NumOps - 1);2408Ops.push_back(Op.getOperand(0));2409for (unsigned I = 2; I < NumOps; ++I)2410Ops.push_back(Op.getOperand(I));24112412assert(Op->getNumValues() == 2 && "Expected only CC result and chain");2413SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);2414SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);2415SDValue OldChain = SDValue(Op.getNode(), 1);2416SDValue NewChain = SDValue(Intr.getNode(), 1);2417DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);2418return Intr.getNode();2419}24202421// Emit an intrinsic with an explicit CC register result.2422static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,2423unsigned Opcode) {2424// Copy all operands except the intrinsic ID.2425unsigned NumOps = Op.getNumOperands();2426SmallVector<SDValue, 6> Ops;2427Ops.reserve(NumOps - 1);2428for (unsigned I = 1; I < NumOps; ++I)2429Ops.push_back(Op.getOperand(I));24302431SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);2432return Intr.getNode();2433}24342435// CC is a comparison that will be implemented using an integer or2436// floating-point comparison. Return the condition code mask for2437// a branch on true. In the integer case, CCMASK_CMP_UO is set for2438// unsigned comparisons and clear for signed ones. In the floating-point2439// case, CCMASK_CMP_UO has its normal mask meaning (unordered).2440static unsigned CCMaskForCondCode(ISD::CondCode CC) {2441#define CONV(X) \2442case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \2443case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \2444case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X24452446switch (CC) {2447default:2448llvm_unreachable("Invalid integer condition!");24492450CONV(EQ);2451CONV(NE);2452CONV(GT);2453CONV(GE);2454CONV(LT);2455CONV(LE);24562457case ISD::SETO: return SystemZ::CCMASK_CMP_O;2458case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;2459}2460#undef CONV2461}24622463// If C can be converted to a comparison against zero, adjust the operands2464// as necessary.2465static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {2466if (C.ICmpType == SystemZICMP::UnsignedOnly)2467return;24682469auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());2470if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)2471return;24722473int64_t Value = ConstOp1->getSExtValue();2474if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||2475(Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||2476(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||2477(Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {2478C.CCMask ^= SystemZ::CCMASK_CMP_EQ;2479C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());2480}2481}24822483// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,2484// adjust the operands as necessary.2485static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,2486Comparison &C) {2487// For us to make any changes, it must a comparison between a single-use2488// load and a constant.2489if (!C.Op0.hasOneUse() ||2490C.Op0.getOpcode() != ISD::LOAD ||2491C.Op1.getOpcode() != ISD::Constant)2492return;24932494// We must have an 8- or 16-bit load.2495auto *Load = cast<LoadSDNode>(C.Op0);2496unsigned NumBits = Load->getMemoryVT().getSizeInBits();2497if ((NumBits != 8 && NumBits != 16) ||2498NumBits != Load->getMemoryVT().getStoreSizeInBits())2499return;25002501// The load must be an extending one and the constant must be within the2502// range of the unextended value.2503auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);2504if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)2505return;2506uint64_t Value = ConstOp1->getZExtValue();2507uint64_t Mask = (1 << NumBits) - 1;2508if (Load->getExtensionType() == ISD::SEXTLOAD) {2509// Make sure that ConstOp1 is in range of C.Op0.2510int64_t SignedValue = ConstOp1->getSExtValue();2511if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)2512return;2513if (C.ICmpType != SystemZICMP::SignedOnly) {2514// Unsigned comparison between two sign-extended values is equivalent2515// to unsigned comparison between two zero-extended values.2516Value &= Mask;2517} else if (NumBits == 8) {2518// Try to treat the comparison as unsigned, so that we can use CLI.2519// Adjust CCMask and Value as necessary.2520if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)2521// Test whether the high bit of the byte is set.2522Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;2523else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)2524// Test whether the high bit of the byte is clear.2525Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;2526else2527// No instruction exists for this combination.2528return;2529C.ICmpType = SystemZICMP::UnsignedOnly;2530}2531} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {2532if (Value > Mask)2533return;2534// If the constant is in range, we can use any comparison.2535C.ICmpType = SystemZICMP::Any;2536} else2537return;25382539// Make sure that the first operand is an i32 of the right extension type.2540ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?2541ISD::SEXTLOAD :2542ISD::ZEXTLOAD);2543if (C.Op0.getValueType() != MVT::i32 ||2544Load->getExtensionType() != ExtType) {2545C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),2546Load->getBasePtr(), Load->getPointerInfo(),2547Load->getMemoryVT(), Load->getAlign(),2548Load->getMemOperand()->getFlags());2549// Update the chain uses.2550DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));2551}25522553// Make sure that the second operand is an i32 with the right value.2554if (C.Op1.getValueType() != MVT::i32 ||2555Value != ConstOp1->getZExtValue())2556C.Op1 = DAG.getConstant(Value, DL, MVT::i32);2557}25582559// Return true if Op is either an unextended load, or a load suitable2560// for integer register-memory comparisons of type ICmpType.2561static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {2562auto *Load = dyn_cast<LoadSDNode>(Op.getNode());2563if (Load) {2564// There are no instructions to compare a register with a memory byte.2565if (Load->getMemoryVT() == MVT::i8)2566return false;2567// Otherwise decide on extension type.2568switch (Load->getExtensionType()) {2569case ISD::NON_EXTLOAD:2570return true;2571case ISD::SEXTLOAD:2572return ICmpType != SystemZICMP::UnsignedOnly;2573case ISD::ZEXTLOAD:2574return ICmpType != SystemZICMP::SignedOnly;2575default:2576break;2577}2578}2579return false;2580}25812582// Return true if it is better to swap the operands of C.2583static bool shouldSwapCmpOperands(const Comparison &C) {2584// Leave i128 and f128 comparisons alone, since they have no memory forms.2585if (C.Op0.getValueType() == MVT::i128)2586return false;2587if (C.Op0.getValueType() == MVT::f128)2588return false;25892590// Always keep a floating-point constant second, since comparisons with2591// zero can use LOAD TEST and comparisons with other constants make a2592// natural memory operand.2593if (isa<ConstantFPSDNode>(C.Op1))2594return false;25952596// Never swap comparisons with zero since there are many ways to optimize2597// those later.2598auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);2599if (ConstOp1 && ConstOp1->getZExtValue() == 0)2600return false;26012602// Also keep natural memory operands second if the loaded value is2603// only used here. Several comparisons have memory forms.2604if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())2605return false;26062607// Look for cases where Cmp0 is a single-use load and Cmp1 isn't.2608// In that case we generally prefer the memory to be second.2609if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {2610// The only exceptions are when the second operand is a constant and2611// we can use things like CHHSI.2612if (!ConstOp1)2613return true;2614// The unsigned memory-immediate instructions can handle 16-bit2615// unsigned integers.2616if (C.ICmpType != SystemZICMP::SignedOnly &&2617isUInt<16>(ConstOp1->getZExtValue()))2618return false;2619// The signed memory-immediate instructions can handle 16-bit2620// signed integers.2621if (C.ICmpType != SystemZICMP::UnsignedOnly &&2622isInt<16>(ConstOp1->getSExtValue()))2623return false;2624return true;2625}26262627// Try to promote the use of CGFR and CLGFR.2628unsigned Opcode0 = C.Op0.getOpcode();2629if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)2630return true;2631if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)2632return true;2633if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&2634C.Op0.getOperand(1).getOpcode() == ISD::Constant &&2635C.Op0.getConstantOperandVal(1) == 0xffffffff)2636return true;26372638return false;2639}26402641// Check whether C tests for equality between X and Y and whether X - Y2642// or Y - X is also computed. In that case it's better to compare the2643// result of the subtraction against zero.2644static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,2645Comparison &C) {2646if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||2647C.CCMask == SystemZ::CCMASK_CMP_NE) {2648for (SDNode *N : C.Op0->uses()) {2649if (N->getOpcode() == ISD::SUB &&2650((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||2651(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {2652// Disable the nsw and nuw flags: the backend needs to handle2653// overflow as well during comparison elimination.2654SDNodeFlags Flags = N->getFlags();2655Flags.setNoSignedWrap(false);2656Flags.setNoUnsignedWrap(false);2657N->setFlags(Flags);2658C.Op0 = SDValue(N, 0);2659C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));2660return;2661}2662}2663}2664}26652666// Check whether C compares a floating-point value with zero and if that2667// floating-point value is also negated. In this case we can use the2668// negation to set CC, so avoiding separate LOAD AND TEST and2669// LOAD (NEGATIVE/COMPLEMENT) instructions.2670static void adjustForFNeg(Comparison &C) {2671// This optimization is invalid for strict comparisons, since FNEG2672// does not raise any exceptions.2673if (C.Chain)2674return;2675auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);2676if (C1 && C1->isZero()) {2677for (SDNode *N : C.Op0->uses()) {2678if (N->getOpcode() == ISD::FNEG) {2679C.Op0 = SDValue(N, 0);2680C.CCMask = SystemZ::reverseCCMask(C.CCMask);2681return;2682}2683}2684}2685}26862687// Check whether C compares (shl X, 32) with 0 and whether X is2688// also sign-extended. In that case it is better to test the result2689// of the sign extension using LTGFR.2690//2691// This case is important because InstCombine transforms a comparison2692// with (sext (trunc X)) into a comparison with (shl X, 32).2693static void adjustForLTGFR(Comparison &C) {2694// Check for a comparison between (shl X, 32) and 0.2695if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&2696C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {2697auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));2698if (C1 && C1->getZExtValue() == 32) {2699SDValue ShlOp0 = C.Op0.getOperand(0);2700// See whether X has any SIGN_EXTEND_INREG uses.2701for (SDNode *N : ShlOp0->uses()) {2702if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&2703cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {2704C.Op0 = SDValue(N, 0);2705return;2706}2707}2708}2709}2710}27112712// If C compares the truncation of an extending load, try to compare2713// the untruncated value instead. This exposes more opportunities to2714// reuse CC.2715static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,2716Comparison &C) {2717if (C.Op0.getOpcode() == ISD::TRUNCATE &&2718C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&2719C.Op1.getOpcode() == ISD::Constant &&2720cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&2721C.Op1->getAsZExtVal() == 0) {2722auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));2723if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=2724C.Op0.getValueSizeInBits().getFixedValue()) {2725unsigned Type = L->getExtensionType();2726if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||2727(Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {2728C.Op0 = C.Op0.getOperand(0);2729C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());2730}2731}2732}2733}27342735// Return true if shift operation N has an in-range constant shift value.2736// Store it in ShiftVal if so.2737static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {2738auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));2739if (!Shift)2740return false;27412742uint64_t Amount = Shift->getZExtValue();2743if (Amount >= N.getValueSizeInBits())2744return false;27452746ShiftVal = Amount;2747return true;2748}27492750// Check whether an AND with Mask is suitable for a TEST UNDER MASK2751// instruction and whether the CC value is descriptive enough to handle2752// a comparison of type Opcode between the AND result and CmpVal.2753// CCMask says which comparison result is being tested and BitSize is2754// the number of bits in the operands. If TEST UNDER MASK can be used,2755// return the corresponding CC mask, otherwise return 0.2756static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,2757uint64_t Mask, uint64_t CmpVal,2758unsigned ICmpType) {2759assert(Mask != 0 && "ANDs with zero should have been removed by now");27602761// Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.2762if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&2763!SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))2764return 0;27652766// Work out the masks for the lowest and highest bits.2767uint64_t High = llvm::bit_floor(Mask);2768uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);27692770// Signed ordered comparisons are effectively unsigned if the sign2771// bit is dropped.2772bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);27732774// Check for equality comparisons with 0, or the equivalent.2775if (CmpVal == 0) {2776if (CCMask == SystemZ::CCMASK_CMP_EQ)2777return SystemZ::CCMASK_TM_ALL_0;2778if (CCMask == SystemZ::CCMASK_CMP_NE)2779return SystemZ::CCMASK_TM_SOME_1;2780}2781if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {2782if (CCMask == SystemZ::CCMASK_CMP_LT)2783return SystemZ::CCMASK_TM_ALL_0;2784if (CCMask == SystemZ::CCMASK_CMP_GE)2785return SystemZ::CCMASK_TM_SOME_1;2786}2787if (EffectivelyUnsigned && CmpVal < Low) {2788if (CCMask == SystemZ::CCMASK_CMP_LE)2789return SystemZ::CCMASK_TM_ALL_0;2790if (CCMask == SystemZ::CCMASK_CMP_GT)2791return SystemZ::CCMASK_TM_SOME_1;2792}27932794// Check for equality comparisons with the mask, or the equivalent.2795if (CmpVal == Mask) {2796if (CCMask == SystemZ::CCMASK_CMP_EQ)2797return SystemZ::CCMASK_TM_ALL_1;2798if (CCMask == SystemZ::CCMASK_CMP_NE)2799return SystemZ::CCMASK_TM_SOME_0;2800}2801if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {2802if (CCMask == SystemZ::CCMASK_CMP_GT)2803return SystemZ::CCMASK_TM_ALL_1;2804if (CCMask == SystemZ::CCMASK_CMP_LE)2805return SystemZ::CCMASK_TM_SOME_0;2806}2807if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {2808if (CCMask == SystemZ::CCMASK_CMP_GE)2809return SystemZ::CCMASK_TM_ALL_1;2810if (CCMask == SystemZ::CCMASK_CMP_LT)2811return SystemZ::CCMASK_TM_SOME_0;2812}28132814// Check for ordered comparisons with the top bit.2815if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {2816if (CCMask == SystemZ::CCMASK_CMP_LE)2817return SystemZ::CCMASK_TM_MSB_0;2818if (CCMask == SystemZ::CCMASK_CMP_GT)2819return SystemZ::CCMASK_TM_MSB_1;2820}2821if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {2822if (CCMask == SystemZ::CCMASK_CMP_LT)2823return SystemZ::CCMASK_TM_MSB_0;2824if (CCMask == SystemZ::CCMASK_CMP_GE)2825return SystemZ::CCMASK_TM_MSB_1;2826}28272828// If there are just two bits, we can do equality checks for Low and High2829// as well.2830if (Mask == Low + High) {2831if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)2832return SystemZ::CCMASK_TM_MIXED_MSB_0;2833if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)2834return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;2835if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)2836return SystemZ::CCMASK_TM_MIXED_MSB_1;2837if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)2838return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;2839}28402841// Looks like we've exhausted our options.2842return 0;2843}28442845// See whether C can be implemented as a TEST UNDER MASK instruction.2846// Update the arguments with the TM version if so.2847static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,2848Comparison &C) {2849// Use VECTOR TEST UNDER MASK for i128 operations.2850if (C.Op0.getValueType() == MVT::i128) {2851// We can use VTM for EQ/NE comparisons of x & y against 0.2852if (C.Op0.getOpcode() == ISD::AND &&2853(C.CCMask == SystemZ::CCMASK_CMP_EQ ||2854C.CCMask == SystemZ::CCMASK_CMP_NE)) {2855auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);2856if (Mask && Mask->getAPIntValue() == 0) {2857C.Opcode = SystemZISD::VTM;2858C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));2859C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));2860C.CCValid = SystemZ::CCMASK_VCMP;2861if (C.CCMask == SystemZ::CCMASK_CMP_EQ)2862C.CCMask = SystemZ::CCMASK_VCMP_ALL;2863else2864C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;2865}2866}2867return;2868}28692870// Check that we have a comparison with a constant.2871auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);2872if (!ConstOp1)2873return;2874uint64_t CmpVal = ConstOp1->getZExtValue();28752876// Check whether the nonconstant input is an AND with a constant mask.2877Comparison NewC(C);2878uint64_t MaskVal;2879ConstantSDNode *Mask = nullptr;2880if (C.Op0.getOpcode() == ISD::AND) {2881NewC.Op0 = C.Op0.getOperand(0);2882NewC.Op1 = C.Op0.getOperand(1);2883Mask = dyn_cast<ConstantSDNode>(NewC.Op1);2884if (!Mask)2885return;2886MaskVal = Mask->getZExtValue();2887} else {2888// There is no instruction to compare with a 64-bit immediate2889// so use TMHH instead if possible. We need an unsigned ordered2890// comparison with an i64 immediate.2891if (NewC.Op0.getValueType() != MVT::i64 ||2892NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||2893NewC.CCMask == SystemZ::CCMASK_CMP_NE ||2894NewC.ICmpType == SystemZICMP::SignedOnly)2895return;2896// Convert LE and GT comparisons into LT and GE.2897if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||2898NewC.CCMask == SystemZ::CCMASK_CMP_GT) {2899if (CmpVal == uint64_t(-1))2900return;2901CmpVal += 1;2902NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;2903}2904// If the low N bits of Op1 are zero than the low N bits of Op0 can2905// be masked off without changing the result.2906MaskVal = -(CmpVal & -CmpVal);2907NewC.ICmpType = SystemZICMP::UnsignedOnly;2908}2909if (!MaskVal)2910return;29112912// Check whether the combination of mask, comparison value and comparison2913// type are suitable.2914unsigned BitSize = NewC.Op0.getValueSizeInBits();2915unsigned NewCCMask, ShiftVal;2916if (NewC.ICmpType != SystemZICMP::SignedOnly &&2917NewC.Op0.getOpcode() == ISD::SHL &&2918isSimpleShift(NewC.Op0, ShiftVal) &&2919(MaskVal >> ShiftVal != 0) &&2920((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&2921(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,2922MaskVal >> ShiftVal,2923CmpVal >> ShiftVal,2924SystemZICMP::Any))) {2925NewC.Op0 = NewC.Op0.getOperand(0);2926MaskVal >>= ShiftVal;2927} else if (NewC.ICmpType != SystemZICMP::SignedOnly &&2928NewC.Op0.getOpcode() == ISD::SRL &&2929isSimpleShift(NewC.Op0, ShiftVal) &&2930(MaskVal << ShiftVal != 0) &&2931((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&2932(NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,2933MaskVal << ShiftVal,2934CmpVal << ShiftVal,2935SystemZICMP::UnsignedOnly))) {2936NewC.Op0 = NewC.Op0.getOperand(0);2937MaskVal <<= ShiftVal;2938} else {2939NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,2940NewC.ICmpType);2941if (!NewCCMask)2942return;2943}29442945// Go ahead and make the change.2946C.Opcode = SystemZISD::TM;2947C.Op0 = NewC.Op0;2948if (Mask && Mask->getZExtValue() == MaskVal)2949C.Op1 = SDValue(Mask, 0);2950else2951C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());2952C.CCValid = SystemZ::CCMASK_TM;2953C.CCMask = NewCCMask;2954}29552956// Implement i128 comparison in vector registers.2957static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,2958Comparison &C) {2959if (C.Opcode != SystemZISD::ICMP)2960return;2961if (C.Op0.getValueType() != MVT::i128)2962return;29632964// (In-)Equality comparisons can be implemented via VCEQGS.2965if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||2966C.CCMask == SystemZ::CCMASK_CMP_NE) {2967C.Opcode = SystemZISD::VICMPES;2968C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);2969C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);2970C.CCValid = SystemZ::CCMASK_VCMP;2971if (C.CCMask == SystemZ::CCMASK_CMP_EQ)2972C.CCMask = SystemZ::CCMASK_VCMP_ALL;2973else2974C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;2975return;2976}29772978// Normalize other comparisons to GT.2979bool Swap = false, Invert = false;2980switch (C.CCMask) {2981case SystemZ::CCMASK_CMP_GT: break;2982case SystemZ::CCMASK_CMP_LT: Swap = true; break;2983case SystemZ::CCMASK_CMP_LE: Invert = true; break;2984case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;2985default: llvm_unreachable("Invalid integer condition!");2986}2987if (Swap)2988std::swap(C.Op0, C.Op1);29892990if (C.ICmpType == SystemZICMP::UnsignedOnly)2991C.Opcode = SystemZISD::UCMP128HI;2992else2993C.Opcode = SystemZISD::SCMP128HI;2994C.CCValid = SystemZ::CCMASK_ANY;2995C.CCMask = SystemZ::CCMASK_1;29962997if (Invert)2998C.CCMask ^= C.CCValid;2999}30003001// See whether the comparison argument contains a redundant AND3002// and remove it if so. This sometimes happens due to the generic3003// BRCOND expansion.3004static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,3005Comparison &C) {3006if (C.Op0.getOpcode() != ISD::AND)3007return;3008auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));3009if (!Mask || Mask->getValueSizeInBits(0) > 64)3010return;3011KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));3012if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())3013return;30143015C.Op0 = C.Op0.getOperand(0);3016}30173018// Return a Comparison that tests the condition-code result of intrinsic3019// node Call against constant integer CC using comparison code Cond.3020// Opcode is the opcode of the SystemZISD operation for the intrinsic3021// and CCValid is the set of possible condition-code results.3022static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,3023SDValue Call, unsigned CCValid, uint64_t CC,3024ISD::CondCode Cond) {3025Comparison C(Call, SDValue(), SDValue());3026C.Opcode = Opcode;3027C.CCValid = CCValid;3028if (Cond == ISD::SETEQ)3029// bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.3030C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;3031else if (Cond == ISD::SETNE)3032// ...and the inverse of that.3033C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;3034else if (Cond == ISD::SETLT || Cond == ISD::SETULT)3035// bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,3036// always true for CC>3.3037C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;3038else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)3039// ...and the inverse of that.3040C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;3041else if (Cond == ISD::SETLE || Cond == ISD::SETULE)3042// bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),3043// always true for CC>3.3044C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;3045else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)3046// ...and the inverse of that.3047C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;3048else3049llvm_unreachable("Unexpected integer comparison type");3050C.CCMask &= CCValid;3051return C;3052}30533054// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.3055static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,3056ISD::CondCode Cond, const SDLoc &DL,3057SDValue Chain = SDValue(),3058bool IsSignaling = false) {3059if (CmpOp1.getOpcode() == ISD::Constant) {3060assert(!Chain);3061unsigned Opcode, CCValid;3062if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&3063CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&3064isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))3065return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,3066CmpOp1->getAsZExtVal(), Cond);3067if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&3068CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&3069isIntrinsicWithCC(CmpOp0, Opcode, CCValid))3070return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,3071CmpOp1->getAsZExtVal(), Cond);3072}3073Comparison C(CmpOp0, CmpOp1, Chain);3074C.CCMask = CCMaskForCondCode(Cond);3075if (C.Op0.getValueType().isFloatingPoint()) {3076C.CCValid = SystemZ::CCMASK_FCMP;3077if (!C.Chain)3078C.Opcode = SystemZISD::FCMP;3079else if (!IsSignaling)3080C.Opcode = SystemZISD::STRICT_FCMP;3081else3082C.Opcode = SystemZISD::STRICT_FCMPS;3083adjustForFNeg(C);3084} else {3085assert(!C.Chain);3086C.CCValid = SystemZ::CCMASK_ICMP;3087C.Opcode = SystemZISD::ICMP;3088// Choose the type of comparison. Equality and inequality tests can3089// use either signed or unsigned comparisons. The choice also doesn't3090// matter if both sign bits are known to be clear. In those cases we3091// want to give the main isel code the freedom to choose whichever3092// form fits best.3093if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||3094C.CCMask == SystemZ::CCMASK_CMP_NE ||3095(DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))3096C.ICmpType = SystemZICMP::Any;3097else if (C.CCMask & SystemZ::CCMASK_CMP_UO)3098C.ICmpType = SystemZICMP::UnsignedOnly;3099else3100C.ICmpType = SystemZICMP::SignedOnly;3101C.CCMask &= ~SystemZ::CCMASK_CMP_UO;3102adjustForRedundantAnd(DAG, DL, C);3103adjustZeroCmp(DAG, DL, C);3104adjustSubwordCmp(DAG, DL, C);3105adjustForSubtraction(DAG, DL, C);3106adjustForLTGFR(C);3107adjustICmpTruncate(DAG, DL, C);3108}31093110if (shouldSwapCmpOperands(C)) {3111std::swap(C.Op0, C.Op1);3112C.CCMask = SystemZ::reverseCCMask(C.CCMask);3113}31143115adjustForTestUnderMask(DAG, DL, C);3116adjustICmp128(DAG, DL, C);3117return C;3118}31193120// Emit the comparison instruction described by C.3121static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {3122if (!C.Op1.getNode()) {3123SDNode *Node;3124switch (C.Op0.getOpcode()) {3125case ISD::INTRINSIC_W_CHAIN:3126Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);3127return SDValue(Node, 0);3128case ISD::INTRINSIC_WO_CHAIN:3129Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);3130return SDValue(Node, Node->getNumValues() - 1);3131default:3132llvm_unreachable("Invalid comparison operands");3133}3134}3135if (C.Opcode == SystemZISD::ICMP)3136return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,3137DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));3138if (C.Opcode == SystemZISD::TM) {3139bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=3140bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));3141return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,3142DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));3143}3144if (C.Opcode == SystemZISD::VICMPES) {3145SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);3146SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);3147return SDValue(Val.getNode(), 1);3148}3149if (C.Chain) {3150SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);3151return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);3152}3153return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);3154}31553156// Implement a 32-bit *MUL_LOHI operation by extending both operands to3157// 64 bits. Extend is the extension type to use. Store the high part3158// in Hi and the low part in Lo.3159static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,3160SDValue Op0, SDValue Op1, SDValue &Hi,3161SDValue &Lo) {3162Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);3163Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);3164SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);3165Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,3166DAG.getConstant(32, DL, MVT::i64));3167Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);3168Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);3169}31703171// Lower a binary operation that produces two VT results, one in each3172// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,3173// and Opcode performs the GR128 operation. Store the even register result3174// in Even and the odd register result in Odd.3175static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,3176unsigned Opcode, SDValue Op0, SDValue Op1,3177SDValue &Even, SDValue &Odd) {3178SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);3179bool Is32Bit = is32Bit(VT);3180Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);3181Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);3182}31833184// Return an i32 value that is 1 if the CC value produced by CCReg is3185// in the mask CCMask and 0 otherwise. CC is known to have a value3186// in CCValid, so other values can be ignored.3187static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,3188unsigned CCValid, unsigned CCMask) {3189SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),3190DAG.getConstant(0, DL, MVT::i32),3191DAG.getTargetConstant(CCValid, DL, MVT::i32),3192DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};3193return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);3194}31953196// Return the SystemISD vector comparison operation for CC, or 0 if it cannot3197// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP3198// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)3199// floating-point comparisons, and CmpMode::SignalingFP for strict signaling3200// floating-point comparisons.3201enum class CmpMode { Int, FP, StrictFP, SignalingFP };3202static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {3203switch (CC) {3204case ISD::SETOEQ:3205case ISD::SETEQ:3206switch (Mode) {3207case CmpMode::Int: return SystemZISD::VICMPE;3208case CmpMode::FP: return SystemZISD::VFCMPE;3209case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;3210case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;3211}3212llvm_unreachable("Bad mode");32133214case ISD::SETOGE:3215case ISD::SETGE:3216switch (Mode) {3217case CmpMode::Int: return 0;3218case CmpMode::FP: return SystemZISD::VFCMPHE;3219case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;3220case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;3221}3222llvm_unreachable("Bad mode");32233224case ISD::SETOGT:3225case ISD::SETGT:3226switch (Mode) {3227case CmpMode::Int: return SystemZISD::VICMPH;3228case CmpMode::FP: return SystemZISD::VFCMPH;3229case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;3230case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;3231}3232llvm_unreachable("Bad mode");32333234case ISD::SETUGT:3235switch (Mode) {3236case CmpMode::Int: return SystemZISD::VICMPHL;3237case CmpMode::FP: return 0;3238case CmpMode::StrictFP: return 0;3239case CmpMode::SignalingFP: return 0;3240}3241llvm_unreachable("Bad mode");32423243default:3244return 0;3245}3246}32473248// Return the SystemZISD vector comparison operation for CC or its inverse,3249// or 0 if neither can be done directly. Indicate in Invert whether the3250// result is for the inverse of CC. Mode is as above.3251static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,3252bool &Invert) {3253if (unsigned Opcode = getVectorComparison(CC, Mode)) {3254Invert = false;3255return Opcode;3256}32573258CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);3259if (unsigned Opcode = getVectorComparison(CC, Mode)) {3260Invert = true;3261return Opcode;3262}32633264return 0;3265}32663267// Return a v2f64 that contains the extended form of elements Start and Start+13268// of v4f32 value Op. If Chain is nonnull, return the strict form.3269static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,3270SDValue Op, SDValue Chain) {3271int Mask[] = { Start, -1, Start + 1, -1 };3272Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);3273if (Chain) {3274SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);3275return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);3276}3277return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);3278}32793280// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,3281// producing a result of type VT. If Chain is nonnull, return the strict form.3282SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,3283const SDLoc &DL, EVT VT,3284SDValue CmpOp0,3285SDValue CmpOp1,3286SDValue Chain) const {3287// There is no hardware support for v4f32 (unless we have the vector3288// enhancements facility 1), so extend the vector into two v2f64s3289// and compare those.3290if (CmpOp0.getValueType() == MVT::v4f32 &&3291!Subtarget.hasVectorEnhancements1()) {3292SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);3293SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);3294SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);3295SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);3296if (Chain) {3297SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);3298SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);3299SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);3300SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);3301SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),3302H1.getValue(1), L1.getValue(1),3303HRes.getValue(1), LRes.getValue(1) };3304SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);3305SDValue Ops[2] = { Res, NewChain };3306return DAG.getMergeValues(Ops, DL);3307}3308SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);3309SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);3310return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);3311}3312if (Chain) {3313SDVTList VTs = DAG.getVTList(VT, MVT::Other);3314return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);3315}3316return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);3317}33183319// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing3320// an integer mask of type VT. If Chain is nonnull, we have a strict3321// floating-point comparison. If in addition IsSignaling is true, we have3322// a strict signaling floating-point comparison.3323SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,3324const SDLoc &DL, EVT VT,3325ISD::CondCode CC,3326SDValue CmpOp0,3327SDValue CmpOp1,3328SDValue Chain,3329bool IsSignaling) const {3330bool IsFP = CmpOp0.getValueType().isFloatingPoint();3331assert (!Chain || IsFP);3332assert (!IsSignaling || Chain);3333CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :3334Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;3335bool Invert = false;3336SDValue Cmp;3337switch (CC) {3338// Handle tests for order using (or (ogt y x) (oge x y)).3339case ISD::SETUO:3340Invert = true;3341[[fallthrough]];3342case ISD::SETO: {3343assert(IsFP && "Unexpected integer comparison");3344SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),3345DL, VT, CmpOp1, CmpOp0, Chain);3346SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),3347DL, VT, CmpOp0, CmpOp1, Chain);3348Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);3349if (Chain)3350Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,3351LT.getValue(1), GE.getValue(1));3352break;3353}33543355// Handle <> tests using (or (ogt y x) (ogt x y)).3356case ISD::SETUEQ:3357Invert = true;3358[[fallthrough]];3359case ISD::SETONE: {3360assert(IsFP && "Unexpected integer comparison");3361SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),3362DL, VT, CmpOp1, CmpOp0, Chain);3363SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),3364DL, VT, CmpOp0, CmpOp1, Chain);3365Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);3366if (Chain)3367Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,3368LT.getValue(1), GT.getValue(1));3369break;3370}33713372// Otherwise a single comparison is enough. It doesn't really3373// matter whether we try the inversion or the swap first, since3374// there are no cases where both work.3375default:3376if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))3377Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);3378else {3379CC = ISD::getSetCCSwappedOperands(CC);3380if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))3381Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);3382else3383llvm_unreachable("Unhandled comparison");3384}3385if (Chain)3386Chain = Cmp.getValue(1);3387break;3388}3389if (Invert) {3390SDValue Mask =3391DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));3392Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);3393}3394if (Chain && Chain.getNode() != Cmp.getNode()) {3395SDValue Ops[2] = { Cmp, Chain };3396Cmp = DAG.getMergeValues(Ops, DL);3397}3398return Cmp;3399}34003401SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,3402SelectionDAG &DAG) const {3403SDValue CmpOp0 = Op.getOperand(0);3404SDValue CmpOp1 = Op.getOperand(1);3405ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();3406SDLoc DL(Op);3407EVT VT = Op.getValueType();3408if (VT.isVector())3409return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);34103411Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));3412SDValue CCReg = emitCmp(DAG, DL, C);3413return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);3414}34153416SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,3417SelectionDAG &DAG,3418bool IsSignaling) const {3419SDValue Chain = Op.getOperand(0);3420SDValue CmpOp0 = Op.getOperand(1);3421SDValue CmpOp1 = Op.getOperand(2);3422ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();3423SDLoc DL(Op);3424EVT VT = Op.getNode()->getValueType(0);3425if (VT.isVector()) {3426SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,3427Chain, IsSignaling);3428return Res.getValue(Op.getResNo());3429}34303431Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));3432SDValue CCReg = emitCmp(DAG, DL, C);3433CCReg->setFlags(Op->getFlags());3434SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);3435SDValue Ops[2] = { Result, CCReg.getValue(1) };3436return DAG.getMergeValues(Ops, DL);3437}34383439SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {3440ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();3441SDValue CmpOp0 = Op.getOperand(2);3442SDValue CmpOp1 = Op.getOperand(3);3443SDValue Dest = Op.getOperand(4);3444SDLoc DL(Op);34453446Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));3447SDValue CCReg = emitCmp(DAG, DL, C);3448return DAG.getNode(3449SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),3450DAG.getTargetConstant(C.CCValid, DL, MVT::i32),3451DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);3452}34533454// Return true if Pos is CmpOp and Neg is the negative of CmpOp,3455// allowing Pos and Neg to be wider than CmpOp.3456static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {3457return (Neg.getOpcode() == ISD::SUB &&3458Neg.getOperand(0).getOpcode() == ISD::Constant &&3459Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&3460(Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&3461Pos.getOperand(0) == CmpOp)));3462}34633464// Return the absolute or negative absolute of Op; IsNegative decides which.3465static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,3466bool IsNegative) {3467Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);3468if (IsNegative)3469Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),3470DAG.getConstant(0, DL, Op.getValueType()), Op);3471return Op;3472}34733474SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,3475SelectionDAG &DAG) const {3476SDValue CmpOp0 = Op.getOperand(0);3477SDValue CmpOp1 = Op.getOperand(1);3478SDValue TrueOp = Op.getOperand(2);3479SDValue FalseOp = Op.getOperand(3);3480ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();3481SDLoc DL(Op);34823483Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));34843485// Check for absolute and negative-absolute selections, including those3486// where the comparison value is sign-extended (for LPGFR and LNGFR).3487// This check supplements the one in DAGCombiner.3488if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&3489C.CCMask != SystemZ::CCMASK_CMP_NE &&3490C.Op1.getOpcode() == ISD::Constant &&3491cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&3492C.Op1->getAsZExtVal() == 0) {3493if (isAbsolute(C.Op0, TrueOp, FalseOp))3494return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);3495if (isAbsolute(C.Op0, FalseOp, TrueOp))3496return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);3497}34983499SDValue CCReg = emitCmp(DAG, DL, C);3500SDValue Ops[] = {TrueOp, FalseOp,3501DAG.getTargetConstant(C.CCValid, DL, MVT::i32),3502DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};35033504return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);3505}35063507SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,3508SelectionDAG &DAG) const {3509SDLoc DL(Node);3510const GlobalValue *GV = Node->getGlobal();3511int64_t Offset = Node->getOffset();3512EVT PtrVT = getPointerTy(DAG.getDataLayout());3513CodeModel::Model CM = DAG.getTarget().getCodeModel();35143515SDValue Result;3516if (Subtarget.isPC32DBLSymbol(GV, CM)) {3517if (isInt<32>(Offset)) {3518// Assign anchors at 1<<12 byte boundaries.3519uint64_t Anchor = Offset & ~uint64_t(0xfff);3520Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);3521Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);35223523// The offset can be folded into the address if it is aligned to a3524// halfword.3525Offset -= Anchor;3526if (Offset != 0 && (Offset & 1) == 0) {3527SDValue Full =3528DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);3529Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);3530Offset = 0;3531}3532} else {3533// Conservatively load a constant offset greater than 32 bits into a3534// register below.3535Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);3536Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);3537}3538} else if (Subtarget.isTargetELF()) {3539Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);3540Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);3541Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,3542MachinePointerInfo::getGOT(DAG.getMachineFunction()));3543} else if (Subtarget.isTargetzOS()) {3544Result = getADAEntry(DAG, GV, DL, PtrVT);3545} else3546llvm_unreachable("Unexpected Subtarget");35473548// If there was a non-zero offset that we didn't fold, create an explicit3549// addition for it.3550if (Offset != 0)3551Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,3552DAG.getConstant(Offset, DL, PtrVT));35533554return Result;3555}35563557SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,3558SelectionDAG &DAG,3559unsigned Opcode,3560SDValue GOTOffset) const {3561SDLoc DL(Node);3562EVT PtrVT = getPointerTy(DAG.getDataLayout());3563SDValue Chain = DAG.getEntryNode();3564SDValue Glue;35653566if (DAG.getMachineFunction().getFunction().getCallingConv() ==3567CallingConv::GHC)3568report_fatal_error("In GHC calling convention TLS is not supported");35693570// __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.3571SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);3572Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);3573Glue = Chain.getValue(1);3574Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);3575Glue = Chain.getValue(1);35763577// The first call operand is the chain and the second is the TLS symbol.3578SmallVector<SDValue, 8> Ops;3579Ops.push_back(Chain);3580Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,3581Node->getValueType(0),35820, 0));35833584// Add argument registers to the end of the list so that they are3585// known live into the call.3586Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));3587Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));35883589// Add a register mask operand representing the call-preserved registers.3590const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();3591const uint32_t *Mask =3592TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);3593assert(Mask && "Missing call preserved mask for calling convention");3594Ops.push_back(DAG.getRegisterMask(Mask));35953596// Glue the call to the argument copies.3597Ops.push_back(Glue);35983599// Emit the call.3600SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);3601Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);3602Glue = Chain.getValue(1);36033604// Copy the return value from %r2.3605return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);3606}36073608SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,3609SelectionDAG &DAG) const {3610SDValue Chain = DAG.getEntryNode();3611EVT PtrVT = getPointerTy(DAG.getDataLayout());36123613// The high part of the thread pointer is in access register 0.3614SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);3615TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);36163617// The low part of the thread pointer is in access register 1.3618SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);3619TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);36203621// Merge them into a single 64-bit address.3622SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,3623DAG.getConstant(32, DL, PtrVT));3624return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);3625}36263627SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,3628SelectionDAG &DAG) const {3629if (DAG.getTarget().useEmulatedTLS())3630return LowerToTLSEmulatedModel(Node, DAG);3631SDLoc DL(Node);3632const GlobalValue *GV = Node->getGlobal();3633EVT PtrVT = getPointerTy(DAG.getDataLayout());3634TLSModel::Model model = DAG.getTarget().getTLSModel(GV);36353636if (DAG.getMachineFunction().getFunction().getCallingConv() ==3637CallingConv::GHC)3638report_fatal_error("In GHC calling convention TLS is not supported");36393640SDValue TP = lowerThreadPointer(DL, DAG);36413642// Get the offset of GA from the thread pointer, based on the TLS model.3643SDValue Offset;3644switch (model) {3645case TLSModel::GeneralDynamic: {3646// Load the GOT offset of the tls_index (module ID / per-symbol offset).3647SystemZConstantPoolValue *CPV =3648SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);36493650Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));3651Offset = DAG.getLoad(3652PtrVT, DL, DAG.getEntryNode(), Offset,3653MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));36543655// Call __tls_get_offset to retrieve the offset.3656Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);3657break;3658}36593660case TLSModel::LocalDynamic: {3661// Load the GOT offset of the module ID.3662SystemZConstantPoolValue *CPV =3663SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);36643665Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));3666Offset = DAG.getLoad(3667PtrVT, DL, DAG.getEntryNode(), Offset,3668MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));36693670// Call __tls_get_offset to retrieve the module base offset.3671Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);36723673// Note: The SystemZLDCleanupPass will remove redundant computations3674// of the module base offset. Count total number of local-dynamic3675// accesses to trigger execution of that pass.3676SystemZMachineFunctionInfo* MFI =3677DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();3678MFI->incNumLocalDynamicTLSAccesses();36793680// Add the per-symbol offset.3681CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);36823683SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));3684DTPOffset = DAG.getLoad(3685PtrVT, DL, DAG.getEntryNode(), DTPOffset,3686MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));36873688Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);3689break;3690}36913692case TLSModel::InitialExec: {3693// Load the offset from the GOT.3694Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,3695SystemZII::MO_INDNTPOFF);3696Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);3697Offset =3698DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,3699MachinePointerInfo::getGOT(DAG.getMachineFunction()));3700break;3701}37023703case TLSModel::LocalExec: {3704// Force the offset into the constant pool and load it from there.3705SystemZConstantPoolValue *CPV =3706SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);37073708Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));3709Offset = DAG.getLoad(3710PtrVT, DL, DAG.getEntryNode(), Offset,3711MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));3712break;3713}3714}37153716// Add the base and offset together.3717return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);3718}37193720SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,3721SelectionDAG &DAG) const {3722SDLoc DL(Node);3723const BlockAddress *BA = Node->getBlockAddress();3724int64_t Offset = Node->getOffset();3725EVT PtrVT = getPointerTy(DAG.getDataLayout());37263727SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);3728Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);3729return Result;3730}37313732SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,3733SelectionDAG &DAG) const {3734SDLoc DL(JT);3735EVT PtrVT = getPointerTy(DAG.getDataLayout());3736SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);37373738// Use LARL to load the address of the table.3739return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);3740}37413742SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,3743SelectionDAG &DAG) const {3744SDLoc DL(CP);3745EVT PtrVT = getPointerTy(DAG.getDataLayout());37463747SDValue Result;3748if (CP->isMachineConstantPoolEntry())3749Result =3750DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());3751else3752Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),3753CP->getOffset());37543755// Use LARL to load the address of the constant pool entry.3756return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);3757}37583759SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,3760SelectionDAG &DAG) const {3761auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();3762MachineFunction &MF = DAG.getMachineFunction();3763MachineFrameInfo &MFI = MF.getFrameInfo();3764MFI.setFrameAddressIsTaken(true);37653766SDLoc DL(Op);3767unsigned Depth = Op.getConstantOperandVal(0);3768EVT PtrVT = getPointerTy(DAG.getDataLayout());37693770// By definition, the frame address is the address of the back chain. (In3771// the case of packed stack without backchain, return the address where the3772// backchain would have been stored. This will either be an unused space or3773// contain a saved register).3774int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);3775SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);37763777if (Depth > 0) {3778// FIXME The frontend should detect this case.3779if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())3780report_fatal_error("Unsupported stack frame traversal count");37813782SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);3783while (Depth--) {3784BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,3785MachinePointerInfo());3786BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);3787}3788}37893790return BackChain;3791}37923793SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,3794SelectionDAG &DAG) const {3795MachineFunction &MF = DAG.getMachineFunction();3796MachineFrameInfo &MFI = MF.getFrameInfo();3797MFI.setReturnAddressIsTaken(true);37983799if (verifyReturnAddressArgumentIsConstant(Op, DAG))3800return SDValue();38013802SDLoc DL(Op);3803unsigned Depth = Op.getConstantOperandVal(0);3804EVT PtrVT = getPointerTy(DAG.getDataLayout());38053806if (Depth > 0) {3807// FIXME The frontend should detect this case.3808if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())3809report_fatal_error("Unsupported stack frame traversal count");38103811SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);3812const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();3813int Offset = TFL->getReturnAddressOffset(MF);3814SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,3815DAG.getConstant(Offset, DL, PtrVT));3816return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,3817MachinePointerInfo());3818}38193820// Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an3821// implicit live-in.3822SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();3823Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(),3824&SystemZ::GR64BitRegClass);3825return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);3826}38273828SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,3829SelectionDAG &DAG) const {3830SDLoc DL(Op);3831SDValue In = Op.getOperand(0);3832EVT InVT = In.getValueType();3833EVT ResVT = Op.getValueType();38343835// Convert loads directly. This is normally done by DAGCombiner,3836// but we need this case for bitcasts that are created during lowering3837// and which are then lowered themselves.3838if (auto *LoadN = dyn_cast<LoadSDNode>(In))3839if (ISD::isNormalLoad(LoadN)) {3840SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),3841LoadN->getBasePtr(), LoadN->getMemOperand());3842// Update the chain uses.3843DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));3844return NewLoad;3845}38463847if (InVT == MVT::i32 && ResVT == MVT::f32) {3848SDValue In64;3849if (Subtarget.hasHighWord()) {3850SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,3851MVT::i64);3852In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,3853MVT::i64, SDValue(U64, 0), In);3854} else {3855In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);3856In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,3857DAG.getConstant(32, DL, MVT::i64));3858}3859SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);3860return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,3861DL, MVT::f32, Out64);3862}3863if (InVT == MVT::f32 && ResVT == MVT::i32) {3864SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);3865SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,3866MVT::f64, SDValue(U64, 0), In);3867SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);3868if (Subtarget.hasHighWord())3869return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,3870MVT::i32, Out64);3871SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,3872DAG.getConstant(32, DL, MVT::i64));3873return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);3874}3875llvm_unreachable("Unexpected bitcast combination");3876}38773878SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,3879SelectionDAG &DAG) const {38803881if (Subtarget.isTargetXPLINK64())3882return lowerVASTART_XPLINK(Op, DAG);3883else3884return lowerVASTART_ELF(Op, DAG);3885}38863887SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,3888SelectionDAG &DAG) const {3889MachineFunction &MF = DAG.getMachineFunction();3890SystemZMachineFunctionInfo *FuncInfo =3891MF.getInfo<SystemZMachineFunctionInfo>();38923893SDLoc DL(Op);38943895// vastart just stores the address of the VarArgsFrameIndex slot into the3896// memory location argument.3897EVT PtrVT = getPointerTy(DAG.getDataLayout());3898SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);3899const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();3900return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),3901MachinePointerInfo(SV));3902}39033904SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,3905SelectionDAG &DAG) const {3906MachineFunction &MF = DAG.getMachineFunction();3907SystemZMachineFunctionInfo *FuncInfo =3908MF.getInfo<SystemZMachineFunctionInfo>();3909EVT PtrVT = getPointerTy(DAG.getDataLayout());39103911SDValue Chain = Op.getOperand(0);3912SDValue Addr = Op.getOperand(1);3913const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();3914SDLoc DL(Op);39153916// The initial values of each field.3917const unsigned NumFields = 4;3918SDValue Fields[NumFields] = {3919DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),3920DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),3921DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),3922DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)3923};39243925// Store each field into its respective slot.3926SDValue MemOps[NumFields];3927unsigned Offset = 0;3928for (unsigned I = 0; I < NumFields; ++I) {3929SDValue FieldAddr = Addr;3930if (Offset != 0)3931FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,3932DAG.getIntPtrConstant(Offset, DL));3933MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,3934MachinePointerInfo(SV, Offset));3935Offset += 8;3936}3937return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);3938}39393940SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,3941SelectionDAG &DAG) const {3942SDValue Chain = Op.getOperand(0);3943SDValue DstPtr = Op.getOperand(1);3944SDValue SrcPtr = Op.getOperand(2);3945const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();3946const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();3947SDLoc DL(Op);39483949uint32_t Sz =3950Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;3951return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),3952Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,3953/*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),3954MachinePointerInfo(SrcSV));3955}39563957SDValue3958SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,3959SelectionDAG &DAG) const {3960if (Subtarget.isTargetXPLINK64())3961return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);3962else3963return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);3964}39653966SDValue3967SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,3968SelectionDAG &DAG) const {3969const TargetFrameLowering *TFI = Subtarget.getFrameLowering();3970MachineFunction &MF = DAG.getMachineFunction();3971bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");3972SDValue Chain = Op.getOperand(0);3973SDValue Size = Op.getOperand(1);3974SDValue Align = Op.getOperand(2);3975SDLoc DL(Op);39763977// If user has set the no alignment function attribute, ignore3978// alloca alignments.3979uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);39803981uint64_t StackAlign = TFI->getStackAlignment();3982uint64_t RequiredAlign = std::max(AlignVal, StackAlign);3983uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;39843985SDValue NeededSpace = Size;39863987// Add extra space for alignment if needed.3988EVT PtrVT = getPointerTy(MF.getDataLayout());3989if (ExtraAlignSpace)3990NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,3991DAG.getConstant(ExtraAlignSpace, DL, PtrVT));39923993bool IsSigned = false;3994bool DoesNotReturn = false;3995bool IsReturnValueUsed = false;3996EVT VT = Op.getValueType();3997SDValue AllocaCall =3998makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),3999CallingConv::C, IsSigned, DL, DoesNotReturn,4000IsReturnValueUsed)4001.first;40024003// Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue4004// to end of call in order to ensure it isn't broken up from the call4005// sequence.4006auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();4007Register SPReg = Regs.getStackPointerRegister();4008Chain = AllocaCall.getValue(1);4009SDValue Glue = AllocaCall.getValue(2);4010SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);4011Chain = NewSPRegNode.getValue(1);40124013MVT PtrMVT = getPointerMemTy(MF.getDataLayout());4014SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);4015SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);40164017// Dynamically realign if needed.4018if (ExtraAlignSpace) {4019Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,4020DAG.getConstant(ExtraAlignSpace, DL, PtrVT));4021Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,4022DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));4023}40244025SDValue Ops[2] = {Result, Chain};4026return DAG.getMergeValues(Ops, DL);4027}40284029SDValue4030SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,4031SelectionDAG &DAG) const {4032const TargetFrameLowering *TFI = Subtarget.getFrameLowering();4033MachineFunction &MF = DAG.getMachineFunction();4034bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");4035bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();40364037SDValue Chain = Op.getOperand(0);4038SDValue Size = Op.getOperand(1);4039SDValue Align = Op.getOperand(2);4040SDLoc DL(Op);40414042// If user has set the no alignment function attribute, ignore4043// alloca alignments.4044uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);40454046uint64_t StackAlign = TFI->getStackAlignment();4047uint64_t RequiredAlign = std::max(AlignVal, StackAlign);4048uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;40494050Register SPReg = getStackPointerRegisterToSaveRestore();4051SDValue NeededSpace = Size;40524053// Get a reference to the stack pointer.4054SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);40554056// If we need a backchain, save it now.4057SDValue Backchain;4058if (StoreBackchain)4059Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),4060MachinePointerInfo());40614062// Add extra space for alignment if needed.4063if (ExtraAlignSpace)4064NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,4065DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));40664067// Get the new stack pointer value.4068SDValue NewSP;4069if (hasInlineStackProbe(MF)) {4070NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,4071DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);4072Chain = NewSP.getValue(1);4073}4074else {4075NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);4076// Copy the new stack pointer back.4077Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);4078}40794080// The allocated data lives above the 160 bytes allocated for the standard4081// frame, plus any outgoing stack arguments. We don't know how much that4082// amounts to yet, so emit a special ADJDYNALLOC placeholder.4083SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);4084SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);40854086// Dynamically realign if needed.4087if (RequiredAlign > StackAlign) {4088Result =4089DAG.getNode(ISD::ADD, DL, MVT::i64, Result,4090DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));4091Result =4092DAG.getNode(ISD::AND, DL, MVT::i64, Result,4093DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));4094}40954096if (StoreBackchain)4097Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),4098MachinePointerInfo());40994100SDValue Ops[2] = { Result, Chain };4101return DAG.getMergeValues(Ops, DL);4102}41034104SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(4105SDValue Op, SelectionDAG &DAG) const {4106SDLoc DL(Op);41074108return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);4109}41104111SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,4112SelectionDAG &DAG) const {4113EVT VT = Op.getValueType();4114SDLoc DL(Op);4115SDValue Ops[2];4116if (is32Bit(VT))4117// Just do a normal 64-bit multiplication and extract the results.4118// We define this so that it can be used for constant division.4119lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),4120Op.getOperand(1), Ops[1], Ops[0]);4121else if (Subtarget.hasMiscellaneousExtensions2())4122// SystemZISD::SMUL_LOHI returns the low result in the odd register and4123// the high result in the even register. ISD::SMUL_LOHI is defined to4124// return the low half first, so the results are in reverse order.4125lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,4126Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);4127else {4128// Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:4129//4130// (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)4131//4132// but using the fact that the upper halves are either all zeros4133// or all ones:4134//4135// (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)4136//4137// and grouping the right terms together since they are quicker than the4138// multiplication:4139//4140// (ll * rl) - (((lh & rl) + (ll & rh)) << 64)4141SDValue C63 = DAG.getConstant(63, DL, MVT::i64);4142SDValue LL = Op.getOperand(0);4143SDValue RL = Op.getOperand(1);4144SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);4145SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);4146// SystemZISD::UMUL_LOHI returns the low result in the odd register and4147// the high result in the even register. ISD::SMUL_LOHI is defined to4148// return the low half first, so the results are in reverse order.4149lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,4150LL, RL, Ops[1], Ops[0]);4151SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);4152SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);4153SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);4154Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);4155}4156return DAG.getMergeValues(Ops, DL);4157}41584159SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,4160SelectionDAG &DAG) const {4161EVT VT = Op.getValueType();4162SDLoc DL(Op);4163SDValue Ops[2];4164if (is32Bit(VT))4165// Just do a normal 64-bit multiplication and extract the results.4166// We define this so that it can be used for constant division.4167lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),4168Op.getOperand(1), Ops[1], Ops[0]);4169else4170// SystemZISD::UMUL_LOHI returns the low result in the odd register and4171// the high result in the even register. ISD::UMUL_LOHI is defined to4172// return the low half first, so the results are in reverse order.4173lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,4174Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);4175return DAG.getMergeValues(Ops, DL);4176}41774178SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,4179SelectionDAG &DAG) const {4180SDValue Op0 = Op.getOperand(0);4181SDValue Op1 = Op.getOperand(1);4182EVT VT = Op.getValueType();4183SDLoc DL(Op);41844185// We use DSGF for 32-bit division. This means the first operand must4186// always be 64-bit, and the second operand should be 32-bit whenever4187// that is possible, to improve performance.4188if (is32Bit(VT))4189Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);4190else if (DAG.ComputeNumSignBits(Op1) > 32)4191Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);41924193// DSG(F) returns the remainder in the even register and the4194// quotient in the odd register.4195SDValue Ops[2];4196lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);4197return DAG.getMergeValues(Ops, DL);4198}41994200SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,4201SelectionDAG &DAG) const {4202EVT VT = Op.getValueType();4203SDLoc DL(Op);42044205// DL(G) returns the remainder in the even register and the4206// quotient in the odd register.4207SDValue Ops[2];4208lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,4209Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);4210return DAG.getMergeValues(Ops, DL);4211}42124213SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {4214assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");42154216// Get the known-zero masks for each operand.4217SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};4218KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),4219DAG.computeKnownBits(Ops[1])};42204221// See if the upper 32 bits of one operand and the lower 32 bits of the4222// other are known zero. They are the low and high operands respectively.4223uint64_t Masks[] = { Known[0].Zero.getZExtValue(),4224Known[1].Zero.getZExtValue() };4225unsigned High, Low;4226if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)4227High = 1, Low = 0;4228else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)4229High = 0, Low = 1;4230else4231return Op;42324233SDValue LowOp = Ops[Low];4234SDValue HighOp = Ops[High];42354236// If the high part is a constant, we're better off using IILH.4237if (HighOp.getOpcode() == ISD::Constant)4238return Op;42394240// If the low part is a constant that is outside the range of LHI,4241// then we're better off using IILF.4242if (LowOp.getOpcode() == ISD::Constant) {4243int64_t Value = int32_t(LowOp->getAsZExtVal());4244if (!isInt<16>(Value))4245return Op;4246}42474248// Check whether the high part is an AND that doesn't change the4249// high 32 bits and just masks out low bits. We can skip it if so.4250if (HighOp.getOpcode() == ISD::AND &&4251HighOp.getOperand(1).getOpcode() == ISD::Constant) {4252SDValue HighOp0 = HighOp.getOperand(0);4253uint64_t Mask = HighOp.getConstantOperandVal(1);4254if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))4255HighOp = HighOp0;4256}42574258// Take advantage of the fact that all GR32 operations only change the4259// low 32 bits by truncating Low to an i32 and inserting it directly4260// using a subreg. The interesting cases are those where the truncation4261// can be folded.4262SDLoc DL(Op);4263SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);4264return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,4265MVT::i64, HighOp, Low32);4266}42674268// Lower SADDO/SSUBO/UADDO/USUBO nodes.4269SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,4270SelectionDAG &DAG) const {4271SDNode *N = Op.getNode();4272SDValue LHS = N->getOperand(0);4273SDValue RHS = N->getOperand(1);4274SDLoc DL(N);42754276if (N->getValueType(0) == MVT::i128) {4277unsigned BaseOp = 0;4278unsigned FlagOp = 0;4279bool IsBorrow = false;4280switch (Op.getOpcode()) {4281default: llvm_unreachable("Unknown instruction!");4282case ISD::UADDO:4283BaseOp = ISD::ADD;4284FlagOp = SystemZISD::VACC;4285break;4286case ISD::USUBO:4287BaseOp = ISD::SUB;4288FlagOp = SystemZISD::VSCBI;4289IsBorrow = true;4290break;4291}4292SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);4293SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);4294Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,4295DAG.getValueType(MVT::i1));4296Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));4297if (IsBorrow)4298Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),4299Flag, DAG.getConstant(1, DL, Flag.getValueType()));4300return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);4301}43024303unsigned BaseOp = 0;4304unsigned CCValid = 0;4305unsigned CCMask = 0;43064307switch (Op.getOpcode()) {4308default: llvm_unreachable("Unknown instruction!");4309case ISD::SADDO:4310BaseOp = SystemZISD::SADDO;4311CCValid = SystemZ::CCMASK_ARITH;4312CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;4313break;4314case ISD::SSUBO:4315BaseOp = SystemZISD::SSUBO;4316CCValid = SystemZ::CCMASK_ARITH;4317CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;4318break;4319case ISD::UADDO:4320BaseOp = SystemZISD::UADDO;4321CCValid = SystemZ::CCMASK_LOGICAL;4322CCMask = SystemZ::CCMASK_LOGICAL_CARRY;4323break;4324case ISD::USUBO:4325BaseOp = SystemZISD::USUBO;4326CCValid = SystemZ::CCMASK_LOGICAL;4327CCMask = SystemZ::CCMASK_LOGICAL_BORROW;4328break;4329}43304331SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);4332SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);43334334SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);4335if (N->getValueType(1) == MVT::i1)4336SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);43374338return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);4339}43404341static bool isAddCarryChain(SDValue Carry) {4342while (Carry.getOpcode() == ISD::UADDO_CARRY)4343Carry = Carry.getOperand(2);4344return Carry.getOpcode() == ISD::UADDO;4345}43464347static bool isSubBorrowChain(SDValue Carry) {4348while (Carry.getOpcode() == ISD::USUBO_CARRY)4349Carry = Carry.getOperand(2);4350return Carry.getOpcode() == ISD::USUBO;4351}43524353// Lower UADDO_CARRY/USUBO_CARRY nodes.4354SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,4355SelectionDAG &DAG) const {43564357SDNode *N = Op.getNode();4358MVT VT = N->getSimpleValueType(0);43594360// Let legalize expand this if it isn't a legal type yet.4361if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))4362return SDValue();43634364SDValue LHS = N->getOperand(0);4365SDValue RHS = N->getOperand(1);4366SDValue Carry = Op.getOperand(2);4367SDLoc DL(N);43684369if (VT == MVT::i128) {4370unsigned BaseOp = 0;4371unsigned FlagOp = 0;4372bool IsBorrow = false;4373switch (Op.getOpcode()) {4374default: llvm_unreachable("Unknown instruction!");4375case ISD::UADDO_CARRY:4376BaseOp = SystemZISD::VAC;4377FlagOp = SystemZISD::VACCC;4378break;4379case ISD::USUBO_CARRY:4380BaseOp = SystemZISD::VSBI;4381FlagOp = SystemZISD::VSBCBI;4382IsBorrow = true;4383break;4384}4385if (IsBorrow)4386Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),4387Carry, DAG.getConstant(1, DL, Carry.getValueType()));4388Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);4389SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);4390SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);4391Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,4392DAG.getValueType(MVT::i1));4393Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));4394if (IsBorrow)4395Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),4396Flag, DAG.getConstant(1, DL, Flag.getValueType()));4397return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);4398}43994400unsigned BaseOp = 0;4401unsigned CCValid = 0;4402unsigned CCMask = 0;44034404switch (Op.getOpcode()) {4405default: llvm_unreachable("Unknown instruction!");4406case ISD::UADDO_CARRY:4407if (!isAddCarryChain(Carry))4408return SDValue();44094410BaseOp = SystemZISD::ADDCARRY;4411CCValid = SystemZ::CCMASK_LOGICAL;4412CCMask = SystemZ::CCMASK_LOGICAL_CARRY;4413break;4414case ISD::USUBO_CARRY:4415if (!isSubBorrowChain(Carry))4416return SDValue();44174418BaseOp = SystemZISD::SUBCARRY;4419CCValid = SystemZ::CCMASK_LOGICAL;4420CCMask = SystemZ::CCMASK_LOGICAL_BORROW;4421break;4422}44234424// Set the condition code from the carry flag.4425Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,4426DAG.getConstant(CCValid, DL, MVT::i32),4427DAG.getConstant(CCMask, DL, MVT::i32));44284429SDVTList VTs = DAG.getVTList(VT, MVT::i32);4430SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);44314432SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);4433if (N->getValueType(1) == MVT::i1)4434SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);44354436return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);4437}44384439SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,4440SelectionDAG &DAG) const {4441EVT VT = Op.getValueType();4442SDLoc DL(Op);4443Op = Op.getOperand(0);44444445if (VT.getScalarSizeInBits() == 128) {4446Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);4447Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);4448SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,4449DAG.getConstant(0, DL, MVT::i64));4450Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);4451return Op;4452}44534454// Handle vector types via VPOPCT.4455if (VT.isVector()) {4456Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);4457Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);4458switch (VT.getScalarSizeInBits()) {4459case 8:4460break;4461case 16: {4462Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);4463SDValue Shift = DAG.getConstant(8, DL, MVT::i32);4464SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);4465Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);4466Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);4467break;4468}4469case 32: {4470SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,4471DAG.getConstant(0, DL, MVT::i32));4472Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);4473break;4474}4475case 64: {4476SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,4477DAG.getConstant(0, DL, MVT::i32));4478Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);4479Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);4480break;4481}4482default:4483llvm_unreachable("Unexpected type");4484}4485return Op;4486}44874488// Get the known-zero mask for the operand.4489KnownBits Known = DAG.computeKnownBits(Op);4490unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();4491if (NumSignificantBits == 0)4492return DAG.getConstant(0, DL, VT);44934494// Skip known-zero high parts of the operand.4495int64_t OrigBitSize = VT.getSizeInBits();4496int64_t BitSize = llvm::bit_ceil(NumSignificantBits);4497BitSize = std::min(BitSize, OrigBitSize);44984499// The POPCNT instruction counts the number of bits in each byte.4500Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);4501Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);4502Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);45034504// Add up per-byte counts in a binary tree. All bits of Op at4505// position larger than BitSize remain zero throughout.4506for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {4507SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));4508if (BitSize != OrigBitSize)4509Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,4510DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));4511Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);4512}45134514// Extract overall result from high byte.4515if (BitSize > 8)4516Op = DAG.getNode(ISD::SRL, DL, VT, Op,4517DAG.getConstant(BitSize - 8, DL, VT));45184519return Op;4520}45214522SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,4523SelectionDAG &DAG) const {4524SDLoc DL(Op);4525AtomicOrdering FenceOrdering =4526static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));4527SyncScope::ID FenceSSID =4528static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));45294530// The only fence that needs an instruction is a sequentially-consistent4531// cross-thread fence.4532if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&4533FenceSSID == SyncScope::System) {4534return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,4535Op.getOperand(0)),45360);4537}45384539// MEMBARRIER is a compiler barrier; it codegens to a no-op.4540return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));4541}45424543SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,4544SelectionDAG &DAG) const {4545auto *Node = cast<AtomicSDNode>(Op.getNode());4546assert(4547(Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&4548"Only custom lowering i128 or f128.");4549// Use same code to handle both legal and non-legal i128 types.4550SmallVector<SDValue, 2> Results;4551LowerOperationWrapper(Node, Results, DAG);4552return DAG.getMergeValues(Results, SDLoc(Op));4553}45544555// Prepare for a Compare And Swap for a subword operation. This needs to be4556// done in memory with 4 bytes at natural alignment.4557static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL,4558SDValue &AlignedAddr, SDValue &BitShift,4559SDValue &NegBitShift) {4560EVT PtrVT = Addr.getValueType();4561EVT WideVT = MVT::i32;45624563// Get the address of the containing word.4564AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,4565DAG.getConstant(-4, DL, PtrVT));45664567// Get the number of bits that the word must be rotated left in order4568// to bring the field to the top bits of a GR32.4569BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,4570DAG.getConstant(3, DL, PtrVT));4571BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);45724573// Get the complementing shift amount, for rotating a field in the top4574// bits back to its proper position.4575NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,4576DAG.getConstant(0, DL, WideVT), BitShift);45774578}45794580// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first4581// two into the fullword ATOMIC_LOADW_* operation given by Opcode.4582SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,4583SelectionDAG &DAG,4584unsigned Opcode) const {4585auto *Node = cast<AtomicSDNode>(Op.getNode());45864587// 32-bit operations need no special handling.4588EVT NarrowVT = Node->getMemoryVT();4589EVT WideVT = MVT::i32;4590if (NarrowVT == WideVT)4591return Op;45924593int64_t BitSize = NarrowVT.getSizeInBits();4594SDValue ChainIn = Node->getChain();4595SDValue Addr = Node->getBasePtr();4596SDValue Src2 = Node->getVal();4597MachineMemOperand *MMO = Node->getMemOperand();4598SDLoc DL(Node);45994600// Convert atomic subtracts of constants into additions.4601if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)4602if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {4603Opcode = SystemZISD::ATOMIC_LOADW_ADD;4604Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());4605}46064607SDValue AlignedAddr, BitShift, NegBitShift;4608getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);46094610// Extend the source operand to 32 bits and prepare it for the inner loop.4611// ATOMIC_SWAPW uses RISBG to rotate the field left, but all other4612// operations require the source to be shifted in advance. (This shift4613// can be folded if the source is constant.) For AND and NAND, the lower4614// bits must be set, while for other opcodes they should be left clear.4615if (Opcode != SystemZISD::ATOMIC_SWAPW)4616Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,4617DAG.getConstant(32 - BitSize, DL, WideVT));4618if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||4619Opcode == SystemZISD::ATOMIC_LOADW_NAND)4620Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,4621DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));46224623// Construct the ATOMIC_LOADW_* node.4624SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);4625SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,4626DAG.getConstant(BitSize, DL, WideVT) };4627SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,4628NarrowVT, MMO);46294630// Rotate the result of the final CS so that the field is in the lower4631// bits of a GR32, then truncate it.4632SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,4633DAG.getConstant(BitSize, DL, WideVT));4634SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);46354636SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };4637return DAG.getMergeValues(RetOps, DL);4638}46394640// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into4641// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.4642SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,4643SelectionDAG &DAG) const {4644auto *Node = cast<AtomicSDNode>(Op.getNode());4645EVT MemVT = Node->getMemoryVT();4646if (MemVT == MVT::i32 || MemVT == MVT::i64) {4647// A full-width operation: negate and use LAA(G).4648assert(Op.getValueType() == MemVT && "Mismatched VTs");4649assert(Subtarget.hasInterlockedAccess1() &&4650"Should have been expanded by AtomicExpand pass.");4651SDValue Src2 = Node->getVal();4652SDLoc DL(Src2);4653SDValue NegSrc2 =4654DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);4655return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,4656Node->getChain(), Node->getBasePtr(), NegSrc2,4657Node->getMemOperand());4658}46594660return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);4661}46624663// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.4664SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,4665SelectionDAG &DAG) const {4666auto *Node = cast<AtomicSDNode>(Op.getNode());4667SDValue ChainIn = Node->getOperand(0);4668SDValue Addr = Node->getOperand(1);4669SDValue CmpVal = Node->getOperand(2);4670SDValue SwapVal = Node->getOperand(3);4671MachineMemOperand *MMO = Node->getMemOperand();4672SDLoc DL(Node);46734674if (Node->getMemoryVT() == MVT::i128) {4675// Use same code to handle both legal and non-legal i128 types.4676SmallVector<SDValue, 3> Results;4677LowerOperationWrapper(Node, Results, DAG);4678return DAG.getMergeValues(Results, DL);4679}46804681// We have native support for 32-bit and 64-bit compare and swap, but we4682// still need to expand extracting the "success" result from the CC.4683EVT NarrowVT = Node->getMemoryVT();4684EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;4685if (NarrowVT == WideVT) {4686SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);4687SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };4688SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,4689DL, Tys, Ops, NarrowVT, MMO);4690SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),4691SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);46924693DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));4694DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);4695DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));4696return SDValue();4697}46984699// Convert 8-bit and 16-bit compare and swap to a loop, implemented4700// via a fullword ATOMIC_CMP_SWAPW operation.4701int64_t BitSize = NarrowVT.getSizeInBits();47024703SDValue AlignedAddr, BitShift, NegBitShift;4704getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);47054706// Construct the ATOMIC_CMP_SWAPW node.4707SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);4708SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,4709NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };4710SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,4711VTList, Ops, NarrowVT, MMO);4712SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),4713SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);47144715// emitAtomicCmpSwapW() will zero extend the result (original value).4716SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),4717DAG.getValueType(NarrowVT));4718DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);4719DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);4720DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));4721return SDValue();4722}47234724MachineMemOperand::Flags4725SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {4726// Because of how we convert atomic_load and atomic_store to normal loads and4727// stores in the DAG, we need to ensure that the MMOs are marked volatile4728// since DAGCombine hasn't been updated to account for atomic, but non4729// volatile loads. (See D57601)4730if (auto *SI = dyn_cast<StoreInst>(&I))4731if (SI->isAtomic())4732return MachineMemOperand::MOVolatile;4733if (auto *LI = dyn_cast<LoadInst>(&I))4734if (LI->isAtomic())4735return MachineMemOperand::MOVolatile;4736if (auto *AI = dyn_cast<AtomicRMWInst>(&I))4737if (AI->isAtomic())4738return MachineMemOperand::MOVolatile;4739if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))4740if (AI->isAtomic())4741return MachineMemOperand::MOVolatile;4742return MachineMemOperand::MONone;4743}47444745SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,4746SelectionDAG &DAG) const {4747MachineFunction &MF = DAG.getMachineFunction();4748auto *Regs = Subtarget.getSpecialRegisters();4749if (MF.getFunction().getCallingConv() == CallingConv::GHC)4750report_fatal_error("Variable-sized stack allocations are not supported "4751"in GHC calling convention");4752return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),4753Regs->getStackPointerRegister(), Op.getValueType());4754}47554756SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,4757SelectionDAG &DAG) const {4758MachineFunction &MF = DAG.getMachineFunction();4759auto *Regs = Subtarget.getSpecialRegisters();4760bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();47614762if (MF.getFunction().getCallingConv() == CallingConv::GHC)4763report_fatal_error("Variable-sized stack allocations are not supported "4764"in GHC calling convention");47654766SDValue Chain = Op.getOperand(0);4767SDValue NewSP = Op.getOperand(1);4768SDValue Backchain;4769SDLoc DL(Op);47704771if (StoreBackchain) {4772SDValue OldSP = DAG.getCopyFromReg(4773Chain, DL, Regs->getStackPointerRegister(), MVT::i64);4774Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),4775MachinePointerInfo());4776}47774778Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);47794780if (StoreBackchain)4781Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),4782MachinePointerInfo());47834784return Chain;4785}47864787SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,4788SelectionDAG &DAG) const {4789bool IsData = Op.getConstantOperandVal(4);4790if (!IsData)4791// Just preserve the chain.4792return Op.getOperand(0);47934794SDLoc DL(Op);4795bool IsWrite = Op.getConstantOperandVal(2);4796unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;4797auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());4798SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),4799Op.getOperand(1)};4800return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,4801Node->getVTList(), Ops,4802Node->getMemoryVT(), Node->getMemOperand());4803}48044805// Convert condition code in CCReg to an i32 value.4806static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {4807SDLoc DL(CCReg);4808SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);4809return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,4810DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));4811}48124813SDValue4814SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,4815SelectionDAG &DAG) const {4816unsigned Opcode, CCValid;4817if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {4818assert(Op->getNumValues() == 2 && "Expected only CC result and chain");4819SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);4820SDValue CC = getCCResult(DAG, SDValue(Node, 0));4821DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);4822return SDValue();4823}48244825return SDValue();4826}48274828SDValue4829SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,4830SelectionDAG &DAG) const {4831unsigned Opcode, CCValid;4832if (isIntrinsicWithCC(Op, Opcode, CCValid)) {4833SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);4834if (Op->getNumValues() == 1)4835return getCCResult(DAG, SDValue(Node, 0));4836assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");4837return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),4838SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));4839}48404841unsigned Id = Op.getConstantOperandVal(0);4842switch (Id) {4843case Intrinsic::thread_pointer:4844return lowerThreadPointer(SDLoc(Op), DAG);48454846case Intrinsic::s390_vpdi:4847return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),4848Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));48494850case Intrinsic::s390_vperm:4851return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),4852Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));48534854case Intrinsic::s390_vuphb:4855case Intrinsic::s390_vuphh:4856case Intrinsic::s390_vuphf:4857return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),4858Op.getOperand(1));48594860case Intrinsic::s390_vuplhb:4861case Intrinsic::s390_vuplhh:4862case Intrinsic::s390_vuplhf:4863return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),4864Op.getOperand(1));48654866case Intrinsic::s390_vuplb:4867case Intrinsic::s390_vuplhw:4868case Intrinsic::s390_vuplf:4869return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),4870Op.getOperand(1));48714872case Intrinsic::s390_vupllb:4873case Intrinsic::s390_vupllh:4874case Intrinsic::s390_vupllf:4875return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),4876Op.getOperand(1));48774878case Intrinsic::s390_vsumb:4879case Intrinsic::s390_vsumh:4880case Intrinsic::s390_vsumgh:4881case Intrinsic::s390_vsumgf:4882case Intrinsic::s390_vsumqf:4883case Intrinsic::s390_vsumqg:4884return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),4885Op.getOperand(1), Op.getOperand(2));48864887case Intrinsic::s390_vaq:4888return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),4889Op.getOperand(1), Op.getOperand(2));4890case Intrinsic::s390_vaccb:4891case Intrinsic::s390_vacch:4892case Intrinsic::s390_vaccf:4893case Intrinsic::s390_vaccg:4894case Intrinsic::s390_vaccq:4895return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),4896Op.getOperand(1), Op.getOperand(2));4897case Intrinsic::s390_vacq:4898return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),4899Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));4900case Intrinsic::s390_vacccq:4901return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),4902Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));49034904case Intrinsic::s390_vsq:4905return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),4906Op.getOperand(1), Op.getOperand(2));4907case Intrinsic::s390_vscbib:4908case Intrinsic::s390_vscbih:4909case Intrinsic::s390_vscbif:4910case Intrinsic::s390_vscbig:4911case Intrinsic::s390_vscbiq:4912return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),4913Op.getOperand(1), Op.getOperand(2));4914case Intrinsic::s390_vsbiq:4915return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),4916Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));4917case Intrinsic::s390_vsbcbiq:4918return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),4919Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));4920}49214922return SDValue();4923}49244925namespace {4926// Says that SystemZISD operation Opcode can be used to perform the equivalent4927// of a VPERM with permute vector Bytes. If Opcode takes three operands,4928// Operand is the constant third operand, otherwise it is the number of4929// bytes in each element of the result.4930struct Permute {4931unsigned Opcode;4932unsigned Operand;4933unsigned char Bytes[SystemZ::VectorBytes];4934};4935}49364937static const Permute PermuteForms[] = {4938// VMRHG4939{ SystemZISD::MERGE_HIGH, 8,4940{ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },4941// VMRHF4942{ SystemZISD::MERGE_HIGH, 4,4943{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },4944// VMRHH4945{ SystemZISD::MERGE_HIGH, 2,4946{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },4947// VMRHB4948{ SystemZISD::MERGE_HIGH, 1,4949{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },4950// VMRLG4951{ SystemZISD::MERGE_LOW, 8,4952{ 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },4953// VMRLF4954{ SystemZISD::MERGE_LOW, 4,4955{ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },4956// VMRLH4957{ SystemZISD::MERGE_LOW, 2,4958{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },4959// VMRLB4960{ SystemZISD::MERGE_LOW, 1,4961{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },4962// VPKG4963{ SystemZISD::PACK, 4,4964{ 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },4965// VPKF4966{ SystemZISD::PACK, 2,4967{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },4968// VPKH4969{ SystemZISD::PACK, 1,4970{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },4971// VPDI V1, V2, 4 (low half of V1, high half of V2)4972{ SystemZISD::PERMUTE_DWORDS, 4,4973{ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },4974// VPDI V1, V2, 1 (high half of V1, low half of V2)4975{ SystemZISD::PERMUTE_DWORDS, 1,4976{ 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }4977};49784979// Called after matching a vector shuffle against a particular pattern.4980// Both the original shuffle and the pattern have two vector operands.4981// OpNos[0] is the operand of the original shuffle that should be used for4982// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.4983// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and4984// set OpNo0 and OpNo1 to the shuffle operands that should actually be used4985// for operands 0 and 1 of the pattern.4986static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {4987if (OpNos[0] < 0) {4988if (OpNos[1] < 0)4989return false;4990OpNo0 = OpNo1 = OpNos[1];4991} else if (OpNos[1] < 0) {4992OpNo0 = OpNo1 = OpNos[0];4993} else {4994OpNo0 = OpNos[0];4995OpNo1 = OpNos[1];4996}4997return true;4998}49995000// Bytes is a VPERM-like permute vector, except that -1 is used for5001// undefined bytes. Return true if the VPERM can be implemented using P.5002// When returning true set OpNo0 to the VPERM operand that should be5003// used for operand 0 of P and likewise OpNo1 for operand 1 of P.5004//5005// For example, if swapping the VPERM operands allows P to match, OpNo05006// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one5007// operand, but rewriting it to use two duplicated operands allows it to5008// match P, then OpNo0 and OpNo1 will be the same.5009static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,5010unsigned &OpNo0, unsigned &OpNo1) {5011int OpNos[] = { -1, -1 };5012for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {5013int Elt = Bytes[I];5014if (Elt >= 0) {5015// Make sure that the two permute vectors use the same suboperand5016// byte number. Only the operand numbers (the high bits) are5017// allowed to differ.5018if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))5019return false;5020int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;5021int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;5022// Make sure that the operand mappings are consistent with previous5023// elements.5024if (OpNos[ModelOpNo] == 1 - RealOpNo)5025return false;5026OpNos[ModelOpNo] = RealOpNo;5027}5028}5029return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);5030}50315032// As above, but search for a matching permute.5033static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,5034unsigned &OpNo0, unsigned &OpNo1) {5035for (auto &P : PermuteForms)5036if (matchPermute(Bytes, P, OpNo0, OpNo1))5037return &P;5038return nullptr;5039}50405041// Bytes is a VPERM-like permute vector, except that -1 is used for5042// undefined bytes. This permute is an operand of an outer permute.5043// See whether redistributing the -1 bytes gives a shuffle that can be5044// implemented using P. If so, set Transform to a VPERM-like permute vector5045// that, when applied to the result of P, gives the original permute in Bytes.5046static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,5047const Permute &P,5048SmallVectorImpl<int> &Transform) {5049unsigned To = 0;5050for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {5051int Elt = Bytes[From];5052if (Elt < 0)5053// Byte number From of the result is undefined.5054Transform[From] = -1;5055else {5056while (P.Bytes[To] != Elt) {5057To += 1;5058if (To == SystemZ::VectorBytes)5059return false;5060}5061Transform[From] = To;5062}5063}5064return true;5065}50665067// As above, but search for a matching permute.5068static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,5069SmallVectorImpl<int> &Transform) {5070for (auto &P : PermuteForms)5071if (matchDoublePermute(Bytes, P, Transform))5072return &P;5073return nullptr;5074}50755076// Convert the mask of the given shuffle op into a byte-level mask,5077// as if it had type vNi8.5078static bool getVPermMask(SDValue ShuffleOp,5079SmallVectorImpl<int> &Bytes) {5080EVT VT = ShuffleOp.getValueType();5081unsigned NumElements = VT.getVectorNumElements();5082unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();50835084if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {5085Bytes.resize(NumElements * BytesPerElement, -1);5086for (unsigned I = 0; I < NumElements; ++I) {5087int Index = VSN->getMaskElt(I);5088if (Index >= 0)5089for (unsigned J = 0; J < BytesPerElement; ++J)5090Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;5091}5092return true;5093}5094if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&5095isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {5096unsigned Index = ShuffleOp.getConstantOperandVal(1);5097Bytes.resize(NumElements * BytesPerElement, -1);5098for (unsigned I = 0; I < NumElements; ++I)5099for (unsigned J = 0; J < BytesPerElement; ++J)5100Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;5101return true;5102}5103return false;5104}51055106// Bytes is a VPERM-like permute vector, except that -1 is used for5107// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of5108// the result come from a contiguous sequence of bytes from one input.5109// Set Base to the selector for the first byte if so.5110static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,5111unsigned BytesPerElement, int &Base) {5112Base = -1;5113for (unsigned I = 0; I < BytesPerElement; ++I) {5114if (Bytes[Start + I] >= 0) {5115unsigned Elem = Bytes[Start + I];5116if (Base < 0) {5117Base = Elem - I;5118// Make sure the bytes would come from one input operand.5119if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())5120return false;5121} else if (unsigned(Base) != Elem - I)5122return false;5123}5124}5125return true;5126}51275128// Bytes is a VPERM-like permute vector, except that -1 is used for5129// undefined bytes. Return true if it can be performed using VSLDB.5130// When returning true, set StartIndex to the shift amount and OpNo05131// and OpNo1 to the VPERM operands that should be used as the first5132// and second shift operand respectively.5133static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,5134unsigned &StartIndex, unsigned &OpNo0,5135unsigned &OpNo1) {5136int OpNos[] = { -1, -1 };5137int Shift = -1;5138for (unsigned I = 0; I < 16; ++I) {5139int Index = Bytes[I];5140if (Index >= 0) {5141int ExpectedShift = (Index - I) % SystemZ::VectorBytes;5142int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;5143int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;5144if (Shift < 0)5145Shift = ExpectedShift;5146else if (Shift != ExpectedShift)5147return false;5148// Make sure that the operand mappings are consistent with previous5149// elements.5150if (OpNos[ModelOpNo] == 1 - RealOpNo)5151return false;5152OpNos[ModelOpNo] = RealOpNo;5153}5154}5155StartIndex = Shift;5156return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);5157}51585159// Create a node that performs P on operands Op0 and Op1, casting the5160// operands to the appropriate type. The type of the result is determined by P.5161static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,5162const Permute &P, SDValue Op0, SDValue Op1) {5163// VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input5164// elements of a PACK are twice as wide as the outputs.5165unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :5166P.Opcode == SystemZISD::PACK ? P.Operand * 2 :5167P.Operand);5168// Cast both operands to the appropriate type.5169MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),5170SystemZ::VectorBytes / InBytes);5171Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);5172Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);5173SDValue Op;5174if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {5175SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);5176Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);5177} else if (P.Opcode == SystemZISD::PACK) {5178MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),5179SystemZ::VectorBytes / P.Operand);5180Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);5181} else {5182Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);5183}5184return Op;5185}51865187static bool isZeroVector(SDValue N) {5188if (N->getOpcode() == ISD::BITCAST)5189N = N->getOperand(0);5190if (N->getOpcode() == ISD::SPLAT_VECTOR)5191if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))5192return Op->getZExtValue() == 0;5193return ISD::isBuildVectorAllZeros(N.getNode());5194}51955196// Return the index of the zero/undef vector, or UINT32_MAX if not found.5197static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {5198for (unsigned I = 0; I < Num ; I++)5199if (isZeroVector(Ops[I]))5200return I;5201return UINT32_MAX;5202}52035204// Bytes is a VPERM-like permute vector, except that -1 is used for5205// undefined bytes. Implement it on operands Ops[0] and Ops[1] using5206// VSLDB or VPERM.5207static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,5208SDValue *Ops,5209const SmallVectorImpl<int> &Bytes) {5210for (unsigned I = 0; I < 2; ++I)5211Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);52125213// First see whether VSLDB can be used.5214unsigned StartIndex, OpNo0, OpNo1;5215if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))5216return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],5217Ops[OpNo1],5218DAG.getTargetConstant(StartIndex, DL, MVT::i32));52195220// Fall back on VPERM. Construct an SDNode for the permute vector. Try to5221// eliminate a zero vector by reusing any zero index in the permute vector.5222unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);5223if (ZeroVecIdx != UINT32_MAX) {5224bool MaskFirst = true;5225int ZeroIdx = -1;5226for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {5227unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;5228unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;5229if (OpNo == ZeroVecIdx && I == 0) {5230// If the first byte is zero, use mask as first operand.5231ZeroIdx = 0;5232break;5233}5234if (OpNo != ZeroVecIdx && Byte == 0) {5235// If mask contains a zero, use it by placing that vector first.5236ZeroIdx = I + SystemZ::VectorBytes;5237MaskFirst = false;5238break;5239}5240}5241if (ZeroIdx != -1) {5242SDValue IndexNodes[SystemZ::VectorBytes];5243for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {5244if (Bytes[I] >= 0) {5245unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;5246unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;5247if (OpNo == ZeroVecIdx)5248IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);5249else {5250unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;5251IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);5252}5253} else5254IndexNodes[I] = DAG.getUNDEF(MVT::i32);5255}5256SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);5257SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];5258if (MaskFirst)5259return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,5260Mask);5261else5262return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,5263Mask);5264}5265}52665267SDValue IndexNodes[SystemZ::VectorBytes];5268for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)5269if (Bytes[I] >= 0)5270IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);5271else5272IndexNodes[I] = DAG.getUNDEF(MVT::i32);5273SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);5274return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],5275(!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);5276}52775278namespace {5279// Describes a general N-operand vector shuffle.5280struct GeneralShuffle {5281GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}5282void addUndef();5283bool add(SDValue, unsigned);5284SDValue getNode(SelectionDAG &, const SDLoc &);5285void tryPrepareForUnpack();5286bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }5287SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);52885289// The operands of the shuffle.5290SmallVector<SDValue, SystemZ::VectorBytes> Ops;52915292// Index I is -1 if byte I of the result is undefined. Otherwise the5293// result comes from byte Bytes[I] % SystemZ::VectorBytes of operand5294// Bytes[I] / SystemZ::VectorBytes.5295SmallVector<int, SystemZ::VectorBytes> Bytes;52965297// The type of the shuffle result.5298EVT VT;52995300// Holds a value of 1, 2 or 4 if a final unpack has been prepared for.5301unsigned UnpackFromEltSize;5302};5303}53045305// Add an extra undefined element to the shuffle.5306void GeneralShuffle::addUndef() {5307unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();5308for (unsigned I = 0; I < BytesPerElement; ++I)5309Bytes.push_back(-1);5310}53115312// Add an extra element to the shuffle, taking it from element Elem of Op.5313// A null Op indicates a vector input whose value will be calculated later;5314// there is at most one such input per shuffle and it always has the same5315// type as the result. Aborts and returns false if the source vector elements5316// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per5317// LLVM they become implicitly extended, but this is rare and not optimized.5318bool GeneralShuffle::add(SDValue Op, unsigned Elem) {5319unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();53205321// The source vector can have wider elements than the result,5322// either through an explicit TRUNCATE or because of type legalization.5323// We want the least significant part.5324EVT FromVT = Op.getNode() ? Op.getValueType() : VT;5325unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();53265327// Return false if the source elements are smaller than their destination5328// elements.5329if (FromBytesPerElement < BytesPerElement)5330return false;53315332unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +5333(FromBytesPerElement - BytesPerElement));53345335// Look through things like shuffles and bitcasts.5336while (Op.getNode()) {5337if (Op.getOpcode() == ISD::BITCAST)5338Op = Op.getOperand(0);5339else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {5340// See whether the bytes we need come from a contiguous part of one5341// operand.5342SmallVector<int, SystemZ::VectorBytes> OpBytes;5343if (!getVPermMask(Op, OpBytes))5344break;5345int NewByte;5346if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))5347break;5348if (NewByte < 0) {5349addUndef();5350return true;5351}5352Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);5353Byte = unsigned(NewByte) % SystemZ::VectorBytes;5354} else if (Op.isUndef()) {5355addUndef();5356return true;5357} else5358break;5359}53605361// Make sure that the source of the extraction is in Ops.5362unsigned OpNo = 0;5363for (; OpNo < Ops.size(); ++OpNo)5364if (Ops[OpNo] == Op)5365break;5366if (OpNo == Ops.size())5367Ops.push_back(Op);53685369// Add the element to Bytes.5370unsigned Base = OpNo * SystemZ::VectorBytes + Byte;5371for (unsigned I = 0; I < BytesPerElement; ++I)5372Bytes.push_back(Base + I);53735374return true;5375}53765377// Return SDNodes for the completed shuffle.5378SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {5379assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");53805381if (Ops.size() == 0)5382return DAG.getUNDEF(VT);53835384// Use a single unpack if possible as the last operation.5385tryPrepareForUnpack();53865387// Make sure that there are at least two shuffle operands.5388if (Ops.size() == 1)5389Ops.push_back(DAG.getUNDEF(MVT::v16i8));53905391// Create a tree of shuffles, deferring root node until after the loop.5392// Try to redistribute the undefined elements of non-root nodes so that5393// the non-root shuffles match something like a pack or merge, then adjust5394// the parent node's permute vector to compensate for the new order.5395// Among other things, this copes with vectors like <2 x i16> that were5396// padded with undefined elements during type legalization.5397//5398// In the best case this redistribution will lead to the whole tree5399// using packs and merges. It should rarely be a loss in other cases.5400unsigned Stride = 1;5401for (; Stride * 2 < Ops.size(); Stride *= 2) {5402for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {5403SDValue SubOps[] = { Ops[I], Ops[I + Stride] };54045405// Create a mask for just these two operands.5406SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);5407for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {5408unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;5409unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;5410if (OpNo == I)5411NewBytes[J] = Byte;5412else if (OpNo == I + Stride)5413NewBytes[J] = SystemZ::VectorBytes + Byte;5414else5415NewBytes[J] = -1;5416}5417// See if it would be better to reorganize NewMask to avoid using VPERM.5418SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);5419if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {5420Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);5421// Applying NewBytesMap to Ops[I] gets back to NewBytes.5422for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {5423if (NewBytes[J] >= 0) {5424assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&5425"Invalid double permute");5426Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];5427} else5428assert(NewBytesMap[J] < 0 && "Invalid double permute");5429}5430} else {5431// Just use NewBytes on the operands.5432Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);5433for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)5434if (NewBytes[J] >= 0)5435Bytes[J] = I * SystemZ::VectorBytes + J;5436}5437}5438}54395440// Now we just have 2 inputs. Put the second operand in Ops[1].5441if (Stride > 1) {5442Ops[1] = Ops[Stride];5443for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)5444if (Bytes[I] >= int(SystemZ::VectorBytes))5445Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;5446}54475448// Look for an instruction that can do the permute without resorting5449// to VPERM.5450unsigned OpNo0, OpNo1;5451SDValue Op;5452if (unpackWasPrepared() && Ops[1].isUndef())5453Op = Ops[0];5454else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))5455Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);5456else5457Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);54585459Op = insertUnpackIfPrepared(DAG, DL, Op);54605461return DAG.getNode(ISD::BITCAST, DL, VT, Op);5462}54635464#ifndef NDEBUG5465static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {5466dbgs() << Msg.c_str() << " { ";5467for (unsigned i = 0; i < Bytes.size(); i++)5468dbgs() << Bytes[i] << " ";5469dbgs() << "}\n";5470}5471#endif54725473// If the Bytes vector matches an unpack operation, prepare to do the unpack5474// after all else by removing the zero vector and the effect of the unpack on5475// Bytes.5476void GeneralShuffle::tryPrepareForUnpack() {5477uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());5478if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)5479return;54805481// Only do this if removing the zero vector reduces the depth, otherwise5482// the critical path will increase with the final unpack.5483if (Ops.size() > 2 &&5484Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))5485return;54865487// Find an unpack that would allow removing the zero vector from Ops.5488UnpackFromEltSize = 1;5489for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {5490bool MatchUnpack = true;5491SmallVector<int, SystemZ::VectorBytes> SrcBytes;5492for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {5493unsigned ToEltSize = UnpackFromEltSize * 2;5494bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;5495if (!IsZextByte)5496SrcBytes.push_back(Bytes[Elt]);5497if (Bytes[Elt] != -1) {5498unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;5499if (IsZextByte != (OpNo == ZeroVecOpNo)) {5500MatchUnpack = false;5501break;5502}5503}5504}5505if (MatchUnpack) {5506if (Ops.size() == 2) {5507// Don't use unpack if a single source operand needs rearrangement.5508for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)5509if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {5510UnpackFromEltSize = UINT_MAX;5511return;5512}5513}5514break;5515}5516}5517if (UnpackFromEltSize > 4)5518return;55195520LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "5521<< UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo5522<< ".\n";5523dumpBytes(Bytes, "Original Bytes vector:"););55245525// Apply the unpack in reverse to the Bytes array.5526unsigned B = 0;5527for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {5528Elt += UnpackFromEltSize;5529for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)5530Bytes[B] = Bytes[Elt];5531}5532while (B < SystemZ::VectorBytes)5533Bytes[B++] = -1;55345535// Remove the zero vector from Ops5536Ops.erase(&Ops[ZeroVecOpNo]);5537for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)5538if (Bytes[I] >= 0) {5539unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;5540if (OpNo > ZeroVecOpNo)5541Bytes[I] -= SystemZ::VectorBytes;5542}55435544LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");5545dbgs() << "\n";);5546}55475548SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,5549const SDLoc &DL,5550SDValue Op) {5551if (!unpackWasPrepared())5552return Op;5553unsigned InBits = UnpackFromEltSize * 8;5554EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),5555SystemZ::VectorBits / InBits);5556SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);5557unsigned OutBits = InBits * 2;5558EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),5559SystemZ::VectorBits / OutBits);5560return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);5561}55625563// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.5564static bool isScalarToVector(SDValue Op) {5565for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)5566if (!Op.getOperand(I).isUndef())5567return false;5568return true;5569}55705571// Return a vector of type VT that contains Value in the first element.5572// The other elements don't matter.5573static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,5574SDValue Value) {5575// If we have a constant, replicate it to all elements and let the5576// BUILD_VECTOR lowering take care of it.5577if (Value.getOpcode() == ISD::Constant ||5578Value.getOpcode() == ISD::ConstantFP) {5579SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);5580return DAG.getBuildVector(VT, DL, Ops);5581}5582if (Value.isUndef())5583return DAG.getUNDEF(VT);5584return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);5585}55865587// Return a vector of type VT in which Op0 is in element 0 and Op1 is in5588// element 1. Used for cases in which replication is cheap.5589static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,5590SDValue Op0, SDValue Op1) {5591if (Op0.isUndef()) {5592if (Op1.isUndef())5593return DAG.getUNDEF(VT);5594return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);5595}5596if (Op1.isUndef())5597return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);5598return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,5599buildScalarToVector(DAG, DL, VT, Op0),5600buildScalarToVector(DAG, DL, VT, Op1));5601}56025603// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i645604// vector for them.5605static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,5606SDValue Op1) {5607if (Op0.isUndef() && Op1.isUndef())5608return DAG.getUNDEF(MVT::v2i64);5609// If one of the two inputs is undefined then replicate the other one,5610// in order to avoid using another register unnecessarily.5611if (Op0.isUndef())5612Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);5613else if (Op1.isUndef())5614Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);5615else {5616Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);5617Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);5618}5619return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);5620}56215622// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually5623// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for5624// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR5625// would benefit from this representation and return it if so.5626static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,5627BuildVectorSDNode *BVN) {5628EVT VT = BVN->getValueType(0);5629unsigned NumElements = VT.getVectorNumElements();56305631// Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation5632// on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still5633// need a BUILD_VECTOR, add an additional placeholder operand for that5634// BUILD_VECTOR and store its operands in ResidueOps.5635GeneralShuffle GS(VT);5636SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;5637bool FoundOne = false;5638for (unsigned I = 0; I < NumElements; ++I) {5639SDValue Op = BVN->getOperand(I);5640if (Op.getOpcode() == ISD::TRUNCATE)5641Op = Op.getOperand(0);5642if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&5643Op.getOperand(1).getOpcode() == ISD::Constant) {5644unsigned Elem = Op.getConstantOperandVal(1);5645if (!GS.add(Op.getOperand(0), Elem))5646return SDValue();5647FoundOne = true;5648} else if (Op.isUndef()) {5649GS.addUndef();5650} else {5651if (!GS.add(SDValue(), ResidueOps.size()))5652return SDValue();5653ResidueOps.push_back(BVN->getOperand(I));5654}5655}56565657// Nothing to do if there are no EXTRACT_VECTOR_ELTs.5658if (!FoundOne)5659return SDValue();56605661// Create the BUILD_VECTOR for the remaining elements, if any.5662if (!ResidueOps.empty()) {5663while (ResidueOps.size() < NumElements)5664ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));5665for (auto &Op : GS.Ops) {5666if (!Op.getNode()) {5667Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);5668break;5669}5670}5671}5672return GS.getNode(DAG, SDLoc(BVN));5673}56745675bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {5676if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())5677return true;5678if (auto *AL = dyn_cast<AtomicSDNode>(Op))5679if (AL->getOpcode() == ISD::ATOMIC_LOAD)5680return true;5681if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)5682return true;5683return false;5684}56855686// Combine GPR scalar values Elems into a vector of type VT.5687SDValue5688SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,5689SmallVectorImpl<SDValue> &Elems) const {5690// See whether there is a single replicated value.5691SDValue Single;5692unsigned int NumElements = Elems.size();5693unsigned int Count = 0;5694for (auto Elem : Elems) {5695if (!Elem.isUndef()) {5696if (!Single.getNode())5697Single = Elem;5698else if (Elem != Single) {5699Single = SDValue();5700break;5701}5702Count += 1;5703}5704}5705// There are three cases here:5706//5707// - if the only defined element is a loaded one, the best sequence5708// is a replicating load.5709//5710// - otherwise, if the only defined element is an i64 value, we will5711// end up with the same VLVGP sequence regardless of whether we short-cut5712// for replication or fall through to the later code.5713//5714// - otherwise, if the only defined element is an i32 or smaller value,5715// we would need 2 instructions to replicate it: VLVGP followed by VREPx.5716// This is only a win if the single defined element is used more than once.5717// In other cases we're better off using a single VLVGx.5718if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))5719return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);57205721// If all elements are loads, use VLREP/VLEs (below).5722bool AllLoads = true;5723for (auto Elem : Elems)5724if (!isVectorElementLoad(Elem)) {5725AllLoads = false;5726break;5727}57285729// The best way of building a v2i64 from two i64s is to use VLVGP.5730if (VT == MVT::v2i64 && !AllLoads)5731return joinDwords(DAG, DL, Elems[0], Elems[1]);57325733// Use a 64-bit merge high to combine two doubles.5734if (VT == MVT::v2f64 && !AllLoads)5735return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);57365737// Build v4f32 values directly from the FPRs:5738//5739// <Axxx> <Bxxx> <Cxxxx> <Dxxx>5740// V V VMRHF5741// <ABxx> <CDxx>5742// V VMRHG5743// <ABCD>5744if (VT == MVT::v4f32 && !AllLoads) {5745SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);5746SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);5747// Avoid unnecessary undefs by reusing the other operand.5748if (Op01.isUndef())5749Op01 = Op23;5750else if (Op23.isUndef())5751Op23 = Op01;5752// Merging identical replications is a no-op.5753if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)5754return Op01;5755Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);5756Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);5757SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,5758DL, MVT::v2i64, Op01, Op23);5759return DAG.getNode(ISD::BITCAST, DL, VT, Op);5760}57615762// Collect the constant terms.5763SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());5764SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);57655766unsigned NumConstants = 0;5767for (unsigned I = 0; I < NumElements; ++I) {5768SDValue Elem = Elems[I];5769if (Elem.getOpcode() == ISD::Constant ||5770Elem.getOpcode() == ISD::ConstantFP) {5771NumConstants += 1;5772Constants[I] = Elem;5773Done[I] = true;5774}5775}5776// If there was at least one constant, fill in the other elements of5777// Constants with undefs to get a full vector constant and use that5778// as the starting point.5779SDValue Result;5780SDValue ReplicatedVal;5781if (NumConstants > 0) {5782for (unsigned I = 0; I < NumElements; ++I)5783if (!Constants[I].getNode())5784Constants[I] = DAG.getUNDEF(Elems[I].getValueType());5785Result = DAG.getBuildVector(VT, DL, Constants);5786} else {5787// Otherwise try to use VLREP or VLVGP to start the sequence in order to5788// avoid a false dependency on any previous contents of the vector5789// register.57905791// Use a VLREP if at least one element is a load. Make sure to replicate5792// the load with the most elements having its value.5793std::map<const SDNode*, unsigned> UseCounts;5794SDNode *LoadMaxUses = nullptr;5795for (unsigned I = 0; I < NumElements; ++I)5796if (isVectorElementLoad(Elems[I])) {5797SDNode *Ld = Elems[I].getNode();5798UseCounts[Ld]++;5799if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])5800LoadMaxUses = Ld;5801}5802if (LoadMaxUses != nullptr) {5803ReplicatedVal = SDValue(LoadMaxUses, 0);5804Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);5805} else {5806// Try to use VLVGP.5807unsigned I1 = NumElements / 2 - 1;5808unsigned I2 = NumElements - 1;5809bool Def1 = !Elems[I1].isUndef();5810bool Def2 = !Elems[I2].isUndef();5811if (Def1 || Def2) {5812SDValue Elem1 = Elems[Def1 ? I1 : I2];5813SDValue Elem2 = Elems[Def2 ? I2 : I1];5814Result = DAG.getNode(ISD::BITCAST, DL, VT,5815joinDwords(DAG, DL, Elem1, Elem2));5816Done[I1] = true;5817Done[I2] = true;5818} else5819Result = DAG.getUNDEF(VT);5820}5821}58225823// Use VLVGx to insert the other elements.5824for (unsigned I = 0; I < NumElements; ++I)5825if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)5826Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],5827DAG.getConstant(I, DL, MVT::i32));5828return Result;5829}58305831SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,5832SelectionDAG &DAG) const {5833auto *BVN = cast<BuildVectorSDNode>(Op.getNode());5834SDLoc DL(Op);5835EVT VT = Op.getValueType();58365837if (BVN->isConstant()) {5838if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))5839return Op;58405841// Fall back to loading it from memory.5842return SDValue();5843}58445845// See if we should use shuffles to construct the vector from other vectors.5846if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))5847return Res;58485849// Detect SCALAR_TO_VECTOR conversions.5850if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))5851return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));58525853// Otherwise use buildVector to build the vector up from GPRs.5854unsigned NumElements = Op.getNumOperands();5855SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);5856for (unsigned I = 0; I < NumElements; ++I)5857Ops[I] = Op.getOperand(I);5858return buildVector(DAG, DL, VT, Ops);5859}58605861SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,5862SelectionDAG &DAG) const {5863auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());5864SDLoc DL(Op);5865EVT VT = Op.getValueType();5866unsigned NumElements = VT.getVectorNumElements();58675868if (VSN->isSplat()) {5869SDValue Op0 = Op.getOperand(0);5870unsigned Index = VSN->getSplatIndex();5871assert(Index < VT.getVectorNumElements() &&5872"Splat index should be defined and in first operand");5873// See whether the value we're splatting is directly available as a scalar.5874if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||5875Op0.getOpcode() == ISD::BUILD_VECTOR)5876return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));5877// Otherwise keep it as a vector-to-vector operation.5878return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),5879DAG.getTargetConstant(Index, DL, MVT::i32));5880}58815882GeneralShuffle GS(VT);5883for (unsigned I = 0; I < NumElements; ++I) {5884int Elt = VSN->getMaskElt(I);5885if (Elt < 0)5886GS.addUndef();5887else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),5888unsigned(Elt) % NumElements))5889return SDValue();5890}5891return GS.getNode(DAG, SDLoc(VSN));5892}58935894SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,5895SelectionDAG &DAG) const {5896SDLoc DL(Op);5897// Just insert the scalar into element 0 of an undefined vector.5898return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,5899Op.getValueType(), DAG.getUNDEF(Op.getValueType()),5900Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));5901}59025903SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,5904SelectionDAG &DAG) const {5905// Handle insertions of floating-point values.5906SDLoc DL(Op);5907SDValue Op0 = Op.getOperand(0);5908SDValue Op1 = Op.getOperand(1);5909SDValue Op2 = Op.getOperand(2);5910EVT VT = Op.getValueType();59115912// Insertions into constant indices of a v2f64 can be done using VPDI.5913// However, if the inserted value is a bitcast or a constant then it's5914// better to use GPRs, as below.5915if (VT == MVT::v2f64 &&5916Op1.getOpcode() != ISD::BITCAST &&5917Op1.getOpcode() != ISD::ConstantFP &&5918Op2.getOpcode() == ISD::Constant) {5919uint64_t Index = Op2->getAsZExtVal();5920unsigned Mask = VT.getVectorNumElements() - 1;5921if (Index <= Mask)5922return Op;5923}59245925// Otherwise bitcast to the equivalent integer form and insert via a GPR.5926MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());5927MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());5928SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,5929DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),5930DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);5931return DAG.getNode(ISD::BITCAST, DL, VT, Res);5932}59335934SDValue5935SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,5936SelectionDAG &DAG) const {5937// Handle extractions of floating-point values.5938SDLoc DL(Op);5939SDValue Op0 = Op.getOperand(0);5940SDValue Op1 = Op.getOperand(1);5941EVT VT = Op.getValueType();5942EVT VecVT = Op0.getValueType();59435944// Extractions of constant indices can be done directly.5945if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {5946uint64_t Index = CIndexN->getZExtValue();5947unsigned Mask = VecVT.getVectorNumElements() - 1;5948if (Index <= Mask)5949return Op;5950}59515952// Otherwise bitcast to the equivalent integer form and extract via a GPR.5953MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());5954MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());5955SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,5956DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);5957return DAG.getNode(ISD::BITCAST, DL, VT, Res);5958}59595960SDValue SystemZTargetLowering::5961lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {5962SDValue PackedOp = Op.getOperand(0);5963EVT OutVT = Op.getValueType();5964EVT InVT = PackedOp.getValueType();5965unsigned ToBits = OutVT.getScalarSizeInBits();5966unsigned FromBits = InVT.getScalarSizeInBits();5967do {5968FromBits *= 2;5969EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),5970SystemZ::VectorBits / FromBits);5971PackedOp =5972DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);5973} while (FromBits != ToBits);5974return PackedOp;5975}59765977// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.5978SDValue SystemZTargetLowering::5979lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {5980SDValue PackedOp = Op.getOperand(0);5981SDLoc DL(Op);5982EVT OutVT = Op.getValueType();5983EVT InVT = PackedOp.getValueType();5984unsigned InNumElts = InVT.getVectorNumElements();5985unsigned OutNumElts = OutVT.getVectorNumElements();5986unsigned NumInPerOut = InNumElts / OutNumElts;59875988SDValue ZeroVec =5989DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));59905991SmallVector<int, 16> Mask(InNumElts);5992unsigned ZeroVecElt = InNumElts;5993for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {5994unsigned MaskElt = PackedElt * NumInPerOut;5995unsigned End = MaskElt + NumInPerOut - 1;5996for (; MaskElt < End; MaskElt++)5997Mask[MaskElt] = ZeroVecElt++;5998Mask[MaskElt] = PackedElt;5999}6000SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);6001return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);6002}60036004SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,6005unsigned ByScalar) const {6006// Look for cases where a vector shift can use the *_BY_SCALAR form.6007SDValue Op0 = Op.getOperand(0);6008SDValue Op1 = Op.getOperand(1);6009SDLoc DL(Op);6010EVT VT = Op.getValueType();6011unsigned ElemBitSize = VT.getScalarSizeInBits();60126013// See whether the shift vector is a splat represented as BUILD_VECTOR.6014if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {6015APInt SplatBits, SplatUndef;6016unsigned SplatBitSize;6017bool HasAnyUndefs;6018// Check for constant splats. Use ElemBitSize as the minimum element6019// width and reject splats that need wider elements.6020if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,6021ElemBitSize, true) &&6022SplatBitSize == ElemBitSize) {6023SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,6024DL, MVT::i32);6025return DAG.getNode(ByScalar, DL, VT, Op0, Shift);6026}6027// Check for variable splats.6028BitVector UndefElements;6029SDValue Splat = BVN->getSplatValue(&UndefElements);6030if (Splat) {6031// Since i32 is the smallest legal type, we either need a no-op6032// or a truncation.6033SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);6034return DAG.getNode(ByScalar, DL, VT, Op0, Shift);6035}6036}60376038// See whether the shift vector is a splat represented as SHUFFLE_VECTOR,6039// and the shift amount is directly available in a GPR.6040if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {6041if (VSN->isSplat()) {6042SDValue VSNOp0 = VSN->getOperand(0);6043unsigned Index = VSN->getSplatIndex();6044assert(Index < VT.getVectorNumElements() &&6045"Splat index should be defined and in first operand");6046if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||6047VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {6048// Since i32 is the smallest legal type, we either need a no-op6049// or a truncation.6050SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,6051VSNOp0.getOperand(Index));6052return DAG.getNode(ByScalar, DL, VT, Op0, Shift);6053}6054}6055}60566057// Otherwise just treat the current form as legal.6058return Op;6059}60606061SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,6062SelectionDAG &DAG) const {6063SDLoc DL(Op);6064MVT ResultVT = Op.getSimpleValueType();6065SDValue Arg = Op.getOperand(0);6066unsigned Check = Op.getConstantOperandVal(1);60676068unsigned TDCMask = 0;6069if (Check & fcSNan)6070TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;6071if (Check & fcQNan)6072TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;6073if (Check & fcPosInf)6074TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;6075if (Check & fcNegInf)6076TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;6077if (Check & fcPosNormal)6078TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;6079if (Check & fcNegNormal)6080TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;6081if (Check & fcPosSubnormal)6082TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;6083if (Check & fcNegSubnormal)6084TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;6085if (Check & fcPosZero)6086TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;6087if (Check & fcNegZero)6088TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;6089SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);60906091SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);6092return getCCResult(DAG, Intr);6093}60946095SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,6096SelectionDAG &DAG) const {6097SDLoc DL(Op);6098SDValue Chain = Op.getOperand(0);60996100// STCKF only supports a memory operand, so we have to use a temporary.6101SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);6102int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();6103MachinePointerInfo MPI =6104MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);61056106// Use STCFK to store the TOD clock into the temporary.6107SDValue StoreOps[] = {Chain, StackPtr};6108Chain = DAG.getMemIntrinsicNode(6109SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,6110MPI, MaybeAlign(), MachineMemOperand::MOStore);61116112// And read it back from there.6113return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);6114}61156116SDValue SystemZTargetLowering::LowerOperation(SDValue Op,6117SelectionDAG &DAG) const {6118switch (Op.getOpcode()) {6119case ISD::FRAMEADDR:6120return lowerFRAMEADDR(Op, DAG);6121case ISD::RETURNADDR:6122return lowerRETURNADDR(Op, DAG);6123case ISD::BR_CC:6124return lowerBR_CC(Op, DAG);6125case ISD::SELECT_CC:6126return lowerSELECT_CC(Op, DAG);6127case ISD::SETCC:6128return lowerSETCC(Op, DAG);6129case ISD::STRICT_FSETCC:6130return lowerSTRICT_FSETCC(Op, DAG, false);6131case ISD::STRICT_FSETCCS:6132return lowerSTRICT_FSETCC(Op, DAG, true);6133case ISD::GlobalAddress:6134return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);6135case ISD::GlobalTLSAddress:6136return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);6137case ISD::BlockAddress:6138return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);6139case ISD::JumpTable:6140return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);6141case ISD::ConstantPool:6142return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);6143case ISD::BITCAST:6144return lowerBITCAST(Op, DAG);6145case ISD::VASTART:6146return lowerVASTART(Op, DAG);6147case ISD::VACOPY:6148return lowerVACOPY(Op, DAG);6149case ISD::DYNAMIC_STACKALLOC:6150return lowerDYNAMIC_STACKALLOC(Op, DAG);6151case ISD::GET_DYNAMIC_AREA_OFFSET:6152return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);6153case ISD::SMUL_LOHI:6154return lowerSMUL_LOHI(Op, DAG);6155case ISD::UMUL_LOHI:6156return lowerUMUL_LOHI(Op, DAG);6157case ISD::SDIVREM:6158return lowerSDIVREM(Op, DAG);6159case ISD::UDIVREM:6160return lowerUDIVREM(Op, DAG);6161case ISD::SADDO:6162case ISD::SSUBO:6163case ISD::UADDO:6164case ISD::USUBO:6165return lowerXALUO(Op, DAG);6166case ISD::UADDO_CARRY:6167case ISD::USUBO_CARRY:6168return lowerUADDSUBO_CARRY(Op, DAG);6169case ISD::OR:6170return lowerOR(Op, DAG);6171case ISD::CTPOP:6172return lowerCTPOP(Op, DAG);6173case ISD::VECREDUCE_ADD:6174return lowerVECREDUCE_ADD(Op, DAG);6175case ISD::ATOMIC_FENCE:6176return lowerATOMIC_FENCE(Op, DAG);6177case ISD::ATOMIC_SWAP:6178return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);6179case ISD::ATOMIC_STORE:6180case ISD::ATOMIC_LOAD:6181return lowerATOMIC_LDST_I128(Op, DAG);6182case ISD::ATOMIC_LOAD_ADD:6183return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);6184case ISD::ATOMIC_LOAD_SUB:6185return lowerATOMIC_LOAD_SUB(Op, DAG);6186case ISD::ATOMIC_LOAD_AND:6187return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);6188case ISD::ATOMIC_LOAD_OR:6189return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);6190case ISD::ATOMIC_LOAD_XOR:6191return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);6192case ISD::ATOMIC_LOAD_NAND:6193return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);6194case ISD::ATOMIC_LOAD_MIN:6195return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);6196case ISD::ATOMIC_LOAD_MAX:6197return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);6198case ISD::ATOMIC_LOAD_UMIN:6199return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);6200case ISD::ATOMIC_LOAD_UMAX:6201return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);6202case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:6203return lowerATOMIC_CMP_SWAP(Op, DAG);6204case ISD::STACKSAVE:6205return lowerSTACKSAVE(Op, DAG);6206case ISD::STACKRESTORE:6207return lowerSTACKRESTORE(Op, DAG);6208case ISD::PREFETCH:6209return lowerPREFETCH(Op, DAG);6210case ISD::INTRINSIC_W_CHAIN:6211return lowerINTRINSIC_W_CHAIN(Op, DAG);6212case ISD::INTRINSIC_WO_CHAIN:6213return lowerINTRINSIC_WO_CHAIN(Op, DAG);6214case ISD::BUILD_VECTOR:6215return lowerBUILD_VECTOR(Op, DAG);6216case ISD::VECTOR_SHUFFLE:6217return lowerVECTOR_SHUFFLE(Op, DAG);6218case ISD::SCALAR_TO_VECTOR:6219return lowerSCALAR_TO_VECTOR(Op, DAG);6220case ISD::INSERT_VECTOR_ELT:6221return lowerINSERT_VECTOR_ELT(Op, DAG);6222case ISD::EXTRACT_VECTOR_ELT:6223return lowerEXTRACT_VECTOR_ELT(Op, DAG);6224case ISD::SIGN_EXTEND_VECTOR_INREG:6225return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);6226case ISD::ZERO_EXTEND_VECTOR_INREG:6227return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);6228case ISD::SHL:6229return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);6230case ISD::SRL:6231return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);6232case ISD::SRA:6233return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);6234case ISD::ROTL:6235return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);6236case ISD::IS_FPCLASS:6237return lowerIS_FPCLASS(Op, DAG);6238case ISD::GET_ROUNDING:6239return lowerGET_ROUNDING(Op, DAG);6240case ISD::READCYCLECOUNTER:6241return lowerREADCYCLECOUNTER(Op, DAG);6242default:6243llvm_unreachable("Unexpected node to lower");6244}6245}62466247static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src,6248const SDLoc &SL) {6249// If i128 is legal, just use a normal bitcast.6250if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))6251return DAG.getBitcast(MVT::f128, Src);62526253// Otherwise, f128 must live in FP128, so do a partwise move.6254assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==6255&SystemZ::FP128BitRegClass);62566257SDValue Hi, Lo;6258std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);62596260Hi = DAG.getBitcast(MVT::f64, Hi);6261Lo = DAG.getBitcast(MVT::f64, Lo);62626263SDNode *Pair = DAG.getMachineNode(6264SystemZ::REG_SEQUENCE, SL, MVT::f128,6265{DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,6266DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,6267DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});6268return SDValue(Pair, 0);6269}62706271static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src,6272const SDLoc &SL) {6273// If i128 is legal, just use a normal bitcast.6274if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))6275return DAG.getBitcast(MVT::i128, Src);62766277// Otherwise, f128 must live in FP128, so do a partwise move.6278assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) ==6279&SystemZ::FP128BitRegClass);62806281SDValue LoFP =6282DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);6283SDValue HiFP =6284DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);6285SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);6286SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);62876288return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);6289}62906291// Lower operations with invalid operand or result types (currently used6292// only for 128-bit integer types).6293void6294SystemZTargetLowering::LowerOperationWrapper(SDNode *N,6295SmallVectorImpl<SDValue> &Results,6296SelectionDAG &DAG) const {6297switch (N->getOpcode()) {6298case ISD::ATOMIC_LOAD: {6299SDLoc DL(N);6300SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);6301SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };6302MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();6303SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,6304DL, Tys, Ops, MVT::i128, MMO);63056306SDValue Lowered = lowerGR128ToI128(DAG, Res);6307if (N->getValueType(0) == MVT::f128)6308Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);6309Results.push_back(Lowered);6310Results.push_back(Res.getValue(1));6311break;6312}6313case ISD::ATOMIC_STORE: {6314SDLoc DL(N);6315SDVTList Tys = DAG.getVTList(MVT::Other);6316SDValue Val = N->getOperand(1);6317if (Val.getValueType() == MVT::f128)6318Val = expandBitCastF128ToI128(DAG, Val, DL);6319Val = lowerI128ToGR128(DAG, Val);63206321SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};6322MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();6323SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,6324DL, Tys, Ops, MVT::i128, MMO);6325// We have to enforce sequential consistency by performing a6326// serialization operation after the store.6327if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==6328AtomicOrdering::SequentiallyConsistent)6329Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,6330MVT::Other, Res), 0);6331Results.push_back(Res);6332break;6333}6334case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {6335SDLoc DL(N);6336SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);6337SDValue Ops[] = { N->getOperand(0), N->getOperand(1),6338lowerI128ToGR128(DAG, N->getOperand(2)),6339lowerI128ToGR128(DAG, N->getOperand(3)) };6340MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();6341SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,6342DL, Tys, Ops, MVT::i128, MMO);6343SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),6344SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);6345Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));6346Results.push_back(lowerGR128ToI128(DAG, Res));6347Results.push_back(Success);6348Results.push_back(Res.getValue(2));6349break;6350}6351case ISD::BITCAST: {6352SDValue Src = N->getOperand(0);6353if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&6354!useSoftFloat()) {6355SDLoc DL(N);6356Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));6357}6358break;6359}6360default:6361llvm_unreachable("Unexpected node to lower");6362}6363}63646365void6366SystemZTargetLowering::ReplaceNodeResults(SDNode *N,6367SmallVectorImpl<SDValue> &Results,6368SelectionDAG &DAG) const {6369return LowerOperationWrapper(N, Results, DAG);6370}63716372const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {6373#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME6374switch ((SystemZISD::NodeType)Opcode) {6375case SystemZISD::FIRST_NUMBER: break;6376OPCODE(RET_GLUE);6377OPCODE(CALL);6378OPCODE(SIBCALL);6379OPCODE(TLS_GDCALL);6380OPCODE(TLS_LDCALL);6381OPCODE(PCREL_WRAPPER);6382OPCODE(PCREL_OFFSET);6383OPCODE(ICMP);6384OPCODE(FCMP);6385OPCODE(STRICT_FCMP);6386OPCODE(STRICT_FCMPS);6387OPCODE(TM);6388OPCODE(BR_CCMASK);6389OPCODE(SELECT_CCMASK);6390OPCODE(ADJDYNALLOC);6391OPCODE(PROBED_ALLOCA);6392OPCODE(POPCNT);6393OPCODE(SMUL_LOHI);6394OPCODE(UMUL_LOHI);6395OPCODE(SDIVREM);6396OPCODE(UDIVREM);6397OPCODE(SADDO);6398OPCODE(SSUBO);6399OPCODE(UADDO);6400OPCODE(USUBO);6401OPCODE(ADDCARRY);6402OPCODE(SUBCARRY);6403OPCODE(GET_CCMASK);6404OPCODE(MVC);6405OPCODE(NC);6406OPCODE(OC);6407OPCODE(XC);6408OPCODE(CLC);6409OPCODE(MEMSET_MVC);6410OPCODE(STPCPY);6411OPCODE(STRCMP);6412OPCODE(SEARCH_STRING);6413OPCODE(IPM);6414OPCODE(TBEGIN);6415OPCODE(TBEGIN_NOFLOAT);6416OPCODE(TEND);6417OPCODE(BYTE_MASK);6418OPCODE(ROTATE_MASK);6419OPCODE(REPLICATE);6420OPCODE(JOIN_DWORDS);6421OPCODE(SPLAT);6422OPCODE(MERGE_HIGH);6423OPCODE(MERGE_LOW);6424OPCODE(SHL_DOUBLE);6425OPCODE(PERMUTE_DWORDS);6426OPCODE(PERMUTE);6427OPCODE(PACK);6428OPCODE(PACKS_CC);6429OPCODE(PACKLS_CC);6430OPCODE(UNPACK_HIGH);6431OPCODE(UNPACKL_HIGH);6432OPCODE(UNPACK_LOW);6433OPCODE(UNPACKL_LOW);6434OPCODE(VSHL_BY_SCALAR);6435OPCODE(VSRL_BY_SCALAR);6436OPCODE(VSRA_BY_SCALAR);6437OPCODE(VROTL_BY_SCALAR);6438OPCODE(VSUM);6439OPCODE(VACC);6440OPCODE(VSCBI);6441OPCODE(VAC);6442OPCODE(VSBI);6443OPCODE(VACCC);6444OPCODE(VSBCBI);6445OPCODE(VICMPE);6446OPCODE(VICMPH);6447OPCODE(VICMPHL);6448OPCODE(VICMPES);6449OPCODE(VICMPHS);6450OPCODE(VICMPHLS);6451OPCODE(VFCMPE);6452OPCODE(STRICT_VFCMPE);6453OPCODE(STRICT_VFCMPES);6454OPCODE(VFCMPH);6455OPCODE(STRICT_VFCMPH);6456OPCODE(STRICT_VFCMPHS);6457OPCODE(VFCMPHE);6458OPCODE(STRICT_VFCMPHE);6459OPCODE(STRICT_VFCMPHES);6460OPCODE(VFCMPES);6461OPCODE(VFCMPHS);6462OPCODE(VFCMPHES);6463OPCODE(VFTCI);6464OPCODE(VEXTEND);6465OPCODE(STRICT_VEXTEND);6466OPCODE(VROUND);6467OPCODE(STRICT_VROUND);6468OPCODE(VTM);6469OPCODE(SCMP128HI);6470OPCODE(UCMP128HI);6471OPCODE(VFAE_CC);6472OPCODE(VFAEZ_CC);6473OPCODE(VFEE_CC);6474OPCODE(VFEEZ_CC);6475OPCODE(VFENE_CC);6476OPCODE(VFENEZ_CC);6477OPCODE(VISTR_CC);6478OPCODE(VSTRC_CC);6479OPCODE(VSTRCZ_CC);6480OPCODE(VSTRS_CC);6481OPCODE(VSTRSZ_CC);6482OPCODE(TDC);6483OPCODE(ATOMIC_SWAPW);6484OPCODE(ATOMIC_LOADW_ADD);6485OPCODE(ATOMIC_LOADW_SUB);6486OPCODE(ATOMIC_LOADW_AND);6487OPCODE(ATOMIC_LOADW_OR);6488OPCODE(ATOMIC_LOADW_XOR);6489OPCODE(ATOMIC_LOADW_NAND);6490OPCODE(ATOMIC_LOADW_MIN);6491OPCODE(ATOMIC_LOADW_MAX);6492OPCODE(ATOMIC_LOADW_UMIN);6493OPCODE(ATOMIC_LOADW_UMAX);6494OPCODE(ATOMIC_CMP_SWAPW);6495OPCODE(ATOMIC_CMP_SWAP);6496OPCODE(ATOMIC_LOAD_128);6497OPCODE(ATOMIC_STORE_128);6498OPCODE(ATOMIC_CMP_SWAP_128);6499OPCODE(LRV);6500OPCODE(STRV);6501OPCODE(VLER);6502OPCODE(VSTER);6503OPCODE(STCKF);6504OPCODE(PREFETCH);6505OPCODE(ADA_ENTRY);6506}6507return nullptr;6508#undef OPCODE6509}65106511// Return true if VT is a vector whose elements are a whole number of bytes6512// in width. Also check for presence of vector support.6513bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {6514if (!Subtarget.hasVector())6515return false;65166517return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();6518}65196520// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT6521// producing a result of type ResVT. Op is a possibly bitcast version6522// of the input vector and Index is the index (based on type VecVT) that6523// should be extracted. Return the new extraction if a simplification6524// was possible or if Force is true.6525SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,6526EVT VecVT, SDValue Op,6527unsigned Index,6528DAGCombinerInfo &DCI,6529bool Force) const {6530SelectionDAG &DAG = DCI.DAG;65316532// The number of bytes being extracted.6533unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();65346535for (;;) {6536unsigned Opcode = Op.getOpcode();6537if (Opcode == ISD::BITCAST)6538// Look through bitcasts.6539Op = Op.getOperand(0);6540else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&6541canTreatAsByteVector(Op.getValueType())) {6542// Get a VPERM-like permute mask and see whether the bytes covered6543// by the extracted element are a contiguous sequence from one6544// source operand.6545SmallVector<int, SystemZ::VectorBytes> Bytes;6546if (!getVPermMask(Op, Bytes))6547break;6548int First;6549if (!getShuffleInput(Bytes, Index * BytesPerElement,6550BytesPerElement, First))6551break;6552if (First < 0)6553return DAG.getUNDEF(ResVT);6554// Make sure the contiguous sequence starts at a multiple of the6555// original element size.6556unsigned Byte = unsigned(First) % Bytes.size();6557if (Byte % BytesPerElement != 0)6558break;6559// We can get the extracted value directly from an input.6560Index = Byte / BytesPerElement;6561Op = Op.getOperand(unsigned(First) / Bytes.size());6562Force = true;6563} else if (Opcode == ISD::BUILD_VECTOR &&6564canTreatAsByteVector(Op.getValueType())) {6565// We can only optimize this case if the BUILD_VECTOR elements are6566// at least as wide as the extracted value.6567EVT OpVT = Op.getValueType();6568unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();6569if (OpBytesPerElement < BytesPerElement)6570break;6571// Make sure that the least-significant bit of the extracted value6572// is the least significant bit of an input.6573unsigned End = (Index + 1) * BytesPerElement;6574if (End % OpBytesPerElement != 0)6575break;6576// We're extracting the low part of one operand of the BUILD_VECTOR.6577Op = Op.getOperand(End / OpBytesPerElement - 1);6578if (!Op.getValueType().isInteger()) {6579EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());6580Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);6581DCI.AddToWorklist(Op.getNode());6582}6583EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());6584Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);6585if (VT != ResVT) {6586DCI.AddToWorklist(Op.getNode());6587Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);6588}6589return Op;6590} else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||6591Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||6592Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&6593canTreatAsByteVector(Op.getValueType()) &&6594canTreatAsByteVector(Op.getOperand(0).getValueType())) {6595// Make sure that only the unextended bits are significant.6596EVT ExtVT = Op.getValueType();6597EVT OpVT = Op.getOperand(0).getValueType();6598unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();6599unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();6600unsigned Byte = Index * BytesPerElement;6601unsigned SubByte = Byte % ExtBytesPerElement;6602unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;6603if (SubByte < MinSubByte ||6604SubByte + BytesPerElement > ExtBytesPerElement)6605break;6606// Get the byte offset of the unextended element6607Byte = Byte / ExtBytesPerElement * OpBytesPerElement;6608// ...then add the byte offset relative to that element.6609Byte += SubByte - MinSubByte;6610if (Byte % BytesPerElement != 0)6611break;6612Op = Op.getOperand(0);6613Index = Byte / BytesPerElement;6614Force = true;6615} else6616break;6617}6618if (Force) {6619if (Op.getValueType() != VecVT) {6620Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);6621DCI.AddToWorklist(Op.getNode());6622}6623return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,6624DAG.getConstant(Index, DL, MVT::i32));6625}6626return SDValue();6627}66286629// Optimize vector operations in scalar value Op on the basis that Op6630// is truncated to TruncVT.6631SDValue SystemZTargetLowering::combineTruncateExtract(6632const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {6633// If we have (trunc (extract_vector_elt X, Y)), try to turn it into6634// (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements6635// of type TruncVT.6636if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&6637TruncVT.getSizeInBits() % 8 == 0) {6638SDValue Vec = Op.getOperand(0);6639EVT VecVT = Vec.getValueType();6640if (canTreatAsByteVector(VecVT)) {6641if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {6642unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();6643unsigned TruncBytes = TruncVT.getStoreSize();6644if (BytesPerElement % TruncBytes == 0) {6645// Calculate the value of Y' in the above description. We are6646// splitting the original elements into Scale equal-sized pieces6647// and for truncation purposes want the last (least-significant)6648// of these pieces for IndexN. This is easiest to do by calculating6649// the start index of the following element and then subtracting 1.6650unsigned Scale = BytesPerElement / TruncBytes;6651unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;66526653// Defer the creation of the bitcast from X to combineExtract,6654// which might be able to optimize the extraction.6655VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),6656MVT::getIntegerVT(TruncBytes * 8),6657VecVT.getStoreSize() / TruncBytes);6658EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);6659return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);6660}6661}6662}6663}6664return SDValue();6665}66666667SDValue SystemZTargetLowering::combineZERO_EXTEND(6668SDNode *N, DAGCombinerInfo &DCI) const {6669// Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')6670SelectionDAG &DAG = DCI.DAG;6671SDValue N0 = N->getOperand(0);6672EVT VT = N->getValueType(0);6673if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {6674auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));6675auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));6676if (TrueOp && FalseOp) {6677SDLoc DL(N0);6678SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),6679DAG.getConstant(FalseOp->getZExtValue(), DL, VT),6680N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };6681SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);6682// If N0 has multiple uses, change other uses as well.6683if (!N0.hasOneUse()) {6684SDValue TruncSelect =6685DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);6686DCI.CombineTo(N0.getNode(), TruncSelect);6687}6688return NewSelect;6689}6690}6691// Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size6692// of the result is smaller than the size of X and all the truncated bits6693// of X are already zero.6694if (N0.getOpcode() == ISD::XOR &&6695N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&6696N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&6697N0.getOperand(1).getOpcode() == ISD::Constant) {6698SDValue X = N0.getOperand(0).getOperand(0);6699if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {6700KnownBits Known = DAG.computeKnownBits(X);6701APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),6702N0.getValueSizeInBits(),6703VT.getSizeInBits());6704if (TruncatedBits.isSubsetOf(Known.Zero)) {6705X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);6706APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());6707return DAG.getNode(ISD::XOR, SDLoc(N0), VT,6708X, DAG.getConstant(Mask, SDLoc(N0), VT));6709}6710}6711}67126713return SDValue();6714}67156716SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(6717SDNode *N, DAGCombinerInfo &DCI) const {6718// Convert (sext_in_reg (setcc LHS, RHS, COND), i1)6719// and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)6720// into (select_cc LHS, RHS, -1, 0, COND)6721SelectionDAG &DAG = DCI.DAG;6722SDValue N0 = N->getOperand(0);6723EVT VT = N->getValueType(0);6724EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();6725if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)6726N0 = N0.getOperand(0);6727if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {6728SDLoc DL(N0);6729SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),6730DAG.getAllOnesConstant(DL, VT),6731DAG.getConstant(0, DL, VT), N0.getOperand(2) };6732return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);6733}6734return SDValue();6735}67366737SDValue SystemZTargetLowering::combineSIGN_EXTEND(6738SDNode *N, DAGCombinerInfo &DCI) const {6739// Convert (sext (ashr (shl X, C1), C2)) to6740// (ashr (shl (anyext X), C1'), C2')), since wider shifts are as6741// cheap as narrower ones.6742SelectionDAG &DAG = DCI.DAG;6743SDValue N0 = N->getOperand(0);6744EVT VT = N->getValueType(0);6745if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {6746auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));6747SDValue Inner = N0.getOperand(0);6748if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {6749if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {6750unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());6751unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;6752unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;6753EVT ShiftVT = N0.getOperand(1).getValueType();6754SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,6755Inner.getOperand(0));6756SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,6757DAG.getConstant(NewShlAmt, SDLoc(Inner),6758ShiftVT));6759return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,6760DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));6761}6762}6763}67646765return SDValue();6766}67676768SDValue SystemZTargetLowering::combineMERGE(6769SDNode *N, DAGCombinerInfo &DCI) const {6770SelectionDAG &DAG = DCI.DAG;6771unsigned Opcode = N->getOpcode();6772SDValue Op0 = N->getOperand(0);6773SDValue Op1 = N->getOperand(1);6774if (Op0.getOpcode() == ISD::BITCAST)6775Op0 = Op0.getOperand(0);6776if (ISD::isBuildVectorAllZeros(Op0.getNode())) {6777// (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF6778// for v4f32.6779if (Op1 == N->getOperand(0))6780return Op1;6781// (z_merge_? 0, X) -> (z_unpackl_? 0, X).6782EVT VT = Op1.getValueType();6783unsigned ElemBytes = VT.getVectorElementType().getStoreSize();6784if (ElemBytes <= 4) {6785Opcode = (Opcode == SystemZISD::MERGE_HIGH ?6786SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);6787EVT InVT = VT.changeVectorElementTypeToInteger();6788EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),6789SystemZ::VectorBytes / ElemBytes / 2);6790if (VT != InVT) {6791Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);6792DCI.AddToWorklist(Op1.getNode());6793}6794SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);6795DCI.AddToWorklist(Op.getNode());6796return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);6797}6798}6799return SDValue();6800}68016802static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,6803SDNode *&HiPart) {6804LoPart = HiPart = nullptr;68056806// Scan through all users.6807for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();6808UI != UIEnd; ++UI) {6809// Skip the uses of the chain.6810if (UI.getUse().getResNo() != 0)6811continue;68126813// Verify every user is a TRUNCATE to i64 of the low or high half.6814SDNode *User = *UI;6815bool IsLoPart = true;6816if (User->getOpcode() == ISD::SRL &&6817User->getOperand(1).getOpcode() == ISD::Constant &&6818User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {6819User = *User->use_begin();6820IsLoPart = false;6821}6822if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)6823return false;68246825if (IsLoPart) {6826if (LoPart)6827return false;6828LoPart = User;6829} else {6830if (HiPart)6831return false;6832HiPart = User;6833}6834}6835return true;6836}68376838static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,6839SDNode *&HiPart) {6840LoPart = HiPart = nullptr;68416842// Scan through all users.6843for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();6844UI != UIEnd; ++UI) {6845// Skip the uses of the chain.6846if (UI.getUse().getResNo() != 0)6847continue;68486849// Verify every user is an EXTRACT_SUBREG of the low or high half.6850SDNode *User = *UI;6851if (!User->hasOneUse() || !User->isMachineOpcode() ||6852User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)6853return false;68546855switch (User->getConstantOperandVal(1)) {6856case SystemZ::subreg_l64:6857if (LoPart)6858return false;6859LoPart = User;6860break;6861case SystemZ::subreg_h64:6862if (HiPart)6863return false;6864HiPart = User;6865break;6866default:6867return false;6868}6869}6870return true;6871}68726873SDValue SystemZTargetLowering::combineLOAD(6874SDNode *N, DAGCombinerInfo &DCI) const {6875SelectionDAG &DAG = DCI.DAG;6876EVT LdVT = N->getValueType(0);6877SDLoc DL(N);68786879// Replace a 128-bit load that is used solely to move its value into GPRs6880// by separate loads of both halves.6881LoadSDNode *LD = cast<LoadSDNode>(N);6882if (LD->isSimple() && ISD::isNormalLoad(LD)) {6883SDNode *LoPart, *HiPart;6884if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||6885(LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {6886// Rewrite each extraction as an independent load.6887SmallVector<SDValue, 2> ArgChains;6888if (HiPart) {6889SDValue EltLoad = DAG.getLoad(6890HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),6891LD->getPointerInfo(), LD->getOriginalAlign(),6892LD->getMemOperand()->getFlags(), LD->getAAInfo());68936894DCI.CombineTo(HiPart, EltLoad, true);6895ArgChains.push_back(EltLoad.getValue(1));6896}6897if (LoPart) {6898SDValue EltLoad = DAG.getLoad(6899LoPart->getValueType(0), DL, LD->getChain(),6900DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),6901LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),6902LD->getMemOperand()->getFlags(), LD->getAAInfo());69036904DCI.CombineTo(LoPart, EltLoad, true);6905ArgChains.push_back(EltLoad.getValue(1));6906}69076908// Collect all chains via TokenFactor.6909SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);6910DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);6911DCI.AddToWorklist(Chain.getNode());6912return SDValue(N, 0);6913}6914}69156916if (LdVT.isVector() || LdVT.isInteger())6917return SDValue();6918// Transform a scalar load that is REPLICATEd as well as having other6919// use(s) to the form where the other use(s) use the first element of the6920// REPLICATE instead of the load. Otherwise instruction selection will not6921// produce a VLREP. Avoid extracting to a GPR, so only do this for floating6922// point loads.69236924SDValue Replicate;6925SmallVector<SDNode*, 8> OtherUses;6926for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();6927UI != UE; ++UI) {6928if (UI->getOpcode() == SystemZISD::REPLICATE) {6929if (Replicate)6930return SDValue(); // Should never happen6931Replicate = SDValue(*UI, 0);6932}6933else if (UI.getUse().getResNo() == 0)6934OtherUses.push_back(*UI);6935}6936if (!Replicate || OtherUses.empty())6937return SDValue();69386939SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,6940Replicate, DAG.getConstant(0, DL, MVT::i32));6941// Update uses of the loaded Value while preserving old chains.6942for (SDNode *U : OtherUses) {6943SmallVector<SDValue, 8> Ops;6944for (SDValue Op : U->ops())6945Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);6946DAG.UpdateNodeOperands(U, Ops);6947}6948return SDValue(N, 0);6949}69506951bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {6952if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)6953return true;6954if (Subtarget.hasVectorEnhancements2())6955if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)6956return true;6957return false;6958}69596960static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {6961if (!VT.isVector() || !VT.isSimple() ||6962VT.getSizeInBits() != 128 ||6963VT.getScalarSizeInBits() % 8 != 0)6964return false;69656966unsigned NumElts = VT.getVectorNumElements();6967for (unsigned i = 0; i < NumElts; ++i) {6968if (M[i] < 0) continue; // ignore UNDEF indices6969if ((unsigned) M[i] != NumElts - 1 - i)6970return false;6971}69726973return true;6974}69756976static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {6977for (auto *U : StoredVal->uses()) {6978if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {6979EVT CurrMemVT = ST->getMemoryVT().getScalarType();6980if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)6981continue;6982} else if (isa<BuildVectorSDNode>(U)) {6983SDValue BuildVector = SDValue(U, 0);6984if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&6985isOnlyUsedByStores(BuildVector, DAG))6986continue;6987}6988return false;6989}6990return true;6991}69926993static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,6994SDValue &HiPart) {6995if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())6996return false;69976998SDValue Op0 = Val.getOperand(0);6999SDValue Op1 = Val.getOperand(1);70007001if (Op0.getOpcode() == ISD::SHL)7002std::swap(Op0, Op1);7003if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||7004Op1.getOperand(1).getOpcode() != ISD::Constant ||7005Op1.getConstantOperandVal(1) != 64)7006return false;7007Op1 = Op1.getOperand(0);70087009if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||7010Op0.getOperand(0).getValueType() != MVT::i64)7011return false;7012if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||7013Op1.getOperand(0).getValueType() != MVT::i64)7014return false;70157016LoPart = Op0.getOperand(0);7017HiPart = Op1.getOperand(0);7018return true;7019}70207021static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,7022SDValue &HiPart) {7023if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||7024Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)7025return false;70267027if (Val->getNumOperands() != 5 ||7028Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||7029Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||7030Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)7031return false;70327033LoPart = Val->getOperand(1);7034HiPart = Val->getOperand(3);7035return true;7036}70377038SDValue SystemZTargetLowering::combineSTORE(7039SDNode *N, DAGCombinerInfo &DCI) const {7040SelectionDAG &DAG = DCI.DAG;7041auto *SN = cast<StoreSDNode>(N);7042auto &Op1 = N->getOperand(1);7043EVT MemVT = SN->getMemoryVT();7044// If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better7045// for the extraction to be done on a vMiN value, so that we can use VSTE.7046// If X has wider elements then convert it to:7047// (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).7048if (MemVT.isInteger() && SN->isTruncatingStore()) {7049if (SDValue Value =7050combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {7051DCI.AddToWorklist(Value.getNode());70527053// Rewrite the store with the new form of stored value.7054return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,7055SN->getBasePtr(), SN->getMemoryVT(),7056SN->getMemOperand());7057}7058}7059// Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR7060if (!SN->isTruncatingStore() &&7061Op1.getOpcode() == ISD::BSWAP &&7062Op1.getNode()->hasOneUse() &&7063canLoadStoreByteSwapped(Op1.getValueType())) {70647065SDValue BSwapOp = Op1.getOperand(0);70667067if (BSwapOp.getValueType() == MVT::i16)7068BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);70697070SDValue Ops[] = {7071N->getOperand(0), BSwapOp, N->getOperand(2)7072};70737074return7075DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),7076Ops, MemVT, SN->getMemOperand());7077}7078// Combine STORE (element-swap) into VSTER7079if (!SN->isTruncatingStore() &&7080Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&7081Op1.getNode()->hasOneUse() &&7082Subtarget.hasVectorEnhancements2()) {7083ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());7084ArrayRef<int> ShuffleMask = SVN->getMask();7085if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {7086SDValue Ops[] = {7087N->getOperand(0), Op1.getOperand(0), N->getOperand(2)7088};70897090return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),7091DAG.getVTList(MVT::Other),7092Ops, MemVT, SN->getMemOperand());7093}7094}70957096// Combine STORE (READCYCLECOUNTER) into STCKF.7097if (!SN->isTruncatingStore() &&7098Op1.getOpcode() == ISD::READCYCLECOUNTER &&7099Op1.hasOneUse() &&7100N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {7101SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };7102return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),7103DAG.getVTList(MVT::Other),7104Ops, MemVT, SN->getMemOperand());7105}71067107// Transform a store of a 128-bit value moved from parts into two stores.7108if (SN->isSimple() && ISD::isNormalStore(SN)) {7109SDValue LoPart, HiPart;7110if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||7111(MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {7112SDLoc DL(SN);7113SDValue Chain0 =7114DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),7115SN->getPointerInfo(), SN->getOriginalAlign(),7116SN->getMemOperand()->getFlags(), SN->getAAInfo());7117SDValue Chain1 =7118DAG.getStore(SN->getChain(), DL, LoPart,7119DAG.getObjectPtrOffset(DL, SN->getBasePtr(),7120TypeSize::getFixed(8)),7121SN->getPointerInfo().getWithOffset(8),7122SN->getOriginalAlign(),7123SN->getMemOperand()->getFlags(), SN->getAAInfo());71247125return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);7126}7127}71287129// Replicate a reg or immediate with VREP instead of scalar multiply or7130// immediate load. It seems best to do this during the first DAGCombine as7131// it is straight-forward to handle the zero-extend node in the initial7132// DAG, and also not worry about the keeping the new MemVT legal (e.g. when7133// extracting an i16 element from a v16i8 vector).7134if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&7135isOnlyUsedByStores(Op1, DAG)) {7136SDValue Word = SDValue();7137EVT WordVT;71387139// Find a replicated immediate and return it if found in Word and its7140// type in WordVT.7141auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {7142// Some constants are better handled with a scalar store.7143if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||7144isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)7145return;7146SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));7147if (VCI.isVectorConstantLegal(Subtarget) &&7148VCI.Opcode == SystemZISD::REPLICATE) {7149Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);7150WordVT = VCI.VecVT.getScalarType();7151}7152};71537154// Find a replicated register and return it if found in Word and its type7155// in WordVT.7156auto FindReplicatedReg = [&](SDValue MulOp) {7157EVT MulVT = MulOp.getValueType();7158if (MulOp->getOpcode() == ISD::MUL &&7159(MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {7160// Find a zero extended value and its type.7161SDValue LHS = MulOp->getOperand(0);7162if (LHS->getOpcode() == ISD::ZERO_EXTEND)7163WordVT = LHS->getOperand(0).getValueType();7164else if (LHS->getOpcode() == ISD::AssertZext)7165WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();7166else7167return;7168// Find a replicating constant, e.g. 0x00010001.7169if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {7170SystemZVectorConstantInfo VCI(7171APInt(MulVT.getSizeInBits(), C->getZExtValue()));7172if (VCI.isVectorConstantLegal(Subtarget) &&7173VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&7174WordVT == VCI.VecVT.getScalarType())7175Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);7176}7177}7178};71797180if (isa<BuildVectorSDNode>(Op1) &&7181DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {7182SDValue SplatVal = Op1->getOperand(0);7183if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))7184FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());7185else7186FindReplicatedReg(SplatVal);7187} else {7188if (auto *C = dyn_cast<ConstantSDNode>(Op1))7189FindReplicatedImm(C, MemVT.getStoreSize());7190else7191FindReplicatedReg(Op1);7192}71937194if (Word != SDValue()) {7195assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&7196"Bad type handling");7197unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();7198EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);7199SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);7200return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,7201SN->getBasePtr(), SN->getMemOperand());7202}7203}72047205return SDValue();7206}72077208SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(7209SDNode *N, DAGCombinerInfo &DCI) const {7210SelectionDAG &DAG = DCI.DAG;7211// Combine element-swap (LOAD) into VLER7212if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&7213N->getOperand(0).hasOneUse() &&7214Subtarget.hasVectorEnhancements2()) {7215ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);7216ArrayRef<int> ShuffleMask = SVN->getMask();7217if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {7218SDValue Load = N->getOperand(0);7219LoadSDNode *LD = cast<LoadSDNode>(Load);72207221// Create the element-swapping load.7222SDValue Ops[] = {7223LD->getChain(), // Chain7224LD->getBasePtr() // Ptr7225};7226SDValue ESLoad =7227DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),7228DAG.getVTList(LD->getValueType(0), MVT::Other),7229Ops, LD->getMemoryVT(), LD->getMemOperand());72307231// First, combine the VECTOR_SHUFFLE away. This makes the value produced7232// by the load dead.7233DCI.CombineTo(N, ESLoad);72347235// Next, combine the load away, we give it a bogus result value but a real7236// chain result. The result value is dead because the shuffle is dead.7237DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));72387239// Return N so it doesn't get rechecked!7240return SDValue(N, 0);7241}7242}72437244return SDValue();7245}72467247SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(7248SDNode *N, DAGCombinerInfo &DCI) const {7249SelectionDAG &DAG = DCI.DAG;72507251if (!Subtarget.hasVector())7252return SDValue();72537254// Look through bitcasts that retain the number of vector elements.7255SDValue Op = N->getOperand(0);7256if (Op.getOpcode() == ISD::BITCAST &&7257Op.getValueType().isVector() &&7258Op.getOperand(0).getValueType().isVector() &&7259Op.getValueType().getVectorNumElements() ==7260Op.getOperand(0).getValueType().getVectorNumElements())7261Op = Op.getOperand(0);72627263// Pull BSWAP out of a vector extraction.7264if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {7265EVT VecVT = Op.getValueType();7266EVT EltVT = VecVT.getVectorElementType();7267Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,7268Op.getOperand(0), N->getOperand(1));7269DCI.AddToWorklist(Op.getNode());7270Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);7271if (EltVT != N->getValueType(0)) {7272DCI.AddToWorklist(Op.getNode());7273Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);7274}7275return Op;7276}72777278// Try to simplify a vector extraction.7279if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {7280SDValue Op0 = N->getOperand(0);7281EVT VecVT = Op0.getValueType();7282return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,7283IndexN->getZExtValue(), DCI, false);7284}7285return SDValue();7286}72877288SDValue SystemZTargetLowering::combineJOIN_DWORDS(7289SDNode *N, DAGCombinerInfo &DCI) const {7290SelectionDAG &DAG = DCI.DAG;7291// (join_dwords X, X) == (replicate X)7292if (N->getOperand(0) == N->getOperand(1))7293return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),7294N->getOperand(0));7295return SDValue();7296}72977298static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {7299SDValue Chain1 = N1->getOperand(0);7300SDValue Chain2 = N2->getOperand(0);73017302// Trivial case: both nodes take the same chain.7303if (Chain1 == Chain2)7304return Chain1;73057306// FIXME - we could handle more complex cases via TokenFactor,7307// assuming we can verify that this would not create a cycle.7308return SDValue();7309}73107311SDValue SystemZTargetLowering::combineFP_ROUND(7312SDNode *N, DAGCombinerInfo &DCI) const {73137314if (!Subtarget.hasVector())7315return SDValue();73167317// (fpround (extract_vector_elt X 0))7318// (fpround (extract_vector_elt X 1)) ->7319// (extract_vector_elt (VROUND X) 0)7320// (extract_vector_elt (VROUND X) 2)7321//7322// This is a special case since the target doesn't really support v2f32s.7323unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;7324SelectionDAG &DAG = DCI.DAG;7325SDValue Op0 = N->getOperand(OpNo);7326if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&7327Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&7328Op0.getOperand(0).getValueType() == MVT::v2f64 &&7329Op0.getOperand(1).getOpcode() == ISD::Constant &&7330Op0.getConstantOperandVal(1) == 0) {7331SDValue Vec = Op0.getOperand(0);7332for (auto *U : Vec->uses()) {7333if (U != Op0.getNode() && U->hasOneUse() &&7334U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&7335U->getOperand(0) == Vec &&7336U->getOperand(1).getOpcode() == ISD::Constant &&7337U->getConstantOperandVal(1) == 1) {7338SDValue OtherRound = SDValue(*U->use_begin(), 0);7339if (OtherRound.getOpcode() == N->getOpcode() &&7340OtherRound.getOperand(OpNo) == SDValue(U, 0) &&7341OtherRound.getValueType() == MVT::f32) {7342SDValue VRound, Chain;7343if (N->isStrictFPOpcode()) {7344Chain = MergeInputChains(N, OtherRound.getNode());7345if (!Chain)7346continue;7347VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),7348{MVT::v4f32, MVT::Other}, {Chain, Vec});7349Chain = VRound.getValue(1);7350} else7351VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),7352MVT::v4f32, Vec);7353DCI.AddToWorklist(VRound.getNode());7354SDValue Extract1 =7355DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,7356VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));7357DCI.AddToWorklist(Extract1.getNode());7358DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);7359if (Chain)7360DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);7361SDValue Extract0 =7362DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,7363VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));7364if (Chain)7365return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),7366N->getVTList(), Extract0, Chain);7367return Extract0;7368}7369}7370}7371}7372return SDValue();7373}73747375SDValue SystemZTargetLowering::combineFP_EXTEND(7376SDNode *N, DAGCombinerInfo &DCI) const {73777378if (!Subtarget.hasVector())7379return SDValue();73807381// (fpextend (extract_vector_elt X 0))7382// (fpextend (extract_vector_elt X 2)) ->7383// (extract_vector_elt (VEXTEND X) 0)7384// (extract_vector_elt (VEXTEND X) 1)7385//7386// This is a special case since the target doesn't really support v2f32s.7387unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;7388SelectionDAG &DAG = DCI.DAG;7389SDValue Op0 = N->getOperand(OpNo);7390if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&7391Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&7392Op0.getOperand(0).getValueType() == MVT::v4f32 &&7393Op0.getOperand(1).getOpcode() == ISD::Constant &&7394Op0.getConstantOperandVal(1) == 0) {7395SDValue Vec = Op0.getOperand(0);7396for (auto *U : Vec->uses()) {7397if (U != Op0.getNode() && U->hasOneUse() &&7398U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&7399U->getOperand(0) == Vec &&7400U->getOperand(1).getOpcode() == ISD::Constant &&7401U->getConstantOperandVal(1) == 2) {7402SDValue OtherExtend = SDValue(*U->use_begin(), 0);7403if (OtherExtend.getOpcode() == N->getOpcode() &&7404OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&7405OtherExtend.getValueType() == MVT::f64) {7406SDValue VExtend, Chain;7407if (N->isStrictFPOpcode()) {7408Chain = MergeInputChains(N, OtherExtend.getNode());7409if (!Chain)7410continue;7411VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),7412{MVT::v2f64, MVT::Other}, {Chain, Vec});7413Chain = VExtend.getValue(1);7414} else7415VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),7416MVT::v2f64, Vec);7417DCI.AddToWorklist(VExtend.getNode());7418SDValue Extract1 =7419DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,7420VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));7421DCI.AddToWorklist(Extract1.getNode());7422DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);7423if (Chain)7424DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);7425SDValue Extract0 =7426DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,7427VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));7428if (Chain)7429return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),7430N->getVTList(), Extract0, Chain);7431return Extract0;7432}7433}7434}7435}7436return SDValue();7437}74387439SDValue SystemZTargetLowering::combineINT_TO_FP(7440SDNode *N, DAGCombinerInfo &DCI) const {7441if (DCI.Level != BeforeLegalizeTypes)7442return SDValue();7443SelectionDAG &DAG = DCI.DAG;7444LLVMContext &Ctx = *DAG.getContext();7445unsigned Opcode = N->getOpcode();7446EVT OutVT = N->getValueType(0);7447Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);7448SDValue Op = N->getOperand(0);7449unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();7450unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();74517452// Insert an extension before type-legalization to avoid scalarization, e.g.:7453// v2f64 = uint_to_fp v2i167454// =>7455// v2f64 = uint_to_fp (v2i64 zero_extend v2i16)7456if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&7457OutScalarBits <= 64) {7458unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();7459EVT ExtVT = EVT::getVectorVT(7460Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);7461unsigned ExtOpcode =7462(Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);7463SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);7464return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);7465}7466return SDValue();7467}74687469SDValue SystemZTargetLowering::combineBSWAP(7470SDNode *N, DAGCombinerInfo &DCI) const {7471SelectionDAG &DAG = DCI.DAG;7472// Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR7473if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&7474N->getOperand(0).hasOneUse() &&7475canLoadStoreByteSwapped(N->getValueType(0))) {7476SDValue Load = N->getOperand(0);7477LoadSDNode *LD = cast<LoadSDNode>(Load);74787479// Create the byte-swapping load.7480SDValue Ops[] = {7481LD->getChain(), // Chain7482LD->getBasePtr() // Ptr7483};7484EVT LoadVT = N->getValueType(0);7485if (LoadVT == MVT::i16)7486LoadVT = MVT::i32;7487SDValue BSLoad =7488DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),7489DAG.getVTList(LoadVT, MVT::Other),7490Ops, LD->getMemoryVT(), LD->getMemOperand());74917492// If this is an i16 load, insert the truncate.7493SDValue ResVal = BSLoad;7494if (N->getValueType(0) == MVT::i16)7495ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);74967497// First, combine the bswap away. This makes the value produced by the7498// load dead.7499DCI.CombineTo(N, ResVal);75007501// Next, combine the load away, we give it a bogus result value but a real7502// chain result. The result value is dead because the bswap is dead.7503DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));75047505// Return N so it doesn't get rechecked!7506return SDValue(N, 0);7507}75087509// Look through bitcasts that retain the number of vector elements.7510SDValue Op = N->getOperand(0);7511if (Op.getOpcode() == ISD::BITCAST &&7512Op.getValueType().isVector() &&7513Op.getOperand(0).getValueType().isVector() &&7514Op.getValueType().getVectorNumElements() ==7515Op.getOperand(0).getValueType().getVectorNumElements())7516Op = Op.getOperand(0);75177518// Push BSWAP into a vector insertion if at least one side then simplifies.7519if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {7520SDValue Vec = Op.getOperand(0);7521SDValue Elt = Op.getOperand(1);7522SDValue Idx = Op.getOperand(2);75237524if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||7525Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||7526DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||7527Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||7528(canLoadStoreByteSwapped(N->getValueType(0)) &&7529ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {7530EVT VecVT = N->getValueType(0);7531EVT EltVT = N->getValueType(0).getVectorElementType();7532if (VecVT != Vec.getValueType()) {7533Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);7534DCI.AddToWorklist(Vec.getNode());7535}7536if (EltVT != Elt.getValueType()) {7537Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);7538DCI.AddToWorklist(Elt.getNode());7539}7540Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);7541DCI.AddToWorklist(Vec.getNode());7542Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);7543DCI.AddToWorklist(Elt.getNode());7544return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,7545Vec, Elt, Idx);7546}7547}75487549// Push BSWAP into a vector shuffle if at least one side then simplifies.7550ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);7551if (SV && Op.hasOneUse()) {7552SDValue Op0 = Op.getOperand(0);7553SDValue Op1 = Op.getOperand(1);75547555if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||7556Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||7557DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||7558Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {7559EVT VecVT = N->getValueType(0);7560if (VecVT != Op0.getValueType()) {7561Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);7562DCI.AddToWorklist(Op0.getNode());7563}7564if (VecVT != Op1.getValueType()) {7565Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);7566DCI.AddToWorklist(Op1.getNode());7567}7568Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);7569DCI.AddToWorklist(Op0.getNode());7570Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);7571DCI.AddToWorklist(Op1.getNode());7572return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());7573}7574}75757576return SDValue();7577}75787579static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {7580// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code7581// set by the CCReg instruction using the CCValid / CCMask masks,7582// If the CCReg instruction is itself a ICMP testing the condition7583// code set by some other instruction, see whether we can directly7584// use that condition code.75857586// Verify that we have an ICMP against some constant.7587if (CCValid != SystemZ::CCMASK_ICMP)7588return false;7589auto *ICmp = CCReg.getNode();7590if (ICmp->getOpcode() != SystemZISD::ICMP)7591return false;7592auto *CompareLHS = ICmp->getOperand(0).getNode();7593auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));7594if (!CompareRHS)7595return false;75967597// Optimize the case where CompareLHS is a SELECT_CCMASK.7598if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {7599// Verify that we have an appropriate mask for a EQ or NE comparison.7600bool Invert = false;7601if (CCMask == SystemZ::CCMASK_CMP_NE)7602Invert = !Invert;7603else if (CCMask != SystemZ::CCMASK_CMP_EQ)7604return false;76057606// Verify that the ICMP compares against one of select values.7607auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));7608if (!TrueVal)7609return false;7610auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));7611if (!FalseVal)7612return false;7613if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())7614Invert = !Invert;7615else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())7616return false;76177618// Compute the effective CC mask for the new branch or select.7619auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));7620auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));7621if (!NewCCValid || !NewCCMask)7622return false;7623CCValid = NewCCValid->getZExtValue();7624CCMask = NewCCMask->getZExtValue();7625if (Invert)7626CCMask ^= CCValid;76277628// Return the updated CCReg link.7629CCReg = CompareLHS->getOperand(4);7630return true;7631}76327633// Optimize the case where CompareRHS is (SRA (SHL (IPM))).7634if (CompareLHS->getOpcode() == ISD::SRA) {7635auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));7636if (!SRACount || SRACount->getZExtValue() != 30)7637return false;7638auto *SHL = CompareLHS->getOperand(0).getNode();7639if (SHL->getOpcode() != ISD::SHL)7640return false;7641auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));7642if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)7643return false;7644auto *IPM = SHL->getOperand(0).getNode();7645if (IPM->getOpcode() != SystemZISD::IPM)7646return false;76477648// Avoid introducing CC spills (because SRA would clobber CC).7649if (!CompareLHS->hasOneUse())7650return false;7651// Verify that the ICMP compares against zero.7652if (CompareRHS->getZExtValue() != 0)7653return false;76547655// Compute the effective CC mask for the new branch or select.7656CCMask = SystemZ::reverseCCMask(CCMask);76577658// Return the updated CCReg link.7659CCReg = IPM->getOperand(0);7660return true;7661}76627663return false;7664}76657666SDValue SystemZTargetLowering::combineBR_CCMASK(7667SDNode *N, DAGCombinerInfo &DCI) const {7668SelectionDAG &DAG = DCI.DAG;76697670// Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.7671auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));7672auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));7673if (!CCValid || !CCMask)7674return SDValue();76757676int CCValidVal = CCValid->getZExtValue();7677int CCMaskVal = CCMask->getZExtValue();7678SDValue Chain = N->getOperand(0);7679SDValue CCReg = N->getOperand(4);76807681if (combineCCMask(CCReg, CCValidVal, CCMaskVal))7682return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),7683Chain,7684DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),7685DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),7686N->getOperand(3), CCReg);7687return SDValue();7688}76897690SDValue SystemZTargetLowering::combineSELECT_CCMASK(7691SDNode *N, DAGCombinerInfo &DCI) const {7692SelectionDAG &DAG = DCI.DAG;76937694// Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.7695auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));7696auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));7697if (!CCValid || !CCMask)7698return SDValue();76997700int CCValidVal = CCValid->getZExtValue();7701int CCMaskVal = CCMask->getZExtValue();7702SDValue CCReg = N->getOperand(4);77037704if (combineCCMask(CCReg, CCValidVal, CCMaskVal))7705return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),7706N->getOperand(0), N->getOperand(1),7707DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),7708DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),7709CCReg);7710return SDValue();7711}771277137714SDValue SystemZTargetLowering::combineGET_CCMASK(7715SDNode *N, DAGCombinerInfo &DCI) const {77167717// Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible7718auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));7719auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));7720if (!CCValid || !CCMask)7721return SDValue();7722int CCValidVal = CCValid->getZExtValue();7723int CCMaskVal = CCMask->getZExtValue();77247725SDValue Select = N->getOperand(0);7726if (Select->getOpcode() == ISD::TRUNCATE)7727Select = Select->getOperand(0);7728if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)7729return SDValue();77307731auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));7732auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));7733if (!SelectCCValid || !SelectCCMask)7734return SDValue();7735int SelectCCValidVal = SelectCCValid->getZExtValue();7736int SelectCCMaskVal = SelectCCMask->getZExtValue();77377738auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));7739auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));7740if (!TrueVal || !FalseVal)7741return SDValue();7742if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)7743;7744else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)7745SelectCCMaskVal ^= SelectCCValidVal;7746else7747return SDValue();77487749if (SelectCCValidVal & ~CCValidVal)7750return SDValue();7751if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))7752return SDValue();77537754return Select->getOperand(4);7755}77567757SDValue SystemZTargetLowering::combineIntDIVREM(7758SDNode *N, DAGCombinerInfo &DCI) const {7759SelectionDAG &DAG = DCI.DAG;7760EVT VT = N->getValueType(0);7761// In the case where the divisor is a vector of constants a cheaper7762// sequence of instructions can replace the divide. BuildSDIV is called to7763// do this during DAG combining, but it only succeeds when it can build a7764// multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and7765// since it is not Legal but Custom it can only happen before7766// legalization. Therefore we must scalarize this early before Combine7767// 1. For widened vectors, this is already the result of type legalization.7768if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&7769DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))7770return DAG.UnrollVectorOp(N);7771return SDValue();7772}77737774SDValue SystemZTargetLowering::combineINTRINSIC(7775SDNode *N, DAGCombinerInfo &DCI) const {7776SelectionDAG &DAG = DCI.DAG;77777778unsigned Id = N->getConstantOperandVal(1);7779switch (Id) {7780// VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 157781// or larger is simply a vector load.7782case Intrinsic::s390_vll:7783case Intrinsic::s390_vlrl:7784if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))7785if (C->getZExtValue() >= 15)7786return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),7787N->getOperand(3), MachinePointerInfo());7788break;7789// Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.7790case Intrinsic::s390_vstl:7791case Intrinsic::s390_vstrl:7792if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))7793if (C->getZExtValue() >= 15)7794return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),7795N->getOperand(4), MachinePointerInfo());7796break;7797}77987799return SDValue();7800}78017802SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {7803if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)7804return N->getOperand(0);7805return N;7806}78077808SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,7809DAGCombinerInfo &DCI) const {7810switch(N->getOpcode()) {7811default: break;7812case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);7813case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);7814case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);7815case SystemZISD::MERGE_HIGH:7816case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);7817case ISD::LOAD: return combineLOAD(N, DCI);7818case ISD::STORE: return combineSTORE(N, DCI);7819case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);7820case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);7821case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);7822case ISD::STRICT_FP_ROUND:7823case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);7824case ISD::STRICT_FP_EXTEND:7825case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);7826case ISD::SINT_TO_FP:7827case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);7828case ISD::BSWAP: return combineBSWAP(N, DCI);7829case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);7830case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);7831case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);7832case ISD::SDIV:7833case ISD::UDIV:7834case ISD::SREM:7835case ISD::UREM: return combineIntDIVREM(N, DCI);7836case ISD::INTRINSIC_W_CHAIN:7837case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);7838}78397840return SDValue();7841}78427843// Return the demanded elements for the OpNo source operand of Op. DemandedElts7844// are for Op.7845static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,7846unsigned OpNo) {7847EVT VT = Op.getValueType();7848unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);7849APInt SrcDemE;7850unsigned Opcode = Op.getOpcode();7851if (Opcode == ISD::INTRINSIC_WO_CHAIN) {7852unsigned Id = Op.getConstantOperandVal(0);7853switch (Id) {7854case Intrinsic::s390_vpksh: // PACKS7855case Intrinsic::s390_vpksf:7856case Intrinsic::s390_vpksg:7857case Intrinsic::s390_vpkshs: // PACKS_CC7858case Intrinsic::s390_vpksfs:7859case Intrinsic::s390_vpksgs:7860case Intrinsic::s390_vpklsh: // PACKLS7861case Intrinsic::s390_vpklsf:7862case Intrinsic::s390_vpklsg:7863case Intrinsic::s390_vpklshs: // PACKLS_CC7864case Intrinsic::s390_vpklsfs:7865case Intrinsic::s390_vpklsgs:7866// VECTOR PACK truncates the elements of two source vectors into one.7867SrcDemE = DemandedElts;7868if (OpNo == 2)7869SrcDemE.lshrInPlace(NumElts / 2);7870SrcDemE = SrcDemE.trunc(NumElts / 2);7871break;7872// VECTOR UNPACK extends half the elements of the source vector.7873case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH7874case Intrinsic::s390_vuphh:7875case Intrinsic::s390_vuphf:7876case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH7877case Intrinsic::s390_vuplhh:7878case Intrinsic::s390_vuplhf:7879SrcDemE = APInt(NumElts * 2, 0);7880SrcDemE.insertBits(DemandedElts, 0);7881break;7882case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW7883case Intrinsic::s390_vuplhw:7884case Intrinsic::s390_vuplf:7885case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW7886case Intrinsic::s390_vupllh:7887case Intrinsic::s390_vupllf:7888SrcDemE = APInt(NumElts * 2, 0);7889SrcDemE.insertBits(DemandedElts, NumElts);7890break;7891case Intrinsic::s390_vpdi: {7892// VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.7893SrcDemE = APInt(NumElts, 0);7894if (!DemandedElts[OpNo - 1])7895break;7896unsigned Mask = Op.getConstantOperandVal(3);7897unsigned MaskBit = ((OpNo - 1) ? 1 : 4);7898// Demand input element 0 or 1, given by the mask bit value.7899SrcDemE.setBit((Mask & MaskBit)? 1 : 0);7900break;7901}7902case Intrinsic::s390_vsldb: {7903// VECTOR SHIFT LEFT DOUBLE BY BYTE7904assert(VT == MVT::v16i8 && "Unexpected type.");7905unsigned FirstIdx = Op.getConstantOperandVal(3);7906assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");7907unsigned NumSrc0Els = 16 - FirstIdx;7908SrcDemE = APInt(NumElts, 0);7909if (OpNo == 1) {7910APInt DemEls = DemandedElts.trunc(NumSrc0Els);7911SrcDemE.insertBits(DemEls, FirstIdx);7912} else {7913APInt DemEls = DemandedElts.lshr(NumSrc0Els);7914SrcDemE.insertBits(DemEls, 0);7915}7916break;7917}7918case Intrinsic::s390_vperm:7919SrcDemE = APInt(NumElts, -1);7920break;7921default:7922llvm_unreachable("Unhandled intrinsic.");7923break;7924}7925} else {7926switch (Opcode) {7927case SystemZISD::JOIN_DWORDS:7928// Scalar operand.7929SrcDemE = APInt(1, 1);7930break;7931case SystemZISD::SELECT_CCMASK:7932SrcDemE = DemandedElts;7933break;7934default:7935llvm_unreachable("Unhandled opcode.");7936break;7937}7938}7939return SrcDemE;7940}79417942static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,7943const APInt &DemandedElts,7944const SelectionDAG &DAG, unsigned Depth,7945unsigned OpNo) {7946APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);7947APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);7948KnownBits LHSKnown =7949DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);7950KnownBits RHSKnown =7951DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);7952Known = LHSKnown.intersectWith(RHSKnown);7953}79547955void7956SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,7957KnownBits &Known,7958const APInt &DemandedElts,7959const SelectionDAG &DAG,7960unsigned Depth) const {7961Known.resetAll();79627963// Intrinsic CC result is returned in the two low bits.7964unsigned tmp0, tmp1; // not used7965if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {7966Known.Zero.setBitsFrom(2);7967return;7968}7969EVT VT = Op.getValueType();7970if (Op.getResNo() != 0 || VT == MVT::Untyped)7971return;7972assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&7973"KnownBits does not match VT in bitwidth");7974assert ((!VT.isVector() ||7975(DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&7976"DemandedElts does not match VT number of elements");7977unsigned BitWidth = Known.getBitWidth();7978unsigned Opcode = Op.getOpcode();7979if (Opcode == ISD::INTRINSIC_WO_CHAIN) {7980bool IsLogical = false;7981unsigned Id = Op.getConstantOperandVal(0);7982switch (Id) {7983case Intrinsic::s390_vpksh: // PACKS7984case Intrinsic::s390_vpksf:7985case Intrinsic::s390_vpksg:7986case Intrinsic::s390_vpkshs: // PACKS_CC7987case Intrinsic::s390_vpksfs:7988case Intrinsic::s390_vpksgs:7989case Intrinsic::s390_vpklsh: // PACKLS7990case Intrinsic::s390_vpklsf:7991case Intrinsic::s390_vpklsg:7992case Intrinsic::s390_vpklshs: // PACKLS_CC7993case Intrinsic::s390_vpklsfs:7994case Intrinsic::s390_vpklsgs:7995case Intrinsic::s390_vpdi:7996case Intrinsic::s390_vsldb:7997case Intrinsic::s390_vperm:7998computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);7999break;8000case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH8001case Intrinsic::s390_vuplhh:8002case Intrinsic::s390_vuplhf:8003case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW8004case Intrinsic::s390_vupllh:8005case Intrinsic::s390_vupllf:8006IsLogical = true;8007[[fallthrough]];8008case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH8009case Intrinsic::s390_vuphh:8010case Intrinsic::s390_vuphf:8011case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW8012case Intrinsic::s390_vuplhw:8013case Intrinsic::s390_vuplf: {8014SDValue SrcOp = Op.getOperand(1);8015APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);8016Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);8017if (IsLogical) {8018Known = Known.zext(BitWidth);8019} else8020Known = Known.sext(BitWidth);8021break;8022}8023default:8024break;8025}8026} else {8027switch (Opcode) {8028case SystemZISD::JOIN_DWORDS:8029case SystemZISD::SELECT_CCMASK:8030computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);8031break;8032case SystemZISD::REPLICATE: {8033SDValue SrcOp = Op.getOperand(0);8034Known = DAG.computeKnownBits(SrcOp, Depth + 1);8035if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))8036Known = Known.sext(BitWidth); // VREPI sign extends the immedate.8037break;8038}8039default:8040break;8041}8042}80438044// Known has the width of the source operand(s). Adjust if needed to match8045// the passed bitwidth.8046if (Known.getBitWidth() != BitWidth)8047Known = Known.anyextOrTrunc(BitWidth);8048}80498050static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,8051const SelectionDAG &DAG, unsigned Depth,8052unsigned OpNo) {8053APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);8054unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);8055if (LHS == 1) return 1; // Early out.8056APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);8057unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);8058if (RHS == 1) return 1; // Early out.8059unsigned Common = std::min(LHS, RHS);8060unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();8061EVT VT = Op.getValueType();8062unsigned VTBits = VT.getScalarSizeInBits();8063if (SrcBitWidth > VTBits) { // PACK8064unsigned SrcExtraBits = SrcBitWidth - VTBits;8065if (Common > SrcExtraBits)8066return (Common - SrcExtraBits);8067return 1;8068}8069assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");8070return Common;8071}80728073unsigned8074SystemZTargetLowering::ComputeNumSignBitsForTargetNode(8075SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,8076unsigned Depth) const {8077if (Op.getResNo() != 0)8078return 1;8079unsigned Opcode = Op.getOpcode();8080if (Opcode == ISD::INTRINSIC_WO_CHAIN) {8081unsigned Id = Op.getConstantOperandVal(0);8082switch (Id) {8083case Intrinsic::s390_vpksh: // PACKS8084case Intrinsic::s390_vpksf:8085case Intrinsic::s390_vpksg:8086case Intrinsic::s390_vpkshs: // PACKS_CC8087case Intrinsic::s390_vpksfs:8088case Intrinsic::s390_vpksgs:8089case Intrinsic::s390_vpklsh: // PACKLS8090case Intrinsic::s390_vpklsf:8091case Intrinsic::s390_vpklsg:8092case Intrinsic::s390_vpklshs: // PACKLS_CC8093case Intrinsic::s390_vpklsfs:8094case Intrinsic::s390_vpklsgs:8095case Intrinsic::s390_vpdi:8096case Intrinsic::s390_vsldb:8097case Intrinsic::s390_vperm:8098return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);8099case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH8100case Intrinsic::s390_vuphh:8101case Intrinsic::s390_vuphf:8102case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW8103case Intrinsic::s390_vuplhw:8104case Intrinsic::s390_vuplf: {8105SDValue PackedOp = Op.getOperand(1);8106APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);8107unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);8108EVT VT = Op.getValueType();8109unsigned VTBits = VT.getScalarSizeInBits();8110Tmp += VTBits - PackedOp.getScalarValueSizeInBits();8111return Tmp;8112}8113default:8114break;8115}8116} else {8117switch (Opcode) {8118case SystemZISD::SELECT_CCMASK:8119return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);8120default:8121break;8122}8123}81248125return 1;8126}81278128bool SystemZTargetLowering::8129isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,8130const APInt &DemandedElts, const SelectionDAG &DAG,8131bool PoisonOnly, unsigned Depth) const {8132switch (Op->getOpcode()) {8133case SystemZISD::PCREL_WRAPPER:8134case SystemZISD::PCREL_OFFSET:8135return true;8136}8137return false;8138}81398140unsigned8141SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const {8142const TargetFrameLowering *TFI = Subtarget.getFrameLowering();8143unsigned StackAlign = TFI->getStackAlignment();8144assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&8145"Unexpected stack alignment");8146// The default stack probe size is 4096 if the function has no8147// stack-probe-size attribute.8148unsigned StackProbeSize =8149MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);8150// Round down to the stack alignment.8151StackProbeSize &= ~(StackAlign - 1);8152return StackProbeSize ? StackProbeSize : StackAlign;8153}81548155//===----------------------------------------------------------------------===//8156// Custom insertion8157//===----------------------------------------------------------------------===//81588159// Force base value Base into a register before MI. Return the register.8160static Register forceReg(MachineInstr &MI, MachineOperand &Base,8161const SystemZInstrInfo *TII) {8162MachineBasicBlock *MBB = MI.getParent();8163MachineFunction &MF = *MBB->getParent();8164MachineRegisterInfo &MRI = MF.getRegInfo();81658166if (Base.isReg()) {8167// Copy Base into a new virtual register to help register coalescing in8168// cases with multiple uses.8169Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);8170BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)8171.add(Base);8172return Reg;8173}81748175Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);8176BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)8177.add(Base)8178.addImm(0)8179.addReg(0);8180return Reg;8181}81828183// The CC operand of MI might be missing a kill marker because there8184// were multiple uses of CC, and ISel didn't know which to mark.8185// Figure out whether MI should have had a kill marker.8186static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {8187// Scan forward through BB for a use/def of CC.8188MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));8189for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {8190const MachineInstr& mi = *miI;8191if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))8192return false;8193if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))8194break; // Should have kill-flag - update below.8195}81968197// If we hit the end of the block, check whether CC is live into a8198// successor.8199if (miI == MBB->end()) {8200for (const MachineBasicBlock *Succ : MBB->successors())8201if (Succ->isLiveIn(SystemZ::CC))8202return false;8203}82048205return true;8206}82078208// Return true if it is OK for this Select pseudo-opcode to be cascaded8209// together with other Select pseudo-opcodes into a single basic-block with8210// a conditional jump around it.8211static bool isSelectPseudo(MachineInstr &MI) {8212switch (MI.getOpcode()) {8213case SystemZ::Select32:8214case SystemZ::Select64:8215case SystemZ::Select128:8216case SystemZ::SelectF32:8217case SystemZ::SelectF64:8218case SystemZ::SelectF128:8219case SystemZ::SelectVR32:8220case SystemZ::SelectVR64:8221case SystemZ::SelectVR128:8222return true;82238224default:8225return false;8226}8227}82288229// Helper function, which inserts PHI functions into SinkMBB:8230// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],8231// where %FalseValue(i) and %TrueValue(i) are taken from Selects.8232static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,8233MachineBasicBlock *TrueMBB,8234MachineBasicBlock *FalseMBB,8235MachineBasicBlock *SinkMBB) {8236MachineFunction *MF = TrueMBB->getParent();8237const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();82388239MachineInstr *FirstMI = Selects.front();8240unsigned CCValid = FirstMI->getOperand(3).getImm();8241unsigned CCMask = FirstMI->getOperand(4).getImm();82428243MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();82448245// As we are creating the PHIs, we have to be careful if there is more than8246// one. Later Selects may reference the results of earlier Selects, but later8247// PHIs have to reference the individual true/false inputs from earlier PHIs.8248// That also means that PHI construction must work forward from earlier to8249// later, and that the code must maintain a mapping from earlier PHI's8250// destination registers, and the registers that went into the PHI.8251DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;82528253for (auto *MI : Selects) {8254Register DestReg = MI->getOperand(0).getReg();8255Register TrueReg = MI->getOperand(1).getReg();8256Register FalseReg = MI->getOperand(2).getReg();82578258// If this Select we are generating is the opposite condition from8259// the jump we generated, then we have to swap the operands for the8260// PHI that is going to be generated.8261if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))8262std::swap(TrueReg, FalseReg);82638264if (RegRewriteTable.contains(TrueReg))8265TrueReg = RegRewriteTable[TrueReg].first;82668267if (RegRewriteTable.contains(FalseReg))8268FalseReg = RegRewriteTable[FalseReg].second;82698270DebugLoc DL = MI->getDebugLoc();8271BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)8272.addReg(TrueReg).addMBB(TrueMBB)8273.addReg(FalseReg).addMBB(FalseMBB);82748275// Add this PHI to the rewrite table.8276RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);8277}82788279MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);8280}82818282MachineBasicBlock *8283SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,8284MachineBasicBlock *BB) const {8285MachineFunction &MF = *BB->getParent();8286MachineFrameInfo &MFI = MF.getFrameInfo();8287auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();8288assert(TFL->hasReservedCallFrame(MF) &&8289"ADJSTACKDOWN and ADJSTACKUP should be no-ops");8290(void)TFL;8291// Get the MaxCallFrameSize value and erase MI since it serves no further8292// purpose as the call frame is statically reserved in the prolog. Set8293// AdjustsStack as MI is *not* mapped as a frame instruction.8294uint32_t NumBytes = MI.getOperand(0).getImm();8295if (NumBytes > MFI.getMaxCallFrameSize())8296MFI.setMaxCallFrameSize(NumBytes);8297MFI.setAdjustsStack(true);82988299MI.eraseFromParent();8300return BB;8301}83028303// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.8304MachineBasicBlock *8305SystemZTargetLowering::emitSelect(MachineInstr &MI,8306MachineBasicBlock *MBB) const {8307assert(isSelectPseudo(MI) && "Bad call to emitSelect()");8308const SystemZInstrInfo *TII = Subtarget.getInstrInfo();83098310unsigned CCValid = MI.getOperand(3).getImm();8311unsigned CCMask = MI.getOperand(4).getImm();83128313// If we have a sequence of Select* pseudo instructions using the8314// same condition code value, we want to expand all of them into8315// a single pair of basic blocks using the same condition.8316SmallVector<MachineInstr*, 8> Selects;8317SmallVector<MachineInstr*, 8> DbgValues;8318Selects.push_back(&MI);8319unsigned Count = 0;8320for (MachineInstr &NextMI : llvm::make_range(8321std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {8322if (isSelectPseudo(NextMI)) {8323assert(NextMI.getOperand(3).getImm() == CCValid &&8324"Bad CCValid operands since CC was not redefined.");8325if (NextMI.getOperand(4).getImm() == CCMask ||8326NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {8327Selects.push_back(&NextMI);8328continue;8329}8330break;8331}8332if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||8333NextMI.usesCustomInsertionHook())8334break;8335bool User = false;8336for (auto *SelMI : Selects)8337if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {8338User = true;8339break;8340}8341if (NextMI.isDebugInstr()) {8342if (User) {8343assert(NextMI.isDebugValue() && "Unhandled debug opcode.");8344DbgValues.push_back(&NextMI);8345}8346} else if (User || ++Count > 20)8347break;8348}83498350MachineInstr *LastMI = Selects.back();8351bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||8352checkCCKill(*LastMI, MBB));8353MachineBasicBlock *StartMBB = MBB;8354MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);8355MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);83568357// Unless CC was killed in the last Select instruction, mark it as8358// live-in to both FalseMBB and JoinMBB.8359if (!CCKilled) {8360FalseMBB->addLiveIn(SystemZ::CC);8361JoinMBB->addLiveIn(SystemZ::CC);8362}83638364// StartMBB:8365// BRC CCMask, JoinMBB8366// # fallthrough to FalseMBB8367MBB = StartMBB;8368BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))8369.addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);8370MBB->addSuccessor(JoinMBB);8371MBB->addSuccessor(FalseMBB);83728373// FalseMBB:8374// # fallthrough to JoinMBB8375MBB = FalseMBB;8376MBB->addSuccessor(JoinMBB);83778378// JoinMBB:8379// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]8380// ...8381MBB = JoinMBB;8382createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);8383for (auto *SelMI : Selects)8384SelMI->eraseFromParent();83858386MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();8387for (auto *DbgMI : DbgValues)8388MBB->splice(InsertPos, StartMBB, DbgMI);83898390return JoinMBB;8391}83928393// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.8394// StoreOpcode is the store to use and Invert says whether the store should8395// happen when the condition is false rather than true. If a STORE ON8396// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.8397MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,8398MachineBasicBlock *MBB,8399unsigned StoreOpcode,8400unsigned STOCOpcode,8401bool Invert) const {8402const SystemZInstrInfo *TII = Subtarget.getInstrInfo();84038404Register SrcReg = MI.getOperand(0).getReg();8405MachineOperand Base = MI.getOperand(1);8406int64_t Disp = MI.getOperand(2).getImm();8407Register IndexReg = MI.getOperand(3).getReg();8408unsigned CCValid = MI.getOperand(4).getImm();8409unsigned CCMask = MI.getOperand(5).getImm();8410DebugLoc DL = MI.getDebugLoc();84118412StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);84138414// ISel pattern matching also adds a load memory operand of the same8415// address, so take special care to find the storing memory operand.8416MachineMemOperand *MMO = nullptr;8417for (auto *I : MI.memoperands())8418if (I->isStore()) {8419MMO = I;8420break;8421}84228423// Use STOCOpcode if possible. We could use different store patterns in8424// order to avoid matching the index register, but the performance trade-offs8425// might be more complicated in that case.8426if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {8427if (Invert)8428CCMask ^= CCValid;84298430BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))8431.addReg(SrcReg)8432.add(Base)8433.addImm(Disp)8434.addImm(CCValid)8435.addImm(CCMask)8436.addMemOperand(MMO);84378438MI.eraseFromParent();8439return MBB;8440}84418442// Get the condition needed to branch around the store.8443if (!Invert)8444CCMask ^= CCValid;84458446MachineBasicBlock *StartMBB = MBB;8447MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);8448MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);84498450// Unless CC was killed in the CondStore instruction, mark it as8451// live-in to both FalseMBB and JoinMBB.8452if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&8453!checkCCKill(MI, JoinMBB)) {8454FalseMBB->addLiveIn(SystemZ::CC);8455JoinMBB->addLiveIn(SystemZ::CC);8456}84578458// StartMBB:8459// BRC CCMask, JoinMBB8460// # fallthrough to FalseMBB8461MBB = StartMBB;8462BuildMI(MBB, DL, TII->get(SystemZ::BRC))8463.addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);8464MBB->addSuccessor(JoinMBB);8465MBB->addSuccessor(FalseMBB);84668467// FalseMBB:8468// store %SrcReg, %Disp(%Index,%Base)8469// # fallthrough to JoinMBB8470MBB = FalseMBB;8471BuildMI(MBB, DL, TII->get(StoreOpcode))8472.addReg(SrcReg)8473.add(Base)8474.addImm(Disp)8475.addReg(IndexReg)8476.addMemOperand(MMO);8477MBB->addSuccessor(JoinMBB);84788479MI.eraseFromParent();8480return JoinMBB;8481}84828483// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.8484MachineBasicBlock *8485SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,8486MachineBasicBlock *MBB,8487bool Unsigned) const {8488MachineFunction &MF = *MBB->getParent();8489const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8490MachineRegisterInfo &MRI = MF.getRegInfo();84918492// Synthetic instruction to compare 128-bit values.8493// Sets CC 1 if Op0 > Op1, sets a different CC otherwise.8494Register Op0 = MI.getOperand(0).getReg();8495Register Op1 = MI.getOperand(1).getReg();84968497MachineBasicBlock *StartMBB = MBB;8498MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);8499MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);85008501// StartMBB:8502//8503// Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.8504// Swap the inputs to get:8505// CC 1 if high(Op0) > high(Op1)8506// CC 2 if high(Op0) < high(Op1)8507// CC 0 if high(Op0) == high(Op1)8508//8509// If CC != 0, we'd done, so jump over the next instruction.8510//8511// VEC[L]G Op1, Op08512// JNE JoinMBB8513// # fallthrough to HiEqMBB8514MBB = StartMBB;8515int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;8516BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))8517.addReg(Op1).addReg(Op0);8518BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))8519.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB);8520MBB->addSuccessor(JoinMBB);8521MBB->addSuccessor(HiEqMBB);85228523// HiEqMBB:8524//8525// Otherwise, use VECTOR COMPARE HIGH LOGICAL.8526// Since we already know the high parts are equal, the CC8527// result will only depend on the low parts:8528// CC 1 if low(Op0) > low(Op1)8529// CC 3 if low(Op0) <= low(Op1)8530//8531// VCHLGS Tmp, Op0, Op18532// # fallthrough to JoinMBB8533MBB = HiEqMBB;8534Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);8535BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)8536.addReg(Op0).addReg(Op1);8537MBB->addSuccessor(JoinMBB);85388539// Mark CC as live-in to JoinMBB.8540JoinMBB->addLiveIn(SystemZ::CC);85418542MI.eraseFromParent();8543return JoinMBB;8544}85458546// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or8547// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs8548// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says8549// whether the field should be inverted after performing BinOpcode (e.g. for8550// NAND).8551MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(8552MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,8553bool Invert) const {8554MachineFunction &MF = *MBB->getParent();8555const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8556MachineRegisterInfo &MRI = MF.getRegInfo();85578558// Extract the operands. Base can be a register or a frame index.8559// Src2 can be a register or immediate.8560Register Dest = MI.getOperand(0).getReg();8561MachineOperand Base = earlyUseOperand(MI.getOperand(1));8562int64_t Disp = MI.getOperand(2).getImm();8563MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));8564Register BitShift = MI.getOperand(4).getReg();8565Register NegBitShift = MI.getOperand(5).getReg();8566unsigned BitSize = MI.getOperand(6).getImm();8567DebugLoc DL = MI.getDebugLoc();85688569// Get the right opcodes for the displacement.8570unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);8571unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);8572assert(LOpcode && CSOpcode && "Displacement out of range");85738574// Create virtual registers for temporary results.8575Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8576Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8577Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8578Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8579Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);85808581// Insert a basic block for the main loop.8582MachineBasicBlock *StartMBB = MBB;8583MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);8584MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);85858586// StartMBB:8587// ...8588// %OrigVal = L Disp(%Base)8589// # fall through to LoopMBB8590MBB = StartMBB;8591BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);8592MBB->addSuccessor(LoopMBB);85938594// LoopMBB:8595// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]8596// %RotatedOldVal = RLL %OldVal, 0(%BitShift)8597// %RotatedNewVal = OP %RotatedOldVal, %Src28598// %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)8599// %Dest = CS %OldVal, %NewVal, Disp(%Base)8600// JNE LoopMBB8601// # fall through to DoneMBB8602MBB = LoopMBB;8603BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)8604.addReg(OrigVal).addMBB(StartMBB)8605.addReg(Dest).addMBB(LoopMBB);8606BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)8607.addReg(OldVal).addReg(BitShift).addImm(0);8608if (Invert) {8609// Perform the operation normally and then invert every bit of the field.8610Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8611BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);8612// XILF with the upper BitSize bits set.8613BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)8614.addReg(Tmp).addImm(-1U << (32 - BitSize));8615} else if (BinOpcode)8616// A simply binary operation.8617BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)8618.addReg(RotatedOldVal)8619.add(Src2);8620else8621// Use RISBG to rotate Src2 into position and use it to replace the8622// field in RotatedOldVal.8623BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)8624.addReg(RotatedOldVal).addReg(Src2.getReg())8625.addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);8626BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)8627.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);8628BuildMI(MBB, DL, TII->get(CSOpcode), Dest)8629.addReg(OldVal)8630.addReg(NewVal)8631.add(Base)8632.addImm(Disp);8633BuildMI(MBB, DL, TII->get(SystemZ::BRC))8634.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);8635MBB->addSuccessor(LoopMBB);8636MBB->addSuccessor(DoneMBB);86378638MI.eraseFromParent();8639return DoneMBB;8640}86418642// Implement EmitInstrWithCustomInserter for subword pseudo8643// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the8644// instruction that should be used to compare the current field with the8645// minimum or maximum value. KeepOldMask is the BRC condition-code mask8646// for when the current field should be kept.8647MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(8648MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,8649unsigned KeepOldMask) const {8650MachineFunction &MF = *MBB->getParent();8651const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8652MachineRegisterInfo &MRI = MF.getRegInfo();86538654// Extract the operands. Base can be a register or a frame index.8655Register Dest = MI.getOperand(0).getReg();8656MachineOperand Base = earlyUseOperand(MI.getOperand(1));8657int64_t Disp = MI.getOperand(2).getImm();8658Register Src2 = MI.getOperand(3).getReg();8659Register BitShift = MI.getOperand(4).getReg();8660Register NegBitShift = MI.getOperand(5).getReg();8661unsigned BitSize = MI.getOperand(6).getImm();8662DebugLoc DL = MI.getDebugLoc();86638664// Get the right opcodes for the displacement.8665unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);8666unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);8667assert(LOpcode && CSOpcode && "Displacement out of range");86688669// Create virtual registers for temporary results.8670Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8671Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8672Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8673Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8674Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);8675Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);86768677// Insert 3 basic blocks for the loop.8678MachineBasicBlock *StartMBB = MBB;8679MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);8680MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);8681MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);8682MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);86838684// StartMBB:8685// ...8686// %OrigVal = L Disp(%Base)8687// # fall through to LoopMBB8688MBB = StartMBB;8689BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);8690MBB->addSuccessor(LoopMBB);86918692// LoopMBB:8693// %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]8694// %RotatedOldVal = RLL %OldVal, 0(%BitShift)8695// CompareOpcode %RotatedOldVal, %Src28696// BRC KeepOldMask, UpdateMBB8697MBB = LoopMBB;8698BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)8699.addReg(OrigVal).addMBB(StartMBB)8700.addReg(Dest).addMBB(UpdateMBB);8701BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)8702.addReg(OldVal).addReg(BitShift).addImm(0);8703BuildMI(MBB, DL, TII->get(CompareOpcode))8704.addReg(RotatedOldVal).addReg(Src2);8705BuildMI(MBB, DL, TII->get(SystemZ::BRC))8706.addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);8707MBB->addSuccessor(UpdateMBB);8708MBB->addSuccessor(UseAltMBB);87098710// UseAltMBB:8711// %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 08712// # fall through to UpdateMBB8713MBB = UseAltMBB;8714BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)8715.addReg(RotatedOldVal).addReg(Src2)8716.addImm(32).addImm(31 + BitSize).addImm(0);8717MBB->addSuccessor(UpdateMBB);87188719// UpdateMBB:8720// %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],8721// [ %RotatedAltVal, UseAltMBB ]8722// %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)8723// %Dest = CS %OldVal, %NewVal, Disp(%Base)8724// JNE LoopMBB8725// # fall through to DoneMBB8726MBB = UpdateMBB;8727BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)8728.addReg(RotatedOldVal).addMBB(LoopMBB)8729.addReg(RotatedAltVal).addMBB(UseAltMBB);8730BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)8731.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);8732BuildMI(MBB, DL, TII->get(CSOpcode), Dest)8733.addReg(OldVal)8734.addReg(NewVal)8735.add(Base)8736.addImm(Disp);8737BuildMI(MBB, DL, TII->get(SystemZ::BRC))8738.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);8739MBB->addSuccessor(LoopMBB);8740MBB->addSuccessor(DoneMBB);87418742MI.eraseFromParent();8743return DoneMBB;8744}87458746// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW8747// instruction MI.8748MachineBasicBlock *8749SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,8750MachineBasicBlock *MBB) const {8751MachineFunction &MF = *MBB->getParent();8752const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8753MachineRegisterInfo &MRI = MF.getRegInfo();87548755// Extract the operands. Base can be a register or a frame index.8756Register Dest = MI.getOperand(0).getReg();8757MachineOperand Base = earlyUseOperand(MI.getOperand(1));8758int64_t Disp = MI.getOperand(2).getImm();8759Register CmpVal = MI.getOperand(3).getReg();8760Register OrigSwapVal = MI.getOperand(4).getReg();8761Register BitShift = MI.getOperand(5).getReg();8762Register NegBitShift = MI.getOperand(6).getReg();8763int64_t BitSize = MI.getOperand(7).getImm();8764DebugLoc DL = MI.getDebugLoc();87658766const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;87678768// Get the right opcodes for the displacement and zero-extension.8769unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);8770unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);8771unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;8772assert(LOpcode && CSOpcode && "Displacement out of range");87738774// Create virtual registers for temporary results.8775Register OrigOldVal = MRI.createVirtualRegister(RC);8776Register OldVal = MRI.createVirtualRegister(RC);8777Register SwapVal = MRI.createVirtualRegister(RC);8778Register StoreVal = MRI.createVirtualRegister(RC);8779Register OldValRot = MRI.createVirtualRegister(RC);8780Register RetryOldVal = MRI.createVirtualRegister(RC);8781Register RetrySwapVal = MRI.createVirtualRegister(RC);87828783// Insert 2 basic blocks for the loop.8784MachineBasicBlock *StartMBB = MBB;8785MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);8786MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);8787MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);87888789// StartMBB:8790// ...8791// %OrigOldVal = L Disp(%Base)8792// # fall through to LoopMBB8793MBB = StartMBB;8794BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)8795.add(Base)8796.addImm(Disp)8797.addReg(0);8798MBB->addSuccessor(LoopMBB);87998800// LoopMBB:8801// %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]8802// %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]8803// %OldValRot = RLL %OldVal, BitSize(%BitShift)8804// ^^ The low BitSize bits contain the field8805// of interest.8806// %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 08807// ^^ Replace the upper 32-BitSize bits of the8808// swap value with those that we loaded and rotated.8809// %Dest = LL[CH] %OldValRot8810// CR %Dest, %CmpVal8811// JNE DoneMBB8812// # Fall through to SetMBB8813MBB = LoopMBB;8814BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)8815.addReg(OrigOldVal).addMBB(StartMBB)8816.addReg(RetryOldVal).addMBB(SetMBB);8817BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)8818.addReg(OrigSwapVal).addMBB(StartMBB)8819.addReg(RetrySwapVal).addMBB(SetMBB);8820BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)8821.addReg(OldVal).addReg(BitShift).addImm(BitSize);8822BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)8823.addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);8824BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)8825.addReg(OldValRot);8826BuildMI(MBB, DL, TII->get(SystemZ::CR))8827.addReg(Dest).addReg(CmpVal);8828BuildMI(MBB, DL, TII->get(SystemZ::BRC))8829.addImm(SystemZ::CCMASK_ICMP)8830.addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);8831MBB->addSuccessor(DoneMBB);8832MBB->addSuccessor(SetMBB);88338834// SetMBB:8835// %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)8836// ^^ Rotate the new field to its proper position.8837// %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)8838// JNE LoopMBB8839// # fall through to ExitMBB8840MBB = SetMBB;8841BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)8842.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);8843BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)8844.addReg(OldVal)8845.addReg(StoreVal)8846.add(Base)8847.addImm(Disp);8848BuildMI(MBB, DL, TII->get(SystemZ::BRC))8849.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);8850MBB->addSuccessor(LoopMBB);8851MBB->addSuccessor(DoneMBB);88528853// If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in8854// to the block after the loop. At this point, CC may have been defined8855// either by the CR in LoopMBB or by the CS in SetMBB.8856if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))8857DoneMBB->addLiveIn(SystemZ::CC);88588859MI.eraseFromParent();8860return DoneMBB;8861}88628863// Emit a move from two GR64s to a GR128.8864MachineBasicBlock *8865SystemZTargetLowering::emitPair128(MachineInstr &MI,8866MachineBasicBlock *MBB) const {8867const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8868const DebugLoc &DL = MI.getDebugLoc();88698870Register Dest = MI.getOperand(0).getReg();8871BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)8872.add(MI.getOperand(1))8873.addImm(SystemZ::subreg_h64)8874.add(MI.getOperand(2))8875.addImm(SystemZ::subreg_l64);8876MI.eraseFromParent();8877return MBB;8878}88798880// Emit an extension from a GR64 to a GR128. ClearEven is true8881// if the high register of the GR128 value must be cleared or false if8882// it's "don't care".8883MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,8884MachineBasicBlock *MBB,8885bool ClearEven) const {8886MachineFunction &MF = *MBB->getParent();8887const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8888MachineRegisterInfo &MRI = MF.getRegInfo();8889DebugLoc DL = MI.getDebugLoc();88908891Register Dest = MI.getOperand(0).getReg();8892Register Src = MI.getOperand(1).getReg();8893Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);88948895BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);8896if (ClearEven) {8897Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);8898Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);88998900BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)8901.addImm(0);8902BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)8903.addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);8904In128 = NewIn128;8905}8906BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)8907.addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);89088909MI.eraseFromParent();8910return MBB;8911}89128913MachineBasicBlock *8914SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,8915MachineBasicBlock *MBB,8916unsigned Opcode, bool IsMemset) const {8917MachineFunction &MF = *MBB->getParent();8918const SystemZInstrInfo *TII = Subtarget.getInstrInfo();8919MachineRegisterInfo &MRI = MF.getRegInfo();8920DebugLoc DL = MI.getDebugLoc();89218922MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));8923uint64_t DestDisp = MI.getOperand(1).getImm();8924MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);8925uint64_t SrcDisp;89268927// Fold the displacement Disp if it is out of range.8928auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {8929if (!isUInt<12>(Disp)) {8930Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);8931unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);8932BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)8933.add(Base).addImm(Disp).addReg(0);8934Base = MachineOperand::CreateReg(Reg, false);8935Disp = 0;8936}8937};89388939if (!IsMemset) {8940SrcBase = earlyUseOperand(MI.getOperand(2));8941SrcDisp = MI.getOperand(3).getImm();8942} else {8943SrcBase = DestBase;8944SrcDisp = DestDisp++;8945foldDisplIfNeeded(DestBase, DestDisp);8946}89478948MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);8949bool IsImmForm = LengthMO.isImm();8950bool IsRegForm = !IsImmForm;89518952// Build and insert one Opcode of Length, with special treatment for memset.8953auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,8954MachineBasicBlock::iterator InsPos,8955MachineOperand DBase, uint64_t DDisp,8956MachineOperand SBase, uint64_t SDisp,8957unsigned Length) -> void {8958assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");8959if (IsMemset) {8960MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));8961if (ByteMO.isImm())8962BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))8963.add(SBase).addImm(SDisp).add(ByteMO);8964else8965BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))8966.add(ByteMO).add(SBase).addImm(SDisp).addReg(0);8967if (--Length == 0)8968return;8969}8970BuildMI(*MBB, InsPos, DL, TII->get(Opcode))8971.add(DBase).addImm(DDisp).addImm(Length)8972.add(SBase).addImm(SDisp)8973.setMemRefs(MI.memoperands());8974};89758976bool NeedsLoop = false;8977uint64_t ImmLength = 0;8978Register LenAdjReg = SystemZ::NoRegister;8979if (IsImmForm) {8980ImmLength = LengthMO.getImm();8981ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.8982if (ImmLength == 0) {8983MI.eraseFromParent();8984return MBB;8985}8986if (Opcode == SystemZ::CLC) {8987if (ImmLength > 3 * 256)8988// A two-CLC sequence is a clear win over a loop, not least because8989// it needs only one branch. A three-CLC sequence needs the same8990// number of branches as a loop (i.e. 2), but is shorter. That8991// brings us to lengths greater than 768 bytes. It seems relatively8992// likely that a difference will be found within the first 768 bytes,8993// so we just optimize for the smallest number of branch8994// instructions, in order to avoid polluting the prediction buffer8995// too much.8996NeedsLoop = true;8997} else if (ImmLength > 6 * 256)8998// The heuristic we use is to prefer loops for anything that would8999// require 7 or more MVCs. With these kinds of sizes there isn't much9000// to choose between straight-line code and looping code, since the9001// time will be dominated by the MVCs themselves.9002NeedsLoop = true;9003} else {9004NeedsLoop = true;9005LenAdjReg = LengthMO.getReg();9006}90079008// When generating more than one CLC, all but the last will need to9009// branch to the end when a difference is found.9010MachineBasicBlock *EndMBB =9011(Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)9012? SystemZ::splitBlockAfter(MI, MBB)9013: nullptr);90149015if (NeedsLoop) {9016Register StartCountReg =9017MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);9018if (IsImmForm) {9019TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);9020ImmLength &= 255;9021} else {9022BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)9023.addReg(LenAdjReg)9024.addReg(0)9025.addImm(8);9026}90279028bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);9029auto loadZeroAddress = [&]() -> MachineOperand {9030Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);9031BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);9032return MachineOperand::CreateReg(Reg, false);9033};9034if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)9035DestBase = loadZeroAddress();9036if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)9037SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();90389039MachineBasicBlock *StartMBB = nullptr;9040MachineBasicBlock *LoopMBB = nullptr;9041MachineBasicBlock *NextMBB = nullptr;9042MachineBasicBlock *DoneMBB = nullptr;9043MachineBasicBlock *AllDoneMBB = nullptr;90449045Register StartSrcReg = forceReg(MI, SrcBase, TII);9046Register StartDestReg =9047(HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));90489049const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;9050Register ThisSrcReg = MRI.createVirtualRegister(RC);9051Register ThisDestReg =9052(HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));9053Register NextSrcReg = MRI.createVirtualRegister(RC);9054Register NextDestReg =9055(HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));9056RC = &SystemZ::GR64BitRegClass;9057Register ThisCountReg = MRI.createVirtualRegister(RC);9058Register NextCountReg = MRI.createVirtualRegister(RC);90599060if (IsRegForm) {9061AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);9062StartMBB = SystemZ::emitBlockAfter(MBB);9063LoopMBB = SystemZ::emitBlockAfter(StartMBB);9064NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);9065DoneMBB = SystemZ::emitBlockAfter(NextMBB);90669067// MBB:9068// # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.9069BuildMI(MBB, DL, TII->get(SystemZ::CGHI))9070.addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);9071BuildMI(MBB, DL, TII->get(SystemZ::BRC))9072.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)9073.addMBB(AllDoneMBB);9074MBB->addSuccessor(AllDoneMBB);9075if (!IsMemset)9076MBB->addSuccessor(StartMBB);9077else {9078// MemsetOneCheckMBB:9079// # Jump to MemsetOneMBB for a memset of length 1, or9080// # fall thru to StartMBB.9081MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);9082MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());9083MBB->addSuccessor(MemsetOneCheckMBB);9084MBB = MemsetOneCheckMBB;9085BuildMI(MBB, DL, TII->get(SystemZ::CGHI))9086.addReg(LenAdjReg).addImm(-1);9087BuildMI(MBB, DL, TII->get(SystemZ::BRC))9088.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)9089.addMBB(MemsetOneMBB);9090MBB->addSuccessor(MemsetOneMBB, {10, 100});9091MBB->addSuccessor(StartMBB, {90, 100});90929093// MemsetOneMBB:9094// # Jump back to AllDoneMBB after a single MVI or STC.9095MBB = MemsetOneMBB;9096insertMemMemOp(MBB, MBB->end(),9097MachineOperand::CreateReg(StartDestReg, false), DestDisp,9098MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,90991);9100BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);9101MBB->addSuccessor(AllDoneMBB);9102}91039104// StartMBB:9105// # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.9106MBB = StartMBB;9107BuildMI(MBB, DL, TII->get(SystemZ::CGHI))9108.addReg(StartCountReg).addImm(0);9109BuildMI(MBB, DL, TII->get(SystemZ::BRC))9110.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)9111.addMBB(DoneMBB);9112MBB->addSuccessor(DoneMBB);9113MBB->addSuccessor(LoopMBB);9114}9115else {9116StartMBB = MBB;9117DoneMBB = SystemZ::splitBlockBefore(MI, MBB);9118LoopMBB = SystemZ::emitBlockAfter(StartMBB);9119NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);91209121// StartMBB:9122// # fall through to LoopMBB9123MBB->addSuccessor(LoopMBB);91249125DestBase = MachineOperand::CreateReg(NextDestReg, false);9126SrcBase = MachineOperand::CreateReg(NextSrcReg, false);9127if (EndMBB && !ImmLength)9128// If the loop handled the whole CLC range, DoneMBB will be empty with9129// CC live-through into EndMBB, so add it as live-in.9130DoneMBB->addLiveIn(SystemZ::CC);9131}91329133// LoopMBB:9134// %ThisDestReg = phi [ %StartDestReg, StartMBB ],9135// [ %NextDestReg, NextMBB ]9136// %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],9137// [ %NextSrcReg, NextMBB ]9138// %ThisCountReg = phi [ %StartCountReg, StartMBB ],9139// [ %NextCountReg, NextMBB ]9140// ( PFD 2, 768+DestDisp(%ThisDestReg) )9141// Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)9142// ( JLH EndMBB )9143//9144// The prefetch is used only for MVC. The JLH is used only for CLC.9145MBB = LoopMBB;9146BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)9147.addReg(StartDestReg).addMBB(StartMBB)9148.addReg(NextDestReg).addMBB(NextMBB);9149if (!HaveSingleBase)9150BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)9151.addReg(StartSrcReg).addMBB(StartMBB)9152.addReg(NextSrcReg).addMBB(NextMBB);9153BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)9154.addReg(StartCountReg).addMBB(StartMBB)9155.addReg(NextCountReg).addMBB(NextMBB);9156if (Opcode == SystemZ::MVC)9157BuildMI(MBB, DL, TII->get(SystemZ::PFD))9158.addImm(SystemZ::PFD_WRITE)9159.addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);9160insertMemMemOp(MBB, MBB->end(),9161MachineOperand::CreateReg(ThisDestReg, false), DestDisp,9162MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);9163if (EndMBB) {9164BuildMI(MBB, DL, TII->get(SystemZ::BRC))9165.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)9166.addMBB(EndMBB);9167MBB->addSuccessor(EndMBB);9168MBB->addSuccessor(NextMBB);9169}91709171// NextMBB:9172// %NextDestReg = LA 256(%ThisDestReg)9173// %NextSrcReg = LA 256(%ThisSrcReg)9174// %NextCountReg = AGHI %ThisCountReg, -19175// CGHI %NextCountReg, 09176// JLH LoopMBB9177// # fall through to DoneMBB9178//9179// The AGHI, CGHI and JLH should be converted to BRCTG by later passes.9180MBB = NextMBB;9181BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)9182.addReg(ThisDestReg).addImm(256).addReg(0);9183if (!HaveSingleBase)9184BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)9185.addReg(ThisSrcReg).addImm(256).addReg(0);9186BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)9187.addReg(ThisCountReg).addImm(-1);9188BuildMI(MBB, DL, TII->get(SystemZ::CGHI))9189.addReg(NextCountReg).addImm(0);9190BuildMI(MBB, DL, TII->get(SystemZ::BRC))9191.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)9192.addMBB(LoopMBB);9193MBB->addSuccessor(LoopMBB);9194MBB->addSuccessor(DoneMBB);91959196MBB = DoneMBB;9197if (IsRegForm) {9198// DoneMBB:9199// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.9200// # Use EXecute Relative Long for the remainder of the bytes. The target9201// instruction of the EXRL will have a length field of 1 since 0 is an9202// illegal value. The number of bytes processed becomes (%LenAdjReg &9203// 0xff) + 1.9204// # Fall through to AllDoneMBB.9205Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);9206Register RemDestReg = HaveSingleBase ? RemSrcReg9207: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);9208BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)9209.addReg(StartDestReg).addMBB(StartMBB)9210.addReg(NextDestReg).addMBB(NextMBB);9211if (!HaveSingleBase)9212BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)9213.addReg(StartSrcReg).addMBB(StartMBB)9214.addReg(NextSrcReg).addMBB(NextMBB);9215if (IsMemset)9216insertMemMemOp(MBB, MBB->end(),9217MachineOperand::CreateReg(RemDestReg, false), DestDisp,9218MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);9219MachineInstrBuilder EXRL_MIB =9220BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))9221.addImm(Opcode)9222.addReg(LenAdjReg)9223.addReg(RemDestReg).addImm(DestDisp)9224.addReg(RemSrcReg).addImm(SrcDisp);9225MBB->addSuccessor(AllDoneMBB);9226MBB = AllDoneMBB;9227if (Opcode != SystemZ::MVC) {9228EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);9229if (EndMBB)9230MBB->addLiveIn(SystemZ::CC);9231}9232}9233MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);9234}92359236// Handle any remaining bytes with straight-line code.9237while (ImmLength > 0) {9238uint64_t ThisLength = std::min(ImmLength, uint64_t(256));9239// The previous iteration might have created out-of-range displacements.9240// Apply them using LA/LAY if so.9241foldDisplIfNeeded(DestBase, DestDisp);9242foldDisplIfNeeded(SrcBase, SrcDisp);9243insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);9244DestDisp += ThisLength;9245SrcDisp += ThisLength;9246ImmLength -= ThisLength;9247// If there's another CLC to go, branch to the end if a difference9248// was found.9249if (EndMBB && ImmLength > 0) {9250MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);9251BuildMI(MBB, DL, TII->get(SystemZ::BRC))9252.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)9253.addMBB(EndMBB);9254MBB->addSuccessor(EndMBB);9255MBB->addSuccessor(NextMBB);9256MBB = NextMBB;9257}9258}9259if (EndMBB) {9260MBB->addSuccessor(EndMBB);9261MBB = EndMBB;9262MBB->addLiveIn(SystemZ::CC);9263}92649265MI.eraseFromParent();9266return MBB;9267}92689269// Decompose string pseudo-instruction MI into a loop that continually performs9270// Opcode until CC != 3.9271MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(9272MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {9273MachineFunction &MF = *MBB->getParent();9274const SystemZInstrInfo *TII = Subtarget.getInstrInfo();9275MachineRegisterInfo &MRI = MF.getRegInfo();9276DebugLoc DL = MI.getDebugLoc();92779278uint64_t End1Reg = MI.getOperand(0).getReg();9279uint64_t Start1Reg = MI.getOperand(1).getReg();9280uint64_t Start2Reg = MI.getOperand(2).getReg();9281uint64_t CharReg = MI.getOperand(3).getReg();92829283const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;9284uint64_t This1Reg = MRI.createVirtualRegister(RC);9285uint64_t This2Reg = MRI.createVirtualRegister(RC);9286uint64_t End2Reg = MRI.createVirtualRegister(RC);92879288MachineBasicBlock *StartMBB = MBB;9289MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);9290MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);92919292// StartMBB:9293// # fall through to LoopMBB9294MBB->addSuccessor(LoopMBB);92959296// LoopMBB:9297// %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]9298// %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]9299// R0L = %CharReg9300// %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L9301// JO LoopMBB9302// # fall through to DoneMBB9303//9304// The load of R0L can be hoisted by post-RA LICM.9305MBB = LoopMBB;93069307BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)9308.addReg(Start1Reg).addMBB(StartMBB)9309.addReg(End1Reg).addMBB(LoopMBB);9310BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)9311.addReg(Start2Reg).addMBB(StartMBB)9312.addReg(End2Reg).addMBB(LoopMBB);9313BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);9314BuildMI(MBB, DL, TII->get(Opcode))9315.addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)9316.addReg(This1Reg).addReg(This2Reg);9317BuildMI(MBB, DL, TII->get(SystemZ::BRC))9318.addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);9319MBB->addSuccessor(LoopMBB);9320MBB->addSuccessor(DoneMBB);93219322DoneMBB->addLiveIn(SystemZ::CC);93239324MI.eraseFromParent();9325return DoneMBB;9326}93279328// Update TBEGIN instruction with final opcode and register clobbers.9329MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(9330MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,9331bool NoFloat) const {9332MachineFunction &MF = *MBB->getParent();9333const TargetFrameLowering *TFI = Subtarget.getFrameLowering();9334const SystemZInstrInfo *TII = Subtarget.getInstrInfo();93359336// Update opcode.9337MI.setDesc(TII->get(Opcode));93389339// We cannot handle a TBEGIN that clobbers the stack or frame pointer.9340// Make sure to add the corresponding GRSM bits if they are missing.9341uint64_t Control = MI.getOperand(2).getImm();9342static const unsigned GPRControlBit[16] = {93430x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,93440x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x01009345};9346Control |= GPRControlBit[15];9347if (TFI->hasFP(MF))9348Control |= GPRControlBit[11];9349MI.getOperand(2).setImm(Control);93509351// Add GPR clobbers.9352for (int I = 0; I < 16; I++) {9353if ((Control & GPRControlBit[I]) == 0) {9354unsigned Reg = SystemZMC::GR64Regs[I];9355MI.addOperand(MachineOperand::CreateReg(Reg, true, true));9356}9357}93589359// Add FPR/VR clobbers.9360if (!NoFloat && (Control & 4) != 0) {9361if (Subtarget.hasVector()) {9362for (unsigned Reg : SystemZMC::VR128Regs) {9363MI.addOperand(MachineOperand::CreateReg(Reg, true, true));9364}9365} else {9366for (unsigned Reg : SystemZMC::FP64Regs) {9367MI.addOperand(MachineOperand::CreateReg(Reg, true, true));9368}9369}9370}93719372return MBB;9373}93749375MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(9376MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {9377MachineFunction &MF = *MBB->getParent();9378MachineRegisterInfo *MRI = &MF.getRegInfo();9379const SystemZInstrInfo *TII = Subtarget.getInstrInfo();9380DebugLoc DL = MI.getDebugLoc();93819382Register SrcReg = MI.getOperand(0).getReg();93839384// Create new virtual register of the same class as source.9385const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);9386Register DstReg = MRI->createVirtualRegister(RC);93879388// Replace pseudo with a normal load-and-test that models the def as9389// well.9390BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)9391.addReg(SrcReg)9392.setMIFlags(MI.getFlags());9393MI.eraseFromParent();93949395return MBB;9396}93979398MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(9399MachineInstr &MI, MachineBasicBlock *MBB) const {9400MachineFunction &MF = *MBB->getParent();9401MachineRegisterInfo *MRI = &MF.getRegInfo();9402const SystemZInstrInfo *TII = Subtarget.getInstrInfo();9403DebugLoc DL = MI.getDebugLoc();9404const unsigned ProbeSize = getStackProbeSize(MF);9405Register DstReg = MI.getOperand(0).getReg();9406Register SizeReg = MI.getOperand(2).getReg();94079408MachineBasicBlock *StartMBB = MBB;9409MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);9410MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);9411MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);9412MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);9413MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);94149415MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),9416MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));94179418Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);9419Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);94209421// LoopTestMBB9422// BRC TailTestMBB9423// # fallthrough to LoopBodyMBB9424StartMBB->addSuccessor(LoopTestMBB);9425MBB = LoopTestMBB;9426BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)9427.addReg(SizeReg)9428.addMBB(StartMBB)9429.addReg(IncReg)9430.addMBB(LoopBodyMBB);9431BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))9432.addReg(PHIReg)9433.addImm(ProbeSize);9434BuildMI(MBB, DL, TII->get(SystemZ::BRC))9435.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)9436.addMBB(TailTestMBB);9437MBB->addSuccessor(LoopBodyMBB);9438MBB->addSuccessor(TailTestMBB);94399440// LoopBodyMBB: Allocate and probe by means of a volatile compare.9441// J LoopTestMBB9442MBB = LoopBodyMBB;9443BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)9444.addReg(PHIReg)9445.addImm(ProbeSize);9446BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)9447.addReg(SystemZ::R15D)9448.addImm(ProbeSize);9449BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)9450.addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)9451.setMemRefs(VolLdMMO);9452BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);9453MBB->addSuccessor(LoopTestMBB);94549455// TailTestMBB9456// BRC DoneMBB9457// # fallthrough to TailMBB9458MBB = TailTestMBB;9459BuildMI(MBB, DL, TII->get(SystemZ::CGHI))9460.addReg(PHIReg)9461.addImm(0);9462BuildMI(MBB, DL, TII->get(SystemZ::BRC))9463.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)9464.addMBB(DoneMBB);9465MBB->addSuccessor(TailMBB);9466MBB->addSuccessor(DoneMBB);94679468// TailMBB9469// # fallthrough to DoneMBB9470MBB = TailMBB;9471BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)9472.addReg(SystemZ::R15D)9473.addReg(PHIReg);9474BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)9475.addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)9476.setMemRefs(VolLdMMO);9477MBB->addSuccessor(DoneMBB);94789479// DoneMBB9480MBB = DoneMBB;9481BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)9482.addReg(SystemZ::R15D);94839484MI.eraseFromParent();9485return DoneMBB;9486}94879488SDValue SystemZTargetLowering::9489getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {9490MachineFunction &MF = DAG.getMachineFunction();9491auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();9492SDLoc DL(SP);9493return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,9494DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));9495}94969497MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(9498MachineInstr &MI, MachineBasicBlock *MBB) const {9499switch (MI.getOpcode()) {9500case SystemZ::ADJCALLSTACKDOWN:9501case SystemZ::ADJCALLSTACKUP:9502return emitAdjCallStack(MI, MBB);95039504case SystemZ::Select32:9505case SystemZ::Select64:9506case SystemZ::Select128:9507case SystemZ::SelectF32:9508case SystemZ::SelectF64:9509case SystemZ::SelectF128:9510case SystemZ::SelectVR32:9511case SystemZ::SelectVR64:9512case SystemZ::SelectVR128:9513return emitSelect(MI, MBB);95149515case SystemZ::CondStore8Mux:9516return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);9517case SystemZ::CondStore8MuxInv:9518return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);9519case SystemZ::CondStore16Mux:9520return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);9521case SystemZ::CondStore16MuxInv:9522return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);9523case SystemZ::CondStore32Mux:9524return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);9525case SystemZ::CondStore32MuxInv:9526return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);9527case SystemZ::CondStore8:9528return emitCondStore(MI, MBB, SystemZ::STC, 0, false);9529case SystemZ::CondStore8Inv:9530return emitCondStore(MI, MBB, SystemZ::STC, 0, true);9531case SystemZ::CondStore16:9532return emitCondStore(MI, MBB, SystemZ::STH, 0, false);9533case SystemZ::CondStore16Inv:9534return emitCondStore(MI, MBB, SystemZ::STH, 0, true);9535case SystemZ::CondStore32:9536return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);9537case SystemZ::CondStore32Inv:9538return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);9539case SystemZ::CondStore64:9540return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);9541case SystemZ::CondStore64Inv:9542return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);9543case SystemZ::CondStoreF32:9544return emitCondStore(MI, MBB, SystemZ::STE, 0, false);9545case SystemZ::CondStoreF32Inv:9546return emitCondStore(MI, MBB, SystemZ::STE, 0, true);9547case SystemZ::CondStoreF64:9548return emitCondStore(MI, MBB, SystemZ::STD, 0, false);9549case SystemZ::CondStoreF64Inv:9550return emitCondStore(MI, MBB, SystemZ::STD, 0, true);95519552case SystemZ::SCmp128Hi:9553return emitICmp128Hi(MI, MBB, false);9554case SystemZ::UCmp128Hi:9555return emitICmp128Hi(MI, MBB, true);95569557case SystemZ::PAIR128:9558return emitPair128(MI, MBB);9559case SystemZ::AEXT128:9560return emitExt128(MI, MBB, false);9561case SystemZ::ZEXT128:9562return emitExt128(MI, MBB, true);95639564case SystemZ::ATOMIC_SWAPW:9565return emitAtomicLoadBinary(MI, MBB, 0);95669567case SystemZ::ATOMIC_LOADW_AR:9568return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);9569case SystemZ::ATOMIC_LOADW_AFI:9570return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);95719572case SystemZ::ATOMIC_LOADW_SR:9573return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);95749575case SystemZ::ATOMIC_LOADW_NR:9576return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);9577case SystemZ::ATOMIC_LOADW_NILH:9578return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);95799580case SystemZ::ATOMIC_LOADW_OR:9581return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);9582case SystemZ::ATOMIC_LOADW_OILH:9583return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);95849585case SystemZ::ATOMIC_LOADW_XR:9586return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);9587case SystemZ::ATOMIC_LOADW_XILF:9588return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);95899590case SystemZ::ATOMIC_LOADW_NRi:9591return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);9592case SystemZ::ATOMIC_LOADW_NILHi:9593return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);95949595case SystemZ::ATOMIC_LOADW_MIN:9596return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);9597case SystemZ::ATOMIC_LOADW_MAX:9598return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);9599case SystemZ::ATOMIC_LOADW_UMIN:9600return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);9601case SystemZ::ATOMIC_LOADW_UMAX:9602return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);96039604case SystemZ::ATOMIC_CMP_SWAPW:9605return emitAtomicCmpSwapW(MI, MBB);9606case SystemZ::MVCImm:9607case SystemZ::MVCReg:9608return emitMemMemWrapper(MI, MBB, SystemZ::MVC);9609case SystemZ::NCImm:9610return emitMemMemWrapper(MI, MBB, SystemZ::NC);9611case SystemZ::OCImm:9612return emitMemMemWrapper(MI, MBB, SystemZ::OC);9613case SystemZ::XCImm:9614case SystemZ::XCReg:9615return emitMemMemWrapper(MI, MBB, SystemZ::XC);9616case SystemZ::CLCImm:9617case SystemZ::CLCReg:9618return emitMemMemWrapper(MI, MBB, SystemZ::CLC);9619case SystemZ::MemsetImmImm:9620case SystemZ::MemsetImmReg:9621case SystemZ::MemsetRegImm:9622case SystemZ::MemsetRegReg:9623return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);9624case SystemZ::CLSTLoop:9625return emitStringWrapper(MI, MBB, SystemZ::CLST);9626case SystemZ::MVSTLoop:9627return emitStringWrapper(MI, MBB, SystemZ::MVST);9628case SystemZ::SRSTLoop:9629return emitStringWrapper(MI, MBB, SystemZ::SRST);9630case SystemZ::TBEGIN:9631return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);9632case SystemZ::TBEGIN_nofloat:9633return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);9634case SystemZ::TBEGINC:9635return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);9636case SystemZ::LTEBRCompare_Pseudo:9637return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);9638case SystemZ::LTDBRCompare_Pseudo:9639return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);9640case SystemZ::LTXBRCompare_Pseudo:9641return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);96429643case SystemZ::PROBED_ALLOCA:9644return emitProbedAlloca(MI, MBB);96459646case TargetOpcode::STACKMAP:9647case TargetOpcode::PATCHPOINT:9648return emitPatchPoint(MI, MBB);96499650default:9651llvm_unreachable("Unexpected instr type to insert");9652}9653}96549655// This is only used by the isel schedulers, and is needed only to prevent9656// compiler from crashing when list-ilp is used.9657const TargetRegisterClass *9658SystemZTargetLowering::getRepRegClassFor(MVT VT) const {9659if (VT == MVT::Untyped)9660return &SystemZ::ADDR128BitRegClass;9661return TargetLowering::getRepRegClassFor(VT);9662}96639664SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,9665SelectionDAG &DAG) const {9666SDLoc dl(Op);9667/*9668The rounding method is in FPC Byte 3 bits 6-7, and has the following9669settings:967000 Round to nearest967101 Round to 0967210 Round to +inf967311 Round to -inf96749675FLT_ROUNDS, on the other hand, expects the following:9676-1 Undefined96770 Round to 096781 Round to nearest96792 Round to +inf96803 Round to -inf9681*/96829683// Save FPC to register.9684SDValue Chain = Op.getOperand(0);9685SDValue EFPC(9686DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);9687Chain = EFPC.getValue(1);96889689// Transform as necessary9690SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,9691DAG.getConstant(3, dl, MVT::i32));9692// RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 19693SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,9694DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,9695DAG.getConstant(1, dl, MVT::i32)));96969697SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,9698DAG.getConstant(1, dl, MVT::i32));9699RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());97009701return DAG.getMergeValues({RetVal, Chain}, dl);9702}97039704SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,9705SelectionDAG &DAG) const {9706EVT VT = Op.getValueType();9707Op = Op.getOperand(0);9708EVT OpVT = Op.getValueType();97099710assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");97119712SDLoc DL(Op);97139714// load a 0 vector for the third operand of VSUM.9715SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));97169717// execute VSUM.9718switch (OpVT.getScalarSizeInBits()) {9719case 8:9720case 16:9721Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);9722[[fallthrough]];9723case 32:9724case 64:9725Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,9726DAG.getBitcast(Op.getValueType(), Zero));9727break;9728case 128:9729break; // VSUM over v1i128 should not happen and would be a noop9730default:9731llvm_unreachable("Unexpected scalar size.");9732}9733// Cast to original vector type, retrieve last element.9734return DAG.getNode(9735ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),9736DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));9737}973897399740