Path: blob/main/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
35269 views
//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines an instruction selector for the SystemZ target.9//10//===----------------------------------------------------------------------===//1112#include "SystemZTargetMachine.h"13#include "SystemZISelLowering.h"14#include "llvm/Analysis/AliasAnalysis.h"15#include "llvm/CodeGen/SelectionDAGISel.h"16#include "llvm/Support/Debug.h"17#include "llvm/Support/KnownBits.h"18#include "llvm/Support/raw_ostream.h"1920using namespace llvm;2122#define DEBUG_TYPE "systemz-isel"23#define PASS_NAME "SystemZ DAG->DAG Pattern Instruction Selection"2425namespace {26// Used to build addressing modes.27struct SystemZAddressingMode {28// The shape of the address.29enum AddrForm {30// base+displacement31FormBD,3233// base+displacement+index for load and store operands34FormBDXNormal,3536// base+displacement+index for load address operands37FormBDXLA,3839// base+displacement+index+ADJDYNALLOC40FormBDXDynAlloc41};42AddrForm Form;4344// The type of displacement. The enum names here correspond directly45// to the definitions in SystemZOperand.td. We could split them into46// flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.47enum DispRange {48Disp12Only,49Disp12Pair,50Disp20Only,51Disp20Only128,52Disp20Pair53};54DispRange DR;5556// The parts of the address. The address is equivalent to:57//58// Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)59SDValue Base;60int64_t Disp;61SDValue Index;62bool IncludesDynAlloc;6364SystemZAddressingMode(AddrForm form, DispRange dr)65: Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {}6667// True if the address can have an index register.68bool hasIndexField() { return Form != FormBD; }6970// True if the address can (and must) include ADJDYNALLOC.71bool isDynAlloc() { return Form == FormBDXDynAlloc; }7273void dump(const llvm::SelectionDAG *DAG) {74errs() << "SystemZAddressingMode " << this << '\n';7576errs() << " Base ";77if (Base.getNode())78Base.getNode()->dump(DAG);79else80errs() << "null\n";8182if (hasIndexField()) {83errs() << " Index ";84if (Index.getNode())85Index.getNode()->dump(DAG);86else87errs() << "null\n";88}8990errs() << " Disp " << Disp;91if (IncludesDynAlloc)92errs() << " + ADJDYNALLOC";93errs() << '\n';94}95};9697// Return a mask with Count low bits set.98static uint64_t allOnes(unsigned int Count) {99assert(Count <= 64);100if (Count > 63)101return UINT64_MAX;102return (uint64_t(1) << Count) - 1;103}104105// Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation106// given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and107// Rotate (I5). The combined operand value is effectively:108//109// (or (rotl Input, Rotate), ~Mask)110//111// for RNSBG and:112//113// (and (rotl Input, Rotate), Mask)114//115// otherwise. The output value has BitSize bits, although Input may be116// narrower (in which case the upper bits are don't care), or wider (in which117// case the result will be truncated as part of the operation).118struct RxSBGOperands {119RxSBGOperands(unsigned Op, SDValue N)120: Opcode(Op), BitSize(N.getValueSizeInBits()),121Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63),122Rotate(0) {}123124unsigned Opcode;125unsigned BitSize;126uint64_t Mask;127SDValue Input;128unsigned Start;129unsigned End;130unsigned Rotate;131};132133class SystemZDAGToDAGISel : public SelectionDAGISel {134const SystemZSubtarget *Subtarget;135136// Used by SystemZOperands.td to create integer constants.137inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {138return CurDAG->getTargetConstant(Imm, SDLoc(Node), Node->getValueType(0));139}140141const SystemZTargetMachine &getTargetMachine() const {142return static_cast<const SystemZTargetMachine &>(TM);143}144145const SystemZInstrInfo *getInstrInfo() const {146return Subtarget->getInstrInfo();147}148149// Try to fold more of the base or index of AM into AM, where IsBase150// selects between the base and index.151bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const;152153// Try to describe N in AM, returning true on success.154bool selectAddress(SDValue N, SystemZAddressingMode &AM) const;155156// Extract individual target operands from matched address AM.157void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,158SDValue &Base, SDValue &Disp) const;159void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,160SDValue &Base, SDValue &Disp, SDValue &Index) const;161162// Try to match Addr as a FormBD address with displacement type DR.163// Return true on success, storing the base and displacement in164// Base and Disp respectively.165bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,166SDValue &Base, SDValue &Disp) const;167168// Try to match Addr as a FormBDX address with displacement type DR.169// Return true on success and if the result had no index. Store the170// base and displacement in Base and Disp respectively.171bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,172SDValue &Base, SDValue &Disp) const;173174// Try to match Addr as a FormBDX* address of form Form with175// displacement type DR. Return true on success, storing the base,176// displacement and index in Base, Disp and Index respectively.177bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,178SystemZAddressingMode::DispRange DR, SDValue Addr,179SDValue &Base, SDValue &Disp, SDValue &Index) const;180181// PC-relative address matching routines used by SystemZOperands.td.182bool selectPCRelAddress(SDValue Addr, SDValue &Target) const {183if (SystemZISD::isPCREL(Addr.getOpcode())) {184Target = Addr.getOperand(0);185return true;186}187return false;188}189190// BD matching routines used by SystemZOperands.td.191bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {192return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);193}194bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {195return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);196}197bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {198return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);199}200bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {201return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);202}203204// MVI matching routines used by SystemZOperands.td.205bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {206return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);207}208bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {209return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);210}211212// BDX matching routines used by SystemZOperands.td.213bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,214SDValue &Index) const {215return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,216SystemZAddressingMode::Disp12Only,217Addr, Base, Disp, Index);218}219bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,220SDValue &Index) const {221return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,222SystemZAddressingMode::Disp12Pair,223Addr, Base, Disp, Index);224}225bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,226SDValue &Index) const {227return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,228SystemZAddressingMode::Disp12Only,229Addr, Base, Disp, Index);230}231bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,232SDValue &Index) const {233return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,234SystemZAddressingMode::Disp20Only,235Addr, Base, Disp, Index);236}237bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,238SDValue &Index) const {239return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,240SystemZAddressingMode::Disp20Only128,241Addr, Base, Disp, Index);242}243bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,244SDValue &Index) const {245return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,246SystemZAddressingMode::Disp20Pair,247Addr, Base, Disp, Index);248}249bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,250SDValue &Index) const {251return selectBDXAddr(SystemZAddressingMode::FormBDXLA,252SystemZAddressingMode::Disp12Pair,253Addr, Base, Disp, Index);254}255bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,256SDValue &Index) const {257return selectBDXAddr(SystemZAddressingMode::FormBDXLA,258SystemZAddressingMode::Disp20Pair,259Addr, Base, Disp, Index);260}261262// Try to match Addr as an address with a base, 12-bit displacement263// and index, where the index is element Elem of a vector.264// Return true on success, storing the base, displacement and vector265// in Base, Disp and Index respectively.266bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,267SDValue &Disp, SDValue &Index) const;268269// Check whether (or Op (and X InsertMask)) is effectively an insertion270// of X into bits InsertMask of some Y != Op. Return true if so and271// set Op to that Y.272bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const;273274// Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.275// Return true on success.276bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const;277278// Try to fold some of RxSBG.Input into other fields of RxSBG.279// Return true on success.280bool expandRxSBG(RxSBGOperands &RxSBG) const;281282// Return an undefined value of type VT.283SDValue getUNDEF(const SDLoc &DL, EVT VT) const;284285// Convert N to VT, if it isn't already.286SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const;287288// Try to implement AND or shift node N using RISBG with the zero flag set.289// Return the selected node on success, otherwise return null.290bool tryRISBGZero(SDNode *N);291292// Try to use RISBG or Opcode to implement OR or XOR node N.293// Return the selected node on success, otherwise return null.294bool tryRxSBG(SDNode *N, unsigned Opcode);295296// If Op0 is null, then Node is a constant that can be loaded using:297//298// (Opcode UpperVal LowerVal)299//300// If Op0 is nonnull, then Node can be implemented using:301//302// (Opcode (Opcode Op0 UpperVal) LowerVal)303void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,304uint64_t UpperVal, uint64_t LowerVal);305306void loadVectorConstant(const SystemZVectorConstantInfo &VCI,307SDNode *Node);308309SDNode *loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL);310311// Try to use gather instruction Opcode to implement vector insertion N.312bool tryGather(SDNode *N, unsigned Opcode);313314// Try to use scatter instruction Opcode to implement store Store.315bool tryScatter(StoreSDNode *Store, unsigned Opcode);316317// Change a chain of {load; op; store} of the same value into a simple op318// through memory of that value, if the uses of the modified value and its319// address are suitable.320bool tryFoldLoadStoreIntoMemOperand(SDNode *Node);321322// Return true if Load and Store are loads and stores of the same size323// and are guaranteed not to overlap. Such operations can be implemented324// using block (SS-format) instructions.325//326// Partial overlap would lead to incorrect code, since the block operations327// are logically bytewise, even though they have a fast path for the328// non-overlapping case. We also need to avoid full overlap (i.e. two329// addresses that might be equal at run time) because although that case330// would be handled correctly, it might be implemented by millicode.331bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;332333// N is a (store (load Y), X) pattern. Return true if it can use an MVC334// from Y to X.335bool storeLoadCanUseMVC(SDNode *N) const;336337// N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true338// if A[1 - I] == X and if N can use a block operation like NC from A[I]339// to X.340bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;341342// Return true if N (a load or a store) fullfills the alignment343// requirements for a PC-relative access.344bool storeLoadIsAligned(SDNode *N) const;345346// Return the load extension type of a load or atomic load.347ISD::LoadExtType getLoadExtType(SDNode *N) const;348349// Try to expand a boolean SELECT_CCMASK using an IPM sequence.350SDValue expandSelectBoolean(SDNode *Node);351352// Return true if the flags of N and the subtarget allows for353// reassociation, in which case a reg/reg opcode is needed as input to the354// MachineCombiner.355bool shouldSelectForReassoc(SDNode *N) const;356357public:358SystemZDAGToDAGISel() = delete;359360SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOptLevel OptLevel)361: SelectionDAGISel(TM, OptLevel) {}362363bool runOnMachineFunction(MachineFunction &MF) override {364const Function &F = MF.getFunction();365if (F.getFnAttribute("fentry-call").getValueAsString() != "true") {366if (F.hasFnAttribute("mnop-mcount"))367report_fatal_error("mnop-mcount only supported with fentry-call");368if (F.hasFnAttribute("mrecord-mcount"))369report_fatal_error("mrecord-mcount only supported with fentry-call");370}371372Subtarget = &MF.getSubtarget<SystemZSubtarget>();373return SelectionDAGISel::runOnMachineFunction(MF);374}375376// Override SelectionDAGISel.377void Select(SDNode *Node) override;378bool SelectInlineAsmMemoryOperand(const SDValue &Op,379InlineAsm::ConstraintCode ConstraintID,380std::vector<SDValue> &OutOps) override;381bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;382void PreprocessISelDAG() override;383384// Include the pieces autogenerated from the target description.385#include "SystemZGenDAGISel.inc"386};387388class SystemZDAGToDAGISelLegacy : public SelectionDAGISelLegacy {389public:390static char ID;391explicit SystemZDAGToDAGISelLegacy(SystemZTargetMachine &TM,392CodeGenOptLevel OptLevel)393: SelectionDAGISelLegacy(394ID, std::make_unique<SystemZDAGToDAGISel>(TM, OptLevel)) {}395};396} // end anonymous namespace397398char SystemZDAGToDAGISelLegacy::ID = 0;399400INITIALIZE_PASS(SystemZDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)401402FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,403CodeGenOptLevel OptLevel) {404return new SystemZDAGToDAGISelLegacy(TM, OptLevel);405}406407// Return true if Val should be selected as a displacement for an address408// with range DR. Here we're interested in the range of both the instruction409// described by DR and of any pairing instruction.410static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {411switch (DR) {412case SystemZAddressingMode::Disp12Only:413return isUInt<12>(Val);414415case SystemZAddressingMode::Disp12Pair:416case SystemZAddressingMode::Disp20Only:417case SystemZAddressingMode::Disp20Pair:418return isInt<20>(Val);419420case SystemZAddressingMode::Disp20Only128:421return isInt<20>(Val) && isInt<20>(Val + 8);422}423llvm_unreachable("Unhandled displacement range");424}425426// Change the base or index in AM to Value, where IsBase selects427// between the base and index.428static void changeComponent(SystemZAddressingMode &AM, bool IsBase,429SDValue Value) {430if (IsBase)431AM.Base = Value;432else433AM.Index = Value;434}435436// The base or index of AM is equivalent to Value + ADJDYNALLOC,437// where IsBase selects between the base and index. Try to fold the438// ADJDYNALLOC into AM.439static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase,440SDValue Value) {441if (AM.isDynAlloc() && !AM.IncludesDynAlloc) {442changeComponent(AM, IsBase, Value);443AM.IncludesDynAlloc = true;444return true;445}446return false;447}448449// The base of AM is equivalent to Base + Index. Try to use Index as450// the index register.451static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,452SDValue Index) {453if (AM.hasIndexField() && !AM.Index.getNode()) {454AM.Base = Base;455AM.Index = Index;456return true;457}458return false;459}460461// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects462// between the base and index. Try to fold Op1 into AM's displacement.463static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,464SDValue Op0, uint64_t Op1) {465// First try adjusting the displacement.466int64_t TestDisp = AM.Disp + Op1;467if (selectDisp(AM.DR, TestDisp)) {468changeComponent(AM, IsBase, Op0);469AM.Disp = TestDisp;470return true;471}472473// We could consider forcing the displacement into a register and474// using it as an index, but it would need to be carefully tuned.475return false;476}477478bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,479bool IsBase) const {480SDValue N = IsBase ? AM.Base : AM.Index;481unsigned Opcode = N.getOpcode();482// Look through no-op truncations.483if (Opcode == ISD::TRUNCATE && N.getOperand(0).getValueSizeInBits() <= 64) {484N = N.getOperand(0);485Opcode = N.getOpcode();486}487if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) {488SDValue Op0 = N.getOperand(0);489SDValue Op1 = N.getOperand(1);490491unsigned Op0Code = Op0->getOpcode();492unsigned Op1Code = Op1->getOpcode();493494if (Op0Code == SystemZISD::ADJDYNALLOC)495return expandAdjDynAlloc(AM, IsBase, Op1);496if (Op1Code == SystemZISD::ADJDYNALLOC)497return expandAdjDynAlloc(AM, IsBase, Op0);498499if (Op0Code == ISD::Constant)500return expandDisp(AM, IsBase, Op1,501cast<ConstantSDNode>(Op0)->getSExtValue());502if (Op1Code == ISD::Constant)503return expandDisp(AM, IsBase, Op0,504cast<ConstantSDNode>(Op1)->getSExtValue());505506if (IsBase && expandIndex(AM, Op0, Op1))507return true;508}509if (Opcode == SystemZISD::PCREL_OFFSET) {510SDValue Full = N.getOperand(0);511SDValue Base = N.getOperand(1);512SDValue Anchor = Base.getOperand(0);513uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() -514cast<GlobalAddressSDNode>(Anchor)->getOffset());515return expandDisp(AM, IsBase, Base, Offset);516}517return false;518}519520// Return true if an instruction with displacement range DR should be521// used for displacement value Val. selectDisp(DR, Val) must already hold.522static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {523assert(selectDisp(DR, Val) && "Invalid displacement");524switch (DR) {525case SystemZAddressingMode::Disp12Only:526case SystemZAddressingMode::Disp20Only:527case SystemZAddressingMode::Disp20Only128:528return true;529530case SystemZAddressingMode::Disp12Pair:531// Use the other instruction if the displacement is too large.532return isUInt<12>(Val);533534case SystemZAddressingMode::Disp20Pair:535// Use the other instruction if the displacement is small enough.536return !isUInt<12>(Val);537}538llvm_unreachable("Unhandled displacement range");539}540541// Return true if Base + Disp + Index should be performed by LA(Y).542static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {543// Don't use LA(Y) for constants.544if (!Base)545return false;546547// Always use LA(Y) for frame addresses, since we know that the destination548// register is almost always (perhaps always) going to be different from549// the frame register.550if (Base->getOpcode() == ISD::FrameIndex)551return true;552553if (Disp) {554// Always use LA(Y) if there is a base, displacement and index.555if (Index)556return true;557558// Always use LA if the displacement is small enough. It should always559// be no worse than AGHI (and better if it avoids a move).560if (isUInt<12>(Disp))561return true;562563// For similar reasons, always use LAY if the constant is too big for AGHI.564// LAY should be no worse than AGFI.565if (!isInt<16>(Disp))566return true;567} else {568// Don't use LA for plain registers.569if (!Index)570return false;571572// Don't use LA for plain addition if the index operand is only used573// once. It should be a natural two-operand addition in that case.574if (Index->hasOneUse())575return false;576577// Prefer addition if the second operation is sign-extended, in the578// hope of using AGF.579unsigned IndexOpcode = Index->getOpcode();580if (IndexOpcode == ISD::SIGN_EXTEND ||581IndexOpcode == ISD::SIGN_EXTEND_INREG)582return false;583}584585// Don't use LA for two-operand addition if either operand is only586// used once. The addition instructions are better in that case.587if (Base->hasOneUse())588return false;589590return true;591}592593// Return true if Addr is suitable for AM, updating AM if so.594bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,595SystemZAddressingMode &AM) const {596// Start out assuming that the address will need to be loaded separately,597// then try to extend it as much as we can.598AM.Base = Addr;599600// First try treating the address as a constant.601if (Addr.getOpcode() == ISD::Constant &&602expandDisp(AM, true, SDValue(),603cast<ConstantSDNode>(Addr)->getSExtValue()))604;605// Also see if it's a bare ADJDYNALLOC.606else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC &&607expandAdjDynAlloc(AM, true, SDValue()))608;609else610// Otherwise try expanding each component.611while (expandAddress(AM, true) ||612(AM.Index.getNode() && expandAddress(AM, false)))613continue;614615// Reject cases where it isn't profitable to use LA(Y).616if (AM.Form == SystemZAddressingMode::FormBDXLA &&617!shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode()))618return false;619620// Reject cases where the other instruction in a pair should be used.621if (!isValidDisp(AM.DR, AM.Disp))622return false;623624// Make sure that ADJDYNALLOC is included where necessary.625if (AM.isDynAlloc() && !AM.IncludesDynAlloc)626return false;627628LLVM_DEBUG(AM.dump(CurDAG));629return true;630}631632// Insert a node into the DAG at least before Pos. This will reposition633// the node as needed, and will assign it a node ID that is <= Pos's ID.634// Note that this does *not* preserve the uniqueness of node IDs!635// The selection DAG must no longer depend on their uniqueness when this636// function is used.637static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {638if (N->getNodeId() == -1 ||639(SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) >640SelectionDAGISel::getUninvalidatedNodeId(Pos))) {641DAG->RepositionNode(Pos->getIterator(), N.getNode());642// Mark Node as invalid for pruning as after this it may be a successor to a643// selected node but otherwise be in the same position of Pos.644// Conservatively mark it with the same -abs(Id) to assure node id645// invariant is preserved.646N->setNodeId(Pos->getNodeId());647SelectionDAGISel::InvalidateNodeId(N.getNode());648}649}650651void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,652EVT VT, SDValue &Base,653SDValue &Disp) const {654Base = AM.Base;655if (!Base.getNode())656// Register 0 means "no base". This is mostly useful for shifts.657Base = CurDAG->getRegister(0, VT);658else if (Base.getOpcode() == ISD::FrameIndex) {659// Lower a FrameIndex to a TargetFrameIndex.660int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex();661Base = CurDAG->getTargetFrameIndex(FrameIndex, VT);662} else if (Base.getValueType() != VT) {663// Truncate values from i64 to i32, for shifts.664assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&665"Unexpected truncation");666SDLoc DL(Base);667SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);668insertDAGNode(CurDAG, Base.getNode(), Trunc);669Base = Trunc;670}671672// Lower the displacement to a TargetConstant.673Disp = CurDAG->getTargetConstant(AM.Disp, SDLoc(Base), VT);674}675676void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,677EVT VT, SDValue &Base,678SDValue &Disp,679SDValue &Index) const {680getAddressOperands(AM, VT, Base, Disp);681682Index = AM.Index;683if (!Index.getNode())684// Register 0 means "no index".685Index = CurDAG->getRegister(0, VT);686}687688bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,689SDValue Addr, SDValue &Base,690SDValue &Disp) const {691SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);692if (!selectAddress(Addr, AM))693return false;694695getAddressOperands(AM, Addr.getValueType(), Base, Disp);696return true;697}698699bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,700SDValue Addr, SDValue &Base,701SDValue &Disp) const {702SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);703if (!selectAddress(Addr, AM) || AM.Index.getNode())704return false;705706getAddressOperands(AM, Addr.getValueType(), Base, Disp);707return true;708}709710bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,711SystemZAddressingMode::DispRange DR,712SDValue Addr, SDValue &Base,713SDValue &Disp, SDValue &Index) const {714SystemZAddressingMode AM(Form, DR);715if (!selectAddress(Addr, AM))716return false;717718getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index);719return true;720}721722bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,723SDValue &Base,724SDValue &Disp,725SDValue &Index) const {726SDValue Regs[2];727if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&728Regs[0].getNode() && Regs[1].getNode()) {729for (unsigned int I = 0; I < 2; ++I) {730Base = Regs[I];731Index = Regs[1 - I];732// We can't tell here whether the index vector has the right type733// for the access; the caller needs to do that instead.734if (Index.getOpcode() == ISD::ZERO_EXTEND)735Index = Index.getOperand(0);736if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&737Index.getOperand(1) == Elem) {738Index = Index.getOperand(0);739return true;740}741}742}743return false;744}745746bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,747uint64_t InsertMask) const {748// We're only interested in cases where the insertion is into some operand749// of Op, rather than into Op itself. The only useful case is an AND.750if (Op.getOpcode() != ISD::AND)751return false;752753// We need a constant mask.754auto *MaskNode = dyn_cast<ConstantSDNode>(Op.getOperand(1).getNode());755if (!MaskNode)756return false;757758// It's not an insertion of Op.getOperand(0) if the two masks overlap.759uint64_t AndMask = MaskNode->getZExtValue();760if (InsertMask & AndMask)761return false;762763// It's only an insertion if all bits are covered or are known to be zero.764// The inner check covers all cases but is more expensive.765uint64_t Used = allOnes(Op.getValueSizeInBits());766if (Used != (AndMask | InsertMask)) {767KnownBits Known = CurDAG->computeKnownBits(Op.getOperand(0));768if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue()))769return false;770}771772Op = Op.getOperand(0);773return true;774}775776bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG,777uint64_t Mask) const {778const SystemZInstrInfo *TII = getInstrInfo();779if (RxSBG.Rotate != 0)780Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));781Mask &= RxSBG.Mask;782if (TII->isRxSBGMask(Mask, RxSBG.BitSize, RxSBG.Start, RxSBG.End)) {783RxSBG.Mask = Mask;784return true;785}786return false;787}788789// Return true if any bits of (RxSBG.Input & Mask) are significant.790static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) {791// Rotate the mask in the same way as RxSBG.Input is rotated.792if (RxSBG.Rotate != 0)793Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)));794return (Mask & RxSBG.Mask) != 0;795}796797bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {798SDValue N = RxSBG.Input;799unsigned Opcode = N.getOpcode();800switch (Opcode) {801case ISD::TRUNCATE: {802if (RxSBG.Opcode == SystemZ::RNSBG)803return false;804if (N.getOperand(0).getValueSizeInBits() > 64)805return false;806uint64_t BitSize = N.getValueSizeInBits();807uint64_t Mask = allOnes(BitSize);808if (!refineRxSBGMask(RxSBG, Mask))809return false;810RxSBG.Input = N.getOperand(0);811return true;812}813case ISD::AND: {814if (RxSBG.Opcode == SystemZ::RNSBG)815return false;816817auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());818if (!MaskNode)819return false;820821SDValue Input = N.getOperand(0);822uint64_t Mask = MaskNode->getZExtValue();823if (!refineRxSBGMask(RxSBG, Mask)) {824// If some bits of Input are already known zeros, those bits will have825// been removed from the mask. See if adding them back in makes the826// mask suitable.827KnownBits Known = CurDAG->computeKnownBits(Input);828Mask |= Known.Zero.getZExtValue();829if (!refineRxSBGMask(RxSBG, Mask))830return false;831}832RxSBG.Input = Input;833return true;834}835836case ISD::OR: {837if (RxSBG.Opcode != SystemZ::RNSBG)838return false;839840auto *MaskNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());841if (!MaskNode)842return false;843844SDValue Input = N.getOperand(0);845uint64_t Mask = ~MaskNode->getZExtValue();846if (!refineRxSBGMask(RxSBG, Mask)) {847// If some bits of Input are already known ones, those bits will have848// been removed from the mask. See if adding them back in makes the849// mask suitable.850KnownBits Known = CurDAG->computeKnownBits(Input);851Mask &= ~Known.One.getZExtValue();852if (!refineRxSBGMask(RxSBG, Mask))853return false;854}855RxSBG.Input = Input;856return true;857}858859case ISD::ROTL: {860// Any 64-bit rotate left can be merged into the RxSBG.861if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64)862return false;863auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());864if (!CountNode)865return false;866867RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63;868RxSBG.Input = N.getOperand(0);869return true;870}871872case ISD::ANY_EXTEND:873// Bits above the extended operand are don't-care.874RxSBG.Input = N.getOperand(0);875return true;876877case ISD::ZERO_EXTEND:878if (RxSBG.Opcode != SystemZ::RNSBG) {879// Restrict the mask to the extended operand.880unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();881if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize)))882return false;883884RxSBG.Input = N.getOperand(0);885return true;886}887[[fallthrough]];888889case ISD::SIGN_EXTEND: {890// Check that the extension bits are don't-care (i.e. are masked out891// by the final mask).892unsigned BitSize = N.getValueSizeInBits();893unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();894if (maskMatters(RxSBG, allOnes(BitSize) - allOnes(InnerBitSize))) {895// In the case where only the sign bit is active, increase Rotate with896// the extension width.897if (RxSBG.Mask == 1 && RxSBG.Rotate == 1)898RxSBG.Rotate += (BitSize - InnerBitSize);899else900return false;901}902903RxSBG.Input = N.getOperand(0);904return true;905}906907case ISD::SHL: {908auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());909if (!CountNode)910return false;911912uint64_t Count = CountNode->getZExtValue();913unsigned BitSize = N.getValueSizeInBits();914if (Count < 1 || Count >= BitSize)915return false;916917if (RxSBG.Opcode == SystemZ::RNSBG) {918// Treat (shl X, count) as (rotl X, size-count) as long as the bottom919// count bits from RxSBG.Input are ignored.920if (maskMatters(RxSBG, allOnes(Count)))921return false;922} else {923// Treat (shl X, count) as (and (rotl X, count), ~0<<count).924if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count))925return false;926}927928RxSBG.Rotate = (RxSBG.Rotate + Count) & 63;929RxSBG.Input = N.getOperand(0);930return true;931}932933case ISD::SRL:934case ISD::SRA: {935auto *CountNode = dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());936if (!CountNode)937return false;938939uint64_t Count = CountNode->getZExtValue();940unsigned BitSize = N.getValueSizeInBits();941if (Count < 1 || Count >= BitSize)942return false;943944if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {945// Treat (srl|sra X, count) as (rotl X, size-count) as long as the top946// count bits from RxSBG.Input are ignored.947if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count)))948return false;949} else {950// Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),951// which is similar to SLL above.952if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count)))953return false;954}955956RxSBG.Rotate = (RxSBG.Rotate - Count) & 63;957RxSBG.Input = N.getOperand(0);958return true;959}960default:961return false;962}963}964965SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const {966SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);967return SDValue(N, 0);968}969970SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT,971SDValue N) const {972if (N.getValueType() == MVT::i32 && VT == MVT::i64)973return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,974DL, VT, getUNDEF(DL, MVT::i64), N);975if (N.getValueType() == MVT::i64 && VT == MVT::i32)976return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N);977assert(N.getValueType() == VT && "Unexpected value types");978return N;979}980981bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {982SDLoc DL(N);983EVT VT = N->getValueType(0);984if (!VT.isInteger() || VT.getSizeInBits() > 64)985return false;986RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));987unsigned Count = 0;988while (expandRxSBG(RISBG))989// The widening or narrowing is expected to be free.990// Counting widening or narrowing as a saved operation will result in991// preferring an R*SBG over a simple shift/logical instruction.992if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND &&993RISBG.Input.getOpcode() != ISD::TRUNCATE)994Count += 1;995if (Count == 0 || isa<ConstantSDNode>(RISBG.Input))996return false;997998// Prefer to use normal shift instructions over RISBG, since they can handle999// all cases and are sometimes shorter.1000if (Count == 1 && N->getOpcode() != ISD::AND)1001return false;10021003// Prefer register extensions like LLC over RISBG. Also prefer to start1004// out with normal ANDs if one instruction would be enough. We can convert1005// these ANDs into an RISBG later if a three-address instruction is useful.1006if (RISBG.Rotate == 0) {1007bool PreferAnd = false;1008// Prefer AND for any 32-bit and-immediate operation.1009if (VT == MVT::i32)1010PreferAnd = true;1011// As well as for any 64-bit operation that can be implemented via LLC(R),1012// LLH(R), LLGT(R), or one of the and-immediate instructions.1013else if (RISBG.Mask == 0xff ||1014RISBG.Mask == 0xffff ||1015RISBG.Mask == 0x7fffffff ||1016SystemZ::isImmLF(~RISBG.Mask) ||1017SystemZ::isImmHF(~RISBG.Mask))1018PreferAnd = true;1019// And likewise for the LLZRGF instruction, which doesn't have a register1020// to register version.1021else if (auto *Load = dyn_cast<LoadSDNode>(RISBG.Input)) {1022if (Load->getMemoryVT() == MVT::i32 &&1023(Load->getExtensionType() == ISD::EXTLOAD ||1024Load->getExtensionType() == ISD::ZEXTLOAD) &&1025RISBG.Mask == 0xffffff00 &&1026Subtarget->hasLoadAndZeroRightmostByte())1027PreferAnd = true;1028}1029if (PreferAnd) {1030// Replace the current node with an AND. Note that the current node1031// might already be that same AND, in which case it is already CSE'd1032// with it, and we must not call ReplaceNode.1033SDValue In = convertTo(DL, VT, RISBG.Input);1034SDValue Mask = CurDAG->getConstant(RISBG.Mask, DL, VT);1035SDValue New = CurDAG->getNode(ISD::AND, DL, VT, In, Mask);1036if (N != New.getNode()) {1037insertDAGNode(CurDAG, N, Mask);1038insertDAGNode(CurDAG, N, New);1039ReplaceNode(N, New.getNode());1040N = New.getNode();1041}1042// Now, select the machine opcode to implement this operation.1043if (!N->isMachineOpcode())1044SelectCode(N);1045return true;1046}1047}10481049unsigned Opcode = SystemZ::RISBG;1050// Prefer RISBGN if available, since it does not clobber CC.1051if (Subtarget->hasMiscellaneousExtensions())1052Opcode = SystemZ::RISBGN;1053EVT OpcodeVT = MVT::i64;1054if (VT == MVT::i32 && Subtarget->hasHighWord() &&1055// We can only use the 32-bit instructions if all source bits are1056// in the low 32 bits without wrapping, both after rotation (because1057// of the smaller range for Start and End) and before rotation1058// (because the input value is truncated).1059RISBG.Start >= 32 && RISBG.End >= RISBG.Start &&1060((RISBG.Start + RISBG.Rotate) & 63) >= 32 &&1061((RISBG.End + RISBG.Rotate) & 63) >=1062((RISBG.Start + RISBG.Rotate) & 63)) {1063Opcode = SystemZ::RISBMux;1064OpcodeVT = MVT::i32;1065RISBG.Start &= 31;1066RISBG.End &= 31;1067}1068SDValue Ops[5] = {1069getUNDEF(DL, OpcodeVT),1070convertTo(DL, OpcodeVT, RISBG.Input),1071CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32),1072CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32),1073CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32)1074};1075SDValue New = convertTo(1076DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0));1077ReplaceNode(N, New.getNode());1078return true;1079}10801081bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {1082SDLoc DL(N);1083EVT VT = N->getValueType(0);1084if (!VT.isInteger() || VT.getSizeInBits() > 64)1085return false;1086// Try treating each operand of N as the second operand of the RxSBG1087// and see which goes deepest.1088RxSBGOperands RxSBG[] = {1089RxSBGOperands(Opcode, N->getOperand(0)),1090RxSBGOperands(Opcode, N->getOperand(1))1091};1092unsigned Count[] = { 0, 0 };1093for (unsigned I = 0; I < 2; ++I)1094while (RxSBG[I].Input->hasOneUse() && expandRxSBG(RxSBG[I]))1095// In cases of multiple users it seems better to keep the simple1096// instruction as they are one cycle faster, and it also helps in cases1097// where both inputs share a common node.1098// The widening or narrowing is expected to be free. Counting widening1099// or narrowing as a saved operation will result in preferring an R*SBG1100// over a simple shift/logical instruction.1101if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND &&1102RxSBG[I].Input.getOpcode() != ISD::TRUNCATE)1103Count[I] += 1;11041105// Do nothing if neither operand is suitable.1106if (Count[0] == 0 && Count[1] == 0)1107return false;11081109// Pick the deepest second operand.1110unsigned I = Count[0] > Count[1] ? 0 : 1;1111SDValue Op0 = N->getOperand(I ^ 1);11121113// Prefer IC for character insertions from memory.1114if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)1115if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode()))1116if (Load->getMemoryVT() == MVT::i8)1117return false;11181119// See whether we can avoid an AND in the first operand by converting1120// ROSBG to RISBG.1121if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {1122Opcode = SystemZ::RISBG;1123// Prefer RISBGN if available, since it does not clobber CC.1124if (Subtarget->hasMiscellaneousExtensions())1125Opcode = SystemZ::RISBGN;1126}11271128SDValue Ops[5] = {1129convertTo(DL, MVT::i64, Op0),1130convertTo(DL, MVT::i64, RxSBG[I].Input),1131CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32),1132CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32),1133CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32)1134};1135SDValue New = convertTo(1136DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0));1137ReplaceNode(N, New.getNode());1138return true;1139}11401141void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,1142SDValue Op0, uint64_t UpperVal,1143uint64_t LowerVal) {1144EVT VT = Node->getValueType(0);1145SDLoc DL(Node);1146SDValue Upper = CurDAG->getConstant(UpperVal, DL, VT);1147if (Op0.getNode())1148Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);11491150{1151// When we haven't passed in Op0, Upper will be a constant. In order to1152// prevent folding back to the large immediate in `Or = getNode(...)` we run1153// SelectCode first and end up with an opaque machine node. This means that1154// we need to use a handle to keep track of Upper in case it gets CSE'd by1155// SelectCode.1156//1157// Note that in the case where Op0 is passed in we could just call1158// SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing1159// the handle at all, but it's fine to do it here.1160//1161// TODO: This is a pretty hacky way to do this. Can we do something that1162// doesn't require a two paragraph explanation?1163HandleSDNode Handle(Upper);1164SelectCode(Upper.getNode());1165Upper = Handle.getValue();1166}11671168SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT);1169SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);11701171ReplaceNode(Node, Or.getNode());11721173SelectCode(Or.getNode());1174}11751176void SystemZDAGToDAGISel::loadVectorConstant(1177const SystemZVectorConstantInfo &VCI, SDNode *Node) {1178assert((VCI.Opcode == SystemZISD::BYTE_MASK ||1179VCI.Opcode == SystemZISD::REPLICATE ||1180VCI.Opcode == SystemZISD::ROTATE_MASK) &&1181"Bad opcode!");1182assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type");1183EVT VT = Node->getValueType(0);1184SDLoc DL(Node);1185SmallVector<SDValue, 2> Ops;1186for (unsigned OpVal : VCI.OpVals)1187Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32));1188SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);11891190if (VCI.VecVT == VT.getSimpleVT())1191ReplaceNode(Node, Op.getNode());1192else if (VT.getSizeInBits() == 128) {1193SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);1194ReplaceNode(Node, BitCast.getNode());1195SelectCode(BitCast.getNode());1196} else { // float or double1197unsigned SubRegIdx =1198(VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64);1199ReplaceNode(1200Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode());1201}1202SelectCode(Op.getNode());1203}12041205SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) {1206SDNode *ResNode;1207assert (VT.getSizeInBits() == 128);12081209SDValue CP = CurDAG->getTargetConstantPool(1210ConstantInt::get(Type::getInt128Ty(*CurDAG->getContext()), Val),1211TLI->getPointerTy(CurDAG->getDataLayout()));12121213EVT PtrVT = CP.getValueType();1214SDValue Ops[] = {1215SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0),1216CurDAG->getTargetConstant(0, DL, PtrVT),1217CurDAG->getRegister(0, PtrVT),1218CurDAG->getEntryNode()1219};1220ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops);12211222// Annotate ResNode with memory operand information so that MachineInstr1223// queries work properly. This e.g. gives the register allocation the1224// required information for rematerialization.1225MachineFunction& MF = CurDAG->getMachineFunction();1226MachineMemOperand *MemOp =1227MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),1228MachineMemOperand::MOLoad, 16, Align(8));12291230CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});1231return ResNode;1232}12331234bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {1235SDValue ElemV = N->getOperand(2);1236auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);1237if (!ElemN)1238return false;12391240unsigned Elem = ElemN->getZExtValue();1241EVT VT = N->getValueType(0);1242if (Elem >= VT.getVectorNumElements())1243return false;12441245auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));1246if (!Load || !Load->hasNUsesOfValue(1, 0))1247return false;1248if (Load->getMemoryVT().getSizeInBits() !=1249Load->getValueType(0).getSizeInBits())1250return false;12511252SDValue Base, Disp, Index;1253if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||1254Index.getValueType() != VT.changeVectorElementTypeToInteger())1255return false;12561257SDLoc DL(Load);1258SDValue Ops[] = {1259N->getOperand(0), Base, Disp, Index,1260CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain()1261};1262SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);1263ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));1264ReplaceNode(N, Res);1265return true;1266}12671268bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {1269SDValue Value = Store->getValue();1270if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)1271return false;1272if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits())1273return false;12741275SDValue ElemV = Value.getOperand(1);1276auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);1277if (!ElemN)1278return false;12791280SDValue Vec = Value.getOperand(0);1281EVT VT = Vec.getValueType();1282unsigned Elem = ElemN->getZExtValue();1283if (Elem >= VT.getVectorNumElements())1284return false;12851286SDValue Base, Disp, Index;1287if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||1288Index.getValueType() != VT.changeVectorElementTypeToInteger())1289return false;12901291SDLoc DL(Store);1292SDValue Ops[] = {1293Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32),1294Store->getChain()1295};1296ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));1297return true;1298}12991300// Check whether or not the chain ending in StoreNode is suitable for doing1301// the {load; op; store} to modify transformation.1302static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,1303SDValue StoredVal, SelectionDAG *CurDAG,1304LoadSDNode *&LoadNode,1305SDValue &InputChain) {1306// Is the stored value result 0 of the operation?1307if (StoredVal.getResNo() != 0)1308return false;13091310// Are there other uses of the loaded value than the operation?1311if (!StoredVal.getNode()->hasNUsesOfValue(1, 0))1312return false;13131314// Is the store non-extending and non-indexed?1315if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())1316return false;13171318SDValue Load = StoredVal->getOperand(0);1319// Is the stored value a non-extending and non-indexed load?1320if (!ISD::isNormalLoad(Load.getNode()))1321return false;13221323// Return LoadNode by reference.1324LoadNode = cast<LoadSDNode>(Load);13251326// Is store the only read of the loaded value?1327if (!Load.hasOneUse())1328return false;13291330// Is the address of the store the same as the load?1331if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||1332LoadNode->getOffset() != StoreNode->getOffset())1333return false;13341335// Check if the chain is produced by the load or is a TokenFactor with1336// the load output chain as an operand. Return InputChain by reference.1337SDValue Chain = StoreNode->getChain();13381339bool ChainCheck = false;1340if (Chain == Load.getValue(1)) {1341ChainCheck = true;1342InputChain = LoadNode->getChain();1343} else if (Chain.getOpcode() == ISD::TokenFactor) {1344SmallVector<SDValue, 4> ChainOps;1345SmallVector<const SDNode *, 4> LoopWorklist;1346SmallPtrSet<const SDNode *, 16> Visited;1347const unsigned int Max = 1024;1348for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {1349SDValue Op = Chain.getOperand(i);1350if (Op == Load.getValue(1)) {1351ChainCheck = true;1352// Drop Load, but keep its chain. No cycle check necessary.1353ChainOps.push_back(Load.getOperand(0));1354continue;1355}1356LoopWorklist.push_back(Op.getNode());1357ChainOps.push_back(Op);1358}13591360if (ChainCheck) {1361// Add the other operand of StoredVal to worklist.1362for (SDValue Op : StoredVal->ops())1363if (Op.getNode() != LoadNode)1364LoopWorklist.push_back(Op.getNode());13651366// Check if Load is reachable from any of the nodes in the worklist.1367if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,1368true))1369return false;13701371// Make a new TokenFactor with all the other input chains except1372// for the load.1373InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),1374MVT::Other, ChainOps);1375}1376}1377if (!ChainCheck)1378return false;13791380return true;1381}13821383// Change a chain of {load; op; store} of the same value into a simple op1384// through memory of that value, if the uses of the modified value and its1385// address are suitable.1386//1387// The tablegen pattern memory operand pattern is currently not able to match1388// the case where the CC on the original operation are used.1389//1390// See the equivalent routine in X86ISelDAGToDAG for further comments.1391bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {1392StoreSDNode *StoreNode = cast<StoreSDNode>(Node);1393SDValue StoredVal = StoreNode->getOperand(1);1394unsigned Opc = StoredVal->getOpcode();1395SDLoc DL(StoreNode);13961397// Before we try to select anything, make sure this is memory operand size1398// and opcode we can handle. Note that this must match the code below that1399// actually lowers the opcodes.1400EVT MemVT = StoreNode->getMemoryVT();1401unsigned NewOpc = 0;1402bool NegateOperand = false;1403switch (Opc) {1404default:1405return false;1406case SystemZISD::SSUBO:1407NegateOperand = true;1408[[fallthrough]];1409case SystemZISD::SADDO:1410if (MemVT == MVT::i32)1411NewOpc = SystemZ::ASI;1412else if (MemVT == MVT::i64)1413NewOpc = SystemZ::AGSI;1414else1415return false;1416break;1417case SystemZISD::USUBO:1418NegateOperand = true;1419[[fallthrough]];1420case SystemZISD::UADDO:1421if (MemVT == MVT::i32)1422NewOpc = SystemZ::ALSI;1423else if (MemVT == MVT::i64)1424NewOpc = SystemZ::ALGSI;1425else1426return false;1427break;1428}14291430LoadSDNode *LoadNode = nullptr;1431SDValue InputChain;1432if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode,1433InputChain))1434return false;14351436SDValue Operand = StoredVal.getOperand(1);1437auto *OperandC = dyn_cast<ConstantSDNode>(Operand);1438if (!OperandC)1439return false;1440auto OperandV = OperandC->getAPIntValue();1441if (NegateOperand)1442OperandV = -OperandV;1443if (OperandV.getSignificantBits() > 8)1444return false;1445Operand = CurDAG->getTargetConstant(OperandV, DL, MemVT);14461447SDValue Base, Disp;1448if (!selectBDAddr20Only(StoreNode->getBasePtr(), Base, Disp))1449return false;14501451SDValue Ops[] = { Base, Disp, Operand, InputChain };1452MachineSDNode *Result =1453CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops);1454CurDAG->setNodeMemRefs(1455Result, {StoreNode->getMemOperand(), LoadNode->getMemOperand()});14561457ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));1458ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));1459CurDAG->RemoveDeadNode(Node);1460return true;1461}14621463bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,1464LoadSDNode *Load) const {1465// Check that the two memory operands have the same size.1466if (Load->getMemoryVT() != Store->getMemoryVT())1467return false;14681469// Volatility stops an access from being decomposed.1470if (Load->isVolatile() || Store->isVolatile())1471return false;14721473// There's no chance of overlap if the load is invariant.1474if (Load->isInvariant() && Load->isDereferenceable())1475return true;14761477// Otherwise we need to check whether there's an alias.1478const Value *V1 = Load->getMemOperand()->getValue();1479const Value *V2 = Store->getMemOperand()->getValue();1480if (!V1 || !V2)1481return false;14821483// Reject equality.1484uint64_t Size = Load->getMemoryVT().getStoreSize();1485int64_t End1 = Load->getSrcValueOffset() + Size;1486int64_t End2 = Store->getSrcValueOffset() + Size;1487if (V1 == V2 && End1 == End2)1488return false;14891490return AA->isNoAlias(MemoryLocation(V1, End1, Load->getAAInfo()),1491MemoryLocation(V2, End2, Store->getAAInfo()));1492}14931494bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {1495auto *Store = cast<StoreSDNode>(N);1496auto *Load = cast<LoadSDNode>(Store->getValue());14971498// Prefer not to use MVC if either address can use ... RELATIVE LONG1499// instructions.1500uint64_t Size = Load->getMemoryVT().getStoreSize();1501if (Size > 1 && Size <= 8) {1502// Prefer LHRL, LRL and LGRL.1503if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode()))1504return false;1505// Prefer STHRL, STRL and STGRL.1506if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode()))1507return false;1508}15091510return canUseBlockOperation(Store, Load);1511}15121513bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,1514unsigned I) const {1515auto *StoreA = cast<StoreSDNode>(N);1516auto *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));1517auto *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));1518return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() &&1519canUseBlockOperation(StoreA, LoadB);1520}15211522bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const {15231524auto *MemAccess = cast<MemSDNode>(N);1525auto *LdSt = dyn_cast<LSBaseSDNode>(MemAccess);1526TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize();1527SDValue BasePtr = MemAccess->getBasePtr();1528MachineMemOperand *MMO = MemAccess->getMemOperand();1529assert(MMO && "Expected a memory operand.");15301531// The memory access must have a proper alignment and no index register.1532// Only load and store nodes have the offset operand (atomic loads do not).1533if (MemAccess->getAlign().value() < StoreSize ||1534(LdSt && !LdSt->getOffset().isUndef()))1535return false;15361537// The MMO must not have an unaligned offset.1538if (MMO->getOffset() % StoreSize != 0)1539return false;15401541// An access to GOT or the Constant Pool is aligned.1542if (const PseudoSourceValue *PSV = MMO->getPseudoValue())1543if ((PSV->isGOT() || PSV->isConstantPool()))1544return true;15451546// Check the alignment of a Global Address.1547if (BasePtr.getNumOperands())1548if (GlobalAddressSDNode *GA =1549dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) {1550// The immediate offset must be aligned.1551if (GA->getOffset() % StoreSize != 0)1552return false;15531554// The alignment of the symbol itself must be at least the store size.1555const GlobalValue *GV = GA->getGlobal();1556const DataLayout &DL = GV->getDataLayout();1557if (GV->getPointerAlignment(DL).value() < StoreSize)1558return false;1559}15601561return true;1562}15631564ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const {1565ISD::LoadExtType ETy;1566if (auto *L = dyn_cast<LoadSDNode>(N))1567ETy = L->getExtensionType();1568else if (auto *AL = dyn_cast<AtomicSDNode>(N))1569ETy = AL->getExtensionType();1570else1571llvm_unreachable("Unkown load node type.");1572return ETy;1573}15741575void SystemZDAGToDAGISel::Select(SDNode *Node) {1576// If we have a custom node, we already have selected!1577if (Node->isMachineOpcode()) {1578LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");1579Node->setNodeId(-1);1580return;1581}15821583unsigned Opcode = Node->getOpcode();1584switch (Opcode) {1585case ISD::OR:1586if (Node->getOperand(1).getOpcode() != ISD::Constant)1587if (tryRxSBG(Node, SystemZ::ROSBG))1588return;1589goto or_xor;15901591case ISD::XOR:1592if (Node->getOperand(1).getOpcode() != ISD::Constant)1593if (tryRxSBG(Node, SystemZ::RXSBG))1594return;1595// Fall through.1596or_xor:1597// If this is a 64-bit operation in which both 32-bit halves are nonzero,1598// split the operation into two. If both operands here happen to be1599// constant, leave this to common code to optimize.1600if (Node->getValueType(0) == MVT::i64 &&1601Node->getOperand(0).getOpcode() != ISD::Constant)1602if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {1603uint64_t Val = Op1->getZExtValue();1604// Don't split the operation if we can match one of the combined1605// logical operations provided by miscellaneous-extensions-3.1606if (Subtarget->hasMiscellaneousExtensions3()) {1607unsigned ChildOpcode = Node->getOperand(0).getOpcode();1608// Check whether this expression matches NAND/NOR/NXOR.1609if (Val == (uint64_t)-1 && Opcode == ISD::XOR)1610if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||1611ChildOpcode == ISD::XOR)1612break;1613// Check whether this expression matches OR-with-complement1614// (or matches an alternate pattern for NXOR).1615if (ChildOpcode == ISD::XOR) {1616auto Op0 = Node->getOperand(0);1617if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))1618if (Op0Op1->getZExtValue() == (uint64_t)-1)1619break;1620}1621}1622// Don't split an XOR with -1 as LCGR/AGHI is more compact.1623if (Opcode == ISD::XOR && Op1->isAllOnes())1624break;1625if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {1626splitLargeImmediate(Opcode, Node, Node->getOperand(0),1627Val - uint32_t(Val), uint32_t(Val));1628return;1629}1630}1631break;16321633case ISD::AND:1634if (Node->getOperand(1).getOpcode() != ISD::Constant)1635if (tryRxSBG(Node, SystemZ::RNSBG))1636return;1637[[fallthrough]];1638case ISD::ROTL:1639case ISD::SHL:1640case ISD::SRL:1641case ISD::ZERO_EXTEND:1642if (tryRISBGZero(Node))1643return;1644break;16451646case ISD::BSWAP:1647if (Node->getValueType(0) == MVT::i128) {1648SDLoc DL(Node);1649SDValue Src = Node->getOperand(0);1650Src = CurDAG->getNode(ISD::BITCAST, DL, MVT::v16i8, Src);16511652uint64_t Bytes[2] = { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL };1653SDNode *Mask = loadPoolVectorConstant(APInt(128, Bytes), MVT::v16i8, DL);1654SDValue Ops[] = { Src, Src, SDValue(Mask, 0) };1655SDValue Res = SDValue(CurDAG->getMachineNode(SystemZ::VPERM, DL,1656MVT::v16i8, Ops), 0);16571658Res = CurDAG->getNode(ISD::BITCAST, DL, MVT::i128, Res);1659SDNode *ResNode = Res.getNode();1660ReplaceNode(Node, ResNode);1661SelectCode(Src.getNode());1662SelectCode(ResNode);1663return;1664}1665break;16661667case ISD::Constant:1668// If this is a 64-bit constant that is out of the range of LLILF,1669// LLIHF and LGFI, split it into two 32-bit pieces.1670if (Node->getValueType(0) == MVT::i64) {1671uint64_t Val = Node->getAsZExtVal();1672if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val)) {1673splitLargeImmediate(ISD::OR, Node, SDValue(), Val - uint32_t(Val),1674uint32_t(Val));1675return;1676}1677}1678if (Node->getValueType(0) == MVT::i128) {1679const APInt &Val = Node->getAsAPIntVal();1680SystemZVectorConstantInfo VCI(Val);1681if (VCI.isVectorConstantLegal(*Subtarget)) {1682loadVectorConstant(VCI, Node);1683return;1684}1685// If we can't materialize the constant we need to use a literal pool.1686SDNode *ResNode = loadPoolVectorConstant(Val, MVT::i128, SDLoc(Node));1687ReplaceNode(Node, ResNode);1688return;1689}1690break;16911692case SystemZISD::SELECT_CCMASK: {1693SDValue Op0 = Node->getOperand(0);1694SDValue Op1 = Node->getOperand(1);1695// Prefer to put any load first, so that it can be matched as a1696// conditional load. Likewise for constants in range for LOCHI.1697if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||1698(Subtarget->hasLoadStoreOnCond2() &&1699Node->getValueType(0).isInteger() &&1700Node->getValueType(0).getSizeInBits() <= 64 &&1701Op1.getOpcode() == ISD::Constant &&1702isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&1703!(Op0.getOpcode() == ISD::Constant &&1704isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) {1705SDValue CCValid = Node->getOperand(2);1706SDValue CCMask = Node->getOperand(3);1707uint64_t ConstCCValid = CCValid.getNode()->getAsZExtVal();1708uint64_t ConstCCMask = CCMask.getNode()->getAsZExtVal();1709// Invert the condition.1710CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask,1711SDLoc(Node), CCMask.getValueType());1712SDValue Op4 = Node->getOperand(4);1713SDNode *UpdatedNode =1714CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);1715if (UpdatedNode != Node) {1716// In case this node already exists then replace Node with it.1717ReplaceNode(Node, UpdatedNode);1718Node = UpdatedNode;1719}1720}1721break;1722}17231724case ISD::INSERT_VECTOR_ELT: {1725EVT VT = Node->getValueType(0);1726unsigned ElemBitSize = VT.getScalarSizeInBits();1727if (ElemBitSize == 32) {1728if (tryGather(Node, SystemZ::VGEF))1729return;1730} else if (ElemBitSize == 64) {1731if (tryGather(Node, SystemZ::VGEG))1732return;1733}1734break;1735}17361737case ISD::BUILD_VECTOR: {1738auto *BVN = cast<BuildVectorSDNode>(Node);1739SystemZVectorConstantInfo VCI(BVN);1740if (VCI.isVectorConstantLegal(*Subtarget)) {1741loadVectorConstant(VCI, Node);1742return;1743}1744break;1745}17461747case ISD::ConstantFP: {1748APFloat Imm = cast<ConstantFPSDNode>(Node)->getValueAPF();1749if (Imm.isZero() || Imm.isNegZero())1750break;1751SystemZVectorConstantInfo VCI(Imm);1752bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success;1753assert(Success && "Expected legal FP immediate");1754loadVectorConstant(VCI, Node);1755return;1756}17571758case ISD::STORE: {1759if (tryFoldLoadStoreIntoMemOperand(Node))1760return;1761auto *Store = cast<StoreSDNode>(Node);1762unsigned ElemBitSize = Store->getValue().getValueSizeInBits();1763if (ElemBitSize == 32) {1764if (tryScatter(Store, SystemZ::VSCEF))1765return;1766} else if (ElemBitSize == 64) {1767if (tryScatter(Store, SystemZ::VSCEG))1768return;1769}1770break;1771}17721773case ISD::ATOMIC_STORE: {1774auto *AtomOp = cast<AtomicSDNode>(Node);1775// Replace the atomic_store with a regular store and select it. This is1776// ok since we know all store instructions <= 8 bytes are atomic, and the1777// 16 byte case is already handled during lowering.1778StoreSDNode *St = cast<StoreSDNode>(CurDAG->getTruncStore(1779AtomOp->getChain(), SDLoc(AtomOp), AtomOp->getVal(),1780AtomOp->getBasePtr(), AtomOp->getMemoryVT(), AtomOp->getMemOperand()));1781assert(St->getMemOperand()->isAtomic() && "Broken MMO.");1782SDNode *Chain = St;1783// We have to enforce sequential consistency by performing a1784// serialization operation after the store.1785if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)1786Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp),1787MVT::Other, SDValue(Chain, 0));1788ReplaceNode(Node, Chain);1789SelectCode(St);1790return;1791}1792}17931794SelectCode(Node);1795}17961797bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand(1798const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,1799std::vector<SDValue> &OutOps) {1800SystemZAddressingMode::AddrForm Form;1801SystemZAddressingMode::DispRange DispRange;1802SDValue Base, Disp, Index;18031804switch(ConstraintID) {1805default:1806llvm_unreachable("Unexpected asm memory constraint");1807case InlineAsm::ConstraintCode::i:1808case InlineAsm::ConstraintCode::Q:1809case InlineAsm::ConstraintCode::ZQ:1810// Accept an address with a short displacement, but no index.1811Form = SystemZAddressingMode::FormBD;1812DispRange = SystemZAddressingMode::Disp12Only;1813break;1814case InlineAsm::ConstraintCode::R:1815case InlineAsm::ConstraintCode::ZR:1816// Accept an address with a short displacement and an index.1817Form = SystemZAddressingMode::FormBDXNormal;1818DispRange = SystemZAddressingMode::Disp12Only;1819break;1820case InlineAsm::ConstraintCode::S:1821case InlineAsm::ConstraintCode::ZS:1822// Accept an address with a long displacement, but no index.1823Form = SystemZAddressingMode::FormBD;1824DispRange = SystemZAddressingMode::Disp20Only;1825break;1826case InlineAsm::ConstraintCode::T:1827case InlineAsm::ConstraintCode::m:1828case InlineAsm::ConstraintCode::o:1829case InlineAsm::ConstraintCode::p:1830case InlineAsm::ConstraintCode::ZT:1831// Accept an address with a long displacement and an index.1832// m works the same as T, as this is the most general case.1833// We don't really have any special handling of "offsettable"1834// memory addresses, so just treat o the same as m.1835Form = SystemZAddressingMode::FormBDXNormal;1836DispRange = SystemZAddressingMode::Disp20Only;1837break;1838}18391840if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {1841const TargetRegisterClass *TRC =1842Subtarget->getRegisterInfo()->getPointerRegClass(*MF);1843SDLoc DL(Base);1844SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32);18451846// Make sure that the base address doesn't go into %r0.1847// If it's a TargetFrameIndex or a fixed register, we shouldn't do anything.1848if (Base.getOpcode() != ISD::TargetFrameIndex &&1849Base.getOpcode() != ISD::Register) {1850Base =1851SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,1852DL, Base.getValueType(),1853Base, RC), 0);1854}18551856// Make sure that the index register isn't assigned to %r0 either.1857if (Index.getOpcode() != ISD::Register) {1858Index =1859SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,1860DL, Index.getValueType(),1861Index, RC), 0);1862}18631864OutOps.push_back(Base);1865OutOps.push_back(Disp);1866OutOps.push_back(Index);1867return false;1868}18691870return true;1871}18721873// IsProfitableToFold - Returns true if is profitable to fold the specific1874// operand node N of U during instruction selection that starts at Root.1875bool1876SystemZDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U,1877SDNode *Root) const {1878// We want to avoid folding a LOAD into an ICMP node if as a result1879// we would be forced to spill the condition code into a GPR.1880if (N.getOpcode() == ISD::LOAD && U->getOpcode() == SystemZISD::ICMP) {1881if (!N.hasOneUse() || !U->hasOneUse())1882return false;18831884// The user of the CC value will usually be a CopyToReg into the1885// physical CC register, which in turn is glued and chained to the1886// actual instruction that uses the CC value. Bail out if we have1887// anything else than that.1888SDNode *CCUser = *U->use_begin();1889SDNode *CCRegUser = nullptr;1890if (CCUser->getOpcode() == ISD::CopyToReg ||1891cast<RegisterSDNode>(CCUser->getOperand(1))->getReg() == SystemZ::CC) {1892for (auto *U : CCUser->uses()) {1893if (CCRegUser == nullptr)1894CCRegUser = U;1895else if (CCRegUser != U)1896return false;1897}1898}1899if (CCRegUser == nullptr)1900return false;19011902// If the actual instruction is a branch, the only thing that remains to be1903// checked is whether the CCUser chain is a predecessor of the load.1904if (CCRegUser->isMachineOpcode() &&1905CCRegUser->getMachineOpcode() == SystemZ::BRC)1906return !N->isPredecessorOf(CCUser->getOperand(0).getNode());19071908// Otherwise, the instruction may have multiple operands, and we need to1909// verify that none of them are a predecessor of the load. This is exactly1910// the same check that would be done by common code if the CC setter were1911// glued to the CC user, so simply invoke that check here.1912if (!IsLegalToFold(N, U, CCRegUser, OptLevel, false))1913return false;1914}19151916return true;1917}19181919namespace {1920// Represents a sequence for extracting a 0/1 value from an IPM result:1921// (((X ^ XORValue) + AddValue) >> Bit)1922struct IPMConversion {1923IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)1924: XORValue(xorValue), AddValue(addValue), Bit(bit) {}19251926int64_t XORValue;1927int64_t AddValue;1928unsigned Bit;1929};1930} // end anonymous namespace19311932// Return a sequence for getting a 1 from an IPM result when CC has a1933// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.1934// The handling of CC values outside CCValid doesn't matter.1935static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {1936// Deal with cases where the result can be taken directly from a bit1937// of the IPM result.1938if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))1939return IPMConversion(0, 0, SystemZ::IPM_CC);1940if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))1941return IPMConversion(0, 0, SystemZ::IPM_CC + 1);19421943// Deal with cases where we can add a value to force the sign bit1944// to contain the right value. Putting the bit in 31 means we can1945// use SRL rather than RISBG(L), and also makes it easier to get a1946// 0/-1 value, so it has priority over the other tests below.1947//1948// These sequences rely on the fact that the upper two bits of the1949// IPM result are zero.1950uint64_t TopBit = uint64_t(1) << 31;1951if (CCMask == (CCValid & SystemZ::CCMASK_0))1952return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);1953if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))1954return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);1955if (CCMask == (CCValid & (SystemZ::CCMASK_01956| SystemZ::CCMASK_11957| SystemZ::CCMASK_2)))1958return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);1959if (CCMask == (CCValid & SystemZ::CCMASK_3))1960return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);1961if (CCMask == (CCValid & (SystemZ::CCMASK_11962| SystemZ::CCMASK_21963| SystemZ::CCMASK_3)))1964return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);19651966// Next try inverting the value and testing a bit. 0/1 could be1967// handled this way too, but we dealt with that case above.1968if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))1969return IPMConversion(-1, 0, SystemZ::IPM_CC);19701971// Handle cases where adding a value forces a non-sign bit to contain1972// the right value.1973if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))1974return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);1975if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))1976return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);19771978// The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are1979// can be done by inverting the low CC bit and applying one of the1980// sign-based extractions above.1981if (CCMask == (CCValid & SystemZ::CCMASK_1))1982return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);1983if (CCMask == (CCValid & SystemZ::CCMASK_2))1984return IPMConversion(1 << SystemZ::IPM_CC,1985TopBit - (3 << SystemZ::IPM_CC), 31);1986if (CCMask == (CCValid & (SystemZ::CCMASK_01987| SystemZ::CCMASK_11988| SystemZ::CCMASK_3)))1989return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);1990if (CCMask == (CCValid & (SystemZ::CCMASK_01991| SystemZ::CCMASK_21992| SystemZ::CCMASK_3)))1993return IPMConversion(1 << SystemZ::IPM_CC,1994TopBit - (1 << SystemZ::IPM_CC), 31);19951996llvm_unreachable("Unexpected CC combination");1997}19981999SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) {2000auto *TrueOp = dyn_cast<ConstantSDNode>(Node->getOperand(0));2001auto *FalseOp = dyn_cast<ConstantSDNode>(Node->getOperand(1));2002if (!TrueOp || !FalseOp)2003return SDValue();2004if (FalseOp->getZExtValue() != 0)2005return SDValue();2006if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1)2007return SDValue();20082009auto *CCValidOp = dyn_cast<ConstantSDNode>(Node->getOperand(2));2010auto *CCMaskOp = dyn_cast<ConstantSDNode>(Node->getOperand(3));2011if (!CCValidOp || !CCMaskOp)2012return SDValue();2013int CCValid = CCValidOp->getZExtValue();2014int CCMask = CCMaskOp->getZExtValue();20152016SDLoc DL(Node);2017SDValue CCReg = Node->getOperand(4);2018IPMConversion IPM = getIPMConversion(CCValid, CCMask);2019SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);20202021if (IPM.XORValue)2022Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result,2023CurDAG->getConstant(IPM.XORValue, DL, MVT::i32));20242025if (IPM.AddValue)2026Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result,2027CurDAG->getConstant(IPM.AddValue, DL, MVT::i32));20282029EVT VT = Node->getValueType(0);2030if (VT == MVT::i32 && IPM.Bit == 31) {2031unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA;2032Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result,2033CurDAG->getConstant(IPM.Bit, DL, MVT::i32));2034} else {2035if (VT != MVT::i32)2036Result = CurDAG->getNode(ISD::ANY_EXTEND, DL, VT, Result);20372038if (TrueOp->getSExtValue() == 1) {2039// The SHR/AND sequence should get optimized to an RISBG.2040Result = CurDAG->getNode(ISD::SRL, DL, VT, Result,2041CurDAG->getConstant(IPM.Bit, DL, MVT::i32));2042Result = CurDAG->getNode(ISD::AND, DL, VT, Result,2043CurDAG->getConstant(1, DL, VT));2044} else {2045// Sign-extend from IPM.Bit using a pair of shifts.2046int ShlAmt = VT.getSizeInBits() - 1 - IPM.Bit;2047int SraAmt = VT.getSizeInBits() - 1;2048Result = CurDAG->getNode(ISD::SHL, DL, VT, Result,2049CurDAG->getConstant(ShlAmt, DL, MVT::i32));2050Result = CurDAG->getNode(ISD::SRA, DL, VT, Result,2051CurDAG->getConstant(SraAmt, DL, MVT::i32));2052}2053}20542055return Result;2056}20572058bool SystemZDAGToDAGISel::shouldSelectForReassoc(SDNode *N) const {2059EVT VT = N->getValueType(0);2060assert(VT.isFloatingPoint() && "Expected FP SDNode");2061return N->getFlags().hasAllowReassociation() &&2062N->getFlags().hasNoSignedZeros() && Subtarget->hasVector() &&2063(VT != MVT::f32 || Subtarget->hasVectorEnhancements1()) &&2064!N->isStrictFPOpcode();2065}20662067void SystemZDAGToDAGISel::PreprocessISelDAG() {2068// If we have conditional immediate loads, we always prefer2069// using those over an IPM sequence.2070if (Subtarget->hasLoadStoreOnCond2())2071return;20722073bool MadeChange = false;20742075for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),2076E = CurDAG->allnodes_end();2077I != E;) {2078SDNode *N = &*I++;2079if (N->use_empty())2080continue;20812082SDValue Res;2083switch (N->getOpcode()) {2084default: break;2085case SystemZISD::SELECT_CCMASK:2086Res = expandSelectBoolean(N);2087break;2088}20892090if (Res) {2091LLVM_DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: ");2092LLVM_DEBUG(N->dump(CurDAG));2093LLVM_DEBUG(dbgs() << "\nNew: ");2094LLVM_DEBUG(Res.getNode()->dump(CurDAG));2095LLVM_DEBUG(dbgs() << "\n");20962097CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);2098MadeChange = true;2099}2100}21012102if (MadeChange)2103CurDAG->RemoveDeadNodes();2104}210521062107