Path: blob/main/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
35269 views
//===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the interfaces that AVR uses to lower LLVM code into a9// selection DAG.10//11//===----------------------------------------------------------------------===//1213#include "AVRISelLowering.h"1415#include "llvm/ADT/ArrayRef.h"16#include "llvm/ADT/STLExtras.h"17#include "llvm/ADT/StringSwitch.h"18#include "llvm/CodeGen/CallingConvLower.h"19#include "llvm/CodeGen/MachineFrameInfo.h"20#include "llvm/CodeGen/MachineInstrBuilder.h"21#include "llvm/CodeGen/MachineRegisterInfo.h"22#include "llvm/CodeGen/SelectionDAG.h"23#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"24#include "llvm/IR/Function.h"25#include "llvm/Support/ErrorHandling.h"2627#include "AVR.h"28#include "AVRMachineFunctionInfo.h"29#include "AVRSubtarget.h"30#include "AVRTargetMachine.h"31#include "MCTargetDesc/AVRMCTargetDesc.h"3233namespace llvm {3435AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,36const AVRSubtarget &STI)37: TargetLowering(TM), Subtarget(STI) {38// Set up the register classes.39addRegisterClass(MVT::i8, &AVR::GPR8RegClass);40addRegisterClass(MVT::i16, &AVR::DREGSRegClass);4142// Compute derived properties from the register classes.43computeRegisterProperties(Subtarget.getRegisterInfo());4445setBooleanContents(ZeroOrOneBooleanContent);46setBooleanVectorContents(ZeroOrOneBooleanContent);47setSchedulingPreference(Sched::RegPressure);48setStackPointerRegisterToSaveRestore(AVR::SP);49setSupportsUnalignedAtomics(true);5051setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);52setOperationAction(ISD::BlockAddress, MVT::i16, Custom);5354setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);55setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);56setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);57setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);5859setOperationAction(ISD::INLINEASM, MVT::Other, Custom);6061for (MVT VT : MVT::integer_valuetypes()) {62for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {63setLoadExtAction(N, VT, MVT::i1, Promote);64setLoadExtAction(N, VT, MVT::i8, Expand);65}66}6768setTruncStoreAction(MVT::i16, MVT::i8, Expand);6970for (MVT VT : MVT::integer_valuetypes()) {71setOperationAction(ISD::ADDC, VT, Legal);72setOperationAction(ISD::SUBC, VT, Legal);73setOperationAction(ISD::ADDE, VT, Legal);74setOperationAction(ISD::SUBE, VT, Legal);75}7677// sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types78// revert into a sub since we don't have an add with immediate instruction.79setOperationAction(ISD::ADD, MVT::i32, Custom);80setOperationAction(ISD::ADD, MVT::i64, Custom);8182// our shift instructions are only able to shift 1 bit at a time, so handle83// this in a custom way.84setOperationAction(ISD::SRA, MVT::i8, Custom);85setOperationAction(ISD::SHL, MVT::i8, Custom);86setOperationAction(ISD::SRL, MVT::i8, Custom);87setOperationAction(ISD::SRA, MVT::i16, Custom);88setOperationAction(ISD::SHL, MVT::i16, Custom);89setOperationAction(ISD::SRL, MVT::i16, Custom);90setOperationAction(ISD::SRA, MVT::i32, Custom);91setOperationAction(ISD::SHL, MVT::i32, Custom);92setOperationAction(ISD::SRL, MVT::i32, Custom);93setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);94setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);95setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);9697setOperationAction(ISD::ROTL, MVT::i8, Custom);98setOperationAction(ISD::ROTL, MVT::i16, Expand);99setOperationAction(ISD::ROTR, MVT::i8, Custom);100setOperationAction(ISD::ROTR, MVT::i16, Expand);101102setOperationAction(ISD::BR_CC, MVT::i8, Custom);103setOperationAction(ISD::BR_CC, MVT::i16, Custom);104setOperationAction(ISD::BR_CC, MVT::i32, Custom);105setOperationAction(ISD::BR_CC, MVT::i64, Custom);106setOperationAction(ISD::BRCOND, MVT::Other, Expand);107108setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);109setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);110setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);111setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);112setOperationAction(ISD::SETCC, MVT::i8, Custom);113setOperationAction(ISD::SETCC, MVT::i16, Custom);114setOperationAction(ISD::SETCC, MVT::i32, Custom);115setOperationAction(ISD::SETCC, MVT::i64, Custom);116setOperationAction(ISD::SELECT, MVT::i8, Expand);117setOperationAction(ISD::SELECT, MVT::i16, Expand);118119setOperationAction(ISD::BSWAP, MVT::i16, Expand);120121// Add support for postincrement and predecrement load/stores.122setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);123setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);124setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);125setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);126setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);127setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);128setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);129setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);130131setOperationAction(ISD::BR_JT, MVT::Other, Expand);132133setOperationAction(ISD::VASTART, MVT::Other, Custom);134setOperationAction(ISD::VAEND, MVT::Other, Expand);135setOperationAction(ISD::VAARG, MVT::Other, Expand);136setOperationAction(ISD::VACOPY, MVT::Other, Expand);137138// Atomic operations which must be lowered to rtlib calls139for (MVT VT : MVT::integer_valuetypes()) {140setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);141setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);142setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);143setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);144setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);145setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);146setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);147}148149// Division/remainder150setOperationAction(ISD::UDIV, MVT::i8, Expand);151setOperationAction(ISD::UDIV, MVT::i16, Expand);152setOperationAction(ISD::UREM, MVT::i8, Expand);153setOperationAction(ISD::UREM, MVT::i16, Expand);154setOperationAction(ISD::SDIV, MVT::i8, Expand);155setOperationAction(ISD::SDIV, MVT::i16, Expand);156setOperationAction(ISD::SREM, MVT::i8, Expand);157setOperationAction(ISD::SREM, MVT::i16, Expand);158159// Make division and modulus custom160setOperationAction(ISD::UDIVREM, MVT::i8, Custom);161setOperationAction(ISD::UDIVREM, MVT::i16, Custom);162setOperationAction(ISD::UDIVREM, MVT::i32, Custom);163setOperationAction(ISD::SDIVREM, MVT::i8, Custom);164setOperationAction(ISD::SDIVREM, MVT::i16, Custom);165setOperationAction(ISD::SDIVREM, MVT::i32, Custom);166167// Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.168setOperationAction(ISD::MUL, MVT::i8, Expand);169setOperationAction(ISD::MUL, MVT::i16, Expand);170171// Expand 16 bit multiplications.172setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);173setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);174175// Expand multiplications to libcalls when there is176// no hardware MUL.177if (!Subtarget.supportsMultiplication()) {178setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);179setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);180}181182for (MVT VT : MVT::integer_valuetypes()) {183setOperationAction(ISD::MULHS, VT, Expand);184setOperationAction(ISD::MULHU, VT, Expand);185}186187for (MVT VT : MVT::integer_valuetypes()) {188setOperationAction(ISD::CTPOP, VT, Expand);189setOperationAction(ISD::CTLZ, VT, Expand);190setOperationAction(ISD::CTTZ, VT, Expand);191}192193for (MVT VT : MVT::integer_valuetypes()) {194setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);195// TODO: The generated code is pretty poor. Investigate using the196// same "shift and subtract with carry" trick that we do for197// extending 8-bit to 16-bit. This may require infrastructure198// improvements in how we treat 16-bit "registers" to be feasible.199}200201// Division and modulus rtlib functions202setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4");203setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4");204setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");205setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4");206setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4");207setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");208209// Several of the runtime library functions use a special calling conv210setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN);211setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN);212setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN);213setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN);214215// Trigonometric rtlib functions216setLibcallName(RTLIB::SIN_F32, "sin");217setLibcallName(RTLIB::COS_F32, "cos");218219setMinFunctionAlignment(Align(2));220setMinimumJumpTableEntries(UINT_MAX);221}222223const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {224#define NODE(name) \225case AVRISD::name: \226return #name227228switch (Opcode) {229default:230return nullptr;231NODE(RET_GLUE);232NODE(RETI_GLUE);233NODE(CALL);234NODE(WRAPPER);235NODE(LSL);236NODE(LSLW);237NODE(LSR);238NODE(LSRW);239NODE(ROL);240NODE(ROR);241NODE(ASR);242NODE(ASRW);243NODE(LSLLOOP);244NODE(LSRLOOP);245NODE(ROLLOOP);246NODE(RORLOOP);247NODE(ASRLOOP);248NODE(BRCOND);249NODE(CMP);250NODE(CMPC);251NODE(TST);252NODE(SELECT_CC);253#undef NODE254}255}256257EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,258EVT VT) const {259assert(!VT.isVector() && "No AVR SetCC type for vectors!");260return MVT::i8;261}262263SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {264unsigned Opc8;265const SDNode *N = Op.getNode();266EVT VT = Op.getValueType();267SDLoc dl(N);268assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&269"Expected power-of-2 shift amount");270271if (VT.getSizeInBits() == 32) {272if (!isa<ConstantSDNode>(N->getOperand(1))) {273// 32-bit shifts are converted to a loop in IR.274// This should be unreachable.275report_fatal_error("Expected a constant shift amount!");276}277SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);278SDValue SrcLo =279DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),280DAG.getConstant(0, dl, MVT::i16));281SDValue SrcHi =282DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),283DAG.getConstant(1, dl, MVT::i16));284uint64_t ShiftAmount = N->getConstantOperandVal(1);285if (ShiftAmount == 16) {286// Special case these two operations because they appear to be used by the287// generic codegen parts to lower 32-bit numbers.288// TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit289// shift of a part of the 32-bit value?290switch (Op.getOpcode()) {291case ISD::SHL: {292SDValue Zero = DAG.getConstant(0, dl, MVT::i16);293return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);294}295case ISD::SRL: {296SDValue Zero = DAG.getConstant(0, dl, MVT::i16);297return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);298}299}300}301SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);302unsigned Opc;303switch (Op.getOpcode()) {304default:305llvm_unreachable("Invalid 32-bit shift opcode!");306case ISD::SHL:307Opc = AVRISD::LSLW;308break;309case ISD::SRL:310Opc = AVRISD::LSRW;311break;312case ISD::SRA:313Opc = AVRISD::ASRW;314break;315}316SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt);317return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0),318Result.getValue(1));319}320321// Expand non-constant shifts to loops.322if (!isa<ConstantSDNode>(N->getOperand(1))) {323switch (Op.getOpcode()) {324default:325llvm_unreachable("Invalid shift opcode!");326case ISD::SHL:327return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0),328N->getOperand(1));329case ISD::SRL:330return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),331N->getOperand(1));332case ISD::ROTL: {333SDValue Amt = N->getOperand(1);334EVT AmtVT = Amt.getValueType();335Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,336DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));337return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt);338}339case ISD::ROTR: {340SDValue Amt = N->getOperand(1);341EVT AmtVT = Amt.getValueType();342Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,343DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));344return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt);345}346case ISD::SRA:347return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),348N->getOperand(1));349}350}351352uint64_t ShiftAmount = N->getConstantOperandVal(1);353SDValue Victim = N->getOperand(0);354355switch (Op.getOpcode()) {356case ISD::SRA:357Opc8 = AVRISD::ASR;358break;359case ISD::ROTL:360Opc8 = AVRISD::ROL;361ShiftAmount = ShiftAmount % VT.getSizeInBits();362break;363case ISD::ROTR:364Opc8 = AVRISD::ROR;365ShiftAmount = ShiftAmount % VT.getSizeInBits();366break;367case ISD::SRL:368Opc8 = AVRISD::LSR;369break;370case ISD::SHL:371Opc8 = AVRISD::LSL;372break;373default:374llvm_unreachable("Invalid shift opcode");375}376377// Optimize int8/int16 shifts.378if (VT.getSizeInBits() == 8) {379if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {380// Optimize LSL when 4 <= ShiftAmount <= 6.381Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);382Victim =383DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));384ShiftAmount -= 4;385} else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&386ShiftAmount < 7) {387// Optimize LSR when 4 <= ShiftAmount <= 6.388Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);389Victim =390DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));391ShiftAmount -= 4;392} else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) {393// Optimize LSL when ShiftAmount == 7.394Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim,395DAG.getConstant(7, dl, VT));396ShiftAmount = 0;397} else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) {398// Optimize LSR when ShiftAmount == 7.399Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim,400DAG.getConstant(7, dl, VT));401ShiftAmount = 0;402} else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) {403// Optimize ASR when ShiftAmount == 6.404Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,405DAG.getConstant(6, dl, VT));406ShiftAmount = 0;407} else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {408// Optimize ASR when ShiftAmount == 7.409Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,410DAG.getConstant(7, dl, VT));411ShiftAmount = 0;412} else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) {413// Optimize left rotation 3 bits to swap then right rotation 1 bit.414Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);415Victim =416DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));417ShiftAmount = 0;418} else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) {419// Optimize right rotation 3 bits to swap then left rotation 1 bit.420Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);421Victim =422DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));423ShiftAmount = 0;424} else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) {425// Optimize left rotation 7 bits to right rotation 1 bit.426Victim =427DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));428ShiftAmount = 0;429} else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) {430// Optimize right rotation 7 bits to left rotation 1 bit.431Victim =432DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));433ShiftAmount = 0;434} else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) &&435ShiftAmount >= 4) {436// Optimize left/right rotation with the SWAP instruction.437Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);438ShiftAmount -= 4;439}440} else if (VT.getSizeInBits() == 16) {441if (Op.getOpcode() == ISD::SRA)442// Special optimization for int16 arithmetic right shift.443switch (ShiftAmount) {444case 15:445Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,446DAG.getConstant(15, dl, VT));447ShiftAmount = 0;448break;449case 14:450Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,451DAG.getConstant(14, dl, VT));452ShiftAmount = 0;453break;454case 7:455Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,456DAG.getConstant(7, dl, VT));457ShiftAmount = 0;458break;459default:460break;461}462if (4 <= ShiftAmount && ShiftAmount < 8)463switch (Op.getOpcode()) {464case ISD::SHL:465Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,466DAG.getConstant(4, dl, VT));467ShiftAmount -= 4;468break;469case ISD::SRL:470Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,471DAG.getConstant(4, dl, VT));472ShiftAmount -= 4;473break;474default:475break;476}477else if (8 <= ShiftAmount && ShiftAmount < 12)478switch (Op.getOpcode()) {479case ISD::SHL:480Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,481DAG.getConstant(8, dl, VT));482ShiftAmount -= 8;483// Only operate on the higher byte for remaining shift bits.484Opc8 = AVRISD::LSLHI;485break;486case ISD::SRL:487Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,488DAG.getConstant(8, dl, VT));489ShiftAmount -= 8;490// Only operate on the lower byte for remaining shift bits.491Opc8 = AVRISD::LSRLO;492break;493case ISD::SRA:494Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,495DAG.getConstant(8, dl, VT));496ShiftAmount -= 8;497// Only operate on the lower byte for remaining shift bits.498Opc8 = AVRISD::ASRLO;499break;500default:501break;502}503else if (12 <= ShiftAmount)504switch (Op.getOpcode()) {505case ISD::SHL:506Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,507DAG.getConstant(12, dl, VT));508ShiftAmount -= 12;509// Only operate on the higher byte for remaining shift bits.510Opc8 = AVRISD::LSLHI;511break;512case ISD::SRL:513Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,514DAG.getConstant(12, dl, VT));515ShiftAmount -= 12;516// Only operate on the lower byte for remaining shift bits.517Opc8 = AVRISD::LSRLO;518break;519case ISD::SRA:520Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,521DAG.getConstant(8, dl, VT));522ShiftAmount -= 8;523// Only operate on the lower byte for remaining shift bits.524Opc8 = AVRISD::ASRLO;525break;526default:527break;528}529}530531while (ShiftAmount--) {532Victim = DAG.getNode(Opc8, dl, VT, Victim);533}534535return Victim;536}537538SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {539unsigned Opcode = Op->getOpcode();540assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&541"Invalid opcode for Div/Rem lowering");542bool IsSigned = (Opcode == ISD::SDIVREM);543EVT VT = Op->getValueType(0);544Type *Ty = VT.getTypeForEVT(*DAG.getContext());545546RTLIB::Libcall LC;547switch (VT.getSimpleVT().SimpleTy) {548default:549llvm_unreachable("Unexpected request for libcall!");550case MVT::i8:551LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;552break;553case MVT::i16:554LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;555break;556case MVT::i32:557LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;558break;559}560561SDValue InChain = DAG.getEntryNode();562563TargetLowering::ArgListTy Args;564TargetLowering::ArgListEntry Entry;565for (SDValue const &Value : Op->op_values()) {566Entry.Node = Value;567Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());568Entry.IsSExt = IsSigned;569Entry.IsZExt = !IsSigned;570Args.push_back(Entry);571}572573SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),574getPointerTy(DAG.getDataLayout()));575576Type *RetTy = (Type *)StructType::get(Ty, Ty);577578SDLoc dl(Op);579TargetLowering::CallLoweringInfo CLI(DAG);580CLI.setDebugLoc(dl)581.setChain(InChain)582.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))583.setInRegister()584.setSExtResult(IsSigned)585.setZExtResult(!IsSigned);586587std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);588return CallInfo.first;589}590591SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,592SelectionDAG &DAG) const {593auto DL = DAG.getDataLayout();594595const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();596int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();597598// Create the TargetGlobalAddress node, folding in the constant offset.599SDValue Result =600DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset);601return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);602}603604SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,605SelectionDAG &DAG) const {606auto DL = DAG.getDataLayout();607const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();608609SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL));610611return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);612}613614/// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.615static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {616switch (CC) {617default:618llvm_unreachable("Unknown condition code!");619case ISD::SETEQ:620return AVRCC::COND_EQ;621case ISD::SETNE:622return AVRCC::COND_NE;623case ISD::SETGE:624return AVRCC::COND_GE;625case ISD::SETLT:626return AVRCC::COND_LT;627case ISD::SETUGE:628return AVRCC::COND_SH;629case ISD::SETULT:630return AVRCC::COND_LO;631}632}633634/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.635SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,636SelectionDAG &DAG, SDLoc DL) const {637assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&638"LHS and RHS have different types");639assert(((LHS.getSimpleValueType() == MVT::i16) ||640(LHS.getSimpleValueType() == MVT::i8)) &&641"invalid comparison type");642643SDValue Cmp;644645if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {646uint64_t Imm = RHS->getAsZExtVal();647// Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero648// register for the constant RHS if its lower or higher byte is zero.649SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,650DAG.getIntPtrConstant(0, DL));651SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,652DAG.getIntPtrConstant(1, DL));653SDValue RHSlo = (Imm & 0xff) == 0654? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)655: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,656DAG.getIntPtrConstant(0, DL));657SDValue RHShi = (Imm & 0xff00) == 0658? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)659: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,660DAG.getIntPtrConstant(1, DL));661Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);662Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);663} else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {664// Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero665// register for the constant LHS if its lower or higher byte is zero.666uint64_t Imm = LHS->getAsZExtVal();667SDValue LHSlo = (Imm & 0xff) == 0668? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)669: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,670DAG.getIntPtrConstant(0, DL));671SDValue LHShi = (Imm & 0xff00) == 0672? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)673: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,674DAG.getIntPtrConstant(1, DL));675SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,676DAG.getIntPtrConstant(0, DL));677SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,678DAG.getIntPtrConstant(1, DL));679Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);680Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);681} else {682// Generate ordinary 16-bit comparison.683Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);684}685686return Cmp;687}688689/// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for690/// the given operands.691SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,692SDValue &AVRcc, SelectionDAG &DAG,693SDLoc DL) const {694SDValue Cmp;695EVT VT = LHS.getValueType();696bool UseTest = false;697698switch (CC) {699default:700break;701case ISD::SETLE: {702// Swap operands and reverse the branching condition.703std::swap(LHS, RHS);704CC = ISD::SETGE;705break;706}707case ISD::SETGT: {708if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {709switch (C->getSExtValue()) {710case -1: {711// When doing lhs > -1 use a tst instruction on the top part of lhs712// and use brpl instead of using a chain of cp/cpc.713UseTest = true;714AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);715break;716}717case 0: {718// Turn lhs > 0 into 0 < lhs since 0 can be materialized with719// __zero_reg__ in lhs.720RHS = LHS;721LHS = DAG.getConstant(0, DL, VT);722CC = ISD::SETLT;723break;724}725default: {726// Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows727// us to fold the constant into the cmp instruction.728RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);729CC = ISD::SETGE;730break;731}732}733break;734}735// Swap operands and reverse the branching condition.736std::swap(LHS, RHS);737CC = ISD::SETLT;738break;739}740case ISD::SETLT: {741if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {742switch (C->getSExtValue()) {743case 1: {744// Turn lhs < 1 into 0 >= lhs since 0 can be materialized with745// __zero_reg__ in lhs.746RHS = LHS;747LHS = DAG.getConstant(0, DL, VT);748CC = ISD::SETGE;749break;750}751case 0: {752// When doing lhs < 0 use a tst instruction on the top part of lhs753// and use brmi instead of using a chain of cp/cpc.754UseTest = true;755AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);756break;757}758}759}760break;761}762case ISD::SETULE: {763// Swap operands and reverse the branching condition.764std::swap(LHS, RHS);765CC = ISD::SETUGE;766break;767}768case ISD::SETUGT: {769// Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to770// fold the constant into the cmp instruction.771if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {772RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);773CC = ISD::SETUGE;774break;775}776// Swap operands and reverse the branching condition.777std::swap(LHS, RHS);778CC = ISD::SETULT;779break;780}781}782783// Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of784// using the default and/or/xor expansion code which is much longer.785if (VT == MVT::i32) {786SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,787DAG.getIntPtrConstant(0, DL));788SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,789DAG.getIntPtrConstant(1, DL));790SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,791DAG.getIntPtrConstant(0, DL));792SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,793DAG.getIntPtrConstant(1, DL));794795if (UseTest) {796// When using tst we only care about the highest part.797SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,798DAG.getIntPtrConstant(1, DL));799Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);800} else {801Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);802Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);803}804} else if (VT == MVT::i64) {805SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,806DAG.getIntPtrConstant(0, DL));807SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,808DAG.getIntPtrConstant(1, DL));809810SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,811DAG.getIntPtrConstant(0, DL));812SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,813DAG.getIntPtrConstant(1, DL));814SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,815DAG.getIntPtrConstant(0, DL));816SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,817DAG.getIntPtrConstant(1, DL));818819SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,820DAG.getIntPtrConstant(0, DL));821SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,822DAG.getIntPtrConstant(1, DL));823824SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,825DAG.getIntPtrConstant(0, DL));826SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,827DAG.getIntPtrConstant(1, DL));828SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,829DAG.getIntPtrConstant(0, DL));830SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,831DAG.getIntPtrConstant(1, DL));832833if (UseTest) {834// When using tst we only care about the highest part.835SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,836DAG.getIntPtrConstant(1, DL));837Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);838} else {839Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);840Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);841Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);842Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);843}844} else if (VT == MVT::i8 || VT == MVT::i16) {845if (UseTest) {846// When using tst we only care about the highest part.847Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,848(VT == MVT::i8)849? LHS850: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,851LHS, DAG.getIntPtrConstant(1, DL)));852} else {853Cmp = getAVRCmp(LHS, RHS, DAG, DL);854}855} else {856llvm_unreachable("Invalid comparison size");857}858859// When using a test instruction AVRcc is already set.860if (!UseTest) {861AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);862}863864return Cmp;865}866867SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {868SDValue Chain = Op.getOperand(0);869ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();870SDValue LHS = Op.getOperand(2);871SDValue RHS = Op.getOperand(3);872SDValue Dest = Op.getOperand(4);873SDLoc dl(Op);874875SDValue TargetCC;876SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);877878return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,879Cmp);880}881882SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {883SDValue LHS = Op.getOperand(0);884SDValue RHS = Op.getOperand(1);885SDValue TrueV = Op.getOperand(2);886SDValue FalseV = Op.getOperand(3);887ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();888SDLoc dl(Op);889890SDValue TargetCC;891SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);892893SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);894SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};895896return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops);897}898899SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {900SDValue LHS = Op.getOperand(0);901SDValue RHS = Op.getOperand(1);902ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();903SDLoc DL(Op);904905SDValue TargetCC;906SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL);907908SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType());909SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType());910SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);911SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};912913return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops);914}915916SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {917const MachineFunction &MF = DAG.getMachineFunction();918const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();919const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();920auto DL = DAG.getDataLayout();921SDLoc dl(Op);922923// Vastart just stores the address of the VarArgsFrameIndex slot into the924// memory location argument.925SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));926927return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),928MachinePointerInfo(SV));929}930931// Modify the existing ISD::INLINEASM node to add the implicit zero register.932SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {933SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);934if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg ||935Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) {936// Zero register has already been added. Don't add it again.937// If this isn't handled, we get called over and over again.938return Op;939}940941// Get a list of operands to the new INLINEASM node. This is mostly a copy,942// with some edits.943// Add the following operands at the end (but before the glue node, if it's944// there):945// - The flags of the implicit zero register operand.946// - The implicit zero register operand itself.947SDLoc dl(Op);948SmallVector<SDValue, 8> Ops;949SDNode *N = Op.getNode();950SDValue Glue;951for (unsigned I = 0; I < N->getNumOperands(); I++) {952SDValue Operand = N->getOperand(I);953if (Operand.getValueType() == MVT::Glue) {954// The glue operand always needs to be at the end, so we need to treat it955// specially.956Glue = Operand;957} else {958Ops.push_back(Operand);959}960}961InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1);962Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));963Ops.push_back(ZeroReg);964if (Glue) {965Ops.push_back(Glue);966}967968// Replace the current INLINEASM node with a new one that has the zero969// register as implicit parameter.970SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops);971DAG.ReplaceAllUsesOfValueWith(Op, New);972DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1));973974return New;975}976977SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {978switch (Op.getOpcode()) {979default:980llvm_unreachable("Don't know how to custom lower this!");981case ISD::SHL:982case ISD::SRA:983case ISD::SRL:984case ISD::ROTL:985case ISD::ROTR:986return LowerShifts(Op, DAG);987case ISD::GlobalAddress:988return LowerGlobalAddress(Op, DAG);989case ISD::BlockAddress:990return LowerBlockAddress(Op, DAG);991case ISD::BR_CC:992return LowerBR_CC(Op, DAG);993case ISD::SELECT_CC:994return LowerSELECT_CC(Op, DAG);995case ISD::SETCC:996return LowerSETCC(Op, DAG);997case ISD::VASTART:998return LowerVASTART(Op, DAG);999case ISD::SDIVREM:1000case ISD::UDIVREM:1001return LowerDivRem(Op, DAG);1002case ISD::INLINEASM:1003return LowerINLINEASM(Op, DAG);1004}10051006return SDValue();1007}10081009/// Replace a node with an illegal result type1010/// with a new node built out of custom code.1011void AVRTargetLowering::ReplaceNodeResults(SDNode *N,1012SmallVectorImpl<SDValue> &Results,1013SelectionDAG &DAG) const {1014SDLoc DL(N);10151016switch (N->getOpcode()) {1017case ISD::ADD: {1018// Convert add (x, imm) into sub (x, -imm).1019if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {1020SDValue Sub = DAG.getNode(1021ISD::SUB, DL, N->getValueType(0), N->getOperand(0),1022DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0)));1023Results.push_back(Sub);1024}1025break;1026}1027default: {1028SDValue Res = LowerOperation(SDValue(N, 0), DAG);10291030for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)1031Results.push_back(Res.getValue(I));10321033break;1034}1035}1036}10371038/// Return true if the addressing mode represented1039/// by AM is legal for this target, for a load/store of the specified type.1040bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,1041const AddrMode &AM, Type *Ty,1042unsigned AS,1043Instruction *I) const {1044int64_t Offs = AM.BaseOffs;10451046// Allow absolute addresses.1047if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) {1048return true;1049}10501051// Flash memory instructions only allow zero offsets.1052if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) {1053return false;1054}10551056// Allow reg+<6bit> offset.1057if (Offs < 0)1058Offs = -Offs;1059if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 &&1060isUInt<6>(Offs)) {1061return true;1062}10631064return false;1065}10661067/// Returns true by value, base pointer and1068/// offset pointer and addressing mode by reference if the node's address1069/// can be legally represented as pre-indexed load / store address.1070bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,1071SDValue &Offset,1072ISD::MemIndexedMode &AM,1073SelectionDAG &DAG) const {1074EVT VT;1075const SDNode *Op;1076SDLoc DL(N);10771078if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {1079VT = LD->getMemoryVT();1080Op = LD->getBasePtr().getNode();1081if (LD->getExtensionType() != ISD::NON_EXTLOAD)1082return false;1083if (AVR::isProgramMemoryAccess(LD)) {1084return false;1085}1086} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {1087VT = ST->getMemoryVT();1088Op = ST->getBasePtr().getNode();1089if (AVR::isProgramMemoryAccess(ST)) {1090return false;1091}1092} else {1093return false;1094}10951096if (VT != MVT::i8 && VT != MVT::i16) {1097return false;1098}10991100if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {1101return false;1102}11031104if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {1105int RHSC = RHS->getSExtValue();1106if (Op->getOpcode() == ISD::SUB)1107RHSC = -RHSC;11081109if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) {1110return false;1111}11121113Base = Op->getOperand(0);1114Offset = DAG.getConstant(RHSC, DL, MVT::i8);1115AM = ISD::PRE_DEC;11161117return true;1118}11191120return false;1121}11221123/// Returns true by value, base pointer and1124/// offset pointer and addressing mode by reference if this node can be1125/// combined with a load / store to form a post-indexed load / store.1126bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,1127SDValue &Base,1128SDValue &Offset,1129ISD::MemIndexedMode &AM,1130SelectionDAG &DAG) const {1131EVT VT;1132SDLoc DL(N);11331134if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {1135VT = LD->getMemoryVT();1136if (LD->getExtensionType() != ISD::NON_EXTLOAD)1137return false;1138} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {1139VT = ST->getMemoryVT();1140// We can not store to program memory.1141if (AVR::isProgramMemoryAccess(ST))1142return false;1143// Since the high byte need to be stored first, we can not emit1144// i16 post increment store like:1145// st X+, r241146// st X+, r251147if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())1148return false;1149} else {1150return false;1151}11521153if (VT != MVT::i8 && VT != MVT::i16) {1154return false;1155}11561157if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {1158return false;1159}11601161if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {1162int RHSC = RHS->getSExtValue();1163if (Op->getOpcode() == ISD::SUB)1164RHSC = -RHSC;1165if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) {1166return false;1167}11681169// FIXME: We temporarily disable post increment load from program memory,1170// due to bug https://github.com/llvm/llvm-project/issues/59914.1171if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))1172if (AVR::isProgramMemoryAccess(LD))1173return false;11741175Base = Op->getOperand(0);1176Offset = DAG.getConstant(RHSC, DL, MVT::i8);1177AM = ISD::POST_INC;11781179return true;1180}11811182return false;1183}11841185bool AVRTargetLowering::isOffsetFoldingLegal(1186const GlobalAddressSDNode *GA) const {1187return true;1188}11891190//===----------------------------------------------------------------------===//1191// Formal Arguments Calling Convention Implementation1192//===----------------------------------------------------------------------===//11931194#include "AVRGenCallingConv.inc"11951196/// Registers for calling conventions, ordered in reverse as required by ABI.1197/// Both arrays must be of the same length.1198static const MCPhysReg RegList8AVR[] = {1199AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,1200AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,1201AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};1202static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,1203AVR::R22, AVR::R21, AVR::R20};1204static const MCPhysReg RegList16AVR[] = {1205AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,1206AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,1207AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,1208AVR::R11R10, AVR::R10R9, AVR::R9R8};1209static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,1210AVR::R24R23, AVR::R23R22,1211AVR::R22R21, AVR::R21R20};12121213static_assert(std::size(RegList8AVR) == std::size(RegList16AVR),1214"8-bit and 16-bit register arrays must be of equal length");1215static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny),1216"8-bit and 16-bit register arrays must be of equal length");12171218/// Analyze incoming and outgoing function arguments. We need custom C++ code1219/// to handle special constraints in the ABI.1220/// In addition, all pieces of a certain argument have to be passed either1221/// using registers or the stack but never mixing both.1222template <typename ArgT>1223static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,1224const Function *F, const DataLayout *TD,1225const SmallVectorImpl<ArgT> &Args,1226SmallVectorImpl<CCValAssign> &ArgLocs,1227CCState &CCInfo, bool Tiny) {1228// Choose the proper register list for argument passing according to the ABI.1229ArrayRef<MCPhysReg> RegList8;1230ArrayRef<MCPhysReg> RegList16;1231if (Tiny) {1232RegList8 = ArrayRef(RegList8Tiny);1233RegList16 = ArrayRef(RegList16Tiny);1234} else {1235RegList8 = ArrayRef(RegList8AVR);1236RegList16 = ArrayRef(RegList16AVR);1237}12381239unsigned NumArgs = Args.size();1240// This is the index of the last used register, in RegList*.1241// -1 means R26 (R26 is never actually used in CC).1242int RegLastIdx = -1;1243// Once a value is passed to the stack it will always be used1244bool UseStack = false;1245for (unsigned i = 0; i != NumArgs;) {1246MVT VT = Args[i].VT;1247// We have to count the number of bytes for each function argument, that is1248// those Args with the same OrigArgIndex. This is important in case the1249// function takes an aggregate type.1250// Current argument will be between [i..j).1251unsigned ArgIndex = Args[i].OrigArgIndex;1252unsigned TotalBytes = VT.getStoreSize();1253unsigned j = i + 1;1254for (; j != NumArgs; ++j) {1255if (Args[j].OrigArgIndex != ArgIndex)1256break;1257TotalBytes += Args[j].VT.getStoreSize();1258}1259// Round up to even number of bytes.1260TotalBytes = alignTo(TotalBytes, 2);1261// Skip zero sized arguments1262if (TotalBytes == 0)1263continue;1264// The index of the first register to be used1265unsigned RegIdx = RegLastIdx + TotalBytes;1266RegLastIdx = RegIdx;1267// If there are not enough registers, use the stack1268if (RegIdx >= RegList8.size()) {1269UseStack = true;1270}1271for (; i != j; ++i) {1272MVT VT = Args[i].VT;12731274if (UseStack) {1275auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext());1276unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt),1277TD->getABITypeAlign(evt));1278CCInfo.addLoc(1279CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full));1280} else {1281unsigned Reg;1282if (VT == MVT::i8) {1283Reg = CCInfo.AllocateReg(RegList8[RegIdx]);1284} else if (VT == MVT::i16) {1285Reg = CCInfo.AllocateReg(RegList16[RegIdx]);1286} else {1287llvm_unreachable(1288"calling convention can only manage i8 and i16 types");1289}1290assert(Reg && "register not available in calling convention");1291CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));1292// Registers inside a particular argument are sorted in increasing order1293// (remember the array is reversed).1294RegIdx -= VT.getStoreSize();1295}1296}1297}1298}12991300/// Count the total number of bytes needed to pass or return these arguments.1301template <typename ArgT>1302static unsigned1303getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {1304unsigned TotalBytes = 0;13051306for (const ArgT &Arg : Args) {1307TotalBytes += Arg.VT.getStoreSize();1308}1309return TotalBytes;1310}13111312/// Analyze incoming and outgoing value of returning from a function.1313/// The algorithm is similar to analyzeArguments, but there can only be1314/// one value, possibly an aggregate, and it is limited to 8 bytes.1315template <typename ArgT>1316static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,1317CCState &CCInfo, bool Tiny) {1318unsigned NumArgs = Args.size();1319unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);1320// CanLowerReturn() guarantees this assertion.1321if (Tiny)1322assert(TotalBytes <= 4 &&1323"return values greater than 4 bytes cannot be lowered on AVRTiny");1324else1325assert(TotalBytes <= 8 &&1326"return values greater than 8 bytes cannot be lowered on AVR");13271328// Choose the proper register list for argument passing according to the ABI.1329ArrayRef<MCPhysReg> RegList8;1330ArrayRef<MCPhysReg> RegList16;1331if (Tiny) {1332RegList8 = ArrayRef(RegList8Tiny);1333RegList16 = ArrayRef(RegList16Tiny);1334} else {1335RegList8 = ArrayRef(RegList8AVR);1336RegList16 = ArrayRef(RegList16AVR);1337}13381339// GCC-ABI says that the size is rounded up to the next even number,1340// but actually once it is more than 4 it will always round up to 8.1341if (TotalBytes > 4) {1342TotalBytes = 8;1343} else {1344TotalBytes = alignTo(TotalBytes, 2);1345}13461347// The index of the first register to use.1348int RegIdx = TotalBytes - 1;1349for (unsigned i = 0; i != NumArgs; ++i) {1350MVT VT = Args[i].VT;1351unsigned Reg;1352if (VT == MVT::i8) {1353Reg = CCInfo.AllocateReg(RegList8[RegIdx]);1354} else if (VT == MVT::i16) {1355Reg = CCInfo.AllocateReg(RegList16[RegIdx]);1356} else {1357llvm_unreachable("calling convention can only manage i8 and i16 types");1358}1359assert(Reg && "register not available in calling convention");1360CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));1361// Registers sort in increasing order1362RegIdx -= VT.getStoreSize();1363}1364}13651366SDValue AVRTargetLowering::LowerFormalArguments(1367SDValue Chain, CallingConv::ID CallConv, bool isVarArg,1368const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1369SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {1370MachineFunction &MF = DAG.getMachineFunction();1371MachineFrameInfo &MFI = MF.getFrameInfo();1372auto DL = DAG.getDataLayout();13731374// Assign locations to all of the incoming arguments.1375SmallVector<CCValAssign, 16> ArgLocs;1376CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,1377*DAG.getContext());13781379// Variadic functions do not need all the analysis below.1380if (isVarArg) {1381CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);1382} else {1383analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,1384Subtarget.hasTinyEncoding());1385}13861387SDValue ArgValue;1388for (CCValAssign &VA : ArgLocs) {13891390// Arguments stored on registers.1391if (VA.isRegLoc()) {1392EVT RegVT = VA.getLocVT();1393const TargetRegisterClass *RC;1394if (RegVT == MVT::i8) {1395RC = &AVR::GPR8RegClass;1396} else if (RegVT == MVT::i16) {1397RC = &AVR::DREGSRegClass;1398} else {1399llvm_unreachable("Unknown argument type!");1400}14011402Register Reg = MF.addLiveIn(VA.getLocReg(), RC);1403ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);14041405// :NOTE: Clang should not promote any i8 into i16 but for safety the1406// following code will handle zexts or sexts generated by other1407// front ends. Otherwise:1408// If this is an 8 bit value, it is really passed promoted1409// to 16 bits. Insert an assert[sz]ext to capture this, then1410// truncate to the right size.1411switch (VA.getLocInfo()) {1412default:1413llvm_unreachable("Unknown loc info!");1414case CCValAssign::Full:1415break;1416case CCValAssign::BCvt:1417ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);1418break;1419case CCValAssign::SExt:1420ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,1421DAG.getValueType(VA.getValVT()));1422ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);1423break;1424case CCValAssign::ZExt:1425ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,1426DAG.getValueType(VA.getValVT()));1427ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);1428break;1429}14301431InVals.push_back(ArgValue);1432} else {1433// Only arguments passed on the stack should make it here.1434assert(VA.isMemLoc());14351436EVT LocVT = VA.getLocVT();14371438// Create the frame index object for this incoming parameter.1439int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,1440VA.getLocMemOffset(), true);14411442// Create the SelectionDAG nodes corresponding to a load1443// from this parameter.1444SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));1445InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,1446MachinePointerInfo::getFixedStack(MF, FI)));1447}1448}14491450// If the function takes variable number of arguments, make a frame index for1451// the start of the first vararg value... for expansion of llvm.va_start.1452if (isVarArg) {1453unsigned StackSize = CCInfo.getStackSize();1454AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();14551456AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));1457}14581459return Chain;1460}14611462//===----------------------------------------------------------------------===//1463// Call Calling Convention Implementation1464//===----------------------------------------------------------------------===//14651466SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,1467SmallVectorImpl<SDValue> &InVals) const {1468SelectionDAG &DAG = CLI.DAG;1469SDLoc &DL = CLI.DL;1470SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;1471SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;1472SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;1473SDValue Chain = CLI.Chain;1474SDValue Callee = CLI.Callee;1475bool &isTailCall = CLI.IsTailCall;1476CallingConv::ID CallConv = CLI.CallConv;1477bool isVarArg = CLI.IsVarArg;14781479MachineFunction &MF = DAG.getMachineFunction();14801481// AVR does not yet support tail call optimization.1482isTailCall = false;14831484// Analyze operands of the call, assigning locations to each operand.1485SmallVector<CCValAssign, 16> ArgLocs;1486CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,1487*DAG.getContext());14881489// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every1490// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol1491// node so that legalize doesn't hack it.1492const Function *F = nullptr;1493if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {1494const GlobalValue *GV = G->getGlobal();1495if (isa<Function>(GV))1496F = cast<Function>(GV);1497Callee =1498DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));1499} else if (const ExternalSymbolSDNode *ES =1500dyn_cast<ExternalSymbolSDNode>(Callee)) {1501Callee = DAG.getTargetExternalSymbol(ES->getSymbol(),1502getPointerTy(DAG.getDataLayout()));1503}15041505// Variadic functions do not need all the analysis below.1506if (isVarArg) {1507CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);1508} else {1509analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,1510Subtarget.hasTinyEncoding());1511}15121513// Get a count of how many bytes are to be pushed on the stack.1514unsigned NumBytes = CCInfo.getStackSize();15151516Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);15171518SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;15191520// First, walk the register assignments, inserting copies.1521unsigned AI, AE;1522bool HasStackArgs = false;1523for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) {1524CCValAssign &VA = ArgLocs[AI];1525EVT RegVT = VA.getLocVT();1526SDValue Arg = OutVals[AI];15271528// Promote the value if needed. With Clang this should not happen.1529switch (VA.getLocInfo()) {1530default:1531llvm_unreachable("Unknown loc info!");1532case CCValAssign::Full:1533break;1534case CCValAssign::SExt:1535Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);1536break;1537case CCValAssign::ZExt:1538Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);1539break;1540case CCValAssign::AExt:1541Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);1542break;1543case CCValAssign::BCvt:1544Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg);1545break;1546}15471548// Stop when we encounter a stack argument, we need to process them1549// in reverse order in the loop below.1550if (VA.isMemLoc()) {1551HasStackArgs = true;1552break;1553}15541555// Arguments that can be passed on registers must be kept in the RegsToPass1556// vector.1557RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));1558}15591560// Second, stack arguments have to walked.1561// Previously this code created chained stores but those chained stores appear1562// to be unchained in the legalization phase. Therefore, do not attempt to1563// chain them here. In fact, chaining them here somehow causes the first and1564// second store to be reversed which is the exact opposite of the intended1565// effect.1566if (HasStackArgs) {1567SmallVector<SDValue, 8> MemOpChains;1568for (; AI != AE; AI++) {1569CCValAssign &VA = ArgLocs[AI];1570SDValue Arg = OutVals[AI];15711572assert(VA.isMemLoc());15731574// SP points to one stack slot further so add one to adjust it.1575SDValue PtrOff = DAG.getNode(1576ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),1577DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),1578DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL));15791580MemOpChains.push_back(1581DAG.getStore(Chain, DL, Arg, PtrOff,1582MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));1583}15841585if (!MemOpChains.empty())1586Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);1587}15881589// Build a sequence of copy-to-reg nodes chained together with token chain and1590// flag operands which copy the outgoing args into registers. The InGlue in1591// necessary since all emited instructions must be stuck together.1592SDValue InGlue;1593for (auto Reg : RegsToPass) {1594Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);1595InGlue = Chain.getValue(1);1596}15971598// Returns a chain & a flag for retval copy to use.1599SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);1600SmallVector<SDValue, 8> Ops;1601Ops.push_back(Chain);1602Ops.push_back(Callee);16031604// Add argument registers to the end of the list so that they are known live1605// into the call.1606for (auto Reg : RegsToPass) {1607Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));1608}16091610// The zero register (usually R1) must be passed as an implicit register so1611// that this register is correctly zeroed in interrupts.1612Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));16131614// Add a register mask operand representing the call-preserved registers.1615const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();1616const uint32_t *Mask =1617TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);1618assert(Mask && "Missing call preserved mask for calling convention");1619Ops.push_back(DAG.getRegisterMask(Mask));16201621if (InGlue.getNode()) {1622Ops.push_back(InGlue);1623}16241625Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);1626InGlue = Chain.getValue(1);16271628// Create the CALLSEQ_END node.1629Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);16301631if (!Ins.empty()) {1632InGlue = Chain.getValue(1);1633}16341635// Handle result values, copying them out of physregs into vregs that we1636// return.1637return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG,1638InVals);1639}16401641/// Lower the result values of a call into the1642/// appropriate copies out of appropriate physical registers.1643///1644SDValue AVRTargetLowering::LowerCallResult(1645SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,1646const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,1647SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {16481649// Assign locations to each value returned by this call.1650SmallVector<CCValAssign, 16> RVLocs;1651CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,1652*DAG.getContext());16531654// Handle runtime calling convs.1655if (CallConv == CallingConv::AVR_BUILTIN) {1656CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);1657} else {1658analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());1659}16601661// Copy all of the result registers out of their specified physreg.1662for (CCValAssign const &RVLoc : RVLocs) {1663Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),1664InGlue)1665.getValue(1);1666InGlue = Chain.getValue(2);1667InVals.push_back(Chain.getValue(0));1668}16691670return Chain;1671}16721673//===----------------------------------------------------------------------===//1674// Return Value Calling Convention Implementation1675//===----------------------------------------------------------------------===//16761677bool AVRTargetLowering::CanLowerReturn(1678CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,1679const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {1680if (CallConv == CallingConv::AVR_BUILTIN) {1681SmallVector<CCValAssign, 16> RVLocs;1682CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);1683return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);1684}16851686unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);1687return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8);1688}16891690SDValue1691AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,1692bool isVarArg,1693const SmallVectorImpl<ISD::OutputArg> &Outs,1694const SmallVectorImpl<SDValue> &OutVals,1695const SDLoc &dl, SelectionDAG &DAG) const {1696// CCValAssign - represent the assignment of the return value to locations.1697SmallVector<CCValAssign, 16> RVLocs;16981699// CCState - Info about the registers and stack slot.1700CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,1701*DAG.getContext());17021703MachineFunction &MF = DAG.getMachineFunction();17041705// Analyze return values.1706if (CallConv == CallingConv::AVR_BUILTIN) {1707CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);1708} else {1709analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());1710}17111712SDValue Glue;1713SmallVector<SDValue, 4> RetOps(1, Chain);1714// Copy the result values into the output registers.1715for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {1716CCValAssign &VA = RVLocs[i];1717assert(VA.isRegLoc() && "Can only return in registers!");17181719Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);17201721// Guarantee that all emitted copies are stuck together with flags.1722Glue = Chain.getValue(1);1723RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));1724}17251726// Don't emit the ret/reti instruction when the naked attribute is present in1727// the function being compiled.1728if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {1729return Chain;1730}17311732const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();17331734if (!AFI->isInterruptOrSignalHandler()) {1735// The return instruction has an implicit zero register operand: it must1736// contain zero on return.1737// This is not needed in interrupts however, where the zero register is1738// handled specially (only pushed/popped when needed).1739RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));1740}17411742unsigned RetOpc =1743AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;17441745RetOps[0] = Chain; // Update chain.17461747if (Glue.getNode()) {1748RetOps.push_back(Glue);1749}17501751return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);1752}17531754//===----------------------------------------------------------------------===//1755// Custom Inserters1756//===----------------------------------------------------------------------===//17571758MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,1759MachineBasicBlock *BB,1760bool Tiny) const {1761unsigned Opc;1762const TargetRegisterClass *RC;1763bool HasRepeatedOperand = false;1764MachineFunction *F = BB->getParent();1765MachineRegisterInfo &RI = F->getRegInfo();1766const TargetInstrInfo &TII = *Subtarget.getInstrInfo();1767DebugLoc dl = MI.getDebugLoc();17681769switch (MI.getOpcode()) {1770default:1771llvm_unreachable("Invalid shift opcode!");1772case AVR::Lsl8:1773Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd1774RC = &AVR::GPR8RegClass;1775HasRepeatedOperand = true;1776break;1777case AVR::Lsl16:1778Opc = AVR::LSLWRd;1779RC = &AVR::DREGSRegClass;1780break;1781case AVR::Asr8:1782Opc = AVR::ASRRd;1783RC = &AVR::GPR8RegClass;1784break;1785case AVR::Asr16:1786Opc = AVR::ASRWRd;1787RC = &AVR::DREGSRegClass;1788break;1789case AVR::Lsr8:1790Opc = AVR::LSRRd;1791RC = &AVR::GPR8RegClass;1792break;1793case AVR::Lsr16:1794Opc = AVR::LSRWRd;1795RC = &AVR::DREGSRegClass;1796break;1797case AVR::Rol8:1798Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;1799RC = &AVR::GPR8RegClass;1800break;1801case AVR::Rol16:1802Opc = AVR::ROLWRd;1803RC = &AVR::DREGSRegClass;1804break;1805case AVR::Ror8:1806Opc = AVR::RORBRd;1807RC = &AVR::GPR8RegClass;1808break;1809case AVR::Ror16:1810Opc = AVR::RORWRd;1811RC = &AVR::DREGSRegClass;1812break;1813}18141815const BasicBlock *LLVM_BB = BB->getBasicBlock();18161817MachineFunction::iterator I;1818for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)1819;1820if (I != F->end())1821++I;18221823// Create loop block.1824MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);1825MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);1826MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);18271828F->insert(I, LoopBB);1829F->insert(I, CheckBB);1830F->insert(I, RemBB);18311832// Update machine-CFG edges by transferring all successors of the current1833// block to the block containing instructions after shift.1834RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),1835BB->end());1836RemBB->transferSuccessorsAndUpdatePHIs(BB);18371838// Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.1839BB->addSuccessor(CheckBB);1840LoopBB->addSuccessor(CheckBB);1841CheckBB->addSuccessor(LoopBB);1842CheckBB->addSuccessor(RemBB);18431844Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);1845Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);1846Register ShiftReg = RI.createVirtualRegister(RC);1847Register ShiftReg2 = RI.createVirtualRegister(RC);1848Register ShiftAmtSrcReg = MI.getOperand(2).getReg();1849Register SrcReg = MI.getOperand(1).getReg();1850Register DstReg = MI.getOperand(0).getReg();18511852// BB:1853// rjmp CheckBB1854BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);18551856// LoopBB:1857// ShiftReg2 = shift ShiftReg1858auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);1859if (HasRepeatedOperand)1860ShiftMI.addReg(ShiftReg);18611862// CheckBB:1863// ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]1864// ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]1865// DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]1866// ShiftAmt2 = ShiftAmt - 1;1867// if (ShiftAmt2 >= 0) goto LoopBB;1868BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)1869.addReg(SrcReg)1870.addMBB(BB)1871.addReg(ShiftReg2)1872.addMBB(LoopBB);1873BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)1874.addReg(ShiftAmtSrcReg)1875.addMBB(BB)1876.addReg(ShiftAmtReg2)1877.addMBB(LoopBB);1878BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)1879.addReg(SrcReg)1880.addMBB(BB)1881.addReg(ShiftReg2)1882.addMBB(LoopBB);18831884BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);1885BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);18861887MI.eraseFromParent(); // The pseudo instruction is gone now.1888return RemBB;1889}18901891// Do a multibyte AVR shift. Insert shift instructions and put the output1892// registers in the Regs array.1893// Because AVR does not have a normal shift instruction (only a single bit shift1894// instruction), we have to emulate this behavior with other instructions.1895// It first tries large steps (moving registers around) and then smaller steps1896// like single bit shifts.1897// Large shifts actually reduce the number of shifted registers, so the below1898// algorithms have to work independently of the number of registers that are1899// shifted.1900// For more information and background, see this blogpost:1901// https://aykevl.nl/2021/02/avr-bitshift1902static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,1903MutableArrayRef<std::pair<Register, int>> Regs,1904ISD::NodeType Opc, int64_t ShiftAmt) {1905const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();1906const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();1907MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();1908const DebugLoc &dl = MI.getDebugLoc();19091910const bool ShiftLeft = Opc == ISD::SHL;1911const bool ArithmeticShift = Opc == ISD::SRA;19121913// Zero a register, for use in later operations.1914Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);1915BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)1916.addReg(STI.getZeroRegister());19171918// Do a shift modulo 6 or 7. This is a bit more complicated than most shifts1919// and is hard to compose with the rest, so these are special cased.1920// The basic idea is to shift one or two bits in the opposite direction and1921// then move registers around to get the correct end result.1922if (ShiftLeft && (ShiftAmt % 8) >= 6) {1923// Left shift modulo 6 or 7.19241925// Create a slice of the registers we're going to modify, to ease working1926// with them.1927size_t ShiftRegsOffset = ShiftAmt / 8;1928size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;1929MutableArrayRef<std::pair<Register, int>> ShiftRegs =1930Regs.slice(ShiftRegsOffset, ShiftRegsSize);19311932// Shift one to the right, keeping the least significant bit as the carry1933// bit.1934insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);19351936// Rotate the least significant bit from the carry bit into a new register1937// (that starts out zero).1938Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);1939BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);19401941// Shift one more to the right if this is a modulo-6 shift.1942if (ShiftAmt % 8 == 6) {1943insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);1944Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);1945BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);1946LowByte = NewLowByte;1947}19481949// Move all registers to the left, zeroing the bottom registers as needed.1950for (size_t I = 0; I < Regs.size(); I++) {1951int ShiftRegsIdx = I + 1;1952if (ShiftRegsIdx < (int)ShiftRegs.size()) {1953Regs[I] = ShiftRegs[ShiftRegsIdx];1954} else if (ShiftRegsIdx == (int)ShiftRegs.size()) {1955Regs[I] = std::pair(LowByte, 0);1956} else {1957Regs[I] = std::pair(ZeroReg, 0);1958}1959}19601961return;1962}19631964// Right shift modulo 6 or 7.1965if (!ShiftLeft && (ShiftAmt % 8) >= 6) {1966// Create a view on the registers we're going to modify, to ease working1967// with them.1968size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8);1969MutableArrayRef<std::pair<Register, int>> ShiftRegs =1970Regs.slice(0, ShiftRegsSize);19711972// Shift one to the left.1973insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);19741975// Sign or zero extend the most significant register into a new register.1976// The HighByte is the byte that still has one (or two) bits from the1977// original value. The ExtByte is purely a zero/sign extend byte (all bits1978// are either 0 or 1).1979Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);1980Register ExtByte = 0;1981if (ArithmeticShift) {1982// Sign-extend bit that was shifted out last.1983BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)1984.addReg(HighByte, RegState::Undef)1985.addReg(HighByte, RegState::Undef);1986ExtByte = HighByte;1987// The highest bit of the original value is the same as the zero-extend1988// byte, so HighByte and ExtByte are the same.1989} else {1990// Use the zero register for zero extending.1991ExtByte = ZeroReg;1992// Rotate most significant bit into a new register (that starts out zero).1993BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)1994.addReg(ExtByte)1995.addReg(ExtByte);1996}19971998// Shift one more to the left for modulo 6 shifts.1999if (ShiftAmt % 8 == 6) {2000insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);2001// Shift the topmost bit into the HighByte.2002Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);2003BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)2004.addReg(HighByte)2005.addReg(HighByte);2006HighByte = NewExt;2007}20082009// Move all to the right, while sign or zero extending.2010for (int I = Regs.size() - 1; I >= 0; I--) {2011int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1;2012if (ShiftRegsIdx >= 0) {2013Regs[I] = ShiftRegs[ShiftRegsIdx];2014} else if (ShiftRegsIdx == -1) {2015Regs[I] = std::pair(HighByte, 0);2016} else {2017Regs[I] = std::pair(ExtByte, 0);2018}2019}20202021return;2022}20232024// For shift amounts of at least one register, simply rename the registers and2025// zero the bottom registers.2026while (ShiftLeft && ShiftAmt >= 8) {2027// Move all registers one to the left.2028for (size_t I = 0; I < Regs.size() - 1; I++) {2029Regs[I] = Regs[I + 1];2030}20312032// Zero the least significant register.2033Regs[Regs.size() - 1] = std::pair(ZeroReg, 0);20342035// Continue shifts with the leftover registers.2036Regs = Regs.drop_back(1);20372038ShiftAmt -= 8;2039}20402041// And again, the same for right shifts.2042Register ShrExtendReg = 0;2043if (!ShiftLeft && ShiftAmt >= 8) {2044if (ArithmeticShift) {2045// Sign extend the most significant register into ShrExtendReg.2046ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);2047Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);2048BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)2049.addReg(Regs[0].first, 0, Regs[0].second)2050.addReg(Regs[0].first, 0, Regs[0].second);2051BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)2052.addReg(Tmp)2053.addReg(Tmp);2054} else {2055ShrExtendReg = ZeroReg;2056}2057for (; ShiftAmt >= 8; ShiftAmt -= 8) {2058// Move all registers one to the right.2059for (size_t I = Regs.size() - 1; I != 0; I--) {2060Regs[I] = Regs[I - 1];2061}20622063// Zero or sign extend the most significant register.2064Regs[0] = std::pair(ShrExtendReg, 0);20652066// Continue shifts with the leftover registers.2067Regs = Regs.drop_front(1);2068}2069}20702071// The bigger shifts are already handled above.2072assert((ShiftAmt < 8) && "Unexpect shift amount");20732074// Shift by four bits, using a complicated swap/eor/andi/eor sequence.2075// It only works for logical shifts because the bits shifted in are all2076// zeroes.2077// To shift a single byte right, it produces code like this:2078// swap r02079// andi r0, 0x0f2080// For a two-byte (16-bit) shift, it adds the following instructions to shift2081// the upper byte into the lower byte:2082// swap r12083// eor r0, r12084// andi r1, 0x0f2085// eor r0, r12086// For bigger shifts, it repeats the above sequence. For example, for a 3-byte2087// (24-bit) shift it adds:2088// swap r22089// eor r1, r22090// andi r2, 0x0f2091// eor r1, r22092if (!ArithmeticShift && ShiftAmt >= 4) {2093Register Prev = 0;2094for (size_t I = 0; I < Regs.size(); I++) {2095size_t Idx = ShiftLeft ? I : Regs.size() - I - 1;2096Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);2097BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)2098.addReg(Regs[Idx].first, 0, Regs[Idx].second);2099if (I != 0) {2100Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);2101BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)2102.addReg(Prev)2103.addReg(SwapReg);2104Prev = R;2105}2106Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);2107BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)2108.addReg(SwapReg)2109.addImm(ShiftLeft ? 0xf0 : 0x0f);2110if (I != 0) {2111Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);2112BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)2113.addReg(Prev)2114.addReg(AndReg);2115size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1;2116Regs[PrevIdx] = std::pair(R, 0);2117}2118Prev = AndReg;2119Regs[Idx] = std::pair(AndReg, 0);2120}2121ShiftAmt -= 4;2122}21232124// Shift by one. This is the fallback that always works, and the shift2125// operation that is used for 1, 2, and 3 bit shifts.2126while (ShiftLeft && ShiftAmt) {2127// Shift one to the left.2128for (ssize_t I = Regs.size() - 1; I >= 0; I--) {2129Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);2130Register In = Regs[I].first;2131Register InSubreg = Regs[I].second;2132if (I == (ssize_t)Regs.size() - 1) { // first iteration2133BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)2134.addReg(In, 0, InSubreg)2135.addReg(In, 0, InSubreg);2136} else {2137BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)2138.addReg(In, 0, InSubreg)2139.addReg(In, 0, InSubreg);2140}2141Regs[I] = std::pair(Out, 0);2142}2143ShiftAmt--;2144}2145while (!ShiftLeft && ShiftAmt) {2146// Shift one to the right.2147for (size_t I = 0; I < Regs.size(); I++) {2148Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);2149Register In = Regs[I].first;2150Register InSubreg = Regs[I].second;2151if (I == 0) {2152unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;2153BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg);2154} else {2155BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg);2156}2157Regs[I] = std::pair(Out, 0);2158}2159ShiftAmt--;2160}21612162if (ShiftAmt != 0) {2163llvm_unreachable("don't know how to shift!"); // sanity check2164}2165}21662167// Do a wide (32-bit) shift.2168MachineBasicBlock *2169AVRTargetLowering::insertWideShift(MachineInstr &MI,2170MachineBasicBlock *BB) const {2171const TargetInstrInfo &TII = *Subtarget.getInstrInfo();2172const DebugLoc &dl = MI.getDebugLoc();21732174// How much to shift to the right (meaning: a negative number indicates a left2175// shift).2176int64_t ShiftAmt = MI.getOperand(4).getImm();2177ISD::NodeType Opc;2178switch (MI.getOpcode()) {2179case AVR::Lsl32:2180Opc = ISD::SHL;2181break;2182case AVR::Lsr32:2183Opc = ISD::SRL;2184break;2185case AVR::Asr32:2186Opc = ISD::SRA;2187break;2188}21892190// Read the input registers, with the most significant register at index 0.2191std::array<std::pair<Register, int>, 4> Registers = {2192std::pair(MI.getOperand(3).getReg(), AVR::sub_hi),2193std::pair(MI.getOperand(3).getReg(), AVR::sub_lo),2194std::pair(MI.getOperand(2).getReg(), AVR::sub_hi),2195std::pair(MI.getOperand(2).getReg(), AVR::sub_lo),2196};21972198// Do the shift. The registers are modified in-place.2199insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);22002201// Combine the 8-bit registers into 16-bit register pairs.2202// This done either from LSB to MSB or from MSB to LSB, depending on the2203// shift. It's an optimization so that the register allocator will use the2204// fewest movs possible (which order we use isn't a correctness issue, just an2205// optimization issue).2206// - lsl prefers starting from the most significant byte (2nd case).2207// - lshr prefers starting from the least significant byte (1st case).2208// - for ashr it depends on the number of shifted bytes.2209// Some shift operations still don't get the most optimal mov sequences even2210// with this distinction. TODO: figure out why and try to fix it (but we're2211// already equal to or faster than avr-gcc in all cases except ashr 8).2212if (Opc != ISD::SHL &&2213(Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) {2214// Use the resulting registers starting with the least significant byte.2215BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())2216.addReg(Registers[3].first, 0, Registers[3].second)2217.addImm(AVR::sub_lo)2218.addReg(Registers[2].first, 0, Registers[2].second)2219.addImm(AVR::sub_hi);2220BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())2221.addReg(Registers[1].first, 0, Registers[1].second)2222.addImm(AVR::sub_lo)2223.addReg(Registers[0].first, 0, Registers[0].second)2224.addImm(AVR::sub_hi);2225} else {2226// Use the resulting registers starting with the most significant byte.2227BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())2228.addReg(Registers[0].first, 0, Registers[0].second)2229.addImm(AVR::sub_hi)2230.addReg(Registers[1].first, 0, Registers[1].second)2231.addImm(AVR::sub_lo);2232BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())2233.addReg(Registers[2].first, 0, Registers[2].second)2234.addImm(AVR::sub_hi)2235.addReg(Registers[3].first, 0, Registers[3].second)2236.addImm(AVR::sub_lo);2237}22382239// Remove the pseudo instruction.2240MI.eraseFromParent();2241return BB;2242}22432244static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {2245if (I->getOpcode() == AVR::COPY) {2246Register SrcReg = I->getOperand(1).getReg();2247return (SrcReg == AVR::R0 || SrcReg == AVR::R1);2248}22492250return false;2251}22522253// The mul instructions wreak havock on our zero_reg R1. We need to clear it2254// after the result has been evacuated. This is probably not the best way to do2255// it, but it works for now.2256MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,2257MachineBasicBlock *BB) const {2258const TargetInstrInfo &TII = *Subtarget.getInstrInfo();2259MachineBasicBlock::iterator I(MI);2260++I; // in any case insert *after* the mul instruction2261if (isCopyMulResult(I))2262++I;2263if (isCopyMulResult(I))2264++I;2265BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)2266.addReg(AVR::R1)2267.addReg(AVR::R1);2268return BB;2269}22702271// Insert a read from the zero register.2272MachineBasicBlock *2273AVRTargetLowering::insertCopyZero(MachineInstr &MI,2274MachineBasicBlock *BB) const {2275const TargetInstrInfo &TII = *Subtarget.getInstrInfo();2276MachineBasicBlock::iterator I(MI);2277BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))2278.add(MI.getOperand(0))2279.addReg(Subtarget.getZeroRegister());2280MI.eraseFromParent();2281return BB;2282}22832284// Lower atomicrmw operation to disable interrupts, do operation, and restore2285// interrupts. This works because all AVR microcontrollers are single core.2286MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(2287MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {2288MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();2289const TargetInstrInfo &TII = *Subtarget.getInstrInfo();2290MachineBasicBlock::iterator I(MI);2291DebugLoc dl = MI.getDebugLoc();22922293// Example instruction sequence, for an atomic 8-bit add:2294// ldi r25, 52295// in r0, SREG2296// cli2297// ld r24, X2298// add r25, r242299// st X, r252300// out SREG, r023012302const TargetRegisterClass *RC =2303(Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;2304unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;2305unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;23062307// Disable interrupts.2308BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())2309.addImm(Subtarget.getIORegSREG());2310BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);23112312// Load the original value.2313BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())2314.add(MI.getOperand(1));23152316// Do the arithmetic operation.2317Register Result = MRI.createVirtualRegister(RC);2318BuildMI(*BB, I, dl, TII.get(Opcode), Result)2319.addReg(MI.getOperand(0).getReg())2320.add(MI.getOperand(2));23212322// Store the result.2323BuildMI(*BB, I, dl, TII.get(StoreOpcode))2324.add(MI.getOperand(1))2325.addReg(Result);23262327// Restore interrupts.2328BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))2329.addImm(Subtarget.getIORegSREG())2330.addReg(Subtarget.getTmpRegister());23312332// Remove the pseudo instruction.2333MI.eraseFromParent();2334return BB;2335}23362337MachineBasicBlock *2338AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,2339MachineBasicBlock *MBB) const {2340int Opc = MI.getOpcode();2341const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();23422343// Pseudo shift instructions with a non constant shift amount are expanded2344// into a loop.2345switch (Opc) {2346case AVR::Lsl8:2347case AVR::Lsl16:2348case AVR::Lsr8:2349case AVR::Lsr16:2350case AVR::Rol8:2351case AVR::Rol16:2352case AVR::Ror8:2353case AVR::Ror16:2354case AVR::Asr8:2355case AVR::Asr16:2356return insertShift(MI, MBB, STI.hasTinyEncoding());2357case AVR::Lsl32:2358case AVR::Lsr32:2359case AVR::Asr32:2360return insertWideShift(MI, MBB);2361case AVR::MULRdRr:2362case AVR::MULSRdRr:2363return insertMul(MI, MBB);2364case AVR::CopyZero:2365return insertCopyZero(MI, MBB);2366case AVR::AtomicLoadAdd8:2367return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);2368case AVR::AtomicLoadAdd16:2369return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);2370case AVR::AtomicLoadSub8:2371return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);2372case AVR::AtomicLoadSub16:2373return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);2374case AVR::AtomicLoadAnd8:2375return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);2376case AVR::AtomicLoadAnd16:2377return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);2378case AVR::AtomicLoadOr8:2379return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);2380case AVR::AtomicLoadOr16:2381return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);2382case AVR::AtomicLoadXor8:2383return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);2384case AVR::AtomicLoadXor16:2385return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);2386}23872388assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&2389"Unexpected instr type to insert");23902391const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()2392->getParent()2393->getSubtarget()2394.getInstrInfo();2395DebugLoc dl = MI.getDebugLoc();23962397// To "insert" a SELECT instruction, we insert the diamond2398// control-flow pattern. The incoming instruction knows the2399// destination vreg to set, the condition code register to branch2400// on, the true/false values to select between, and a branch opcode2401// to use.24022403MachineFunction *MF = MBB->getParent();2404const BasicBlock *LLVM_BB = MBB->getBasicBlock();2405MachineBasicBlock *FallThrough = MBB->getFallThrough();24062407// If the current basic block falls through to another basic block,2408// we must insert an unconditional branch to the fallthrough destination2409// if we are to insert basic blocks at the prior fallthrough point.2410if (FallThrough != nullptr) {2411BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);2412}24132414MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);2415MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);24162417MachineFunction::iterator I;2418for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)2419;2420if (I != MF->end())2421++I;2422MF->insert(I, trueMBB);2423MF->insert(I, falseMBB);24242425// Set the call frame size on entry to the new basic blocks.2426unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);2427trueMBB->setCallFrameSize(CallFrameSize);2428falseMBB->setCallFrameSize(CallFrameSize);24292430// Transfer remaining instructions and all successors of the current2431// block to the block which will contain the Phi node for the2432// select.2433trueMBB->splice(trueMBB->begin(), MBB,2434std::next(MachineBasicBlock::iterator(MI)), MBB->end());2435trueMBB->transferSuccessorsAndUpdatePHIs(MBB);24362437AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm();2438BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB);2439BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);2440MBB->addSuccessor(falseMBB);2441MBB->addSuccessor(trueMBB);24422443// Unconditionally flow back to the true block2444BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);2445falseMBB->addSuccessor(trueMBB);24462447// Set up the Phi node to determine where we came from2448BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),2449MI.getOperand(0).getReg())2450.addReg(MI.getOperand(1).getReg())2451.addMBB(MBB)2452.addReg(MI.getOperand(2).getReg())2453.addMBB(falseMBB);24542455MI.eraseFromParent(); // The pseudo instruction is gone now.2456return trueMBB;2457}24582459//===----------------------------------------------------------------------===//2460// Inline Asm Support2461//===----------------------------------------------------------------------===//24622463AVRTargetLowering::ConstraintType2464AVRTargetLowering::getConstraintType(StringRef Constraint) const {2465if (Constraint.size() == 1) {2466// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html2467switch (Constraint[0]) {2468default:2469break;2470case 'a': // Simple upper registers2471case 'b': // Base pointer registers pairs2472case 'd': // Upper register2473case 'l': // Lower registers2474case 'e': // Pointer register pairs2475case 'q': // Stack pointer register2476case 'r': // Any register2477case 'w': // Special upper register pairs2478return C_RegisterClass;2479case 't': // Temporary register2480case 'x':2481case 'X': // Pointer register pair X2482case 'y':2483case 'Y': // Pointer register pair Y2484case 'z':2485case 'Z': // Pointer register pair Z2486return C_Register;2487case 'Q': // A memory address based on Y or Z pointer with displacement.2488return C_Memory;2489case 'G': // Floating point constant2490case 'I': // 6-bit positive integer constant2491case 'J': // 6-bit negative integer constant2492case 'K': // Integer constant (Range: 2)2493case 'L': // Integer constant (Range: 0)2494case 'M': // 8-bit integer constant2495case 'N': // Integer constant (Range: -1)2496case 'O': // Integer constant (Range: 8, 16, 24)2497case 'P': // Integer constant (Range: 1)2498case 'R': // Integer constant (Range: -6 to 5)x2499return C_Immediate;2500}2501}25022503return TargetLowering::getConstraintType(Constraint);2504}25052506InlineAsm::ConstraintCode2507AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {2508// Not sure if this is actually the right thing to do, but we got to do2509// *something* [agnat]2510switch (ConstraintCode[0]) {2511case 'Q':2512return InlineAsm::ConstraintCode::Q;2513}2514return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);2515}25162517AVRTargetLowering::ConstraintWeight2518AVRTargetLowering::getSingleConstraintMatchWeight(2519AsmOperandInfo &info, const char *constraint) const {2520ConstraintWeight weight = CW_Invalid;2521Value *CallOperandVal = info.CallOperandVal;25222523// If we don't have a value, we can't do a match,2524// but allow it at the lowest weight.2525// (this behaviour has been copied from the ARM backend)2526if (!CallOperandVal) {2527return CW_Default;2528}25292530// Look at the constraint type.2531switch (*constraint) {2532default:2533weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);2534break;2535case 'd':2536case 'r':2537case 'l':2538weight = CW_Register;2539break;2540case 'a':2541case 'b':2542case 'e':2543case 'q':2544case 't':2545case 'w':2546case 'x':2547case 'X':2548case 'y':2549case 'Y':2550case 'z':2551case 'Z':2552weight = CW_SpecificReg;2553break;2554case 'G':2555if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {2556if (C->isZero()) {2557weight = CW_Constant;2558}2559}2560break;2561case 'I':2562if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2563if (isUInt<6>(C->getZExtValue())) {2564weight = CW_Constant;2565}2566}2567break;2568case 'J':2569if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2570if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) {2571weight = CW_Constant;2572}2573}2574break;2575case 'K':2576if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2577if (C->getZExtValue() == 2) {2578weight = CW_Constant;2579}2580}2581break;2582case 'L':2583if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2584if (C->getZExtValue() == 0) {2585weight = CW_Constant;2586}2587}2588break;2589case 'M':2590if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2591if (isUInt<8>(C->getZExtValue())) {2592weight = CW_Constant;2593}2594}2595break;2596case 'N':2597if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2598if (C->getSExtValue() == -1) {2599weight = CW_Constant;2600}2601}2602break;2603case 'O':2604if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2605if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) ||2606(C->getZExtValue() == 24)) {2607weight = CW_Constant;2608}2609}2610break;2611case 'P':2612if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2613if (C->getZExtValue() == 1) {2614weight = CW_Constant;2615}2616}2617break;2618case 'R':2619if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {2620if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) {2621weight = CW_Constant;2622}2623}2624break;2625case 'Q':2626weight = CW_Memory;2627break;2628}26292630return weight;2631}26322633std::pair<unsigned, const TargetRegisterClass *>2634AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,2635StringRef Constraint,2636MVT VT) const {2637if (Constraint.size() == 1) {2638switch (Constraint[0]) {2639case 'a': // Simple upper registers r16..r23.2640if (VT == MVT::i8)2641return std::make_pair(0U, &AVR::LD8loRegClass);2642else if (VT == MVT::i16)2643return std::make_pair(0U, &AVR::DREGSLD8loRegClass);2644break;2645case 'b': // Base pointer registers: y, z.2646if (VT == MVT::i8 || VT == MVT::i16)2647return std::make_pair(0U, &AVR::PTRDISPREGSRegClass);2648break;2649case 'd': // Upper registers r16..r31.2650if (VT == MVT::i8)2651return std::make_pair(0U, &AVR::LD8RegClass);2652else if (VT == MVT::i16)2653return std::make_pair(0U, &AVR::DLDREGSRegClass);2654break;2655case 'l': // Lower registers r0..r15.2656if (VT == MVT::i8)2657return std::make_pair(0U, &AVR::GPR8loRegClass);2658else if (VT == MVT::i16)2659return std::make_pair(0U, &AVR::DREGSloRegClass);2660break;2661case 'e': // Pointer register pairs: x, y, z.2662if (VT == MVT::i8 || VT == MVT::i16)2663return std::make_pair(0U, &AVR::PTRREGSRegClass);2664break;2665case 'q': // Stack pointer register: SPH:SPL.2666return std::make_pair(0U, &AVR::GPRSPRegClass);2667case 'r': // Any register: r0..r31.2668if (VT == MVT::i8)2669return std::make_pair(0U, &AVR::GPR8RegClass);2670else if (VT == MVT::i16)2671return std::make_pair(0U, &AVR::DREGSRegClass);2672break;2673case 't': // Temporary register: r0.2674if (VT == MVT::i8)2675return std::make_pair(unsigned(Subtarget.getTmpRegister()),2676&AVR::GPR8RegClass);2677break;2678case 'w': // Special upper register pairs: r24, r26, r28, r30.2679if (VT == MVT::i8 || VT == MVT::i16)2680return std::make_pair(0U, &AVR::IWREGSRegClass);2681break;2682case 'x': // Pointer register pair X: r27:r26.2683case 'X':2684if (VT == MVT::i8 || VT == MVT::i16)2685return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);2686break;2687case 'y': // Pointer register pair Y: r29:r28.2688case 'Y':2689if (VT == MVT::i8 || VT == MVT::i16)2690return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);2691break;2692case 'z': // Pointer register pair Z: r31:r30.2693case 'Z':2694if (VT == MVT::i8 || VT == MVT::i16)2695return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);2696break;2697default:2698break;2699}2700}27012702return TargetLowering::getRegForInlineAsmConstraint(2703Subtarget.getRegisterInfo(), Constraint, VT);2704}27052706void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,2707StringRef Constraint,2708std::vector<SDValue> &Ops,2709SelectionDAG &DAG) const {2710SDValue Result;2711SDLoc DL(Op);2712EVT Ty = Op.getValueType();27132714// Currently only support length 1 constraints.2715if (Constraint.size() != 1) {2716return;2717}27182719char ConstraintLetter = Constraint[0];2720switch (ConstraintLetter) {2721default:2722break;2723// Deal with integers first:2724case 'I':2725case 'J':2726case 'K':2727case 'L':2728case 'M':2729case 'N':2730case 'O':2731case 'P':2732case 'R': {2733const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);2734if (!C) {2735return;2736}27372738int64_t CVal64 = C->getSExtValue();2739uint64_t CUVal64 = C->getZExtValue();2740switch (ConstraintLetter) {2741case 'I': // 0..632742if (!isUInt<6>(CUVal64))2743return;2744Result = DAG.getTargetConstant(CUVal64, DL, Ty);2745break;2746case 'J': // -63..02747if (CVal64 < -63 || CVal64 > 0)2748return;2749Result = DAG.getTargetConstant(CVal64, DL, Ty);2750break;2751case 'K': // 22752if (CUVal64 != 2)2753return;2754Result = DAG.getTargetConstant(CUVal64, DL, Ty);2755break;2756case 'L': // 02757if (CUVal64 != 0)2758return;2759Result = DAG.getTargetConstant(CUVal64, DL, Ty);2760break;2761case 'M': // 0..2552762if (!isUInt<8>(CUVal64))2763return;2764// i8 type may be printed as a negative number,2765// e.g. 254 would be printed as -2,2766// so we force it to i16 at least.2767if (Ty.getSimpleVT() == MVT::i8) {2768Ty = MVT::i16;2769}2770Result = DAG.getTargetConstant(CUVal64, DL, Ty);2771break;2772case 'N': // -12773if (CVal64 != -1)2774return;2775Result = DAG.getTargetConstant(CVal64, DL, Ty);2776break;2777case 'O': // 8, 16, 242778if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24)2779return;2780Result = DAG.getTargetConstant(CUVal64, DL, Ty);2781break;2782case 'P': // 12783if (CUVal64 != 1)2784return;2785Result = DAG.getTargetConstant(CUVal64, DL, Ty);2786break;2787case 'R': // -6..52788if (CVal64 < -6 || CVal64 > 5)2789return;2790Result = DAG.getTargetConstant(CVal64, DL, Ty);2791break;2792}27932794break;2795}2796case 'G':2797const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);2798if (!FC || !FC->isZero())2799return;2800// Soften float to i8 02801Result = DAG.getTargetConstant(0, DL, MVT::i8);2802break;2803}28042805if (Result.getNode()) {2806Ops.push_back(Result);2807return;2808}28092810return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);2811}28122813Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,2814const MachineFunction &MF) const {2815Register Reg;28162817if (VT == LLT::scalar(8)) {2818Reg = StringSwitch<unsigned>(RegName)2819.Case("r0", AVR::R0)2820.Case("r1", AVR::R1)2821.Default(0);2822} else {2823Reg = StringSwitch<unsigned>(RegName)2824.Case("r0", AVR::R1R0)2825.Case("sp", AVR::SP)2826.Default(0);2827}28282829if (Reg)2830return Reg;28312832report_fatal_error(2833Twine("Invalid register name \"" + StringRef(RegName) + "\"."));2834}28352836} // end of namespace llvm283728382839