CoCalc -- AVRISelLowering.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AVR/AVRISelLowering.cpp
³⁵²⁶⁹ views
1
//===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that AVR uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#include "AVRISelLowering.h"
15

16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/StringSwitch.h"
19
#include "llvm/CodeGen/CallingConvLower.h"
20
#include "llvm/CodeGen/MachineFrameInfo.h"
21
#include "llvm/CodeGen/MachineInstrBuilder.h"
22
#include "llvm/CodeGen/MachineRegisterInfo.h"
23
#include "llvm/CodeGen/SelectionDAG.h"
24
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25
#include "llvm/IR/Function.h"
26
#include "llvm/Support/ErrorHandling.h"
27

28
#include "AVR.h"
29
#include "AVRMachineFunctionInfo.h"
30
#include "AVRSubtarget.h"
31
#include "AVRTargetMachine.h"
32
#include "MCTargetDesc/AVRMCTargetDesc.h"
33

34
namespace llvm {
35

36
AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
37
                                     const AVRSubtarget &STI)
38
    : TargetLowering(TM), Subtarget(STI) {
39
  // Set up the register classes.
40
  addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
41
  addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
42

43
  // Compute derived properties from the register classes.
44
  computeRegisterProperties(Subtarget.getRegisterInfo());
45

46
  setBooleanContents(ZeroOrOneBooleanContent);
47
  setBooleanVectorContents(ZeroOrOneBooleanContent);
48
  setSchedulingPreference(Sched::RegPressure);
49
  setStackPointerRegisterToSaveRestore(AVR::SP);
50
  setSupportsUnalignedAtomics(true);
51

52
  setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
53
  setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
54

55
  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
56
  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
57
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
58
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
59

60
  setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
61

62
  for (MVT VT : MVT::integer_valuetypes()) {
63
    for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
64
      setLoadExtAction(N, VT, MVT::i1, Promote);
65
      setLoadExtAction(N, VT, MVT::i8, Expand);
66
    }
67
  }
68

69
  setTruncStoreAction(MVT::i16, MVT::i8, Expand);
70

71
  for (MVT VT : MVT::integer_valuetypes()) {
72
    setOperationAction(ISD::ADDC, VT, Legal);
73
    setOperationAction(ISD::SUBC, VT, Legal);
74
    setOperationAction(ISD::ADDE, VT, Legal);
75
    setOperationAction(ISD::SUBE, VT, Legal);
76
  }
77

78
  // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
79
  // revert into a sub since we don't have an add with immediate instruction.
80
  setOperationAction(ISD::ADD, MVT::i32, Custom);
81
  setOperationAction(ISD::ADD, MVT::i64, Custom);
82

83
  // our shift instructions are only able to shift 1 bit at a time, so handle
84
  // this in a custom way.
85
  setOperationAction(ISD::SRA, MVT::i8, Custom);
86
  setOperationAction(ISD::SHL, MVT::i8, Custom);
87
  setOperationAction(ISD::SRL, MVT::i8, Custom);
88
  setOperationAction(ISD::SRA, MVT::i16, Custom);
89
  setOperationAction(ISD::SHL, MVT::i16, Custom);
90
  setOperationAction(ISD::SRL, MVT::i16, Custom);
91
  setOperationAction(ISD::SRA, MVT::i32, Custom);
92
  setOperationAction(ISD::SHL, MVT::i32, Custom);
93
  setOperationAction(ISD::SRL, MVT::i32, Custom);
94
  setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
95
  setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
96
  setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
97

98
  setOperationAction(ISD::ROTL, MVT::i8, Custom);
99
  setOperationAction(ISD::ROTL, MVT::i16, Expand);
100
  setOperationAction(ISD::ROTR, MVT::i8, Custom);
101
  setOperationAction(ISD::ROTR, MVT::i16, Expand);
102

103
  setOperationAction(ISD::BR_CC, MVT::i8, Custom);
104
  setOperationAction(ISD::BR_CC, MVT::i16, Custom);
105
  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
106
  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
107
  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
108

109
  setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
110
  setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
111
  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
112
  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
113
  setOperationAction(ISD::SETCC, MVT::i8, Custom);
114
  setOperationAction(ISD::SETCC, MVT::i16, Custom);
115
  setOperationAction(ISD::SETCC, MVT::i32, Custom);
116
  setOperationAction(ISD::SETCC, MVT::i64, Custom);
117
  setOperationAction(ISD::SELECT, MVT::i8, Expand);
118
  setOperationAction(ISD::SELECT, MVT::i16, Expand);
119

120
  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
121

122
  // Add support for postincrement and predecrement load/stores.
123
  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
124
  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
125
  setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);
126
  setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);
127
  setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
128
  setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
129
  setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);
130
  setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);
131

132
  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
133

134
  setOperationAction(ISD::VASTART, MVT::Other, Custom);
135
  setOperationAction(ISD::VAEND, MVT::Other, Expand);
136
  setOperationAction(ISD::VAARG, MVT::Other, Expand);
137
  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
138

139
  // Atomic operations which must be lowered to rtlib calls
140
  for (MVT VT : MVT::integer_valuetypes()) {
141
    setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
142
    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
143
    setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
144
    setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
145
    setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
146
    setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
147
    setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
148
  }
149

150
  // Division/remainder
151
  setOperationAction(ISD::UDIV, MVT::i8, Expand);
152
  setOperationAction(ISD::UDIV, MVT::i16, Expand);
153
  setOperationAction(ISD::UREM, MVT::i8, Expand);
154
  setOperationAction(ISD::UREM, MVT::i16, Expand);
155
  setOperationAction(ISD::SDIV, MVT::i8, Expand);
156
  setOperationAction(ISD::SDIV, MVT::i16, Expand);
157
  setOperationAction(ISD::SREM, MVT::i8, Expand);
158
  setOperationAction(ISD::SREM, MVT::i16, Expand);
159

160
  // Make division and modulus custom
161
  setOperationAction(ISD::UDIVREM, MVT::i8, Custom);
162
  setOperationAction(ISD::UDIVREM, MVT::i16, Custom);
163
  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
164
  setOperationAction(ISD::SDIVREM, MVT::i8, Custom);
165
  setOperationAction(ISD::SDIVREM, MVT::i16, Custom);
166
  setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
167

168
  // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
169
  setOperationAction(ISD::MUL, MVT::i8, Expand);
170
  setOperationAction(ISD::MUL, MVT::i16, Expand);
171

172
  // Expand 16 bit multiplications.
173
  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
174
  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
175

176
  // Expand multiplications to libcalls when there is
177
  // no hardware MUL.
178
  if (!Subtarget.supportsMultiplication()) {
179
    setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
180
    setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
181
  }
182

183
  for (MVT VT : MVT::integer_valuetypes()) {
184
    setOperationAction(ISD::MULHS, VT, Expand);
185
    setOperationAction(ISD::MULHU, VT, Expand);
186
  }
187

188
  for (MVT VT : MVT::integer_valuetypes()) {
189
    setOperationAction(ISD::CTPOP, VT, Expand);
190
    setOperationAction(ISD::CTLZ, VT, Expand);
191
    setOperationAction(ISD::CTTZ, VT, Expand);
192
  }
193

194
  for (MVT VT : MVT::integer_valuetypes()) {
195
    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
196
    // TODO: The generated code is pretty poor. Investigate using the
197
    // same "shift and subtract with carry" trick that we do for
198
    // extending 8-bit to 16-bit. This may require infrastructure
199
    // improvements in how we treat 16-bit "registers" to be feasible.
200
  }
201

202
  // Division and modulus rtlib functions
203
  setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4");
204
  setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4");
205
  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
206
  setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4");
207
  setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4");
208
  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
209

210
  // Several of the runtime library functions use a special calling conv
211
  setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN);
212
  setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN);
213
  setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN);
214
  setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN);
215

216
  // Trigonometric rtlib functions
217
  setLibcallName(RTLIB::SIN_F32, "sin");
218
  setLibcallName(RTLIB::COS_F32, "cos");
219

220
  setMinFunctionAlignment(Align(2));
221
  setMinimumJumpTableEntries(UINT_MAX);
222
}
223

224
const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
225
#define NODE(name)                                                             \
226
  case AVRISD::name:                                                           \
227
    return #name
228

229
  switch (Opcode) {
230
  default:
231
    return nullptr;
232
    NODE(RET_GLUE);
233
    NODE(RETI_GLUE);
234
    NODE(CALL);
235
    NODE(WRAPPER);
236
    NODE(LSL);
237
    NODE(LSLW);
238
    NODE(LSR);
239
    NODE(LSRW);
240
    NODE(ROL);
241
    NODE(ROR);
242
    NODE(ASR);
243
    NODE(ASRW);
244
    NODE(LSLLOOP);
245
    NODE(LSRLOOP);
246
    NODE(ROLLOOP);
247
    NODE(RORLOOP);
248
    NODE(ASRLOOP);
249
    NODE(BRCOND);
250
    NODE(CMP);
251
    NODE(CMPC);
252
    NODE(TST);
253
    NODE(SELECT_CC);
254
#undef NODE
255
  }
256
}
257

258
EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
259
                                          EVT VT) const {
260
  assert(!VT.isVector() && "No AVR SetCC type for vectors!");
261
  return MVT::i8;
262
}
263

264
SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
265
  unsigned Opc8;
266
  const SDNode *N = Op.getNode();
267
  EVT VT = Op.getValueType();
268
  SDLoc dl(N);
269
  assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&
270
         "Expected power-of-2 shift amount");
271

272
  if (VT.getSizeInBits() == 32) {
273
    if (!isa<ConstantSDNode>(N->getOperand(1))) {
274
      // 32-bit shifts are converted to a loop in IR.
275
      // This should be unreachable.
276
      report_fatal_error("Expected a constant shift amount!");
277
    }
278
    SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);
279
    SDValue SrcLo =
280
        DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
281
                    DAG.getConstant(0, dl, MVT::i16));
282
    SDValue SrcHi =
283
        DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0),
284
                    DAG.getConstant(1, dl, MVT::i16));
285
    uint64_t ShiftAmount = N->getConstantOperandVal(1);
286
    if (ShiftAmount == 16) {
287
      // Special case these two operations because they appear to be used by the
288
      // generic codegen parts to lower 32-bit numbers.
289
      // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
290
      // shift of a part of the 32-bit value?
291
      switch (Op.getOpcode()) {
292
      case ISD::SHL: {
293
        SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
294
        return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
295
      }
296
      case ISD::SRL: {
297
        SDValue Zero = DAG.getConstant(0, dl, MVT::i16);
298
        return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
299
      }
300
      }
301
    }
302
    SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
303
    unsigned Opc;
304
    switch (Op.getOpcode()) {
305
    default:
306
      llvm_unreachable("Invalid 32-bit shift opcode!");
307
    case ISD::SHL:
308
      Opc = AVRISD::LSLW;
309
      break;
310
    case ISD::SRL:
311
      Opc = AVRISD::LSRW;
312
      break;
313
    case ISD::SRA:
314
      Opc = AVRISD::ASRW;
315
      break;
316
    }
317
    SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt);
318
    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0),
319
                       Result.getValue(1));
320
  }
321

322
  // Expand non-constant shifts to loops.
323
  if (!isa<ConstantSDNode>(N->getOperand(1))) {
324
    switch (Op.getOpcode()) {
325
    default:
326
      llvm_unreachable("Invalid shift opcode!");
327
    case ISD::SHL:
328
      return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0),
329
                         N->getOperand(1));
330
    case ISD::SRL:
331
      return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
332
                         N->getOperand(1));
333
    case ISD::ROTL: {
334
      SDValue Amt = N->getOperand(1);
335
      EVT AmtVT = Amt.getValueType();
336
      Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
337
                        DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
338
      return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt);
339
    }
340
    case ISD::ROTR: {
341
      SDValue Amt = N->getOperand(1);
342
      EVT AmtVT = Amt.getValueType();
343
      Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt,
344
                        DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT));
345
      return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt);
346
    }
347
    case ISD::SRA:
348
      return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
349
                         N->getOperand(1));
350
    }
351
  }
352

353
  uint64_t ShiftAmount = N->getConstantOperandVal(1);
354
  SDValue Victim = N->getOperand(0);
355

356
  switch (Op.getOpcode()) {
357
  case ISD::SRA:
358
    Opc8 = AVRISD::ASR;
359
    break;
360
  case ISD::ROTL:
361
    Opc8 = AVRISD::ROL;
362
    ShiftAmount = ShiftAmount % VT.getSizeInBits();
363
    break;
364
  case ISD::ROTR:
365
    Opc8 = AVRISD::ROR;
366
    ShiftAmount = ShiftAmount % VT.getSizeInBits();
367
    break;
368
  case ISD::SRL:
369
    Opc8 = AVRISD::LSR;
370
    break;
371
  case ISD::SHL:
372
    Opc8 = AVRISD::LSL;
373
    break;
374
  default:
375
    llvm_unreachable("Invalid shift opcode");
376
  }
377

378
  // Optimize int8/int16 shifts.
379
  if (VT.getSizeInBits() == 8) {
380
    if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) {
381
      // Optimize LSL when 4 <= ShiftAmount <= 6.
382
      Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
383
      Victim =
384
          DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT));
385
      ShiftAmount -= 4;
386
    } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount &&
387
               ShiftAmount < 7) {
388
      // Optimize LSR when 4 <= ShiftAmount <= 6.
389
      Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
390
      Victim =
391
          DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT));
392
      ShiftAmount -= 4;
393
    } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) {
394
      // Optimize LSL when ShiftAmount == 7.
395
      Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim,
396
                           DAG.getConstant(7, dl, VT));
397
      ShiftAmount = 0;
398
    } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) {
399
      // Optimize LSR when ShiftAmount == 7.
400
      Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim,
401
                           DAG.getConstant(7, dl, VT));
402
      ShiftAmount = 0;
403
    } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) {
404
      // Optimize ASR when ShiftAmount == 6.
405
      Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
406
                           DAG.getConstant(6, dl, VT));
407
      ShiftAmount = 0;
408
    } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) {
409
      // Optimize ASR when ShiftAmount == 7.
410
      Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
411
                           DAG.getConstant(7, dl, VT));
412
      ShiftAmount = 0;
413
    } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) {
414
      // Optimize left rotation 3 bits to swap then right rotation 1 bit.
415
      Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
416
      Victim =
417
          DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));
418
      ShiftAmount = 0;
419
    } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) {
420
      // Optimize right rotation 3 bits to swap then left rotation 1 bit.
421
      Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
422
      Victim =
423
          DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));
424
      ShiftAmount = 0;
425
    } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) {
426
      // Optimize left rotation 7 bits to right rotation 1 bit.
427
      Victim =
428
          DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));
429
      ShiftAmount = 0;
430
    } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) {
431
      // Optimize right rotation 7 bits to left rotation 1 bit.
432
      Victim =
433
          DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));
434
      ShiftAmount = 0;
435
    } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) &&
436
               ShiftAmount >= 4) {
437
      // Optimize left/right rotation with the SWAP instruction.
438
      Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
439
      ShiftAmount -= 4;
440
    }
441
  } else if (VT.getSizeInBits() == 16) {
442
    if (Op.getOpcode() == ISD::SRA)
443
      // Special optimization for int16 arithmetic right shift.
444
      switch (ShiftAmount) {
445
      case 15:
446
        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
447
                             DAG.getConstant(15, dl, VT));
448
        ShiftAmount = 0;
449
        break;
450
      case 14:
451
        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
452
                             DAG.getConstant(14, dl, VT));
453
        ShiftAmount = 0;
454
        break;
455
      case 7:
456
        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
457
                             DAG.getConstant(7, dl, VT));
458
        ShiftAmount = 0;
459
        break;
460
      default:
461
        break;
462
      }
463
    if (4 <= ShiftAmount && ShiftAmount < 8)
464
      switch (Op.getOpcode()) {
465
      case ISD::SHL:
466
        Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
467
                             DAG.getConstant(4, dl, VT));
468
        ShiftAmount -= 4;
469
        break;
470
      case ISD::SRL:
471
        Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
472
                             DAG.getConstant(4, dl, VT));
473
        ShiftAmount -= 4;
474
        break;
475
      default:
476
        break;
477
      }
478
    else if (8 <= ShiftAmount && ShiftAmount < 12)
479
      switch (Op.getOpcode()) {
480
      case ISD::SHL:
481
        Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
482
                             DAG.getConstant(8, dl, VT));
483
        ShiftAmount -= 8;
484
        // Only operate on the higher byte for remaining shift bits.
485
        Opc8 = AVRISD::LSLHI;
486
        break;
487
      case ISD::SRL:
488
        Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
489
                             DAG.getConstant(8, dl, VT));
490
        ShiftAmount -= 8;
491
        // Only operate on the lower byte for remaining shift bits.
492
        Opc8 = AVRISD::LSRLO;
493
        break;
494
      case ISD::SRA:
495
        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
496
                             DAG.getConstant(8, dl, VT));
497
        ShiftAmount -= 8;
498
        // Only operate on the lower byte for remaining shift bits.
499
        Opc8 = AVRISD::ASRLO;
500
        break;
501
      default:
502
        break;
503
      }
504
    else if (12 <= ShiftAmount)
505
      switch (Op.getOpcode()) {
506
      case ISD::SHL:
507
        Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim,
508
                             DAG.getConstant(12, dl, VT));
509
        ShiftAmount -= 12;
510
        // Only operate on the higher byte for remaining shift bits.
511
        Opc8 = AVRISD::LSLHI;
512
        break;
513
      case ISD::SRL:
514
        Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim,
515
                             DAG.getConstant(12, dl, VT));
516
        ShiftAmount -= 12;
517
        // Only operate on the lower byte for remaining shift bits.
518
        Opc8 = AVRISD::LSRLO;
519
        break;
520
      case ISD::SRA:
521
        Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
522
                             DAG.getConstant(8, dl, VT));
523
        ShiftAmount -= 8;
524
        // Only operate on the lower byte for remaining shift bits.
525
        Opc8 = AVRISD::ASRLO;
526
        break;
527
      default:
528
        break;
529
      }
530
  }
531

532
  while (ShiftAmount--) {
533
    Victim = DAG.getNode(Opc8, dl, VT, Victim);
534
  }
535

536
  return Victim;
537
}
538

539
SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
540
  unsigned Opcode = Op->getOpcode();
541
  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
542
         "Invalid opcode for Div/Rem lowering");
543
  bool IsSigned = (Opcode == ISD::SDIVREM);
544
  EVT VT = Op->getValueType(0);
545
  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
546

547
  RTLIB::Libcall LC;
548
  switch (VT.getSimpleVT().SimpleTy) {
549
  default:
550
    llvm_unreachable("Unexpected request for libcall!");
551
  case MVT::i8:
552
    LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
553
    break;
554
  case MVT::i16:
555
    LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
556
    break;
557
  case MVT::i32:
558
    LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
559
    break;
560
  }
561

562
  SDValue InChain = DAG.getEntryNode();
563

564
  TargetLowering::ArgListTy Args;
565
  TargetLowering::ArgListEntry Entry;
566
  for (SDValue const &Value : Op->op_values()) {
567
    Entry.Node = Value;
568
    Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
569
    Entry.IsSExt = IsSigned;
570
    Entry.IsZExt = !IsSigned;
571
    Args.push_back(Entry);
572
  }
573

574
  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
575
                                         getPointerTy(DAG.getDataLayout()));
576

577
  Type *RetTy = (Type *)StructType::get(Ty, Ty);
578

579
  SDLoc dl(Op);
580
  TargetLowering::CallLoweringInfo CLI(DAG);
581
  CLI.setDebugLoc(dl)
582
      .setChain(InChain)
583
      .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
584
      .setInRegister()
585
      .setSExtResult(IsSigned)
586
      .setZExtResult(!IsSigned);
587

588
  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
589
  return CallInfo.first;
590
}
591

592
SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,
593
                                              SelectionDAG &DAG) const {
594
  auto DL = DAG.getDataLayout();
595

596
  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
597
  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
598

599
  // Create the TargetGlobalAddress node, folding in the constant offset.
600
  SDValue Result =
601
      DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset);
602
  return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
603
}
604

605
SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,
606
                                             SelectionDAG &DAG) const {
607
  auto DL = DAG.getDataLayout();
608
  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
609

610
  SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL));
611

612
  return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
613
}
614

615
/// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.
616
static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
617
  switch (CC) {
618
  default:
619
    llvm_unreachable("Unknown condition code!");
620
  case ISD::SETEQ:
621
    return AVRCC::COND_EQ;
622
  case ISD::SETNE:
623
    return AVRCC::COND_NE;
624
  case ISD::SETGE:
625
    return AVRCC::COND_GE;
626
  case ISD::SETLT:
627
    return AVRCC::COND_LT;
628
  case ISD::SETUGE:
629
    return AVRCC::COND_SH;
630
  case ISD::SETULT:
631
    return AVRCC::COND_LO;
632
  }
633
}
634

635
/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
636
SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
637
                                     SelectionDAG &DAG, SDLoc DL) const {
638
  assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
639
         "LHS and RHS have different types");
640
  assert(((LHS.getSimpleValueType() == MVT::i16) ||
641
          (LHS.getSimpleValueType() == MVT::i8)) &&
642
         "invalid comparison type");
643

644
  SDValue Cmp;
645

646
  if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {
647
    uint64_t Imm = RHS->getAsZExtVal();
648
    // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero
649
    // register for the constant RHS if its lower or higher byte is zero.
650
    SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
651
                                DAG.getIntPtrConstant(0, DL));
652
    SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
653
                                DAG.getIntPtrConstant(1, DL));
654
    SDValue RHSlo = (Imm & 0xff) == 0
655
                        ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
656
                        : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
657
                                      DAG.getIntPtrConstant(0, DL));
658
    SDValue RHShi = (Imm & 0xff00) == 0
659
                        ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
660
                        : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
661
                                      DAG.getIntPtrConstant(1, DL));
662
    Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
663
    Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
664
  } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {
665
    // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero
666
    // register for the constant LHS if its lower or higher byte is zero.
667
    uint64_t Imm = LHS->getAsZExtVal();
668
    SDValue LHSlo = (Imm & 0xff) == 0
669
                        ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
670
                        : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
671
                                      DAG.getIntPtrConstant(0, DL));
672
    SDValue LHShi = (Imm & 0xff00) == 0
673
                        ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
674
                        : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
675
                                      DAG.getIntPtrConstant(1, DL));
676
    SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
677
                                DAG.getIntPtrConstant(0, DL));
678
    SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
679
                                DAG.getIntPtrConstant(1, DL));
680
    Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
681
    Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
682
  } else {
683
    // Generate ordinary 16-bit comparison.
684
    Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
685
  }
686

687
  return Cmp;
688
}
689

690
/// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
691
/// the given operands.
692
SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
693
                                     SDValue &AVRcc, SelectionDAG &DAG,
694
                                     SDLoc DL) const {
695
  SDValue Cmp;
696
  EVT VT = LHS.getValueType();
697
  bool UseTest = false;
698

699
  switch (CC) {
700
  default:
701
    break;
702
  case ISD::SETLE: {
703
    // Swap operands and reverse the branching condition.
704
    std::swap(LHS, RHS);
705
    CC = ISD::SETGE;
706
    break;
707
  }
708
  case ISD::SETGT: {
709
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
710
      switch (C->getSExtValue()) {
711
      case -1: {
712
        // When doing lhs > -1 use a tst instruction on the top part of lhs
713
        // and use brpl instead of using a chain of cp/cpc.
714
        UseTest = true;
715
        AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);
716
        break;
717
      }
718
      case 0: {
719
        // Turn lhs > 0 into 0 < lhs since 0 can be materialized with
720
        // __zero_reg__ in lhs.
721
        RHS = LHS;
722
        LHS = DAG.getConstant(0, DL, VT);
723
        CC = ISD::SETLT;
724
        break;
725
      }
726
      default: {
727
        // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows
728
        // us to  fold the constant into the cmp instruction.
729
        RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
730
        CC = ISD::SETGE;
731
        break;
732
      }
733
      }
734
      break;
735
    }
736
    // Swap operands and reverse the branching condition.
737
    std::swap(LHS, RHS);
738
    CC = ISD::SETLT;
739
    break;
740
  }
741
  case ISD::SETLT: {
742
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
743
      switch (C->getSExtValue()) {
744
      case 1: {
745
        // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with
746
        // __zero_reg__ in lhs.
747
        RHS = LHS;
748
        LHS = DAG.getConstant(0, DL, VT);
749
        CC = ISD::SETGE;
750
        break;
751
      }
752
      case 0: {
753
        // When doing lhs < 0 use a tst instruction on the top part of lhs
754
        // and use brmi instead of using a chain of cp/cpc.
755
        UseTest = true;
756
        AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);
757
        break;
758
      }
759
      }
760
    }
761
    break;
762
  }
763
  case ISD::SETULE: {
764
    // Swap operands and reverse the branching condition.
765
    std::swap(LHS, RHS);
766
    CC = ISD::SETUGE;
767
    break;
768
  }
769
  case ISD::SETUGT: {
770
    // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
771
    // fold the constant into the cmp instruction.
772
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
773
      RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
774
      CC = ISD::SETUGE;
775
      break;
776
    }
777
    // Swap operands and reverse the branching condition.
778
    std::swap(LHS, RHS);
779
    CC = ISD::SETULT;
780
    break;
781
  }
782
  }
783

784
  // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of
785
  // using the default and/or/xor expansion code which is much longer.
786
  if (VT == MVT::i32) {
787
    SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
788
                                DAG.getIntPtrConstant(0, DL));
789
    SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
790
                                DAG.getIntPtrConstant(1, DL));
791
    SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
792
                                DAG.getIntPtrConstant(0, DL));
793
    SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
794
                                DAG.getIntPtrConstant(1, DL));
795

796
    if (UseTest) {
797
      // When using tst we only care about the highest part.
798
      SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,
799
                                DAG.getIntPtrConstant(1, DL));
800
      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
801
    } else {
802
      Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL);
803
      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
804
    }
805
  } else if (VT == MVT::i64) {
806
    SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
807
                                DAG.getIntPtrConstant(0, DL));
808
    SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
809
                                DAG.getIntPtrConstant(1, DL));
810

811
    SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
812
                               DAG.getIntPtrConstant(0, DL));
813
    SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
814
                               DAG.getIntPtrConstant(1, DL));
815
    SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
816
                               DAG.getIntPtrConstant(0, DL));
817
    SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
818
                               DAG.getIntPtrConstant(1, DL));
819

820
    SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
821
                                DAG.getIntPtrConstant(0, DL));
822
    SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
823
                                DAG.getIntPtrConstant(1, DL));
824

825
    SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
826
                               DAG.getIntPtrConstant(0, DL));
827
    SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
828
                               DAG.getIntPtrConstant(1, DL));
829
    SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
830
                               DAG.getIntPtrConstant(0, DL));
831
    SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
832
                               DAG.getIntPtrConstant(1, DL));
833

834
    if (UseTest) {
835
      // When using tst we only care about the highest part.
836
      SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,
837
                                DAG.getIntPtrConstant(1, DL));
838
      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
839
    } else {
840
      Cmp = getAVRCmp(LHS0, RHS0, DAG, DL);
841
      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
842
      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
843
      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
844
    }
845
  } else if (VT == MVT::i8 || VT == MVT::i16) {
846
    if (UseTest) {
847
      // When using tst we only care about the highest part.
848
      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,
849
                        (VT == MVT::i8)
850
                            ? LHS
851
                            : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
852
                                          LHS, DAG.getIntPtrConstant(1, DL)));
853
    } else {
854
      Cmp = getAVRCmp(LHS, RHS, DAG, DL);
855
    }
856
  } else {
857
    llvm_unreachable("Invalid comparison size");
858
  }
859

860
  // When using a test instruction AVRcc is already set.
861
  if (!UseTest) {
862
    AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);
863
  }
864

865
  return Cmp;
866
}
867

868
SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
869
  SDValue Chain = Op.getOperand(0);
870
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
871
  SDValue LHS = Op.getOperand(2);
872
  SDValue RHS = Op.getOperand(3);
873
  SDValue Dest = Op.getOperand(4);
874
  SDLoc dl(Op);
875

876
  SDValue TargetCC;
877
  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
878

879
  return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,
880
                     Cmp);
881
}
882

883
SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
884
  SDValue LHS = Op.getOperand(0);
885
  SDValue RHS = Op.getOperand(1);
886
  SDValue TrueV = Op.getOperand(2);
887
  SDValue FalseV = Op.getOperand(3);
888
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
889
  SDLoc dl(Op);
890

891
  SDValue TargetCC;
892
  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
893

894
  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
895
  SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
896

897
  return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops);
898
}
899

900
SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
901
  SDValue LHS = Op.getOperand(0);
902
  SDValue RHS = Op.getOperand(1);
903
  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
904
  SDLoc DL(Op);
905

906
  SDValue TargetCC;
907
  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL);
908

909
  SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType());
910
  SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType());
911
  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
912
  SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
913

914
  return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops);
915
}
916

917
SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
918
  const MachineFunction &MF = DAG.getMachineFunction();
919
  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
920
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
921
  auto DL = DAG.getDataLayout();
922
  SDLoc dl(Op);
923

924
  // Vastart just stores the address of the VarArgsFrameIndex slot into the
925
  // memory location argument.
926
  SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));
927

928
  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
929
                      MachinePointerInfo(SV));
930
}
931

932
// Modify the existing ISD::INLINEASM node to add the implicit zero register.
933
SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
934
  SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);
935
  if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg ||
936
      Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) {
937
    // Zero register has already been added. Don't add it again.
938
    // If this isn't handled, we get called over and over again.
939
    return Op;
940
  }
941

942
  // Get a list of operands to the new INLINEASM node. This is mostly a copy,
943
  // with some edits.
944
  // Add the following operands at the end (but before the glue node, if it's
945
  // there):
946
  //  - The flags of the implicit zero register operand.
947
  //  - The implicit zero register operand itself.
948
  SDLoc dl(Op);
949
  SmallVector<SDValue, 8> Ops;
950
  SDNode *N = Op.getNode();
951
  SDValue Glue;
952
  for (unsigned I = 0; I < N->getNumOperands(); I++) {
953
    SDValue Operand = N->getOperand(I);
954
    if (Operand.getValueType() == MVT::Glue) {
955
      // The glue operand always needs to be at the end, so we need to treat it
956
      // specially.
957
      Glue = Operand;
958
    } else {
959
      Ops.push_back(Operand);
960
    }
961
  }
962
  InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1);
963
  Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
964
  Ops.push_back(ZeroReg);
965
  if (Glue) {
966
    Ops.push_back(Glue);
967
  }
968

969
  // Replace the current INLINEASM node with a new one that has the zero
970
  // register as implicit parameter.
971
  SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops);
972
  DAG.ReplaceAllUsesOfValueWith(Op, New);
973
  DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1));
974

975
  return New;
976
}
977

978
SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
979
  switch (Op.getOpcode()) {
980
  default:
981
    llvm_unreachable("Don't know how to custom lower this!");
982
  case ISD::SHL:
983
  case ISD::SRA:
984
  case ISD::SRL:
985
  case ISD::ROTL:
986
  case ISD::ROTR:
987
    return LowerShifts(Op, DAG);
988
  case ISD::GlobalAddress:
989
    return LowerGlobalAddress(Op, DAG);
990
  case ISD::BlockAddress:
991
    return LowerBlockAddress(Op, DAG);
992
  case ISD::BR_CC:
993
    return LowerBR_CC(Op, DAG);
994
  case ISD::SELECT_CC:
995
    return LowerSELECT_CC(Op, DAG);
996
  case ISD::SETCC:
997
    return LowerSETCC(Op, DAG);
998
  case ISD::VASTART:
999
    return LowerVASTART(Op, DAG);
1000
  case ISD::SDIVREM:
1001
  case ISD::UDIVREM:
1002
    return LowerDivRem(Op, DAG);
1003
  case ISD::INLINEASM:
1004
    return LowerINLINEASM(Op, DAG);
1005
  }
1006

1007
  return SDValue();
1008
}
1009

1010
/// Replace a node with an illegal result type
1011
/// with a new node built out of custom code.
1012
void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
1013
                                           SmallVectorImpl<SDValue> &Results,
1014
                                           SelectionDAG &DAG) const {
1015
  SDLoc DL(N);
1016

1017
  switch (N->getOpcode()) {
1018
  case ISD::ADD: {
1019
    // Convert add (x, imm) into sub (x, -imm).
1020
    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1021
      SDValue Sub = DAG.getNode(
1022
          ISD::SUB, DL, N->getValueType(0), N->getOperand(0),
1023
          DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0)));
1024
      Results.push_back(Sub);
1025
    }
1026
    break;
1027
  }
1028
  default: {
1029
    SDValue Res = LowerOperation(SDValue(N, 0), DAG);
1030

1031
    for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
1032
      Results.push_back(Res.getValue(I));
1033

1034
    break;
1035
  }
1036
  }
1037
}
1038

1039
/// Return true if the addressing mode represented
1040
/// by AM is legal for this target, for a load/store of the specified type.
1041
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1042
                                              const AddrMode &AM, Type *Ty,
1043
                                              unsigned AS,
1044
                                              Instruction *I) const {
1045
  int64_t Offs = AM.BaseOffs;
1046

1047
  // Allow absolute addresses.
1048
  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) {
1049
    return true;
1050
  }
1051

1052
  // Flash memory instructions only allow zero offsets.
1053
  if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) {
1054
    return false;
1055
  }
1056

1057
  // Allow reg+<6bit> offset.
1058
  if (Offs < 0)
1059
    Offs = -Offs;
1060
  if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 &&
1061
      isUInt<6>(Offs)) {
1062
    return true;
1063
  }
1064

1065
  return false;
1066
}
1067

1068
/// Returns true by value, base pointer and
1069
/// offset pointer and addressing mode by reference if the node's address
1070
/// can be legally represented as pre-indexed load / store address.
1071
bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1072
                                                  SDValue &Offset,
1073
                                                  ISD::MemIndexedMode &AM,
1074
                                                  SelectionDAG &DAG) const {
1075
  EVT VT;
1076
  const SDNode *Op;
1077
  SDLoc DL(N);
1078

1079
  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1080
    VT = LD->getMemoryVT();
1081
    Op = LD->getBasePtr().getNode();
1082
    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1083
      return false;
1084
    if (AVR::isProgramMemoryAccess(LD)) {
1085
      return false;
1086
    }
1087
  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1088
    VT = ST->getMemoryVT();
1089
    Op = ST->getBasePtr().getNode();
1090
    if (AVR::isProgramMemoryAccess(ST)) {
1091
      return false;
1092
    }
1093
  } else {
1094
    return false;
1095
  }
1096

1097
  if (VT != MVT::i8 && VT != MVT::i16) {
1098
    return false;
1099
  }
1100

1101
  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1102
    return false;
1103
  }
1104

1105
  if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1106
    int RHSC = RHS->getSExtValue();
1107
    if (Op->getOpcode() == ISD::SUB)
1108
      RHSC = -RHSC;
1109

1110
    if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) {
1111
      return false;
1112
    }
1113

1114
    Base = Op->getOperand(0);
1115
    Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1116
    AM = ISD::PRE_DEC;
1117

1118
    return true;
1119
  }
1120

1121
  return false;
1122
}
1123

1124
/// Returns true by value, base pointer and
1125
/// offset pointer and addressing mode by reference if this node can be
1126
/// combined with a load / store to form a post-indexed load / store.
1127
bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
1128
                                                   SDValue &Base,
1129
                                                   SDValue &Offset,
1130
                                                   ISD::MemIndexedMode &AM,
1131
                                                   SelectionDAG &DAG) const {
1132
  EVT VT;
1133
  SDLoc DL(N);
1134

1135
  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1136
    VT = LD->getMemoryVT();
1137
    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1138
      return false;
1139
  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1140
    VT = ST->getMemoryVT();
1141
    // We can not store to program memory.
1142
    if (AVR::isProgramMemoryAccess(ST))
1143
      return false;
1144
    // Since the high byte need to be stored first, we can not emit
1145
    // i16 post increment store like:
1146
    // st X+, r24
1147
    // st X+, r25
1148
    if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
1149
      return false;
1150
  } else {
1151
    return false;
1152
  }
1153

1154
  if (VT != MVT::i8 && VT != MVT::i16) {
1155
    return false;
1156
  }
1157

1158
  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1159
    return false;
1160
  }
1161

1162
  if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
1163
    int RHSC = RHS->getSExtValue();
1164
    if (Op->getOpcode() == ISD::SUB)
1165
      RHSC = -RHSC;
1166
    if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) {
1167
      return false;
1168
    }
1169

1170
    // FIXME: We temporarily disable post increment load from program memory,
1171
    //        due to bug https://github.com/llvm/llvm-project/issues/59914.
1172
    if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
1173
      if (AVR::isProgramMemoryAccess(LD))
1174
        return false;
1175

1176
    Base = Op->getOperand(0);
1177
    Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1178
    AM = ISD::POST_INC;
1179

1180
    return true;
1181
  }
1182

1183
  return false;
1184
}
1185

1186
bool AVRTargetLowering::isOffsetFoldingLegal(
1187
    const GlobalAddressSDNode *GA) const {
1188
  return true;
1189
}
1190

1191
//===----------------------------------------------------------------------===//
1192
//             Formal Arguments Calling Convention Implementation
1193
//===----------------------------------------------------------------------===//
1194

1195
#include "AVRGenCallingConv.inc"
1196

1197
/// Registers for calling conventions, ordered in reverse as required by ABI.
1198
/// Both arrays must be of the same length.
1199
static const MCPhysReg RegList8AVR[] = {
1200
    AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
1201
    AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
1202
    AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9,  AVR::R8};
1203
static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,
1204
                                         AVR::R22, AVR::R21, AVR::R20};
1205
static const MCPhysReg RegList16AVR[] = {
1206
    AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
1207
    AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
1208
    AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
1209
    AVR::R11R10, AVR::R10R9,  AVR::R9R8};
1210
static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,
1211
                                          AVR::R24R23, AVR::R23R22,
1212
                                          AVR::R22R21, AVR::R21R20};
1213

1214
static_assert(std::size(RegList8AVR) == std::size(RegList16AVR),
1215
              "8-bit and 16-bit register arrays must be of equal length");
1216
static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny),
1217
              "8-bit and 16-bit register arrays must be of equal length");
1218

1219
/// Analyze incoming and outgoing function arguments. We need custom C++ code
1220
/// to handle special constraints in the ABI.
1221
/// In addition, all pieces of a certain argument have to be passed either
1222
/// using registers or the stack but never mixing both.
1223
template <typename ArgT>
1224
static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
1225
                             const Function *F, const DataLayout *TD,
1226
                             const SmallVectorImpl<ArgT> &Args,
1227
                             SmallVectorImpl<CCValAssign> &ArgLocs,
1228
                             CCState &CCInfo, bool Tiny) {
1229
  // Choose the proper register list for argument passing according to the ABI.
1230
  ArrayRef<MCPhysReg> RegList8;
1231
  ArrayRef<MCPhysReg> RegList16;
1232
  if (Tiny) {
1233
    RegList8 = ArrayRef(RegList8Tiny);
1234
    RegList16 = ArrayRef(RegList16Tiny);
1235
  } else {
1236
    RegList8 = ArrayRef(RegList8AVR);
1237
    RegList16 = ArrayRef(RegList16AVR);
1238
  }
1239

1240
  unsigned NumArgs = Args.size();
1241
  // This is the index of the last used register, in RegList*.
1242
  // -1 means R26 (R26 is never actually used in CC).
1243
  int RegLastIdx = -1;
1244
  // Once a value is passed to the stack it will always be used
1245
  bool UseStack = false;
1246
  for (unsigned i = 0; i != NumArgs;) {
1247
    MVT VT = Args[i].VT;
1248
    // We have to count the number of bytes for each function argument, that is
1249
    // those Args with the same OrigArgIndex. This is important in case the
1250
    // function takes an aggregate type.
1251
    // Current argument will be between [i..j).
1252
    unsigned ArgIndex = Args[i].OrigArgIndex;
1253
    unsigned TotalBytes = VT.getStoreSize();
1254
    unsigned j = i + 1;
1255
    for (; j != NumArgs; ++j) {
1256
      if (Args[j].OrigArgIndex != ArgIndex)
1257
        break;
1258
      TotalBytes += Args[j].VT.getStoreSize();
1259
    }
1260
    // Round up to even number of bytes.
1261
    TotalBytes = alignTo(TotalBytes, 2);
1262
    // Skip zero sized arguments
1263
    if (TotalBytes == 0)
1264
      continue;
1265
    // The index of the first register to be used
1266
    unsigned RegIdx = RegLastIdx + TotalBytes;
1267
    RegLastIdx = RegIdx;
1268
    // If there are not enough registers, use the stack
1269
    if (RegIdx >= RegList8.size()) {
1270
      UseStack = true;
1271
    }
1272
    for (; i != j; ++i) {
1273
      MVT VT = Args[i].VT;
1274

1275
      if (UseStack) {
1276
        auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext());
1277
        unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt),
1278
                                               TD->getABITypeAlign(evt));
1279
        CCInfo.addLoc(
1280
            CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full));
1281
      } else {
1282
        unsigned Reg;
1283
        if (VT == MVT::i8) {
1284
          Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1285
        } else if (VT == MVT::i16) {
1286
          Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1287
        } else {
1288
          llvm_unreachable(
1289
              "calling convention can only manage i8 and i16 types");
1290
        }
1291
        assert(Reg && "register not available in calling convention");
1292
        CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1293
        // Registers inside a particular argument are sorted in increasing order
1294
        // (remember the array is reversed).
1295
        RegIdx -= VT.getStoreSize();
1296
      }
1297
    }
1298
  }
1299
}
1300

1301
/// Count the total number of bytes needed to pass or return these arguments.
1302
template <typename ArgT>
1303
static unsigned
1304
getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
1305
  unsigned TotalBytes = 0;
1306

1307
  for (const ArgT &Arg : Args) {
1308
    TotalBytes += Arg.VT.getStoreSize();
1309
  }
1310
  return TotalBytes;
1311
}
1312

1313
/// Analyze incoming and outgoing value of returning from a function.
1314
/// The algorithm is similar to analyzeArguments, but there can only be
1315
/// one value, possibly an aggregate, and it is limited to 8 bytes.
1316
template <typename ArgT>
1317
static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
1318
                                CCState &CCInfo, bool Tiny) {
1319
  unsigned NumArgs = Args.size();
1320
  unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
1321
  // CanLowerReturn() guarantees this assertion.
1322
  if (Tiny)
1323
    assert(TotalBytes <= 4 &&
1324
           "return values greater than 4 bytes cannot be lowered on AVRTiny");
1325
  else
1326
    assert(TotalBytes <= 8 &&
1327
           "return values greater than 8 bytes cannot be lowered on AVR");
1328

1329
  // Choose the proper register list for argument passing according to the ABI.
1330
  ArrayRef<MCPhysReg> RegList8;
1331
  ArrayRef<MCPhysReg> RegList16;
1332
  if (Tiny) {
1333
    RegList8 = ArrayRef(RegList8Tiny);
1334
    RegList16 = ArrayRef(RegList16Tiny);
1335
  } else {
1336
    RegList8 = ArrayRef(RegList8AVR);
1337
    RegList16 = ArrayRef(RegList16AVR);
1338
  }
1339

1340
  // GCC-ABI says that the size is rounded up to the next even number,
1341
  // but actually once it is more than 4 it will always round up to 8.
1342
  if (TotalBytes > 4) {
1343
    TotalBytes = 8;
1344
  } else {
1345
    TotalBytes = alignTo(TotalBytes, 2);
1346
  }
1347

1348
  // The index of the first register to use.
1349
  int RegIdx = TotalBytes - 1;
1350
  for (unsigned i = 0; i != NumArgs; ++i) {
1351
    MVT VT = Args[i].VT;
1352
    unsigned Reg;
1353
    if (VT == MVT::i8) {
1354
      Reg = CCInfo.AllocateReg(RegList8[RegIdx]);
1355
    } else if (VT == MVT::i16) {
1356
      Reg = CCInfo.AllocateReg(RegList16[RegIdx]);
1357
    } else {
1358
      llvm_unreachable("calling convention can only manage i8 and i16 types");
1359
    }
1360
    assert(Reg && "register not available in calling convention");
1361
    CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full));
1362
    // Registers sort in increasing order
1363
    RegIdx -= VT.getStoreSize();
1364
  }
1365
}
1366

1367
SDValue AVRTargetLowering::LowerFormalArguments(
1368
    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1369
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1370
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1371
  MachineFunction &MF = DAG.getMachineFunction();
1372
  MachineFrameInfo &MFI = MF.getFrameInfo();
1373
  auto DL = DAG.getDataLayout();
1374

1375
  // Assign locations to all of the incoming arguments.
1376
  SmallVector<CCValAssign, 16> ArgLocs;
1377
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1378
                 *DAG.getContext());
1379

1380
  // Variadic functions do not need all the analysis below.
1381
  if (isVarArg) {
1382
    CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
1383
  } else {
1384
    analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,
1385
                     Subtarget.hasTinyEncoding());
1386
  }
1387

1388
  SDValue ArgValue;
1389
  for (CCValAssign &VA : ArgLocs) {
1390

1391
    // Arguments stored on registers.
1392
    if (VA.isRegLoc()) {
1393
      EVT RegVT = VA.getLocVT();
1394
      const TargetRegisterClass *RC;
1395
      if (RegVT == MVT::i8) {
1396
        RC = &AVR::GPR8RegClass;
1397
      } else if (RegVT == MVT::i16) {
1398
        RC = &AVR::DREGSRegClass;
1399
      } else {
1400
        llvm_unreachable("Unknown argument type!");
1401
      }
1402

1403
      Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1404
      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1405

1406
      // :NOTE: Clang should not promote any i8 into i16 but for safety the
1407
      // following code will handle zexts or sexts generated by other
1408
      // front ends. Otherwise:
1409
      // If this is an 8 bit value, it is really passed promoted
1410
      // to 16 bits. Insert an assert[sz]ext to capture this, then
1411
      // truncate to the right size.
1412
      switch (VA.getLocInfo()) {
1413
      default:
1414
        llvm_unreachable("Unknown loc info!");
1415
      case CCValAssign::Full:
1416
        break;
1417
      case CCValAssign::BCvt:
1418
        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
1419
        break;
1420
      case CCValAssign::SExt:
1421
        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1422
                               DAG.getValueType(VA.getValVT()));
1423
        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1424
        break;
1425
      case CCValAssign::ZExt:
1426
        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1427
                               DAG.getValueType(VA.getValVT()));
1428
        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1429
        break;
1430
      }
1431

1432
      InVals.push_back(ArgValue);
1433
    } else {
1434
      // Only arguments passed on the stack should make it here.
1435
      assert(VA.isMemLoc());
1436

1437
      EVT LocVT = VA.getLocVT();
1438

1439
      // Create the frame index object for this incoming parameter.
1440
      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1441
                                     VA.getLocMemOffset(), true);
1442

1443
      // Create the SelectionDAG nodes corresponding to a load
1444
      // from this parameter.
1445
      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
1446
      InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
1447
                                   MachinePointerInfo::getFixedStack(MF, FI)));
1448
    }
1449
  }
1450

1451
  // If the function takes variable number of arguments, make a frame index for
1452
  // the start of the first vararg value... for expansion of llvm.va_start.
1453
  if (isVarArg) {
1454
    unsigned StackSize = CCInfo.getStackSize();
1455
    AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1456

1457
    AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));
1458
  }
1459

1460
  return Chain;
1461
}
1462

1463
//===----------------------------------------------------------------------===//
1464
//                  Call Calling Convention Implementation
1465
//===----------------------------------------------------------------------===//
1466

1467
SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1468
                                     SmallVectorImpl<SDValue> &InVals) const {
1469
  SelectionDAG &DAG = CLI.DAG;
1470
  SDLoc &DL = CLI.DL;
1471
  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1472
  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1473
  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1474
  SDValue Chain = CLI.Chain;
1475
  SDValue Callee = CLI.Callee;
1476
  bool &isTailCall = CLI.IsTailCall;
1477
  CallingConv::ID CallConv = CLI.CallConv;
1478
  bool isVarArg = CLI.IsVarArg;
1479

1480
  MachineFunction &MF = DAG.getMachineFunction();
1481

1482
  // AVR does not yet support tail call optimization.
1483
  isTailCall = false;
1484

1485
  // Analyze operands of the call, assigning locations to each operand.
1486
  SmallVector<CCValAssign, 16> ArgLocs;
1487
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1488
                 *DAG.getContext());
1489

1490
  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1491
  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1492
  // node so that legalize doesn't hack it.
1493
  const Function *F = nullptr;
1494
  if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1495
    const GlobalValue *GV = G->getGlobal();
1496
    if (isa<Function>(GV))
1497
      F = cast<Function>(GV);
1498
    Callee =
1499
        DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));
1500
  } else if (const ExternalSymbolSDNode *ES =
1501
                 dyn_cast<ExternalSymbolSDNode>(Callee)) {
1502
    Callee = DAG.getTargetExternalSymbol(ES->getSymbol(),
1503
                                         getPointerTy(DAG.getDataLayout()));
1504
  }
1505

1506
  // Variadic functions do not need all the analysis below.
1507
  if (isVarArg) {
1508
    CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
1509
  } else {
1510
    analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,
1511
                     Subtarget.hasTinyEncoding());
1512
  }
1513

1514
  // Get a count of how many bytes are to be pushed on the stack.
1515
  unsigned NumBytes = CCInfo.getStackSize();
1516

1517
  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1518

1519
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
1520

1521
  // First, walk the register assignments, inserting copies.
1522
  unsigned AI, AE;
1523
  bool HasStackArgs = false;
1524
  for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) {
1525
    CCValAssign &VA = ArgLocs[AI];
1526
    EVT RegVT = VA.getLocVT();
1527
    SDValue Arg = OutVals[AI];
1528

1529
    // Promote the value if needed. With Clang this should not happen.
1530
    switch (VA.getLocInfo()) {
1531
    default:
1532
      llvm_unreachable("Unknown loc info!");
1533
    case CCValAssign::Full:
1534
      break;
1535
    case CCValAssign::SExt:
1536
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);
1537
      break;
1538
    case CCValAssign::ZExt:
1539
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);
1540
      break;
1541
    case CCValAssign::AExt:
1542
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);
1543
      break;
1544
    case CCValAssign::BCvt:
1545
      Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg);
1546
      break;
1547
    }
1548

1549
    // Stop when we encounter a stack argument, we need to process them
1550
    // in reverse order in the loop below.
1551
    if (VA.isMemLoc()) {
1552
      HasStackArgs = true;
1553
      break;
1554
    }
1555

1556
    // Arguments that can be passed on registers must be kept in the RegsToPass
1557
    // vector.
1558
    RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1559
  }
1560

1561
  // Second, stack arguments have to walked.
1562
  // Previously this code created chained stores but those chained stores appear
1563
  // to be unchained in the legalization phase. Therefore, do not attempt to
1564
  // chain them here. In fact, chaining them here somehow causes the first and
1565
  // second store to be reversed which is the exact opposite of the intended
1566
  // effect.
1567
  if (HasStackArgs) {
1568
    SmallVector<SDValue, 8> MemOpChains;
1569
    for (; AI != AE; AI++) {
1570
      CCValAssign &VA = ArgLocs[AI];
1571
      SDValue Arg = OutVals[AI];
1572

1573
      assert(VA.isMemLoc());
1574

1575
      // SP points to one stack slot further so add one to adjust it.
1576
      SDValue PtrOff = DAG.getNode(
1577
          ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
1578
          DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),
1579
          DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL));
1580

1581
      MemOpChains.push_back(
1582
          DAG.getStore(Chain, DL, Arg, PtrOff,
1583
                       MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
1584
    }
1585

1586
    if (!MemOpChains.empty())
1587
      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1588
  }
1589

1590
  // Build a sequence of copy-to-reg nodes chained together with token chain and
1591
  // flag operands which copy the outgoing args into registers.  The InGlue in
1592
  // necessary since all emited instructions must be stuck together.
1593
  SDValue InGlue;
1594
  for (auto Reg : RegsToPass) {
1595
    Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);
1596
    InGlue = Chain.getValue(1);
1597
  }
1598

1599
  // Returns a chain & a flag for retval copy to use.
1600
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1601
  SmallVector<SDValue, 8> Ops;
1602
  Ops.push_back(Chain);
1603
  Ops.push_back(Callee);
1604

1605
  // Add argument registers to the end of the list so that they are known live
1606
  // into the call.
1607
  for (auto Reg : RegsToPass) {
1608
    Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1609
  }
1610

1611
  // The zero register (usually R1) must be passed as an implicit register so
1612
  // that this register is correctly zeroed in interrupts.
1613
  Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1614

1615
  // Add a register mask operand representing the call-preserved registers.
1616
  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1617
  const uint32_t *Mask =
1618
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
1619
  assert(Mask && "Missing call preserved mask for calling convention");
1620
  Ops.push_back(DAG.getRegisterMask(Mask));
1621

1622
  if (InGlue.getNode()) {
1623
    Ops.push_back(InGlue);
1624
  }
1625

1626
  Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
1627
  InGlue = Chain.getValue(1);
1628

1629
  // Create the CALLSEQ_END node.
1630
  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
1631

1632
  if (!Ins.empty()) {
1633
    InGlue = Chain.getValue(1);
1634
  }
1635

1636
  // Handle result values, copying them out of physregs into vregs that we
1637
  // return.
1638
  return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG,
1639
                         InVals);
1640
}
1641

1642
/// Lower the result values of a call into the
1643
/// appropriate copies out of appropriate physical registers.
1644
///
1645
SDValue AVRTargetLowering::LowerCallResult(
1646
    SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1647
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1648
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1649

1650
  // Assign locations to each value returned by this call.
1651
  SmallVector<CCValAssign, 16> RVLocs;
1652
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1653
                 *DAG.getContext());
1654

1655
  // Handle runtime calling convs.
1656
  if (CallConv == CallingConv::AVR_BUILTIN) {
1657
    CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
1658
  } else {
1659
    analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());
1660
  }
1661

1662
  // Copy all of the result registers out of their specified physreg.
1663
  for (CCValAssign const &RVLoc : RVLocs) {
1664
    Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
1665
                               InGlue)
1666
                .getValue(1);
1667
    InGlue = Chain.getValue(2);
1668
    InVals.push_back(Chain.getValue(0));
1669
  }
1670

1671
  return Chain;
1672
}
1673

1674
//===----------------------------------------------------------------------===//
1675
//               Return Value Calling Convention Implementation
1676
//===----------------------------------------------------------------------===//
1677

1678
bool AVRTargetLowering::CanLowerReturn(
1679
    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
1680
    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1681
  if (CallConv == CallingConv::AVR_BUILTIN) {
1682
    SmallVector<CCValAssign, 16> RVLocs;
1683
    CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
1684
    return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);
1685
  }
1686

1687
  unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);
1688
  return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8);
1689
}
1690

1691
SDValue
1692
AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1693
                               bool isVarArg,
1694
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1695
                               const SmallVectorImpl<SDValue> &OutVals,
1696
                               const SDLoc &dl, SelectionDAG &DAG) const {
1697
  // CCValAssign - represent the assignment of the return value to locations.
1698
  SmallVector<CCValAssign, 16> RVLocs;
1699

1700
  // CCState - Info about the registers and stack slot.
1701
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1702
                 *DAG.getContext());
1703

1704
  MachineFunction &MF = DAG.getMachineFunction();
1705

1706
  // Analyze return values.
1707
  if (CallConv == CallingConv::AVR_BUILTIN) {
1708
    CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
1709
  } else {
1710
    analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
1711
  }
1712

1713
  SDValue Glue;
1714
  SmallVector<SDValue, 4> RetOps(1, Chain);
1715
  // Copy the result values into the output registers.
1716
  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
1717
    CCValAssign &VA = RVLocs[i];
1718
    assert(VA.isRegLoc() && "Can only return in registers!");
1719

1720
    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
1721

1722
    // Guarantee that all emitted copies are stuck together with flags.
1723
    Glue = Chain.getValue(1);
1724
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1725
  }
1726

1727
  // Don't emit the ret/reti instruction when the naked attribute is present in
1728
  // the function being compiled.
1729
  if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
1730
    return Chain;
1731
  }
1732

1733
  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1734

1735
  if (!AFI->isInterruptOrSignalHandler()) {
1736
    // The return instruction has an implicit zero register operand: it must
1737
    // contain zero on return.
1738
    // This is not needed in interrupts however, where the zero register is
1739
    // handled specially (only pushed/popped when needed).
1740
    RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1741
  }
1742

1743
  unsigned RetOpc =
1744
      AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;
1745

1746
  RetOps[0] = Chain; // Update chain.
1747

1748
  if (Glue.getNode()) {
1749
    RetOps.push_back(Glue);
1750
  }
1751

1752
  return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
1753
}
1754

1755
//===----------------------------------------------------------------------===//
1756
//  Custom Inserters
1757
//===----------------------------------------------------------------------===//
1758

1759
MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
1760
                                                  MachineBasicBlock *BB,
1761
                                                  bool Tiny) const {
1762
  unsigned Opc;
1763
  const TargetRegisterClass *RC;
1764
  bool HasRepeatedOperand = false;
1765
  MachineFunction *F = BB->getParent();
1766
  MachineRegisterInfo &RI = F->getRegInfo();
1767
  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1768
  DebugLoc dl = MI.getDebugLoc();
1769

1770
  switch (MI.getOpcode()) {
1771
  default:
1772
    llvm_unreachable("Invalid shift opcode!");
1773
  case AVR::Lsl8:
1774
    Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
1775
    RC = &AVR::GPR8RegClass;
1776
    HasRepeatedOperand = true;
1777
    break;
1778
  case AVR::Lsl16:
1779
    Opc = AVR::LSLWRd;
1780
    RC = &AVR::DREGSRegClass;
1781
    break;
1782
  case AVR::Asr8:
1783
    Opc = AVR::ASRRd;
1784
    RC = &AVR::GPR8RegClass;
1785
    break;
1786
  case AVR::Asr16:
1787
    Opc = AVR::ASRWRd;
1788
    RC = &AVR::DREGSRegClass;
1789
    break;
1790
  case AVR::Lsr8:
1791
    Opc = AVR::LSRRd;
1792
    RC = &AVR::GPR8RegClass;
1793
    break;
1794
  case AVR::Lsr16:
1795
    Opc = AVR::LSRWRd;
1796
    RC = &AVR::DREGSRegClass;
1797
    break;
1798
  case AVR::Rol8:
1799
    Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;
1800
    RC = &AVR::GPR8RegClass;
1801
    break;
1802
  case AVR::Rol16:
1803
    Opc = AVR::ROLWRd;
1804
    RC = &AVR::DREGSRegClass;
1805
    break;
1806
  case AVR::Ror8:
1807
    Opc = AVR::RORBRd;
1808
    RC = &AVR::GPR8RegClass;
1809
    break;
1810
  case AVR::Ror16:
1811
    Opc = AVR::RORWRd;
1812
    RC = &AVR::DREGSRegClass;
1813
    break;
1814
  }
1815

1816
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
1817

1818
  MachineFunction::iterator I;
1819
  for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
1820
    ;
1821
  if (I != F->end())
1822
    ++I;
1823

1824
  // Create loop block.
1825
  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
1826
  MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB);
1827
  MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
1828

1829
  F->insert(I, LoopBB);
1830
  F->insert(I, CheckBB);
1831
  F->insert(I, RemBB);
1832

1833
  // Update machine-CFG edges by transferring all successors of the current
1834
  // block to the block containing instructions after shift.
1835
  RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
1836
                BB->end());
1837
  RemBB->transferSuccessorsAndUpdatePHIs(BB);
1838

1839
  // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
1840
  BB->addSuccessor(CheckBB);
1841
  LoopBB->addSuccessor(CheckBB);
1842
  CheckBB->addSuccessor(LoopBB);
1843
  CheckBB->addSuccessor(RemBB);
1844

1845
  Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
1846
  Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
1847
  Register ShiftReg = RI.createVirtualRegister(RC);
1848
  Register ShiftReg2 = RI.createVirtualRegister(RC);
1849
  Register ShiftAmtSrcReg = MI.getOperand(2).getReg();
1850
  Register SrcReg = MI.getOperand(1).getReg();
1851
  Register DstReg = MI.getOperand(0).getReg();
1852

1853
  // BB:
1854
  // rjmp CheckBB
1855
  BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
1856

1857
  // LoopBB:
1858
  // ShiftReg2 = shift ShiftReg
1859
  auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
1860
  if (HasRepeatedOperand)
1861
    ShiftMI.addReg(ShiftReg);
1862

1863
  // CheckBB:
1864
  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
1865
  // ShiftAmt = phi [%N,      BB], [%ShiftAmt2, LoopBB]
1866
  // DestReg  = phi [%SrcReg, BB], [%ShiftReg,  LoopBB]
1867
  // ShiftAmt2 = ShiftAmt - 1;
1868
  // if (ShiftAmt2 >= 0) goto LoopBB;
1869
  BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
1870
      .addReg(SrcReg)
1871
      .addMBB(BB)
1872
      .addReg(ShiftReg2)
1873
      .addMBB(LoopBB);
1874
  BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
1875
      .addReg(ShiftAmtSrcReg)
1876
      .addMBB(BB)
1877
      .addReg(ShiftAmtReg2)
1878
      .addMBB(LoopBB);
1879
  BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
1880
      .addReg(SrcReg)
1881
      .addMBB(BB)
1882
      .addReg(ShiftReg2)
1883
      .addMBB(LoopBB);
1884

1885
  BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
1886
  BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
1887

1888
  MI.eraseFromParent(); // The pseudo instruction is gone now.
1889
  return RemBB;
1890
}
1891

1892
// Do a multibyte AVR shift. Insert shift instructions and put the output
1893
// registers in the Regs array.
1894
// Because AVR does not have a normal shift instruction (only a single bit shift
1895
// instruction), we have to emulate this behavior with other instructions.
1896
// It first tries large steps (moving registers around) and then smaller steps
1897
// like single bit shifts.
1898
// Large shifts actually reduce the number of shifted registers, so the below
1899
// algorithms have to work independently of the number of registers that are
1900
// shifted.
1901
// For more information and background, see this blogpost:
1902
// https://aykevl.nl/2021/02/avr-bitshift
1903
static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,
1904
                                 MutableArrayRef<std::pair<Register, int>> Regs,
1905
                                 ISD::NodeType Opc, int64_t ShiftAmt) {
1906
  const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1907
  const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
1908
  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
1909
  const DebugLoc &dl = MI.getDebugLoc();
1910

1911
  const bool ShiftLeft = Opc == ISD::SHL;
1912
  const bool ArithmeticShift = Opc == ISD::SRA;
1913

1914
  // Zero a register, for use in later operations.
1915
  Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1916
  BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
1917
      .addReg(STI.getZeroRegister());
1918

1919
  // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
1920
  // and is hard to compose with the rest, so these are special cased.
1921
  // The basic idea is to shift one or two bits in the opposite direction and
1922
  // then move registers around to get the correct end result.
1923
  if (ShiftLeft && (ShiftAmt % 8) >= 6) {
1924
    // Left shift modulo 6 or 7.
1925

1926
    // Create a slice of the registers we're going to modify, to ease working
1927
    // with them.
1928
    size_t ShiftRegsOffset = ShiftAmt / 8;
1929
    size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;
1930
    MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1931
        Regs.slice(ShiftRegsOffset, ShiftRegsSize);
1932

1933
    // Shift one to the right, keeping the least significant bit as the carry
1934
    // bit.
1935
    insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1936

1937
    // Rotate the least significant bit from the carry bit into a new register
1938
    // (that starts out zero).
1939
    Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1940
    BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
1941

1942
    // Shift one more to the right if this is a modulo-6 shift.
1943
    if (ShiftAmt % 8 == 6) {
1944
      insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1);
1945
      Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1946
      BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
1947
      LowByte = NewLowByte;
1948
    }
1949

1950
    // Move all registers to the left, zeroing the bottom registers as needed.
1951
    for (size_t I = 0; I < Regs.size(); I++) {
1952
      int ShiftRegsIdx = I + 1;
1953
      if (ShiftRegsIdx < (int)ShiftRegs.size()) {
1954
        Regs[I] = ShiftRegs[ShiftRegsIdx];
1955
      } else if (ShiftRegsIdx == (int)ShiftRegs.size()) {
1956
        Regs[I] = std::pair(LowByte, 0);
1957
      } else {
1958
        Regs[I] = std::pair(ZeroReg, 0);
1959
      }
1960
    }
1961

1962
    return;
1963
  }
1964

1965
  // Right shift modulo 6 or 7.
1966
  if (!ShiftLeft && (ShiftAmt % 8) >= 6) {
1967
    // Create a view on the registers we're going to modify, to ease working
1968
    // with them.
1969
    size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8);
1970
    MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1971
        Regs.slice(0, ShiftRegsSize);
1972

1973
    // Shift one to the left.
1974
    insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
1975

1976
    // Sign or zero extend the most significant register into a new register.
1977
    // The HighByte is the byte that still has one (or two) bits from the
1978
    // original value. The ExtByte is purely a zero/sign extend byte (all bits
1979
    // are either 0 or 1).
1980
    Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1981
    Register ExtByte = 0;
1982
    if (ArithmeticShift) {
1983
      // Sign-extend bit that was shifted out last.
1984
      BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
1985
          .addReg(HighByte, RegState::Undef)
1986
          .addReg(HighByte, RegState::Undef);
1987
      ExtByte = HighByte;
1988
      // The highest bit of the original value is the same as the zero-extend
1989
      // byte, so HighByte and ExtByte are the same.
1990
    } else {
1991
      // Use the zero register for zero extending.
1992
      ExtByte = ZeroReg;
1993
      // Rotate most significant bit into a new register (that starts out zero).
1994
      BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
1995
          .addReg(ExtByte)
1996
          .addReg(ExtByte);
1997
    }
1998

1999
    // Shift one more to the left for modulo 6 shifts.
2000
    if (ShiftAmt % 8 == 6) {
2001
      insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1);
2002
      // Shift the topmost bit into the HighByte.
2003
      Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2004
      BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
2005
          .addReg(HighByte)
2006
          .addReg(HighByte);
2007
      HighByte = NewExt;
2008
    }
2009

2010
    // Move all to the right, while sign or zero extending.
2011
    for (int I = Regs.size() - 1; I >= 0; I--) {
2012
      int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1;
2013
      if (ShiftRegsIdx >= 0) {
2014
        Regs[I] = ShiftRegs[ShiftRegsIdx];
2015
      } else if (ShiftRegsIdx == -1) {
2016
        Regs[I] = std::pair(HighByte, 0);
2017
      } else {
2018
        Regs[I] = std::pair(ExtByte, 0);
2019
      }
2020
    }
2021

2022
    return;
2023
  }
2024

2025
  // For shift amounts of at least one register, simply rename the registers and
2026
  // zero the bottom registers.
2027
  while (ShiftLeft && ShiftAmt >= 8) {
2028
    // Move all registers one to the left.
2029
    for (size_t I = 0; I < Regs.size() - 1; I++) {
2030
      Regs[I] = Regs[I + 1];
2031
    }
2032

2033
    // Zero the least significant register.
2034
    Regs[Regs.size() - 1] = std::pair(ZeroReg, 0);
2035

2036
    // Continue shifts with the leftover registers.
2037
    Regs = Regs.drop_back(1);
2038

2039
    ShiftAmt -= 8;
2040
  }
2041

2042
  // And again, the same for right shifts.
2043
  Register ShrExtendReg = 0;
2044
  if (!ShiftLeft && ShiftAmt >= 8) {
2045
    if (ArithmeticShift) {
2046
      // Sign extend the most significant register into ShrExtendReg.
2047
      ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2048
      Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2049
      BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)
2050
          .addReg(Regs[0].first, 0, Regs[0].second)
2051
          .addReg(Regs[0].first, 0, Regs[0].second);
2052
      BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)
2053
          .addReg(Tmp)
2054
          .addReg(Tmp);
2055
    } else {
2056
      ShrExtendReg = ZeroReg;
2057
    }
2058
    for (; ShiftAmt >= 8; ShiftAmt -= 8) {
2059
      // Move all registers one to the right.
2060
      for (size_t I = Regs.size() - 1; I != 0; I--) {
2061
        Regs[I] = Regs[I - 1];
2062
      }
2063

2064
      // Zero or sign extend the most significant register.
2065
      Regs[0] = std::pair(ShrExtendReg, 0);
2066

2067
      // Continue shifts with the leftover registers.
2068
      Regs = Regs.drop_front(1);
2069
    }
2070
  }
2071

2072
  // The bigger shifts are already handled above.
2073
  assert((ShiftAmt < 8) && "Unexpect shift amount");
2074

2075
  // Shift by four bits, using a complicated swap/eor/andi/eor sequence.
2076
  // It only works for logical shifts because the bits shifted in are all
2077
  // zeroes.
2078
  // To shift a single byte right, it produces code like this:
2079
  //   swap r0
2080
  //   andi r0, 0x0f
2081
  // For a two-byte (16-bit) shift, it adds the following instructions to shift
2082
  // the upper byte into the lower byte:
2083
  //   swap r1
2084
  //   eor r0, r1
2085
  //   andi r1, 0x0f
2086
  //   eor r0, r1
2087
  // For bigger shifts, it repeats the above sequence. For example, for a 3-byte
2088
  // (24-bit) shift it adds:
2089
  //   swap r2
2090
  //   eor r1, r2
2091
  //   andi r2, 0x0f
2092
  //   eor r1, r2
2093
  if (!ArithmeticShift && ShiftAmt >= 4) {
2094
    Register Prev = 0;
2095
    for (size_t I = 0; I < Regs.size(); I++) {
2096
      size_t Idx = ShiftLeft ? I : Regs.size() - I - 1;
2097
      Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2098
      BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)
2099
          .addReg(Regs[Idx].first, 0, Regs[Idx].second);
2100
      if (I != 0) {
2101
        Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2102
        BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2103
            .addReg(Prev)
2104
            .addReg(SwapReg);
2105
        Prev = R;
2106
      }
2107
      Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2108
      BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)
2109
          .addReg(SwapReg)
2110
          .addImm(ShiftLeft ? 0xf0 : 0x0f);
2111
      if (I != 0) {
2112
        Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2113
        BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2114
            .addReg(Prev)
2115
            .addReg(AndReg);
2116
        size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1;
2117
        Regs[PrevIdx] = std::pair(R, 0);
2118
      }
2119
      Prev = AndReg;
2120
      Regs[Idx] = std::pair(AndReg, 0);
2121
    }
2122
    ShiftAmt -= 4;
2123
  }
2124

2125
  // Shift by one. This is the fallback that always works, and the shift
2126
  // operation that is used for 1, 2, and 3 bit shifts.
2127
  while (ShiftLeft && ShiftAmt) {
2128
    // Shift one to the left.
2129
    for (ssize_t I = Regs.size() - 1; I >= 0; I--) {
2130
      Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2131
      Register In = Regs[I].first;
2132
      Register InSubreg = Regs[I].second;
2133
      if (I == (ssize_t)Regs.size() - 1) { // first iteration
2134
        BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)
2135
            .addReg(In, 0, InSubreg)
2136
            .addReg(In, 0, InSubreg);
2137
      } else {
2138
        BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)
2139
            .addReg(In, 0, InSubreg)
2140
            .addReg(In, 0, InSubreg);
2141
      }
2142
      Regs[I] = std::pair(Out, 0);
2143
    }
2144
    ShiftAmt--;
2145
  }
2146
  while (!ShiftLeft && ShiftAmt) {
2147
    // Shift one to the right.
2148
    for (size_t I = 0; I < Regs.size(); I++) {
2149
      Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2150
      Register In = Regs[I].first;
2151
      Register InSubreg = Regs[I].second;
2152
      if (I == 0) {
2153
        unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;
2154
        BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg);
2155
      } else {
2156
        BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg);
2157
      }
2158
      Regs[I] = std::pair(Out, 0);
2159
    }
2160
    ShiftAmt--;
2161
  }
2162

2163
  if (ShiftAmt != 0) {
2164
    llvm_unreachable("don't know how to shift!"); // sanity check
2165
  }
2166
}
2167

2168
// Do a wide (32-bit) shift.
2169
MachineBasicBlock *
2170
AVRTargetLowering::insertWideShift(MachineInstr &MI,
2171
                                   MachineBasicBlock *BB) const {
2172
  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2173
  const DebugLoc &dl = MI.getDebugLoc();
2174

2175
  // How much to shift to the right (meaning: a negative number indicates a left
2176
  // shift).
2177
  int64_t ShiftAmt = MI.getOperand(4).getImm();
2178
  ISD::NodeType Opc;
2179
  switch (MI.getOpcode()) {
2180
  case AVR::Lsl32:
2181
    Opc = ISD::SHL;
2182
    break;
2183
  case AVR::Lsr32:
2184
    Opc = ISD::SRL;
2185
    break;
2186
  case AVR::Asr32:
2187
    Opc = ISD::SRA;
2188
    break;
2189
  }
2190

2191
  // Read the input registers, with the most significant register at index 0.
2192
  std::array<std::pair<Register, int>, 4> Registers = {
2193
      std::pair(MI.getOperand(3).getReg(), AVR::sub_hi),
2194
      std::pair(MI.getOperand(3).getReg(), AVR::sub_lo),
2195
      std::pair(MI.getOperand(2).getReg(), AVR::sub_hi),
2196
      std::pair(MI.getOperand(2).getReg(), AVR::sub_lo),
2197
  };
2198

2199
  // Do the shift. The registers are modified in-place.
2200
  insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);
2201

2202
  // Combine the 8-bit registers into 16-bit register pairs.
2203
  // This done either from LSB to MSB or from MSB to LSB, depending on the
2204
  // shift. It's an optimization so that the register allocator will use the
2205
  // fewest movs possible (which order we use isn't a correctness issue, just an
2206
  // optimization issue).
2207
  //   - lsl prefers starting from the most significant byte (2nd case).
2208
  //   - lshr prefers starting from the least significant byte (1st case).
2209
  //   - for ashr it depends on the number of shifted bytes.
2210
  // Some shift operations still don't get the most optimal mov sequences even
2211
  // with this distinction. TODO: figure out why and try to fix it (but we're
2212
  // already equal to or faster than avr-gcc in all cases except ashr 8).
2213
  if (Opc != ISD::SHL &&
2214
      (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) {
2215
    // Use the resulting registers starting with the least significant byte.
2216
    BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2217
        .addReg(Registers[3].first, 0, Registers[3].second)
2218
        .addImm(AVR::sub_lo)
2219
        .addReg(Registers[2].first, 0, Registers[2].second)
2220
        .addImm(AVR::sub_hi);
2221
    BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2222
        .addReg(Registers[1].first, 0, Registers[1].second)
2223
        .addImm(AVR::sub_lo)
2224
        .addReg(Registers[0].first, 0, Registers[0].second)
2225
        .addImm(AVR::sub_hi);
2226
  } else {
2227
    // Use the resulting registers starting with the most significant byte.
2228
    BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg())
2229
        .addReg(Registers[0].first, 0, Registers[0].second)
2230
        .addImm(AVR::sub_hi)
2231
        .addReg(Registers[1].first, 0, Registers[1].second)
2232
        .addImm(AVR::sub_lo);
2233
    BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg())
2234
        .addReg(Registers[2].first, 0, Registers[2].second)
2235
        .addImm(AVR::sub_hi)
2236
        .addReg(Registers[3].first, 0, Registers[3].second)
2237
        .addImm(AVR::sub_lo);
2238
  }
2239

2240
  // Remove the pseudo instruction.
2241
  MI.eraseFromParent();
2242
  return BB;
2243
}
2244

2245
static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
2246
  if (I->getOpcode() == AVR::COPY) {
2247
    Register SrcReg = I->getOperand(1).getReg();
2248
    return (SrcReg == AVR::R0 || SrcReg == AVR::R1);
2249
  }
2250

2251
  return false;
2252
}
2253

2254
// The mul instructions wreak havock on our zero_reg R1. We need to clear it
2255
// after the result has been evacuated. This is probably not the best way to do
2256
// it, but it works for now.
2257
MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
2258
                                                MachineBasicBlock *BB) const {
2259
  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2260
  MachineBasicBlock::iterator I(MI);
2261
  ++I; // in any case insert *after* the mul instruction
2262
  if (isCopyMulResult(I))
2263
    ++I;
2264
  if (isCopyMulResult(I))
2265
    ++I;
2266
  BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)
2267
      .addReg(AVR::R1)
2268
      .addReg(AVR::R1);
2269
  return BB;
2270
}
2271

2272
// Insert a read from the zero register.
2273
MachineBasicBlock *
2274
AVRTargetLowering::insertCopyZero(MachineInstr &MI,
2275
                                  MachineBasicBlock *BB) const {
2276
  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2277
  MachineBasicBlock::iterator I(MI);
2278
  BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
2279
      .add(MI.getOperand(0))
2280
      .addReg(Subtarget.getZeroRegister());
2281
  MI.eraseFromParent();
2282
  return BB;
2283
}
2284

2285
// Lower atomicrmw operation to disable interrupts, do operation, and restore
2286
// interrupts. This works because all AVR microcontrollers are single core.
2287
MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
2288
    MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const {
2289
  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2290
  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2291
  MachineBasicBlock::iterator I(MI);
2292
  DebugLoc dl = MI.getDebugLoc();
2293

2294
  // Example instruction sequence, for an atomic 8-bit add:
2295
  //   ldi r25, 5
2296
  //   in r0, SREG
2297
  //   cli
2298
  //   ld r24, X
2299
  //   add r25, r24
2300
  //   st X, r25
2301
  //   out SREG, r0
2302

2303
  const TargetRegisterClass *RC =
2304
      (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
2305
  unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
2306
  unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
2307

2308
  // Disable interrupts.
2309
  BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())
2310
      .addImm(Subtarget.getIORegSREG());
2311
  BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7);
2312

2313
  // Load the original value.
2314
  BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg())
2315
      .add(MI.getOperand(1));
2316

2317
  // Do the arithmetic operation.
2318
  Register Result = MRI.createVirtualRegister(RC);
2319
  BuildMI(*BB, I, dl, TII.get(Opcode), Result)
2320
      .addReg(MI.getOperand(0).getReg())
2321
      .add(MI.getOperand(2));
2322

2323
  // Store the result.
2324
  BuildMI(*BB, I, dl, TII.get(StoreOpcode))
2325
      .add(MI.getOperand(1))
2326
      .addReg(Result);
2327

2328
  // Restore interrupts.
2329
  BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
2330
      .addImm(Subtarget.getIORegSREG())
2331
      .addReg(Subtarget.getTmpRegister());
2332

2333
  // Remove the pseudo instruction.
2334
  MI.eraseFromParent();
2335
  return BB;
2336
}
2337

2338
MachineBasicBlock *
2339
AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2340
                                               MachineBasicBlock *MBB) const {
2341
  int Opc = MI.getOpcode();
2342
  const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();
2343

2344
  // Pseudo shift instructions with a non constant shift amount are expanded
2345
  // into a loop.
2346
  switch (Opc) {
2347
  case AVR::Lsl8:
2348
  case AVR::Lsl16:
2349
  case AVR::Lsr8:
2350
  case AVR::Lsr16:
2351
  case AVR::Rol8:
2352
  case AVR::Rol16:
2353
  case AVR::Ror8:
2354
  case AVR::Ror16:
2355
  case AVR::Asr8:
2356
  case AVR::Asr16:
2357
    return insertShift(MI, MBB, STI.hasTinyEncoding());
2358
  case AVR::Lsl32:
2359
  case AVR::Lsr32:
2360
  case AVR::Asr32:
2361
    return insertWideShift(MI, MBB);
2362
  case AVR::MULRdRr:
2363
  case AVR::MULSRdRr:
2364
    return insertMul(MI, MBB);
2365
  case AVR::CopyZero:
2366
    return insertCopyZero(MI, MBB);
2367
  case AVR::AtomicLoadAdd8:
2368
    return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8);
2369
  case AVR::AtomicLoadAdd16:
2370
    return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16);
2371
  case AVR::AtomicLoadSub8:
2372
    return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8);
2373
  case AVR::AtomicLoadSub16:
2374
    return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16);
2375
  case AVR::AtomicLoadAnd8:
2376
    return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8);
2377
  case AVR::AtomicLoadAnd16:
2378
    return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16);
2379
  case AVR::AtomicLoadOr8:
2380
    return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8);
2381
  case AVR::AtomicLoadOr16:
2382
    return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16);
2383
  case AVR::AtomicLoadXor8:
2384
    return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8);
2385
  case AVR::AtomicLoadXor16:
2386
    return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16);
2387
  }
2388

2389
  assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
2390
         "Unexpected instr type to insert");
2391

2392
  const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()
2393
                                ->getParent()
2394
                                ->getSubtarget()
2395
                                .getInstrInfo();
2396
  DebugLoc dl = MI.getDebugLoc();
2397

2398
  // To "insert" a SELECT instruction, we insert the diamond
2399
  // control-flow pattern. The incoming instruction knows the
2400
  // destination vreg to set, the condition code register to branch
2401
  // on, the true/false values to select between, and a branch opcode
2402
  // to use.
2403

2404
  MachineFunction *MF = MBB->getParent();
2405
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2406
  MachineBasicBlock *FallThrough = MBB->getFallThrough();
2407

2408
  // If the current basic block falls through to another basic block,
2409
  // we must insert an unconditional branch to the fallthrough destination
2410
  // if we are to insert basic blocks at the prior fallthrough point.
2411
  if (FallThrough != nullptr) {
2412
    BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
2413
  }
2414

2415
  MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2416
  MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
2417

2418
  MachineFunction::iterator I;
2419
  for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
2420
    ;
2421
  if (I != MF->end())
2422
    ++I;
2423
  MF->insert(I, trueMBB);
2424
  MF->insert(I, falseMBB);
2425

2426
  // Set the call frame size on entry to the new basic blocks.
2427
  unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);
2428
  trueMBB->setCallFrameSize(CallFrameSize);
2429
  falseMBB->setCallFrameSize(CallFrameSize);
2430

2431
  // Transfer remaining instructions and all successors of the current
2432
  // block to the block which will contain the Phi node for the
2433
  // select.
2434
  trueMBB->splice(trueMBB->begin(), MBB,
2435
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2436
  trueMBB->transferSuccessorsAndUpdatePHIs(MBB);
2437

2438
  AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm();
2439
  BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB);
2440
  BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);
2441
  MBB->addSuccessor(falseMBB);
2442
  MBB->addSuccessor(trueMBB);
2443

2444
  // Unconditionally flow back to the true block
2445
  BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);
2446
  falseMBB->addSuccessor(trueMBB);
2447

2448
  // Set up the Phi node to determine where we came from
2449
  BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
2450
          MI.getOperand(0).getReg())
2451
      .addReg(MI.getOperand(1).getReg())
2452
      .addMBB(MBB)
2453
      .addReg(MI.getOperand(2).getReg())
2454
      .addMBB(falseMBB);
2455

2456
  MI.eraseFromParent(); // The pseudo instruction is gone now.
2457
  return trueMBB;
2458
}
2459

2460
//===----------------------------------------------------------------------===//
2461
//  Inline Asm Support
2462
//===----------------------------------------------------------------------===//
2463

2464
AVRTargetLowering::ConstraintType
2465
AVRTargetLowering::getConstraintType(StringRef Constraint) const {
2466
  if (Constraint.size() == 1) {
2467
    // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
2468
    switch (Constraint[0]) {
2469
    default:
2470
      break;
2471
    case 'a': // Simple upper registers
2472
    case 'b': // Base pointer registers pairs
2473
    case 'd': // Upper register
2474
    case 'l': // Lower registers
2475
    case 'e': // Pointer register pairs
2476
    case 'q': // Stack pointer register
2477
    case 'r': // Any register
2478
    case 'w': // Special upper register pairs
2479
      return C_RegisterClass;
2480
    case 't': // Temporary register
2481
    case 'x':
2482
    case 'X': // Pointer register pair X
2483
    case 'y':
2484
    case 'Y': // Pointer register pair Y
2485
    case 'z':
2486
    case 'Z': // Pointer register pair Z
2487
      return C_Register;
2488
    case 'Q': // A memory address based on Y or Z pointer with displacement.
2489
      return C_Memory;
2490
    case 'G': // Floating point constant
2491
    case 'I': // 6-bit positive integer constant
2492
    case 'J': // 6-bit negative integer constant
2493
    case 'K': // Integer constant (Range: 2)
2494
    case 'L': // Integer constant (Range: 0)
2495
    case 'M': // 8-bit integer constant
2496
    case 'N': // Integer constant (Range: -1)
2497
    case 'O': // Integer constant (Range: 8, 16, 24)
2498
    case 'P': // Integer constant (Range: 1)
2499
    case 'R': // Integer constant (Range: -6 to 5)x
2500
      return C_Immediate;
2501
    }
2502
  }
2503

2504
  return TargetLowering::getConstraintType(Constraint);
2505
}
2506

2507
InlineAsm::ConstraintCode
2508
AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2509
  // Not sure if this is actually the right thing to do, but we got to do
2510
  // *something* [agnat]
2511
  switch (ConstraintCode[0]) {
2512
  case 'Q':
2513
    return InlineAsm::ConstraintCode::Q;
2514
  }
2515
  return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2516
}
2517

2518
AVRTargetLowering::ConstraintWeight
2519
AVRTargetLowering::getSingleConstraintMatchWeight(
2520
    AsmOperandInfo &info, const char *constraint) const {
2521
  ConstraintWeight weight = CW_Invalid;
2522
  Value *CallOperandVal = info.CallOperandVal;
2523

2524
  // If we don't have a value, we can't do a match,
2525
  // but allow it at the lowest weight.
2526
  // (this behaviour has been copied from the ARM backend)
2527
  if (!CallOperandVal) {
2528
    return CW_Default;
2529
  }
2530

2531
  // Look at the constraint type.
2532
  switch (*constraint) {
2533
  default:
2534
    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
2535
    break;
2536
  case 'd':
2537
  case 'r':
2538
  case 'l':
2539
    weight = CW_Register;
2540
    break;
2541
  case 'a':
2542
  case 'b':
2543
  case 'e':
2544
  case 'q':
2545
  case 't':
2546
  case 'w':
2547
  case 'x':
2548
  case 'X':
2549
  case 'y':
2550
  case 'Y':
2551
  case 'z':
2552
  case 'Z':
2553
    weight = CW_SpecificReg;
2554
    break;
2555
  case 'G':
2556
    if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {
2557
      if (C->isZero()) {
2558
        weight = CW_Constant;
2559
      }
2560
    }
2561
    break;
2562
  case 'I':
2563
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2564
      if (isUInt<6>(C->getZExtValue())) {
2565
        weight = CW_Constant;
2566
      }
2567
    }
2568
    break;
2569
  case 'J':
2570
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2571
      if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) {
2572
        weight = CW_Constant;
2573
      }
2574
    }
2575
    break;
2576
  case 'K':
2577
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2578
      if (C->getZExtValue() == 2) {
2579
        weight = CW_Constant;
2580
      }
2581
    }
2582
    break;
2583
  case 'L':
2584
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2585
      if (C->getZExtValue() == 0) {
2586
        weight = CW_Constant;
2587
      }
2588
    }
2589
    break;
2590
  case 'M':
2591
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2592
      if (isUInt<8>(C->getZExtValue())) {
2593
        weight = CW_Constant;
2594
      }
2595
    }
2596
    break;
2597
  case 'N':
2598
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2599
      if (C->getSExtValue() == -1) {
2600
        weight = CW_Constant;
2601
      }
2602
    }
2603
    break;
2604
  case 'O':
2605
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2606
      if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) ||
2607
          (C->getZExtValue() == 24)) {
2608
        weight = CW_Constant;
2609
      }
2610
    }
2611
    break;
2612
  case 'P':
2613
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2614
      if (C->getZExtValue() == 1) {
2615
        weight = CW_Constant;
2616
      }
2617
    }
2618
    break;
2619
  case 'R':
2620
    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2621
      if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) {
2622
        weight = CW_Constant;
2623
      }
2624
    }
2625
    break;
2626
  case 'Q':
2627
    weight = CW_Memory;
2628
    break;
2629
  }
2630

2631
  return weight;
2632
}
2633

2634
std::pair<unsigned, const TargetRegisterClass *>
2635
AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2636
                                                StringRef Constraint,
2637
                                                MVT VT) const {
2638
  if (Constraint.size() == 1) {
2639
    switch (Constraint[0]) {
2640
    case 'a': // Simple upper registers r16..r23.
2641
      if (VT == MVT::i8)
2642
        return std::make_pair(0U, &AVR::LD8loRegClass);
2643
      else if (VT == MVT::i16)
2644
        return std::make_pair(0U, &AVR::DREGSLD8loRegClass);
2645
      break;
2646
    case 'b': // Base pointer registers: y, z.
2647
      if (VT == MVT::i8 || VT == MVT::i16)
2648
        return std::make_pair(0U, &AVR::PTRDISPREGSRegClass);
2649
      break;
2650
    case 'd': // Upper registers r16..r31.
2651
      if (VT == MVT::i8)
2652
        return std::make_pair(0U, &AVR::LD8RegClass);
2653
      else if (VT == MVT::i16)
2654
        return std::make_pair(0U, &AVR::DLDREGSRegClass);
2655
      break;
2656
    case 'l': // Lower registers r0..r15.
2657
      if (VT == MVT::i8)
2658
        return std::make_pair(0U, &AVR::GPR8loRegClass);
2659
      else if (VT == MVT::i16)
2660
        return std::make_pair(0U, &AVR::DREGSloRegClass);
2661
      break;
2662
    case 'e': // Pointer register pairs: x, y, z.
2663
      if (VT == MVT::i8 || VT == MVT::i16)
2664
        return std::make_pair(0U, &AVR::PTRREGSRegClass);
2665
      break;
2666
    case 'q': // Stack pointer register: SPH:SPL.
2667
      return std::make_pair(0U, &AVR::GPRSPRegClass);
2668
    case 'r': // Any register: r0..r31.
2669
      if (VT == MVT::i8)
2670
        return std::make_pair(0U, &AVR::GPR8RegClass);
2671
      else if (VT == MVT::i16)
2672
        return std::make_pair(0U, &AVR::DREGSRegClass);
2673
      break;
2674
    case 't': // Temporary register: r0.
2675
      if (VT == MVT::i8)
2676
        return std::make_pair(unsigned(Subtarget.getTmpRegister()),
2677
                              &AVR::GPR8RegClass);
2678
      break;
2679
    case 'w': // Special upper register pairs: r24, r26, r28, r30.
2680
      if (VT == MVT::i8 || VT == MVT::i16)
2681
        return std::make_pair(0U, &AVR::IWREGSRegClass);
2682
      break;
2683
    case 'x': // Pointer register pair X: r27:r26.
2684
    case 'X':
2685
      if (VT == MVT::i8 || VT == MVT::i16)
2686
        return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);
2687
      break;
2688
    case 'y': // Pointer register pair Y: r29:r28.
2689
    case 'Y':
2690
      if (VT == MVT::i8 || VT == MVT::i16)
2691
        return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);
2692
      break;
2693
    case 'z': // Pointer register pair Z: r31:r30.
2694
    case 'Z':
2695
      if (VT == MVT::i8 || VT == MVT::i16)
2696
        return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);
2697
      break;
2698
    default:
2699
      break;
2700
    }
2701
  }
2702

2703
  return TargetLowering::getRegForInlineAsmConstraint(
2704
      Subtarget.getRegisterInfo(), Constraint, VT);
2705
}
2706

2707
void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2708
                                                     StringRef Constraint,
2709
                                                     std::vector<SDValue> &Ops,
2710
                                                     SelectionDAG &DAG) const {
2711
  SDValue Result;
2712
  SDLoc DL(Op);
2713
  EVT Ty = Op.getValueType();
2714

2715
  // Currently only support length 1 constraints.
2716
  if (Constraint.size() != 1) {
2717
    return;
2718
  }
2719

2720
  char ConstraintLetter = Constraint[0];
2721
  switch (ConstraintLetter) {
2722
  default:
2723
    break;
2724
  // Deal with integers first:
2725
  case 'I':
2726
  case 'J':
2727
  case 'K':
2728
  case 'L':
2729
  case 'M':
2730
  case 'N':
2731
  case 'O':
2732
  case 'P':
2733
  case 'R': {
2734
    const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2735
    if (!C) {
2736
      return;
2737
    }
2738

2739
    int64_t CVal64 = C->getSExtValue();
2740
    uint64_t CUVal64 = C->getZExtValue();
2741
    switch (ConstraintLetter) {
2742
    case 'I': // 0..63
2743
      if (!isUInt<6>(CUVal64))
2744
        return;
2745
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2746
      break;
2747
    case 'J': // -63..0
2748
      if (CVal64 < -63 || CVal64 > 0)
2749
        return;
2750
      Result = DAG.getTargetConstant(CVal64, DL, Ty);
2751
      break;
2752
    case 'K': // 2
2753
      if (CUVal64 != 2)
2754
        return;
2755
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2756
      break;
2757
    case 'L': // 0
2758
      if (CUVal64 != 0)
2759
        return;
2760
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2761
      break;
2762
    case 'M': // 0..255
2763
      if (!isUInt<8>(CUVal64))
2764
        return;
2765
      // i8 type may be printed as a negative number,
2766
      // e.g. 254 would be printed as -2,
2767
      // so we force it to i16 at least.
2768
      if (Ty.getSimpleVT() == MVT::i8) {
2769
        Ty = MVT::i16;
2770
      }
2771
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2772
      break;
2773
    case 'N': // -1
2774
      if (CVal64 != -1)
2775
        return;
2776
      Result = DAG.getTargetConstant(CVal64, DL, Ty);
2777
      break;
2778
    case 'O': // 8, 16, 24
2779
      if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24)
2780
        return;
2781
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2782
      break;
2783
    case 'P': // 1
2784
      if (CUVal64 != 1)
2785
        return;
2786
      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
2787
      break;
2788
    case 'R': // -6..5
2789
      if (CVal64 < -6 || CVal64 > 5)
2790
        return;
2791
      Result = DAG.getTargetConstant(CVal64, DL, Ty);
2792
      break;
2793
    }
2794

2795
    break;
2796
  }
2797
  case 'G':
2798
    const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);
2799
    if (!FC || !FC->isZero())
2800
      return;
2801
    // Soften float to i8 0
2802
    Result = DAG.getTargetConstant(0, DL, MVT::i8);
2803
    break;
2804
  }
2805

2806
  if (Result.getNode()) {
2807
    Ops.push_back(Result);
2808
    return;
2809
  }
2810

2811
  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
2812
}
2813

2814
Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,
2815
                                              const MachineFunction &MF) const {
2816
  Register Reg;
2817

2818
  if (VT == LLT::scalar(8)) {
2819
    Reg = StringSwitch<unsigned>(RegName)
2820
              .Case("r0", AVR::R0)
2821
              .Case("r1", AVR::R1)
2822
              .Default(0);
2823
  } else {
2824
    Reg = StringSwitch<unsigned>(RegName)
2825
              .Case("r0", AVR::R1R0)
2826
              .Case("sp", AVR::SP)
2827
              .Default(0);
2828
  }
2829

2830
  if (Reg)
2831
    return Reg;
2832

2833
  report_fatal_error(
2834
      Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
2835
}
2836

2837
} // end of namespace llvm
2838

2839
Product

Resources

Company