CoCalc -- AArch64ExpandImm.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
³⁵²⁶⁷ views
1
//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the AArch64ExpandImm stuff.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "AArch64.h"
14
#include "AArch64ExpandImm.h"
15
#include "MCTargetDesc/AArch64AddressingModes.h"
16

17
using namespace llvm;
18
using namespace llvm::AArch64_IMM;
19

20
/// Helper function which extracts the specified 16-bit chunk from a
21
/// 64-bit value.
22
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
23
  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
24

25
  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
26
}
27

28
/// Check whether the given 16-bit chunk replicated to full 64-bit width
29
/// can be materialized with an ORR instruction.
30
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
31
  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
32

33
  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
34
}
35

36
/// Check for identical 16-bit chunks within the constant and if so
37
/// materialize them with a single ORR instruction. The remaining one or two
38
/// 16-bit chunks will be materialized with MOVK instructions.
39
///
40
/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
41
/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
42
/// an ORR instruction.
43
static bool tryToreplicateChunks(uint64_t UImm,
44
				 SmallVectorImpl<ImmInsnModel> &Insn) {
45
  using CountMap = DenseMap<uint64_t, unsigned>;
46

47
  CountMap Counts;
48

49
  // Scan the constant and count how often every chunk occurs.
50
  for (unsigned Idx = 0; Idx < 4; ++Idx)
51
    ++Counts[getChunk(UImm, Idx)];
52

53
  // Traverse the chunks to find one which occurs more than once.
54
  for (const auto &Chunk : Counts) {
55
    const uint64_t ChunkVal = Chunk.first;
56
    const unsigned Count = Chunk.second;
57

58
    uint64_t Encoding = 0;
59

60
    // We are looking for chunks which have two or three instances and can be
61
    // materialized with an ORR instruction.
62
    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
63
      continue;
64

65
    const bool CountThree = Count == 3;
66

67
    Insn.push_back({ AArch64::ORRXri, 0, Encoding });
68

69
    unsigned ShiftAmt = 0;
70
    uint64_t Imm16 = 0;
71
    // Find the first chunk not materialized with the ORR instruction.
72
    for (; ShiftAmt < 64; ShiftAmt += 16) {
73
      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
74

75
      if (Imm16 != ChunkVal)
76
        break;
77
    }
78

79
    // Create the first MOVK instruction.
80
    Insn.push_back({ AArch64::MOVKXi, Imm16,
81
		     AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
82

83
    // In case we have three instances the whole constant is now materialized
84
    // and we can exit.
85
    if (CountThree)
86
      return true;
87

88
    // Find the remaining chunk which needs to be materialized.
89
    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
90
      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
91

92
      if (Imm16 != ChunkVal)
93
        break;
94
    }
95
    Insn.push_back({ AArch64::MOVKXi, Imm16,
96
                     AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
97
    return true;
98
  }
99

100
  return false;
101
}
102

103
/// Check whether this chunk matches the pattern '1...0...'. This pattern
104
/// starts a contiguous sequence of ones if we look at the bits from the LSB
105
/// towards the MSB.
106
static bool isStartChunk(uint64_t Chunk) {
107
  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
108
    return false;
109

110
  return isMask_64(~Chunk);
111
}
112

113
/// Check whether this chunk matches the pattern '0...1...' This pattern
114
/// ends a contiguous sequence of ones if we look at the bits from the LSB
115
/// towards the MSB.
116
static bool isEndChunk(uint64_t Chunk) {
117
  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
118
    return false;
119

120
  return isMask_64(Chunk);
121
}
122

123
/// Clear or set all bits in the chunk at the given index.
124
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
125
  const uint64_t Mask = 0xFFFF;
126

127
  if (Clear)
128
    // Clear chunk in the immediate.
129
    Imm &= ~(Mask << (Idx * 16));
130
  else
131
    // Set all bits in the immediate for the particular chunk.
132
    Imm |= Mask << (Idx * 16);
133

134
  return Imm;
135
}
136

137
/// Check whether the constant contains a sequence of contiguous ones,
138
/// which might be interrupted by one or two chunks. If so, materialize the
139
/// sequence of contiguous ones with an ORR instruction.
140
/// Materialize the chunks which are either interrupting the sequence or outside
141
/// of the sequence with a MOVK instruction.
142
///
143
/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
144
/// which ends the sequence (0...1...). Then we are looking for constants which
145
/// contain at least one S and E chunk.
146
/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
147
///
148
/// We are also looking for constants like |S|A|B|E| where the contiguous
149
/// sequence of ones wraps around the MSB into the LSB.
150
static bool trySequenceOfOnes(uint64_t UImm,
151
                              SmallVectorImpl<ImmInsnModel> &Insn) {
152
  const int NotSet = -1;
153
  const uint64_t Mask = 0xFFFF;
154

155
  int StartIdx = NotSet;
156
  int EndIdx = NotSet;
157
  // Try to find the chunks which start/end a contiguous sequence of ones.
158
  for (int Idx = 0; Idx < 4; ++Idx) {
159
    int64_t Chunk = getChunk(UImm, Idx);
160
    // Sign extend the 16-bit chunk to 64-bit.
161
    Chunk = (Chunk << 48) >> 48;
162

163
    if (isStartChunk(Chunk))
164
      StartIdx = Idx;
165
    else if (isEndChunk(Chunk))
166
      EndIdx = Idx;
167
  }
168

169
  // Early exit in case we can't find a start/end chunk.
170
  if (StartIdx == NotSet || EndIdx == NotSet)
171
    return false;
172

173
  // Outside of the contiguous sequence of ones everything needs to be zero.
174
  uint64_t Outside = 0;
175
  // Chunks between the start and end chunk need to have all their bits set.
176
  uint64_t Inside = Mask;
177

178
  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
179
  // just swap indices and pretend we are materializing a contiguous sequence
180
  // of zeros surrounded by a contiguous sequence of ones.
181
  if (StartIdx > EndIdx) {
182
    std::swap(StartIdx, EndIdx);
183
    std::swap(Outside, Inside);
184
  }
185

186
  uint64_t OrrImm = UImm;
187
  int FirstMovkIdx = NotSet;
188
  int SecondMovkIdx = NotSet;
189

190
  // Find out which chunks we need to patch up to obtain a contiguous sequence
191
  // of ones.
192
  for (int Idx = 0; Idx < 4; ++Idx) {
193
    const uint64_t Chunk = getChunk(UImm, Idx);
194

195
    // Check whether we are looking at a chunk which is not part of the
196
    // contiguous sequence of ones.
197
    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
198
      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
199

200
      // Remember the index we need to patch.
201
      if (FirstMovkIdx == NotSet)
202
        FirstMovkIdx = Idx;
203
      else
204
        SecondMovkIdx = Idx;
205

206
      // Check whether we are looking a chunk which is part of the contiguous
207
      // sequence of ones.
208
    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
209
      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
210

211
      // Remember the index we need to patch.
212
      if (FirstMovkIdx == NotSet)
213
        FirstMovkIdx = Idx;
214
      else
215
        SecondMovkIdx = Idx;
216
    }
217
  }
218
  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
219

220
  // Create the ORR-immediate instruction.
221
  uint64_t Encoding = 0;
222
  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
223
  Insn.push_back({ AArch64::ORRXri, 0, Encoding });
224

225
  const bool SingleMovk = SecondMovkIdx == NotSet;
226
  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
227
                   AArch64_AM::getShifterImm(AArch64_AM::LSL,
228
                                             FirstMovkIdx * 16) });
229

230
  // Early exit in case we only need to emit a single MOVK instruction.
231
  if (SingleMovk)
232
    return true;
233

234
  // Create the second MOVK instruction.
235
  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
236
	           AArch64_AM::getShifterImm(AArch64_AM::LSL,
237
                                             SecondMovkIdx * 16) });
238

239
  return true;
240
}
241

242
static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
243
  uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
244

245
  uint64_t UnshiftedOnes;
246
  if (NumOnes == 64) {
247
    UnshiftedOnes = ~0ULL;
248
  } else {
249
    UnshiftedOnes = (1ULL << NumOnes) - 1;
250
  }
251
  return UnshiftedOnes << StartPosition;
252
}
253

254
static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
255
  uint64_t Result = Subset;
256

257
  // 64, 32, 16, 8, 4, 2
258
  for (uint64_t i = 0; i < 6; ++i) {
259
    uint64_t Rotation = 1ULL << (6 - i);
260
    uint64_t Closure = Result | llvm::rotl<uint64_t>(Result, Rotation);
261
    if (Closure != (Closure & V)) {
262
      break;
263
    }
264
    Result = Closure;
265
  }
266

267
  return Result;
268
}
269

270
// Find the logical immediate that covers the most bits in RemainingBits,
271
// allowing for additional bits to be set that were set in OriginalBits.
272
static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
273
                                        uint64_t OriginalBits) {
274
  // Find the first set bit.
275
  uint32_t Position = llvm::countr_zero(RemainingBits);
276

277
  // Get the first run of set bits.
278
  uint64_t FirstRun = GetRunOfOnesStartingAt(OriginalBits, Position);
279

280
  // Replicate the run as many times as possible, as long as the bits are set in
281
  // RemainingBits.
282
  uint64_t MaximalImm = MaximallyReplicateSubImmediate(OriginalBits, FirstRun);
283

284
  return MaximalImm;
285
}
286

287
static std::optional<std::pair<uint64_t, uint64_t>>
288
decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
289
  if (UImm == 0 || ~UImm == 0)
290
    return std::nullopt;
291

292
  // Make sure we don't have a run of ones split around the rotation boundary.
293
  uint32_t InitialTrailingOnes = llvm::countr_one(UImm);
294
  uint64_t RotatedBits = llvm::rotr<uint64_t>(UImm, InitialTrailingOnes);
295

296
  // Find the largest logical immediate that fits within the full immediate.
297
  uint64_t MaximalImm1 = maximalLogicalImmWithin(RotatedBits, RotatedBits);
298

299
  // Remove all bits that are set by this mask.
300
  uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
301

302
  // Find the largest logical immediate covering the remaining bits, allowing
303
  // for additional bits to be set that were also set in the original immediate.
304
  uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, RotatedBits);
305

306
  // If any bits still haven't been covered, then give up.
307
  if (RemainingBits & ~MaximalImm2)
308
    return std::nullopt;
309

310
  // Make sure to un-rotate the immediates.
311
  return std::make_pair(rotl(MaximalImm1, InitialTrailingOnes),
312
                        rotl(MaximalImm2, InitialTrailingOnes));
313
}
314

315
// Attempt to expand an immediate as the ORR of a pair of logical immediates.
316
static bool tryOrrOfLogicalImmediates(uint64_t UImm,
317
                                      SmallVectorImpl<ImmInsnModel> &Insn) {
318
  auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
319
  if (MaybeDecomposition == std::nullopt)
320
    return false;
321
  uint64_t Imm1 = MaybeDecomposition->first;
322
  uint64_t Imm2 = MaybeDecomposition->second;
323

324
  uint64_t Encoding1, Encoding2;
325
  bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm1, 64, Encoding1);
326
  bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm2, 64, Encoding2);
327

328
  if (Imm1Success && Imm2Success) {
329
    // Create the ORR-immediate instructions.
330
    Insn.push_back({AArch64::ORRXri, 0, Encoding1});
331
    Insn.push_back({AArch64::ORRXri, 1, Encoding2});
332
    return true;
333
  }
334

335
  return false;
336
}
337

338
// Attempt to expand an immediate as the AND of a pair of logical immediates.
339
// This is done by applying DeMorgan's law, under which logical immediates
340
// are closed.
341
static bool tryAndOfLogicalImmediates(uint64_t UImm,
342
                                      SmallVectorImpl<ImmInsnModel> &Insn) {
343
  // Apply DeMorgan's law to turn this into an ORR problem.
344
  auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(~UImm);
345
  if (MaybeDecomposition == std::nullopt)
346
    return false;
347
  uint64_t Imm1 = MaybeDecomposition->first;
348
  uint64_t Imm2 = MaybeDecomposition->second;
349

350
  uint64_t Encoding1, Encoding2;
351
  bool Imm1Success = AArch64_AM::processLogicalImmediate(~Imm1, 64, Encoding1);
352
  bool Imm2Success = AArch64_AM::processLogicalImmediate(~Imm2, 64, Encoding2);
353

354
  if (Imm1Success && Imm2Success) {
355
    // Materialize Imm1, the LHS of the AND
356
    Insn.push_back({AArch64::ORRXri, 0, Encoding1});
357
    // AND Imm1 with Imm2
358
    Insn.push_back({AArch64::ANDXri, 1, Encoding2});
359
    return true;
360
  }
361

362
  return false;
363
}
364

365
// Check whether the constant can be represented by exclusive-or of two 64-bit
366
// logical immediates. If so, materialize it with an ORR instruction followed
367
// by an EOR instruction.
368
//
369
// This encoding allows all remaining repeated byte patterns, and many repeated
370
// 16-bit values, to be encoded without needing four instructions. It can also
371
// represent some irregular bitmasks (although those would mostly only need
372
// three instructions otherwise).
373
static bool tryEorOfLogicalImmediates(uint64_t Imm,
374
                                      SmallVectorImpl<ImmInsnModel> &Insn) {
375
  // Determine the larger repetition size of the two possible logical
376
  // immediates, by finding the repetition size of Imm.
377
  unsigned BigSize = 64;
378

379
  do {
380
    BigSize /= 2;
381
    uint64_t Mask = (1ULL << BigSize) - 1;
382

383
    if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
384
      BigSize *= 2;
385
      break;
386
    }
387
  } while (BigSize > 2);
388

389
  uint64_t BigMask = ((uint64_t)-1LL) >> (64 - BigSize);
390

391
  // Find the last bit of each run of ones, circularly. For runs which wrap
392
  // around from bit 0 to bit 63, this is the bit before the most-significant
393
  // zero, otherwise it is the least-significant bit in the run of ones.
394
  uint64_t RunStarts = Imm & ~rotl<uint64_t>(Imm, 1);
395

396
  // Find the smaller repetition size of the two possible logical immediates by
397
  // counting the number of runs of one-bits within the BigSize-bit value. Both
398
  // sizes may be the same. The EOR may add one or subtract one from the
399
  // power-of-two count that can be represented by a logical immediate, or it
400
  // may be left unchanged.
401
  int RunsPerBigChunk = popcount(RunStarts & BigMask);
402

403
  static const int8_t BigToSmallSizeTable[32] = {
404
      -1, -1, 0,  1,  2,  2,  -1, 3,  3,  3,  -1, -1, -1, -1, -1, 4,
405
      4,  4,  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5,
406
  };
407

408
  int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
409

410
  // Early-exit if the big chunk couldn't be a power-of-two number of runs
411
  // EORed with another single run.
412
  if (BigToSmallShift == -1)
413
    return false;
414

415
  unsigned SmallSize = BigSize >> BigToSmallShift;
416

417
  // 64-bit values with a bit set every (1 << index) bits.
418
  static const uint64_t RepeatedOnesTable[] = {
419
      0xffffffffffffffff, 0x5555555555555555, 0x1111111111111111,
420
      0x0101010101010101, 0x0001000100010001, 0x0000000100000001,
421
      0x0000000000000001,
422
  };
423

424
  // This RepeatedOnesTable lookup is a faster implementation of the division
425
  // 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
426
  // dividing the 64-bit value into fields of width SmallSize, and placing a
427
  // one in the least significant bit of each field.
428
  uint64_t SmallOnes = RepeatedOnesTable[countr_zero(SmallSize)];
429

430
  // Now we try to find the number of ones in each of the smaller repetitions,
431
  // by looking at runs of ones in Imm. This can take three attempts, as the
432
  // EOR may have changed the length of the first two runs we find.
433

434
  // Rotate a run of ones so we can count the number of trailing set bits.
435
  int Rotation = countr_zero(RunStarts);
436
  uint64_t RotatedImm = rotr<uint64_t>(Imm, Rotation);
437
  for (int Attempt = 0; Attempt < 3; ++Attempt) {
438
    unsigned RunLength = countr_one(RotatedImm);
439

440
    // Construct candidate values BigImm and SmallImm, such that if these two
441
    // values are encodable, we have a solution. (SmallImm is constructed to be
442
    // encodable, but this isn't guaranteed when RunLength >= SmallSize)
443
    uint64_t SmallImm =
444
        rotl<uint64_t>((SmallOnes << RunLength) - SmallOnes, Rotation);
445
    uint64_t BigImm = Imm ^ SmallImm;
446

447
    uint64_t BigEncoding = 0;
448
    uint64_t SmallEncoding = 0;
449
    if (AArch64_AM::processLogicalImmediate(BigImm, 64, BigEncoding) &&
450
        AArch64_AM::processLogicalImmediate(SmallImm, 64, SmallEncoding)) {
451
      Insn.push_back({AArch64::ORRXri, 0, SmallEncoding});
452
      Insn.push_back({AArch64::EORXri, 1, BigEncoding});
453
      return true;
454
    }
455

456
    // Rotate to the next run of ones
457
    Rotation += countr_zero(rotr<uint64_t>(RunStarts, Rotation) & ~1);
458
    RotatedImm = rotr<uint64_t>(Imm, Rotation);
459
  }
460

461
  return false;
462
}
463

464
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
465
/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
466
static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
467
				      unsigned OneChunks, unsigned ZeroChunks,
468
				      SmallVectorImpl<ImmInsnModel> &Insn) {
469
  const unsigned Mask = 0xFFFF;
470

471
  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
472
  // more MOVK instructions to insert additional 16-bit portions into the
473
  // lower bits.
474
  bool isNeg = false;
475

476
  // Use MOVN to materialize the high bits if we have more all one chunks
477
  // than all zero chunks.
478
  if (OneChunks > ZeroChunks) {
479
    isNeg = true;
480
    Imm = ~Imm;
481
  }
482

483
  unsigned FirstOpc;
484
  if (BitSize == 32) {
485
    Imm &= (1LL << 32) - 1;
486
    FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
487
  } else {
488
    FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
489
  }
490
  unsigned Shift = 0;     // LSL amount for high bits with MOVZ/MOVN
491
  unsigned LastShift = 0; // LSL amount for last MOVK
492
  if (Imm != 0) {
493
    unsigned LZ = llvm::countl_zero(Imm);
494
    unsigned TZ = llvm::countr_zero(Imm);
495
    Shift = (TZ / 16) * 16;
496
    LastShift = ((63 - LZ) / 16) * 16;
497
  }
498
  unsigned Imm16 = (Imm >> Shift) & Mask;
499

500
  Insn.push_back({ FirstOpc, Imm16,
501
                   AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
502

503
  if (Shift == LastShift)
504
    return;
505

506
  // If a MOVN was used for the high bits of a negative value, flip the rest
507
  // of the bits back for use with MOVK.
508
  if (isNeg)
509
    Imm = ~Imm;
510

511
  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
512
  while (Shift < LastShift) {
513
    Shift += 16;
514
    Imm16 = (Imm >> Shift) & Mask;
515
    if (Imm16 == (isNeg ? Mask : 0))
516
      continue; // This 16-bit portion is already set correctly.
517

518
    Insn.push_back({ Opc, Imm16,
519
                     AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
520
  }
521

522
  // Now, we get 16-bit divided Imm. If high and low bits are same in
523
  // 32-bit, there is an opportunity to reduce instruction.
524
  if (Insn.size() > 2 && (Imm >> 32) == (Imm & 0xffffffffULL)) {
525
    for (int Size = Insn.size(); Size > 2; Size--)
526
      Insn.pop_back();
527
    Insn.push_back({AArch64::ORRXrs, 0, 32});
528
  }
529
}
530

531
/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
532
/// real move-immediate instructions to synthesize the immediate.
533
void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
534
                               SmallVectorImpl<ImmInsnModel> &Insn) {
535
  const unsigned Mask = 0xFFFF;
536

537
  // Scan the immediate and count the number of 16-bit chunks which are either
538
  // all ones or all zeros.
539
  unsigned OneChunks = 0;
540
  unsigned ZeroChunks = 0;
541
  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
542
    const unsigned Chunk = (Imm >> Shift) & Mask;
543
    if (Chunk == Mask)
544
      OneChunks++;
545
    else if (Chunk == 0)
546
      ZeroChunks++;
547
  }
548

549
  // Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
550
  if ((BitSize / 16) - OneChunks <= 1 || (BitSize / 16) - ZeroChunks <= 1) {
551
    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
552
    return;
553
  }
554

555
  // Try a single ORR.
556
  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
557
  uint64_t Encoding;
558
  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
559
    unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
560
    Insn.push_back({ Opc, 0, Encoding });
561
    return;
562
  }
563

564
  // One to up three instruction sequences.
565
  //
566
  // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
567
  // fastest sequence with fast literal generation.
568
  if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) {
569
    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
570
    return;
571
  }
572

573
  assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
574
                          "MOVZ/MOVK pair");
575

576
  // Try other two-instruction sequences.
577

578
  // 64-bit ORR followed by MOVK.
579
  // We try to construct the ORR immediate in three different ways: either we
580
  // zero out the chunk which will be replaced, we fill the chunk which will
581
  // be replaced with ones, or we take the bit pattern from the other half of
582
  // the 64-bit immediate. This is comprehensive because of the way ORR
583
  // immediates are constructed.
584
  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
585
    uint64_t ShiftedMask = (0xFFFFULL << Shift);
586
    uint64_t ZeroChunk = UImm & ~ShiftedMask;
587
    uint64_t OneChunk = UImm | ShiftedMask;
588
    uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
589
    uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
590
    if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
591
        AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
592
        AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
593
                                            Encoding)) {
594
      // Create the ORR-immediate instruction.
595
      Insn.push_back({ AArch64::ORRXri, 0, Encoding });
596

597
      // Create the MOVK instruction.
598
      const unsigned Imm16 = getChunk(UImm, Shift / 16);
599
      Insn.push_back({ AArch64::MOVKXi, Imm16,
600
		       AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
601
      return;
602
    }
603
  }
604

605
  // Attempt to use a sequence of two ORR-immediate instructions.
606
  if (tryOrrOfLogicalImmediates(Imm, Insn))
607
    return;
608

609
  // Attempt to use a sequence of ORR-immediate followed by AND-immediate.
610
  if (tryAndOfLogicalImmediates(Imm, Insn))
611
    return;
612

613
  // Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
614
  if (tryEorOfLogicalImmediates(UImm, Insn))
615
    return;
616

617
  // FIXME: Add more two-instruction sequences.
618

619
  // Three instruction sequences.
620
  //
621
  // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
622
  // the fastest sequence with fast literal generation. (If neither MOVK is
623
  // part of a fast literal generation pair, it could be slower than the
624
  // four-instruction sequence, but we won't worry about that for now.)
625
  if (OneChunks || ZeroChunks) {
626
    expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
627
    return;
628
  }
629

630
  // Check for identical 16-bit chunks within the constant and if so materialize
631
  // them with a single ORR instruction. The remaining one or two 16-bit chunks
632
  // will be materialized with MOVK instructions.
633
  if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
634
    return;
635

636
  // Check whether the constant contains a sequence of contiguous ones, which
637
  // might be interrupted by one or two chunks. If so, materialize the sequence
638
  // of contiguous ones with an ORR instruction. Materialize the chunks which
639
  // are either interrupting the sequence or outside of the sequence with a
640
  // MOVK instruction.
641
  if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
642
    return;
643

644
  // We found no possible two or three instruction sequence; use the general
645
  // four-instruction sequence.
646
  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
647
}
648

649
Product

Resources

Company