CoCalc -- APInt.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Support/APInt.cpp
¹⁰²⁸⁶⁵ views
1
//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a class to represent arbitrary precision integer
10
// constant values and provide a variety of arithmetic operations on them.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/ArrayRef.h"
16
#include "llvm/ADT/FoldingSet.h"
17
#include "llvm/ADT/Hashing.h"
18
#include "llvm/ADT/SmallString.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/ADT/bit.h"
21
#include "llvm/Config/llvm-config.h"
22
#include "llvm/Support/Alignment.h"
23
#include "llvm/Support/Debug.h"
24
#include "llvm/Support/ErrorHandling.h"
25
#include "llvm/Support/MathExtras.h"
26
#include "llvm/Support/raw_ostream.h"
27
#include <cmath>
28
#include <optional>
29

30
using namespace llvm;
31

32
#define DEBUG_TYPE "apint"
33

34
/// A utility function for allocating memory, checking for allocation failures,
35
/// and ensuring the contents are zeroed.
36
inline static uint64_t* getClearedMemory(unsigned numWords) {
37
  uint64_t *result = new uint64_t[numWords];
38
  memset(result, 0, numWords * sizeof(uint64_t));
39
  return result;
40
}
41

42
/// A utility function for allocating memory and checking for allocation
43
/// failure.  The content is not zeroed.
44
inline static uint64_t* getMemory(unsigned numWords) {
45
  return new uint64_t[numWords];
46
}
47

48
/// A utility function that converts a character to a digit.
49
inline static unsigned getDigit(char cdigit, uint8_t radix) {
50
  unsigned r;
51

52
  if (radix == 16 || radix == 36) {
53
    r = cdigit - '0';
54
    if (r <= 9)
55
      return r;
56

57
    r = cdigit - 'A';
58
    if (r <= radix - 11U)
59
      return r + 10;
60

61
    r = cdigit - 'a';
62
    if (r <= radix - 11U)
63
      return r + 10;
64

65
    radix = 10;
66
  }
67

68
  r = cdigit - '0';
69
  if (r < radix)
70
    return r;
71

72
  return UINT_MAX;
73
}
74

75

76
void APInt::initSlowCase(uint64_t val, bool isSigned) {
77
  U.pVal = getClearedMemory(getNumWords());
78
  U.pVal[0] = val;
79
  if (isSigned && int64_t(val) < 0)
80
    for (unsigned i = 1; i < getNumWords(); ++i)
81
      U.pVal[i] = WORDTYPE_MAX;
82
  clearUnusedBits();
83
}
84

85
void APInt::initSlowCase(const APInt& that) {
86
  U.pVal = getMemory(getNumWords());
87
  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
88
}
89

90
void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
91
  assert(bigVal.data() && "Null pointer detected!");
92
  if (isSingleWord())
93
    U.VAL = bigVal[0];
94
  else {
95
    // Get memory, cleared to 0
96
    U.pVal = getClearedMemory(getNumWords());
97
    // Calculate the number of words to copy
98
    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
99
    // Copy the words from bigVal to pVal
100
    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
101
  }
102
  // Make sure unused high bits are cleared
103
  clearUnusedBits();
104
}
105

106
APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {
107
  initFromArray(bigVal);
108
}
109

110
APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
111
    : BitWidth(numBits) {
112
  initFromArray(ArrayRef(bigVal, numWords));
113
}
114

115
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
116
    : BitWidth(numbits) {
117
  fromString(numbits, Str, radix);
118
}
119

120
void APInt::reallocate(unsigned NewBitWidth) {
121
  // If the number of words is the same we can just change the width and stop.
122
  if (getNumWords() == getNumWords(NewBitWidth)) {
123
    BitWidth = NewBitWidth;
124
    return;
125
  }
126

127
  // If we have an allocation, delete it.
128
  if (!isSingleWord())
129
    delete [] U.pVal;
130

131
  // Update BitWidth.
132
  BitWidth = NewBitWidth;
133

134
  // If we are supposed to have an allocation, create it.
135
  if (!isSingleWord())
136
    U.pVal = getMemory(getNumWords());
137
}
138

139
void APInt::assignSlowCase(const APInt &RHS) {
140
  // Don't do anything for X = X
141
  if (this == &RHS)
142
    return;
143

144
  // Adjust the bit width and handle allocations as necessary.
145
  reallocate(RHS.getBitWidth());
146

147
  // Copy the data.
148
  if (isSingleWord())
149
    U.VAL = RHS.U.VAL;
150
  else
151
    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
152
}
153

154
/// This method 'profiles' an APInt for use with FoldingSet.
155
void APInt::Profile(FoldingSetNodeID& ID) const {
156
  ID.AddInteger(BitWidth);
157

158
  if (isSingleWord()) {
159
    ID.AddInteger(U.VAL);
160
    return;
161
  }
162

163
  unsigned NumWords = getNumWords();
164
  for (unsigned i = 0; i < NumWords; ++i)
165
    ID.AddInteger(U.pVal[i]);
166
}
167

168
bool APInt::isAligned(Align A) const {
169
  if (isZero())
170
    return true;
171
  const unsigned TrailingZeroes = countr_zero();
172
  const unsigned MinimumTrailingZeroes = Log2(A);
173
  return TrailingZeroes >= MinimumTrailingZeroes;
174
}
175

176
/// Prefix increment operator. Increments the APInt by one.
177
APInt& APInt::operator++() {
178
  if (isSingleWord())
179
    ++U.VAL;
180
  else
181
    tcIncrement(U.pVal, getNumWords());
182
  return clearUnusedBits();
183
}
184

185
/// Prefix decrement operator. Decrements the APInt by one.
186
APInt& APInt::operator--() {
187
  if (isSingleWord())
188
    --U.VAL;
189
  else
190
    tcDecrement(U.pVal, getNumWords());
191
  return clearUnusedBits();
192
}
193

194
/// Adds the RHS APInt to this APInt.
195
/// @returns this, after addition of RHS.
196
/// Addition assignment operator.
197
APInt& APInt::operator+=(const APInt& RHS) {
198
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
199
  if (isSingleWord())
200
    U.VAL += RHS.U.VAL;
201
  else
202
    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
203
  return clearUnusedBits();
204
}
205

206
APInt& APInt::operator+=(uint64_t RHS) {
207
  if (isSingleWord())
208
    U.VAL += RHS;
209
  else
210
    tcAddPart(U.pVal, RHS, getNumWords());
211
  return clearUnusedBits();
212
}
213

214
/// Subtracts the RHS APInt from this APInt
215
/// @returns this, after subtraction
216
/// Subtraction assignment operator.
217
APInt& APInt::operator-=(const APInt& RHS) {
218
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
219
  if (isSingleWord())
220
    U.VAL -= RHS.U.VAL;
221
  else
222
    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
223
  return clearUnusedBits();
224
}
225

226
APInt& APInt::operator-=(uint64_t RHS) {
227
  if (isSingleWord())
228
    U.VAL -= RHS;
229
  else
230
    tcSubtractPart(U.pVal, RHS, getNumWords());
231
  return clearUnusedBits();
232
}
233

234
APInt APInt::operator*(const APInt& RHS) const {
235
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
236
  if (isSingleWord())
237
    return APInt(BitWidth, U.VAL * RHS.U.VAL);
238

239
  APInt Result(getMemory(getNumWords()), getBitWidth());
240
  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
241
  Result.clearUnusedBits();
242
  return Result;
243
}
244

245
void APInt::andAssignSlowCase(const APInt &RHS) {
246
  WordType *dst = U.pVal, *rhs = RHS.U.pVal;
247
  for (size_t i = 0, e = getNumWords(); i != e; ++i)
248
    dst[i] &= rhs[i];
249
}
250

251
void APInt::orAssignSlowCase(const APInt &RHS) {
252
  WordType *dst = U.pVal, *rhs = RHS.U.pVal;
253
  for (size_t i = 0, e = getNumWords(); i != e; ++i)
254
    dst[i] |= rhs[i];
255
}
256

257
void APInt::xorAssignSlowCase(const APInt &RHS) {
258
  WordType *dst = U.pVal, *rhs = RHS.U.pVal;
259
  for (size_t i = 0, e = getNumWords(); i != e; ++i)
260
    dst[i] ^= rhs[i];
261
}
262

263
APInt &APInt::operator*=(const APInt &RHS) {
264
  *this = *this * RHS;
265
  return *this;
266
}
267

268
APInt& APInt::operator*=(uint64_t RHS) {
269
  if (isSingleWord()) {
270
    U.VAL *= RHS;
271
  } else {
272
    unsigned NumWords = getNumWords();
273
    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
274
  }
275
  return clearUnusedBits();
276
}
277

278
bool APInt::equalSlowCase(const APInt &RHS) const {
279
  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
280
}
281

282
int APInt::compare(const APInt& RHS) const {
283
  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
284
  if (isSingleWord())
285
    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
286

287
  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
288
}
289

290
int APInt::compareSigned(const APInt& RHS) const {
291
  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
292
  if (isSingleWord()) {
293
    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
294
    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
295
    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
296
  }
297

298
  bool lhsNeg = isNegative();
299
  bool rhsNeg = RHS.isNegative();
300

301
  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
302
  if (lhsNeg != rhsNeg)
303
    return lhsNeg ? -1 : 1;
304

305
  // Otherwise we can just use an unsigned comparison, because even negative
306
  // numbers compare correctly this way if both have the same signed-ness.
307
  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
308
}
309

310
void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
311
  unsigned loWord = whichWord(loBit);
312
  unsigned hiWord = whichWord(hiBit);
313

314
  // Create an initial mask for the low word with zeros below loBit.
315
  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
316

317
  // If hiBit is not aligned, we need a high mask.
318
  unsigned hiShiftAmt = whichBit(hiBit);
319
  if (hiShiftAmt != 0) {
320
    // Create a high mask with zeros above hiBit.
321
    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
322
    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
323
    // set the bits in hiWord.
324
    if (hiWord == loWord)
325
      loMask &= hiMask;
326
    else
327
      U.pVal[hiWord] |= hiMask;
328
  }
329
  // Apply the mask to the low word.
330
  U.pVal[loWord] |= loMask;
331

332
  // Fill any words between loWord and hiWord with all ones.
333
  for (unsigned word = loWord + 1; word < hiWord; ++word)
334
    U.pVal[word] = WORDTYPE_MAX;
335
}
336

337
// Complement a bignum in-place.
338
static void tcComplement(APInt::WordType *dst, unsigned parts) {
339
  for (unsigned i = 0; i < parts; i++)
340
    dst[i] = ~dst[i];
341
}
342

343
/// Toggle every bit to its opposite value.
344
void APInt::flipAllBitsSlowCase() {
345
  tcComplement(U.pVal, getNumWords());
346
  clearUnusedBits();
347
}
348

349
/// Concatenate the bits from "NewLSB" onto the bottom of *this.  This is
350
/// equivalent to:
351
///   (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
352
/// In the slow case, we know the result is large.
353
APInt APInt::concatSlowCase(const APInt &NewLSB) const {
354
  unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
355
  APInt Result = NewLSB.zext(NewWidth);
356
  Result.insertBits(*this, NewLSB.getBitWidth());
357
  return Result;
358
}
359

360
/// Toggle a given bit to its opposite value whose position is given
361
/// as "bitPosition".
362
/// Toggles a given bit to its opposite value.
363
void APInt::flipBit(unsigned bitPosition) {
364
  assert(bitPosition < BitWidth && "Out of the bit-width range!");
365
  setBitVal(bitPosition, !(*this)[bitPosition]);
366
}
367

368
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
369
  unsigned subBitWidth = subBits.getBitWidth();
370
  assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion");
371

372
  // inserting no bits is a noop.
373
  if (subBitWidth == 0)
374
    return;
375

376
  // Insertion is a direct copy.
377
  if (subBitWidth == BitWidth) {
378
    *this = subBits;
379
    return;
380
  }
381

382
  // Single word result can be done as a direct bitmask.
383
  if (isSingleWord()) {
384
    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
385
    U.VAL &= ~(mask << bitPosition);
386
    U.VAL |= (subBits.U.VAL << bitPosition);
387
    return;
388
  }
389

390
  unsigned loBit = whichBit(bitPosition);
391
  unsigned loWord = whichWord(bitPosition);
392
  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
393

394
  // Insertion within a single word can be done as a direct bitmask.
395
  if (loWord == hi1Word) {
396
    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
397
    U.pVal[loWord] &= ~(mask << loBit);
398
    U.pVal[loWord] |= (subBits.U.VAL << loBit);
399
    return;
400
  }
401

402
  // Insert on word boundaries.
403
  if (loBit == 0) {
404
    // Direct copy whole words.
405
    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
406
    memcpy(U.pVal + loWord, subBits.getRawData(),
407
           numWholeSubWords * APINT_WORD_SIZE);
408

409
    // Mask+insert remaining bits.
410
    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
411
    if (remainingBits != 0) {
412
      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
413
      U.pVal[hi1Word] &= ~mask;
414
      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
415
    }
416
    return;
417
  }
418

419
  // General case - set/clear individual bits in dst based on src.
420
  // TODO - there is scope for optimization here, but at the moment this code
421
  // path is barely used so prefer readability over performance.
422
  for (unsigned i = 0; i != subBitWidth; ++i)
423
    setBitVal(bitPosition + i, subBits[i]);
424
}
425

426
void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
427
  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
428
  subBits &= maskBits;
429
  if (isSingleWord()) {
430
    U.VAL &= ~(maskBits << bitPosition);
431
    U.VAL |= subBits << bitPosition;
432
    return;
433
  }
434

435
  unsigned loBit = whichBit(bitPosition);
436
  unsigned loWord = whichWord(bitPosition);
437
  unsigned hiWord = whichWord(bitPosition + numBits - 1);
438
  if (loWord == hiWord) {
439
    U.pVal[loWord] &= ~(maskBits << loBit);
440
    U.pVal[loWord] |= subBits << loBit;
441
    return;
442
  }
443

444
  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
445
  unsigned wordBits = 8 * sizeof(WordType);
446
  U.pVal[loWord] &= ~(maskBits << loBit);
447
  U.pVal[loWord] |= subBits << loBit;
448

449
  U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
450
  U.pVal[hiWord] |= subBits >> (wordBits - loBit);
451
}
452

453
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
454
  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
455
         "Illegal bit extraction");
456

457
  if (isSingleWord())
458
    return APInt(numBits, U.VAL >> bitPosition);
459

460
  unsigned loBit = whichBit(bitPosition);
461
  unsigned loWord = whichWord(bitPosition);
462
  unsigned hiWord = whichWord(bitPosition + numBits - 1);
463

464
  // Single word result extracting bits from a single word source.
465
  if (loWord == hiWord)
466
    return APInt(numBits, U.pVal[loWord] >> loBit);
467

468
  // Extracting bits that start on a source word boundary can be done
469
  // as a fast memory copy.
470
  if (loBit == 0)
471
    return APInt(numBits, ArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
472

473
  // General case - shift + copy source words directly into place.
474
  APInt Result(numBits, 0);
475
  unsigned NumSrcWords = getNumWords();
476
  unsigned NumDstWords = Result.getNumWords();
477

478
  uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
479
  for (unsigned word = 0; word < NumDstWords; ++word) {
480
    uint64_t w0 = U.pVal[loWord + word];
481
    uint64_t w1 =
482
        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
483
    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
484
  }
485

486
  return Result.clearUnusedBits();
487
}
488

489
uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
490
                                       unsigned bitPosition) const {
491
  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
492
         "Illegal bit extraction");
493
  assert(numBits <= 64 && "Illegal bit extraction");
494

495
  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
496
  if (isSingleWord())
497
    return (U.VAL >> bitPosition) & maskBits;
498

499
  unsigned loBit = whichBit(bitPosition);
500
  unsigned loWord = whichWord(bitPosition);
501
  unsigned hiWord = whichWord(bitPosition + numBits - 1);
502
  if (loWord == hiWord)
503
    return (U.pVal[loWord] >> loBit) & maskBits;
504

505
  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
506
  unsigned wordBits = 8 * sizeof(WordType);
507
  uint64_t retBits = U.pVal[loWord] >> loBit;
508
  retBits |= U.pVal[hiWord] << (wordBits - loBit);
509
  retBits &= maskBits;
510
  return retBits;
511
}
512

513
unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
514
  assert(!Str.empty() && "Invalid string length");
515
  size_t StrLen = Str.size();
516

517
  // Each computation below needs to know if it's negative.
518
  unsigned IsNegative = false;
519
  if (Str[0] == '-' || Str[0] == '+') {
520
    IsNegative = Str[0] == '-';
521
    StrLen--;
522
    assert(StrLen && "String is only a sign, needs a value.");
523
  }
524

525
  // For radixes of power-of-two values, the bits required is accurately and
526
  // easily computed.
527
  if (Radix == 2)
528
    return StrLen + IsNegative;
529
  if (Radix == 8)
530
    return StrLen * 3 + IsNegative;
531
  if (Radix == 16)
532
    return StrLen * 4 + IsNegative;
533

534
  // Compute a sufficient number of bits that is always large enough but might
535
  // be too large. This avoids the assertion in the constructor. This
536
  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
537
  // bits in that case.
538
  if (Radix == 10)
539
    return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
540

541
  assert(Radix == 36);
542
  return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
543
}
544

545
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
546
  // Compute a sufficient number of bits that is always large enough but might
547
  // be too large.
548
  unsigned sufficient = getSufficientBitsNeeded(str, radix);
549

550
  // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
551
  // return the value directly. For bases 10 and 36, we need to do extra work.
552
  if (radix == 2 || radix == 8 || radix == 16)
553
    return sufficient;
554

555
  // This is grossly inefficient but accurate. We could probably do something
556
  // with a computation of roughly slen*64/20 and then adjust by the value of
557
  // the first few digits. But, I'm not sure how accurate that could be.
558
  size_t slen = str.size();
559

560
  // Each computation below needs to know if it's negative.
561
  StringRef::iterator p = str.begin();
562
  unsigned isNegative = *p == '-';
563
  if (*p == '-' || *p == '+') {
564
    p++;
565
    slen--;
566
    assert(slen && "String is only a sign, needs a value.");
567
  }
568

569

570
  // Convert to the actual binary value.
571
  APInt tmp(sufficient, StringRef(p, slen), radix);
572

573
  // Compute how many bits are required. If the log is infinite, assume we need
574
  // just bit. If the log is exact and value is negative, then the value is
575
  // MinSignedValue with (log + 1) bits.
576
  unsigned log = tmp.logBase2();
577
  if (log == (unsigned)-1) {
578
    return isNegative + 1;
579
  } else if (isNegative && tmp.isPowerOf2()) {
580
    return isNegative + log;
581
  } else {
582
    return isNegative + log + 1;
583
  }
584
}
585

586
hash_code llvm::hash_value(const APInt &Arg) {
587
  if (Arg.isSingleWord())
588
    return hash_combine(Arg.BitWidth, Arg.U.VAL);
589

590
  return hash_combine(
591
      Arg.BitWidth,
592
      hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
593
}
594

595
unsigned DenseMapInfo<APInt, void>::getHashValue(const APInt &Key) {
596
  return static_cast<unsigned>(hash_value(Key));
597
}
598

599
bool APInt::isSplat(unsigned SplatSizeInBits) const {
600
  assert(getBitWidth() % SplatSizeInBits == 0 &&
601
         "SplatSizeInBits must divide width!");
602
  // We can check that all parts of an integer are equal by making use of a
603
  // little trick: rotate and check if it's still the same value.
604
  return *this == rotl(SplatSizeInBits);
605
}
606

607
/// This function returns the high "numBits" bits of this APInt.
608
APInt APInt::getHiBits(unsigned numBits) const {
609
  return this->lshr(BitWidth - numBits);
610
}
611

612
/// This function returns the low "numBits" bits of this APInt.
613
APInt APInt::getLoBits(unsigned numBits) const {
614
  APInt Result(getLowBitsSet(BitWidth, numBits));
615
  Result &= *this;
616
  return Result;
617
}
618

619
/// Return a value containing V broadcasted over NewLen bits.
620
APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
621
  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
622

623
  APInt Val = V.zext(NewLen);
624
  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
625
    Val |= Val << I;
626

627
  return Val;
628
}
629

630
unsigned APInt::countLeadingZerosSlowCase() const {
631
  unsigned Count = 0;
632
  for (int i = getNumWords()-1; i >= 0; --i) {
633
    uint64_t V = U.pVal[i];
634
    if (V == 0)
635
      Count += APINT_BITS_PER_WORD;
636
    else {
637
      Count += llvm::countl_zero(V);
638
      break;
639
    }
640
  }
641
  // Adjust for unused bits in the most significant word (they are zero).
642
  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
643
  Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
644
  return Count;
645
}
646

647
unsigned APInt::countLeadingOnesSlowCase() const {
648
  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
649
  unsigned shift;
650
  if (!highWordBits) {
651
    highWordBits = APINT_BITS_PER_WORD;
652
    shift = 0;
653
  } else {
654
    shift = APINT_BITS_PER_WORD - highWordBits;
655
  }
656
  int i = getNumWords() - 1;
657
  unsigned Count = llvm::countl_one(U.pVal[i] << shift);
658
  if (Count == highWordBits) {
659
    for (i--; i >= 0; --i) {
660
      if (U.pVal[i] == WORDTYPE_MAX)
661
        Count += APINT_BITS_PER_WORD;
662
      else {
663
        Count += llvm::countl_one(U.pVal[i]);
664
        break;
665
      }
666
    }
667
  }
668
  return Count;
669
}
670

671
unsigned APInt::countTrailingZerosSlowCase() const {
672
  unsigned Count = 0;
673
  unsigned i = 0;
674
  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
675
    Count += APINT_BITS_PER_WORD;
676
  if (i < getNumWords())
677
    Count += llvm::countr_zero(U.pVal[i]);
678
  return std::min(Count, BitWidth);
679
}
680

681
unsigned APInt::countTrailingOnesSlowCase() const {
682
  unsigned Count = 0;
683
  unsigned i = 0;
684
  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
685
    Count += APINT_BITS_PER_WORD;
686
  if (i < getNumWords())
687
    Count += llvm::countr_one(U.pVal[i]);
688
  assert(Count <= BitWidth);
689
  return Count;
690
}
691

692
unsigned APInt::countPopulationSlowCase() const {
693
  unsigned Count = 0;
694
  for (unsigned i = 0; i < getNumWords(); ++i)
695
    Count += llvm::popcount(U.pVal[i]);
696
  return Count;
697
}
698

699
bool APInt::intersectsSlowCase(const APInt &RHS) const {
700
  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
701
    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
702
      return true;
703

704
  return false;
705
}
706

707
bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
708
  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
709
    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
710
      return false;
711

712
  return true;
713
}
714

715
APInt APInt::byteSwap() const {
716
  assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
717
  if (BitWidth == 16)
718
    return APInt(BitWidth, llvm::byteswap<uint16_t>(U.VAL));
719
  if (BitWidth == 32)
720
    return APInt(BitWidth, llvm::byteswap<uint32_t>(U.VAL));
721
  if (BitWidth <= 64) {
722
    uint64_t Tmp1 = llvm::byteswap<uint64_t>(U.VAL);
723
    Tmp1 >>= (64 - BitWidth);
724
    return APInt(BitWidth, Tmp1);
725
  }
726

727
  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
728
  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
729
    Result.U.pVal[I] = llvm::byteswap<uint64_t>(U.pVal[N - I - 1]);
730
  if (Result.BitWidth != BitWidth) {
731
    Result.lshrInPlace(Result.BitWidth - BitWidth);
732
    Result.BitWidth = BitWidth;
733
  }
734
  return Result;
735
}
736

737
APInt APInt::reverseBits() const {
738
  switch (BitWidth) {
739
  case 64:
740
    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
741
  case 32:
742
    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
743
  case 16:
744
    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
745
  case 8:
746
    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
747
  case 0:
748
    return *this;
749
  default:
750
    break;
751
  }
752

753
  APInt Val(*this);
754
  APInt Reversed(BitWidth, 0);
755
  unsigned S = BitWidth;
756

757
  for (; Val != 0; Val.lshrInPlace(1)) {
758
    Reversed <<= 1;
759
    Reversed |= Val[0];
760
    --S;
761
  }
762

763
  Reversed <<= S;
764
  return Reversed;
765
}
766

767
APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
768
  // Fast-path a common case.
769
  if (A == B) return A;
770

771
  // Corner cases: if either operand is zero, the other is the gcd.
772
  if (!A) return B;
773
  if (!B) return A;
774

775
  // Count common powers of 2 and remove all other powers of 2.
776
  unsigned Pow2;
777
  {
778
    unsigned Pow2_A = A.countr_zero();
779
    unsigned Pow2_B = B.countr_zero();
780
    if (Pow2_A > Pow2_B) {
781
      A.lshrInPlace(Pow2_A - Pow2_B);
782
      Pow2 = Pow2_B;
783
    } else if (Pow2_B > Pow2_A) {
784
      B.lshrInPlace(Pow2_B - Pow2_A);
785
      Pow2 = Pow2_A;
786
    } else {
787
      Pow2 = Pow2_A;
788
    }
789
  }
790

791
  // Both operands are odd multiples of 2^Pow_2:
792
  //
793
  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
794
  //
795
  // This is a modified version of Stein's algorithm, taking advantage of
796
  // efficient countTrailingZeros().
797
  while (A != B) {
798
    if (A.ugt(B)) {
799
      A -= B;
800
      A.lshrInPlace(A.countr_zero() - Pow2);
801
    } else {
802
      B -= A;
803
      B.lshrInPlace(B.countr_zero() - Pow2);
804
    }
805
  }
806

807
  return A;
808
}
809

810
APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
811
  uint64_t I = bit_cast<uint64_t>(Double);
812

813
  // Get the sign bit from the highest order bit
814
  bool isNeg = I >> 63;
815

816
  // Get the 11-bit exponent and adjust for the 1023 bit bias
817
  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
818

819
  // If the exponent is negative, the value is < 0 so just return 0.
820
  if (exp < 0)
821
    return APInt(width, 0u);
822

823
  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
824
  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
825

826
  // If the exponent doesn't shift all bits out of the mantissa
827
  if (exp < 52)
828
    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
829
                    APInt(width, mantissa >> (52 - exp));
830

831
  // If the client didn't provide enough bits for us to shift the mantissa into
832
  // then the result is undefined, just return 0
833
  if (width <= exp - 52)
834
    return APInt(width, 0);
835

836
  // Otherwise, we have to shift the mantissa bits up to the right location
837
  APInt Tmp(width, mantissa);
838
  Tmp <<= (unsigned)exp - 52;
839
  return isNeg ? -Tmp : Tmp;
840
}
841

842
/// This function converts this APInt to a double.
843
/// The layout for double is as following (IEEE Standard 754):
844
///  --------------------------------------
845
/// |  Sign    Exponent    Fraction    Bias |
846
/// |-------------------------------------- |
847
/// |  1[63]   11[62-52]   52[51-00]   1023 |
848
///  --------------------------------------
849
double APInt::roundToDouble(bool isSigned) const {
850

851
  // Handle the simple case where the value is contained in one uint64_t.
852
  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
853
  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
854
    if (isSigned) {
855
      int64_t sext = SignExtend64(getWord(0), BitWidth);
856
      return double(sext);
857
    } else
858
      return double(getWord(0));
859
  }
860

861
  // Determine if the value is negative.
862
  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
863

864
  // Construct the absolute value if we're negative.
865
  APInt Tmp(isNeg ? -(*this) : (*this));
866

867
  // Figure out how many bits we're using.
868
  unsigned n = Tmp.getActiveBits();
869

870
  // The exponent (without bias normalization) is just the number of bits
871
  // we are using. Note that the sign bit is gone since we constructed the
872
  // absolute value.
873
  uint64_t exp = n;
874

875
  // Return infinity for exponent overflow
876
  if (exp > 1023) {
877
    if (!isSigned || !isNeg)
878
      return std::numeric_limits<double>::infinity();
879
    else
880
      return -std::numeric_limits<double>::infinity();
881
  }
882
  exp += 1023; // Increment for 1023 bias
883

884
  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
885
  // extract the high 52 bits from the correct words in pVal.
886
  uint64_t mantissa;
887
  unsigned hiWord = whichWord(n-1);
888
  if (hiWord == 0) {
889
    mantissa = Tmp.U.pVal[0];
890
    if (n > 52)
891
      mantissa >>= n - 52; // shift down, we want the top 52 bits.
892
  } else {
893
    assert(hiWord > 0 && "huh?");
894
    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
895
    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
896
    mantissa = hibits | lobits;
897
  }
898

899
  // The leading bit of mantissa is implicit, so get rid of it.
900
  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
901
  uint64_t I = sign | (exp << 52) | mantissa;
902
  return bit_cast<double>(I);
903
}
904

905
// Truncate to new width.
906
APInt APInt::trunc(unsigned width) const {
907
  assert(width <= BitWidth && "Invalid APInt Truncate request");
908

909
  if (width <= APINT_BITS_PER_WORD)
910
    return APInt(width, getRawData()[0]);
911

912
  if (width == BitWidth)
913
    return *this;
914

915
  APInt Result(getMemory(getNumWords(width)), width);
916

917
  // Copy full words.
918
  unsigned i;
919
  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
920
    Result.U.pVal[i] = U.pVal[i];
921

922
  // Truncate and copy any partial word.
923
  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
924
  if (bits != 0)
925
    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
926

927
  return Result;
928
}
929

930
// Truncate to new width with unsigned saturation.
931
APInt APInt::truncUSat(unsigned width) const {
932
  assert(width <= BitWidth && "Invalid APInt Truncate request");
933

934
  // Can we just losslessly truncate it?
935
  if (isIntN(width))
936
    return trunc(width);
937
  // If not, then just return the new limit.
938
  return APInt::getMaxValue(width);
939
}
940

941
// Truncate to new width with signed saturation.
942
APInt APInt::truncSSat(unsigned width) const {
943
  assert(width <= BitWidth && "Invalid APInt Truncate request");
944

945
  // Can we just losslessly truncate it?
946
  if (isSignedIntN(width))
947
    return trunc(width);
948
  // If not, then just return the new limits.
949
  return isNegative() ? APInt::getSignedMinValue(width)
950
                      : APInt::getSignedMaxValue(width);
951
}
952

953
// Sign extend to a new width.
954
APInt APInt::sext(unsigned Width) const {
955
  assert(Width >= BitWidth && "Invalid APInt SignExtend request");
956

957
  if (Width <= APINT_BITS_PER_WORD)
958
    return APInt(Width, SignExtend64(U.VAL, BitWidth));
959

960
  if (Width == BitWidth)
961
    return *this;
962

963
  APInt Result(getMemory(getNumWords(Width)), Width);
964

965
  // Copy words.
966
  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
967

968
  // Sign extend the last word since there may be unused bits in the input.
969
  Result.U.pVal[getNumWords() - 1] =
970
      SignExtend64(Result.U.pVal[getNumWords() - 1],
971
                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
972

973
  // Fill with sign bits.
974
  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
975
              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
976
  Result.clearUnusedBits();
977
  return Result;
978
}
979

980
//  Zero extend to a new width.
981
APInt APInt::zext(unsigned width) const {
982
  assert(width >= BitWidth && "Invalid APInt ZeroExtend request");
983

984
  if (width <= APINT_BITS_PER_WORD)
985
    return APInt(width, U.VAL);
986

987
  if (width == BitWidth)
988
    return *this;
989

990
  APInt Result(getMemory(getNumWords(width)), width);
991

992
  // Copy words.
993
  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
994

995
  // Zero remaining words.
996
  std::memset(Result.U.pVal + getNumWords(), 0,
997
              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
998

999
  return Result;
1000
}
1001

1002
APInt APInt::zextOrTrunc(unsigned width) const {
1003
  if (BitWidth < width)
1004
    return zext(width);
1005
  if (BitWidth > width)
1006
    return trunc(width);
1007
  return *this;
1008
}
1009

1010
APInt APInt::sextOrTrunc(unsigned width) const {
1011
  if (BitWidth < width)
1012
    return sext(width);
1013
  if (BitWidth > width)
1014
    return trunc(width);
1015
  return *this;
1016
}
1017

1018
/// Arithmetic right-shift this APInt by shiftAmt.
1019
/// Arithmetic right-shift function.
1020
void APInt::ashrInPlace(const APInt &shiftAmt) {
1021
  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1022
}
1023

1024
/// Arithmetic right-shift this APInt by shiftAmt.
1025
/// Arithmetic right-shift function.
1026
void APInt::ashrSlowCase(unsigned ShiftAmt) {
1027
  // Don't bother performing a no-op shift.
1028
  if (!ShiftAmt)
1029
    return;
1030

1031
  // Save the original sign bit for later.
1032
  bool Negative = isNegative();
1033

1034
  // WordShift is the inter-part shift; BitShift is intra-part shift.
1035
  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1036
  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1037

1038
  unsigned WordsToMove = getNumWords() - WordShift;
1039
  if (WordsToMove != 0) {
1040
    // Sign extend the last word to fill in the unused bits.
1041
    U.pVal[getNumWords() - 1] = SignExtend64(
1042
        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1043

1044
    // Fastpath for moving by whole words.
1045
    if (BitShift == 0) {
1046
      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1047
    } else {
1048
      // Move the words containing significant bits.
1049
      for (unsigned i = 0; i != WordsToMove - 1; ++i)
1050
        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1051
                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1052

1053
      // Handle the last word which has no high bits to copy.
1054
      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1055
      // Sign extend one more time.
1056
      U.pVal[WordsToMove - 1] =
1057
          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1058
    }
1059
  }
1060

1061
  // Fill in the remainder based on the original sign.
1062
  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1063
              WordShift * APINT_WORD_SIZE);
1064
  clearUnusedBits();
1065
}
1066

1067
/// Logical right-shift this APInt by shiftAmt.
1068
/// Logical right-shift function.
1069
void APInt::lshrInPlace(const APInt &shiftAmt) {
1070
  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1071
}
1072

1073
/// Logical right-shift this APInt by shiftAmt.
1074
/// Logical right-shift function.
1075
void APInt::lshrSlowCase(unsigned ShiftAmt) {
1076
  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1077
}
1078

1079
/// Left-shift this APInt by shiftAmt.
1080
/// Left-shift function.
1081
APInt &APInt::operator<<=(const APInt &shiftAmt) {
1082
  // It's undefined behavior in C to shift by BitWidth or greater.
1083
  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1084
  return *this;
1085
}
1086

1087
void APInt::shlSlowCase(unsigned ShiftAmt) {
1088
  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1089
  clearUnusedBits();
1090
}
1091

1092
// Calculate the rotate amount modulo the bit width.
1093
static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1094
  if (LLVM_UNLIKELY(BitWidth == 0))
1095
    return 0;
1096
  unsigned rotBitWidth = rotateAmt.getBitWidth();
1097
  APInt rot = rotateAmt;
1098
  if (rotBitWidth < BitWidth) {
1099
    // Extend the rotate APInt, so that the urem doesn't divide by 0.
1100
    // e.g. APInt(1, 32) would give APInt(1, 0).
1101
    rot = rotateAmt.zext(BitWidth);
1102
  }
1103
  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1104
  return rot.getLimitedValue(BitWidth);
1105
}
1106

1107
APInt APInt::rotl(const APInt &rotateAmt) const {
1108
  return rotl(rotateModulo(BitWidth, rotateAmt));
1109
}
1110

1111
APInt APInt::rotl(unsigned rotateAmt) const {
1112
  if (LLVM_UNLIKELY(BitWidth == 0))
1113
    return *this;
1114
  rotateAmt %= BitWidth;
1115
  if (rotateAmt == 0)
1116
    return *this;
1117
  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1118
}
1119

1120
APInt APInt::rotr(const APInt &rotateAmt) const {
1121
  return rotr(rotateModulo(BitWidth, rotateAmt));
1122
}
1123

1124
APInt APInt::rotr(unsigned rotateAmt) const {
1125
  if (BitWidth == 0)
1126
    return *this;
1127
  rotateAmt %= BitWidth;
1128
  if (rotateAmt == 0)
1129
    return *this;
1130
  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1131
}
1132

1133
/// \returns the nearest log base 2 of this APInt. Ties round up.
1134
///
1135
/// NOTE: When we have a BitWidth of 1, we define:
1136
///
1137
///   log2(0) = UINT32_MAX
1138
///   log2(1) = 0
1139
///
1140
/// to get around any mathematical concerns resulting from
1141
/// referencing 2 in a space where 2 does no exist.
1142
unsigned APInt::nearestLogBase2() const {
1143
  // Special case when we have a bitwidth of 1. If VAL is 1, then we
1144
  // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1145
  // UINT32_MAX.
1146
  if (BitWidth == 1)
1147
    return U.VAL - 1;
1148

1149
  // Handle the zero case.
1150
  if (isZero())
1151
    return UINT32_MAX;
1152

1153
  // The non-zero case is handled by computing:
1154
  //
1155
  //   nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1156
  //
1157
  // where x[i] is referring to the value of the ith bit of x.
1158
  unsigned lg = logBase2();
1159
  return lg + unsigned((*this)[lg - 1]);
1160
}
1161

1162
// Square Root - this method computes and returns the square root of "this".
1163
// Three mechanisms are used for computation. For small values (<= 5 bits),
1164
// a table lookup is done. This gets some performance for common cases. For
1165
// values using less than 52 bits, the value is converted to double and then
1166
// the libc sqrt function is called. The result is rounded and then converted
1167
// back to a uint64_t which is then used to construct the result. Finally,
1168
// the Babylonian method for computing square roots is used.
1169
APInt APInt::sqrt() const {
1170

1171
  // Determine the magnitude of the value.
1172
  unsigned magnitude = getActiveBits();
1173

1174
  // Use a fast table for some small values. This also gets rid of some
1175
  // rounding errors in libc sqrt for small values.
1176
  if (magnitude <= 5) {
1177
    static const uint8_t results[32] = {
1178
      /*     0 */ 0,
1179
      /*  1- 2 */ 1, 1,
1180
      /*  3- 6 */ 2, 2, 2, 2,
1181
      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1182
      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1183
      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1184
      /*    31 */ 6
1185
    };
1186
    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1187
  }
1188

1189
  // If the magnitude of the value fits in less than 52 bits (the precision of
1190
  // an IEEE double precision floating point value), then we can use the
1191
  // libc sqrt function which will probably use a hardware sqrt computation.
1192
  // This should be faster than the algorithm below.
1193
  if (magnitude < 52) {
1194
    return APInt(BitWidth,
1195
                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1196
                                                               : U.pVal[0])))));
1197
  }
1198

1199
  // Okay, all the short cuts are exhausted. We must compute it. The following
1200
  // is a classical Babylonian method for computing the square root. This code
1201
  // was adapted to APInt from a wikipedia article on such computations.
1202
  // See http://www.wikipedia.org/ and go to the page named
1203
  // Calculate_an_integer_square_root.
1204
  unsigned nbits = BitWidth, i = 4;
1205
  APInt testy(BitWidth, 16);
1206
  APInt x_old(BitWidth, 1);
1207
  APInt x_new(BitWidth, 0);
1208
  APInt two(BitWidth, 2);
1209

1210
  // Select a good starting value using binary logarithms.
1211
  for (;; i += 2, testy = testy.shl(2))
1212
    if (i >= nbits || this->ule(testy)) {
1213
      x_old = x_old.shl(i / 2);
1214
      break;
1215
    }
1216

1217
  // Use the Babylonian method to arrive at the integer square root:
1218
  for (;;) {
1219
    x_new = (this->udiv(x_old) + x_old).udiv(two);
1220
    if (x_old.ule(x_new))
1221
      break;
1222
    x_old = x_new;
1223
  }
1224

1225
  // Make sure we return the closest approximation
1226
  // NOTE: The rounding calculation below is correct. It will produce an
1227
  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1228
  // determined to be a rounding issue with pari/gp as it begins to use a
1229
  // floating point representation after 192 bits. There are no discrepancies
1230
  // between this algorithm and pari/gp for bit widths < 192 bits.
1231
  APInt square(x_old * x_old);
1232
  APInt nextSquare((x_old + 1) * (x_old +1));
1233
  if (this->ult(square))
1234
    return x_old;
1235
  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1236
  APInt midpoint((nextSquare - square).udiv(two));
1237
  APInt offset(*this - square);
1238
  if (offset.ult(midpoint))
1239
    return x_old;
1240
  return x_old + 1;
1241
}
1242

1243
/// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth.
1244
APInt APInt::multiplicativeInverse() const {
1245
  assert((*this)[0] &&
1246
         "multiplicative inverse is only defined for odd numbers!");
1247

1248
  // Use Newton's method.
1249
  APInt Factor = *this;
1250
  APInt T;
1251
  while (!(T = *this * Factor).isOne())
1252
    Factor *= 2 - std::move(T);
1253
  return Factor;
1254
}
1255

1256
/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1257
/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1258
/// variables here have the same names as in the algorithm. Comments explain
1259
/// the algorithm and any deviation from it.
1260
static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1261
                     unsigned m, unsigned n) {
1262
  assert(u && "Must provide dividend");
1263
  assert(v && "Must provide divisor");
1264
  assert(q && "Must provide quotient");
1265
  assert(u != v && u != q && v != q && "Must use different memory");
1266
  assert(n>1 && "n must be > 1");
1267

1268
  // b denotes the base of the number system. In our case b is 2^32.
1269
  const uint64_t b = uint64_t(1) << 32;
1270

1271
// The DEBUG macros here tend to be spam in the debug output if you're not
1272
// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1273
#ifdef KNUTH_DEBUG
1274
#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1275
#else
1276
#define DEBUG_KNUTH(X) do {} while(false)
1277
#endif
1278

1279
  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1280
  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1281
  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1282
  DEBUG_KNUTH(dbgs() << " by");
1283
  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1284
  DEBUG_KNUTH(dbgs() << '\n');
1285
  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1286
  // u and v by d. Note that we have taken Knuth's advice here to use a power
1287
  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1288
  // 2 allows us to shift instead of multiply and it is easy to determine the
1289
  // shift amount from the leading zeros.  We are basically normalizing the u
1290
  // and v so that its high bits are shifted to the top of v's range without
1291
  // overflow. Note that this can require an extra word in u so that u must
1292
  // be of length m+n+1.
1293
  unsigned shift = llvm::countl_zero(v[n - 1]);
1294
  uint32_t v_carry = 0;
1295
  uint32_t u_carry = 0;
1296
  if (shift) {
1297
    for (unsigned i = 0; i < m+n; ++i) {
1298
      uint32_t u_tmp = u[i] >> (32 - shift);
1299
      u[i] = (u[i] << shift) | u_carry;
1300
      u_carry = u_tmp;
1301
    }
1302
    for (unsigned i = 0; i < n; ++i) {
1303
      uint32_t v_tmp = v[i] >> (32 - shift);
1304
      v[i] = (v[i] << shift) | v_carry;
1305
      v_carry = v_tmp;
1306
    }
1307
  }
1308
  u[m+n] = u_carry;
1309

1310
  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1311
  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1312
  DEBUG_KNUTH(dbgs() << " by");
1313
  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1314
  DEBUG_KNUTH(dbgs() << '\n');
1315

1316
  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1317
  int j = m;
1318
  do {
1319
    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1320
    // D3. [Calculate q'.].
1321
    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1322
    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1323
    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1324
    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1325
    // on v[n-2] determines at high speed most of the cases in which the trial
1326
    // value qp is one too large, and it eliminates all cases where qp is two
1327
    // too large.
1328
    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1329
    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1330
    uint64_t qp = dividend / v[n-1];
1331
    uint64_t rp = dividend % v[n-1];
1332
    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1333
      qp--;
1334
      rp += v[n-1];
1335
      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1336
        qp--;
1337
    }
1338
    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1339

1340
    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1341
    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1342
    // consists of a simple multiplication by a one-place number, combined with
1343
    // a subtraction.
1344
    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1345
    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1346
    // true value plus b**(n+1), namely as the b's complement of
1347
    // the true value, and a "borrow" to the left should be remembered.
1348
    int64_t borrow = 0;
1349
    for (unsigned i = 0; i < n; ++i) {
1350
      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1351
      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1352
      u[j+i] = Lo_32(subres);
1353
      borrow = Hi_32(p) - Hi_32(subres);
1354
      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1355
                        << ", borrow = " << borrow << '\n');
1356
    }
1357
    bool isNeg = u[j+n] < borrow;
1358
    u[j+n] -= Lo_32(borrow);
1359

1360
    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1361
    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1362
    DEBUG_KNUTH(dbgs() << '\n');
1363

1364
    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1365
    // negative, go to step D6; otherwise go on to step D7.
1366
    q[j] = Lo_32(qp);
1367
    if (isNeg) {
1368
      // D6. [Add back]. The probability that this step is necessary is very
1369
      // small, on the order of only 2/b. Make sure that test data accounts for
1370
      // this possibility. Decrease q[j] by 1
1371
      q[j]--;
1372
      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1373
      // A carry will occur to the left of u[j+n], and it should be ignored
1374
      // since it cancels with the borrow that occurred in D4.
1375
      bool carry = false;
1376
      for (unsigned i = 0; i < n; i++) {
1377
        uint32_t limit = std::min(u[j+i],v[i]);
1378
        u[j+i] += v[i] + carry;
1379
        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1380
      }
1381
      u[j+n] += carry;
1382
    }
1383
    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1384
    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1385
    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1386

1387
    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1388
  } while (--j >= 0);
1389

1390
  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1391
  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1392
  DEBUG_KNUTH(dbgs() << '\n');
1393

1394
  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1395
  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1396
  // compute the remainder (urem uses this).
1397
  if (r) {
1398
    // The value d is expressed by the "shift" value above since we avoided
1399
    // multiplication by d by using a shift left. So, all we have to do is
1400
    // shift right here.
1401
    if (shift) {
1402
      uint32_t carry = 0;
1403
      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1404
      for (int i = n-1; i >= 0; i--) {
1405
        r[i] = (u[i] >> shift) | carry;
1406
        carry = u[i] << (32 - shift);
1407
        DEBUG_KNUTH(dbgs() << " " << r[i]);
1408
      }
1409
    } else {
1410
      for (int i = n-1; i >= 0; i--) {
1411
        r[i] = u[i];
1412
        DEBUG_KNUTH(dbgs() << " " << r[i]);
1413
      }
1414
    }
1415
    DEBUG_KNUTH(dbgs() << '\n');
1416
  }
1417
  DEBUG_KNUTH(dbgs() << '\n');
1418
}
1419

1420
void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1421
                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1422
  assert(lhsWords >= rhsWords && "Fractional result");
1423

1424
  // First, compose the values into an array of 32-bit words instead of
1425
  // 64-bit words. This is a necessity of both the "short division" algorithm
1426
  // and the Knuth "classical algorithm" which requires there to be native
1427
  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1428
  // can't use 64-bit operands here because we don't have native results of
1429
  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1430
  // work on large-endian machines.
1431
  unsigned n = rhsWords * 2;
1432
  unsigned m = (lhsWords * 2) - n;
1433

1434
  // Allocate space for the temporary values we need either on the stack, if
1435
  // it will fit, or on the heap if it won't.
1436
  uint32_t SPACE[128];
1437
  uint32_t *U = nullptr;
1438
  uint32_t *V = nullptr;
1439
  uint32_t *Q = nullptr;
1440
  uint32_t *R = nullptr;
1441
  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1442
    U = &SPACE[0];
1443
    V = &SPACE[m+n+1];
1444
    Q = &SPACE[(m+n+1) + n];
1445
    if (Remainder)
1446
      R = &SPACE[(m+n+1) + n + (m+n)];
1447
  } else {
1448
    U = new uint32_t[m + n + 1];
1449
    V = new uint32_t[n];
1450
    Q = new uint32_t[m+n];
1451
    if (Remainder)
1452
      R = new uint32_t[n];
1453
  }
1454

1455
  // Initialize the dividend
1456
  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1457
  for (unsigned i = 0; i < lhsWords; ++i) {
1458
    uint64_t tmp = LHS[i];
1459
    U[i * 2] = Lo_32(tmp);
1460
    U[i * 2 + 1] = Hi_32(tmp);
1461
  }
1462
  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1463

1464
  // Initialize the divisor
1465
  memset(V, 0, (n)*sizeof(uint32_t));
1466
  for (unsigned i = 0; i < rhsWords; ++i) {
1467
    uint64_t tmp = RHS[i];
1468
    V[i * 2] = Lo_32(tmp);
1469
    V[i * 2 + 1] = Hi_32(tmp);
1470
  }
1471

1472
  // initialize the quotient and remainder
1473
  memset(Q, 0, (m+n) * sizeof(uint32_t));
1474
  if (Remainder)
1475
    memset(R, 0, n * sizeof(uint32_t));
1476

1477
  // Now, adjust m and n for the Knuth division. n is the number of words in
1478
  // the divisor. m is the number of words by which the dividend exceeds the
1479
  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1480
  // contain any zero words or the Knuth algorithm fails.
1481
  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1482
    n--;
1483
    m++;
1484
  }
1485
  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1486
    m--;
1487

1488
  // If we're left with only a single word for the divisor, Knuth doesn't work
1489
  // so we implement the short division algorithm here. This is much simpler
1490
  // and faster because we are certain that we can divide a 64-bit quantity
1491
  // by a 32-bit quantity at hardware speed and short division is simply a
1492
  // series of such operations. This is just like doing short division but we
1493
  // are using base 2^32 instead of base 10.
1494
  assert(n != 0 && "Divide by zero?");
1495
  if (n == 1) {
1496
    uint32_t divisor = V[0];
1497
    uint32_t remainder = 0;
1498
    for (int i = m; i >= 0; i--) {
1499
      uint64_t partial_dividend = Make_64(remainder, U[i]);
1500
      if (partial_dividend == 0) {
1501
        Q[i] = 0;
1502
        remainder = 0;
1503
      } else if (partial_dividend < divisor) {
1504
        Q[i] = 0;
1505
        remainder = Lo_32(partial_dividend);
1506
      } else if (partial_dividend == divisor) {
1507
        Q[i] = 1;
1508
        remainder = 0;
1509
      } else {
1510
        Q[i] = Lo_32(partial_dividend / divisor);
1511
        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1512
      }
1513
    }
1514
    if (R)
1515
      R[0] = remainder;
1516
  } else {
1517
    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1518
    // case n > 1.
1519
    KnuthDiv(U, V, Q, R, m, n);
1520
  }
1521

1522
  // If the caller wants the quotient
1523
  if (Quotient) {
1524
    for (unsigned i = 0; i < lhsWords; ++i)
1525
      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1526
  }
1527

1528
  // If the caller wants the remainder
1529
  if (Remainder) {
1530
    for (unsigned i = 0; i < rhsWords; ++i)
1531
      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1532
  }
1533

1534
  // Clean up the memory we allocated.
1535
  if (U != &SPACE[0]) {
1536
    delete [] U;
1537
    delete [] V;
1538
    delete [] Q;
1539
    delete [] R;
1540
  }
1541
}
1542

1543
APInt APInt::udiv(const APInt &RHS) const {
1544
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1545

1546
  // First, deal with the easy case
1547
  if (isSingleWord()) {
1548
    assert(RHS.U.VAL != 0 && "Divide by zero?");
1549
    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1550
  }
1551

1552
  // Get some facts about the LHS and RHS number of bits and words
1553
  unsigned lhsWords = getNumWords(getActiveBits());
1554
  unsigned rhsBits  = RHS.getActiveBits();
1555
  unsigned rhsWords = getNumWords(rhsBits);
1556
  assert(rhsWords && "Divided by zero???");
1557

1558
  // Deal with some degenerate cases
1559
  if (!lhsWords)
1560
    // 0 / X ===> 0
1561
    return APInt(BitWidth, 0);
1562
  if (rhsBits == 1)
1563
    // X / 1 ===> X
1564
    return *this;
1565
  if (lhsWords < rhsWords || this->ult(RHS))
1566
    // X / Y ===> 0, iff X < Y
1567
    return APInt(BitWidth, 0);
1568
  if (*this == RHS)
1569
    // X / X ===> 1
1570
    return APInt(BitWidth, 1);
1571
  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1572
    // All high words are zero, just use native divide
1573
    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1574

1575
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1576
  APInt Quotient(BitWidth, 0); // to hold result.
1577
  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1578
  return Quotient;
1579
}
1580

1581
APInt APInt::udiv(uint64_t RHS) const {
1582
  assert(RHS != 0 && "Divide by zero?");
1583

1584
  // First, deal with the easy case
1585
  if (isSingleWord())
1586
    return APInt(BitWidth, U.VAL / RHS);
1587

1588
  // Get some facts about the LHS words.
1589
  unsigned lhsWords = getNumWords(getActiveBits());
1590

1591
  // Deal with some degenerate cases
1592
  if (!lhsWords)
1593
    // 0 / X ===> 0
1594
    return APInt(BitWidth, 0);
1595
  if (RHS == 1)
1596
    // X / 1 ===> X
1597
    return *this;
1598
  if (this->ult(RHS))
1599
    // X / Y ===> 0, iff X < Y
1600
    return APInt(BitWidth, 0);
1601
  if (*this == RHS)
1602
    // X / X ===> 1
1603
    return APInt(BitWidth, 1);
1604
  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1605
    // All high words are zero, just use native divide
1606
    return APInt(BitWidth, this->U.pVal[0] / RHS);
1607

1608
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609
  APInt Quotient(BitWidth, 0); // to hold result.
1610
  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1611
  return Quotient;
1612
}
1613

1614
APInt APInt::sdiv(const APInt &RHS) const {
1615
  if (isNegative()) {
1616
    if (RHS.isNegative())
1617
      return (-(*this)).udiv(-RHS);
1618
    return -((-(*this)).udiv(RHS));
1619
  }
1620
  if (RHS.isNegative())
1621
    return -(this->udiv(-RHS));
1622
  return this->udiv(RHS);
1623
}
1624

1625
APInt APInt::sdiv(int64_t RHS) const {
1626
  if (isNegative()) {
1627
    if (RHS < 0)
1628
      return (-(*this)).udiv(-RHS);
1629
    return -((-(*this)).udiv(RHS));
1630
  }
1631
  if (RHS < 0)
1632
    return -(this->udiv(-RHS));
1633
  return this->udiv(RHS);
1634
}
1635

1636
APInt APInt::urem(const APInt &RHS) const {
1637
  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1638
  if (isSingleWord()) {
1639
    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1640
    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1641
  }
1642

1643
  // Get some facts about the LHS
1644
  unsigned lhsWords = getNumWords(getActiveBits());
1645

1646
  // Get some facts about the RHS
1647
  unsigned rhsBits = RHS.getActiveBits();
1648
  unsigned rhsWords = getNumWords(rhsBits);
1649
  assert(rhsWords && "Performing remainder operation by zero ???");
1650

1651
  // Check the degenerate cases
1652
  if (lhsWords == 0)
1653
    // 0 % Y ===> 0
1654
    return APInt(BitWidth, 0);
1655
  if (rhsBits == 1)
1656
    // X % 1 ===> 0
1657
    return APInt(BitWidth, 0);
1658
  if (lhsWords < rhsWords || this->ult(RHS))
1659
    // X % Y ===> X, iff X < Y
1660
    return *this;
1661
  if (*this == RHS)
1662
    // X % X == 0;
1663
    return APInt(BitWidth, 0);
1664
  if (lhsWords == 1)
1665
    // All high words are zero, just use native remainder
1666
    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1667

1668
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1669
  APInt Remainder(BitWidth, 0);
1670
  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1671
  return Remainder;
1672
}
1673

1674
uint64_t APInt::urem(uint64_t RHS) const {
1675
  assert(RHS != 0 && "Remainder by zero?");
1676

1677
  if (isSingleWord())
1678
    return U.VAL % RHS;
1679

1680
  // Get some facts about the LHS
1681
  unsigned lhsWords = getNumWords(getActiveBits());
1682

1683
  // Check the degenerate cases
1684
  if (lhsWords == 0)
1685
    // 0 % Y ===> 0
1686
    return 0;
1687
  if (RHS == 1)
1688
    // X % 1 ===> 0
1689
    return 0;
1690
  if (this->ult(RHS))
1691
    // X % Y ===> X, iff X < Y
1692
    return getZExtValue();
1693
  if (*this == RHS)
1694
    // X % X == 0;
1695
    return 0;
1696
  if (lhsWords == 1)
1697
    // All high words are zero, just use native remainder
1698
    return U.pVal[0] % RHS;
1699

1700
  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1701
  uint64_t Remainder;
1702
  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1703
  return Remainder;
1704
}
1705

1706
APInt APInt::srem(const APInt &RHS) const {
1707
  if (isNegative()) {
1708
    if (RHS.isNegative())
1709
      return -((-(*this)).urem(-RHS));
1710
    return -((-(*this)).urem(RHS));
1711
  }
1712
  if (RHS.isNegative())
1713
    return this->urem(-RHS);
1714
  return this->urem(RHS);
1715
}
1716

1717
int64_t APInt::srem(int64_t RHS) const {
1718
  if (isNegative()) {
1719
    if (RHS < 0)
1720
      return -((-(*this)).urem(-RHS));
1721
    return -((-(*this)).urem(RHS));
1722
  }
1723
  if (RHS < 0)
1724
    return this->urem(-RHS);
1725
  return this->urem(RHS);
1726
}
1727

1728
void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1729
                    APInt &Quotient, APInt &Remainder) {
1730
  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1731
  unsigned BitWidth = LHS.BitWidth;
1732

1733
  // First, deal with the easy case
1734
  if (LHS.isSingleWord()) {
1735
    assert(RHS.U.VAL != 0 && "Divide by zero?");
1736
    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1737
    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1738
    Quotient = APInt(BitWidth, QuotVal);
1739
    Remainder = APInt(BitWidth, RemVal);
1740
    return;
1741
  }
1742

1743
  // Get some size facts about the dividend and divisor
1744
  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1745
  unsigned rhsBits  = RHS.getActiveBits();
1746
  unsigned rhsWords = getNumWords(rhsBits);
1747
  assert(rhsWords && "Performing divrem operation by zero ???");
1748

1749
  // Check the degenerate cases
1750
  if (lhsWords == 0) {
1751
    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1752
    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1753
    return;
1754
  }
1755

1756
  if (rhsBits == 1) {
1757
    Quotient = LHS;                   // X / 1 ===> X
1758
    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1759
  }
1760

1761
  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1762
    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1763
    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1764
    return;
1765
  }
1766

1767
  if (LHS == RHS) {
1768
    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1769
    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1770
    return;
1771
  }
1772

1773
  // Make sure there is enough space to hold the results.
1774
  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1775
  // change the size. This is necessary if Quotient or Remainder is aliased
1776
  // with LHS or RHS.
1777
  Quotient.reallocate(BitWidth);
1778
  Remainder.reallocate(BitWidth);
1779

1780
  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1781
    // There is only one word to consider so use the native versions.
1782
    uint64_t lhsValue = LHS.U.pVal[0];
1783
    uint64_t rhsValue = RHS.U.pVal[0];
1784
    Quotient = lhsValue / rhsValue;
1785
    Remainder = lhsValue % rhsValue;
1786
    return;
1787
  }
1788

1789
  // Okay, lets do it the long way
1790
  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1791
         Remainder.U.pVal);
1792
  // Clear the rest of the Quotient and Remainder.
1793
  std::memset(Quotient.U.pVal + lhsWords, 0,
1794
              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1795
  std::memset(Remainder.U.pVal + rhsWords, 0,
1796
              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1797
}
1798

1799
void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1800
                    uint64_t &Remainder) {
1801
  assert(RHS != 0 && "Divide by zero?");
1802
  unsigned BitWidth = LHS.BitWidth;
1803

1804
  // First, deal with the easy case
1805
  if (LHS.isSingleWord()) {
1806
    uint64_t QuotVal = LHS.U.VAL / RHS;
1807
    Remainder = LHS.U.VAL % RHS;
1808
    Quotient = APInt(BitWidth, QuotVal);
1809
    return;
1810
  }
1811

1812
  // Get some size facts about the dividend and divisor
1813
  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1814

1815
  // Check the degenerate cases
1816
  if (lhsWords == 0) {
1817
    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1818
    Remainder = 0;                    // 0 % Y ===> 0
1819
    return;
1820
  }
1821

1822
  if (RHS == 1) {
1823
    Quotient = LHS;                   // X / 1 ===> X
1824
    Remainder = 0;                    // X % 1 ===> 0
1825
    return;
1826
  }
1827

1828
  if (LHS.ult(RHS)) {
1829
    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1830
    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1831
    return;
1832
  }
1833

1834
  if (LHS == RHS) {
1835
    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1836
    Remainder = 0;                    // X % X ===> 0;
1837
    return;
1838
  }
1839

1840
  // Make sure there is enough space to hold the results.
1841
  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1842
  // change the size. This is necessary if Quotient is aliased with LHS.
1843
  Quotient.reallocate(BitWidth);
1844

1845
  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1846
    // There is only one word to consider so use the native versions.
1847
    uint64_t lhsValue = LHS.U.pVal[0];
1848
    Quotient = lhsValue / RHS;
1849
    Remainder = lhsValue % RHS;
1850
    return;
1851
  }
1852

1853
  // Okay, lets do it the long way
1854
  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1855
  // Clear the rest of the Quotient.
1856
  std::memset(Quotient.U.pVal + lhsWords, 0,
1857
              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1858
}
1859

1860
void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1861
                    APInt &Quotient, APInt &Remainder) {
1862
  if (LHS.isNegative()) {
1863
    if (RHS.isNegative())
1864
      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1865
    else {
1866
      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1867
      Quotient.negate();
1868
    }
1869
    Remainder.negate();
1870
  } else if (RHS.isNegative()) {
1871
    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1872
    Quotient.negate();
1873
  } else {
1874
    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1875
  }
1876
}
1877

1878
void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1879
                    APInt &Quotient, int64_t &Remainder) {
1880
  uint64_t R = Remainder;
1881
  if (LHS.isNegative()) {
1882
    if (RHS < 0)
1883
      APInt::udivrem(-LHS, -RHS, Quotient, R);
1884
    else {
1885
      APInt::udivrem(-LHS, RHS, Quotient, R);
1886
      Quotient.negate();
1887
    }
1888
    R = -R;
1889
  } else if (RHS < 0) {
1890
    APInt::udivrem(LHS, -RHS, Quotient, R);
1891
    Quotient.negate();
1892
  } else {
1893
    APInt::udivrem(LHS, RHS, Quotient, R);
1894
  }
1895
  Remainder = R;
1896
}
1897

1898
APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1899
  APInt Res = *this+RHS;
1900
  Overflow = isNonNegative() == RHS.isNonNegative() &&
1901
             Res.isNonNegative() != isNonNegative();
1902
  return Res;
1903
}
1904

1905
APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1906
  APInt Res = *this+RHS;
1907
  Overflow = Res.ult(RHS);
1908
  return Res;
1909
}
1910

1911
APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1912
  APInt Res = *this - RHS;
1913
  Overflow = isNonNegative() != RHS.isNonNegative() &&
1914
             Res.isNonNegative() != isNonNegative();
1915
  return Res;
1916
}
1917

1918
APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1919
  APInt Res = *this-RHS;
1920
  Overflow = Res.ugt(*this);
1921
  return Res;
1922
}
1923

1924
APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1925
  // MININT/-1  -->  overflow.
1926
  Overflow = isMinSignedValue() && RHS.isAllOnes();
1927
  return sdiv(RHS);
1928
}
1929

1930
APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1931
  APInt Res = *this * RHS;
1932

1933
  if (RHS != 0)
1934
    Overflow = Res.sdiv(RHS) != *this ||
1935
               (isMinSignedValue() && RHS.isAllOnes());
1936
  else
1937
    Overflow = false;
1938
  return Res;
1939
}
1940

1941
APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1942
  if (countl_zero() + RHS.countl_zero() + 2 <= BitWidth) {
1943
    Overflow = true;
1944
    return *this * RHS;
1945
  }
1946

1947
  APInt Res = lshr(1) * RHS;
1948
  Overflow = Res.isNegative();
1949
  Res <<= 1;
1950
  if ((*this)[0]) {
1951
    Res += RHS;
1952
    if (Res.ult(RHS))
1953
      Overflow = true;
1954
  }
1955
  return Res;
1956
}
1957

1958
APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1959
  return sshl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
1960
}
1961

1962
APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
1963
  Overflow = ShAmt >= getBitWidth();
1964
  if (Overflow)
1965
    return APInt(BitWidth, 0);
1966

1967
  if (isNonNegative()) // Don't allow sign change.
1968
    Overflow = ShAmt >= countl_zero();
1969
  else
1970
    Overflow = ShAmt >= countl_one();
1971

1972
  return *this << ShAmt;
1973
}
1974

1975
APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1976
  return ushl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
1977
}
1978

1979
APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {
1980
  Overflow = ShAmt >= getBitWidth();
1981
  if (Overflow)
1982
    return APInt(BitWidth, 0);
1983

1984
  Overflow = ShAmt > countl_zero();
1985

1986
  return *this << ShAmt;
1987
}
1988

1989
APInt APInt::sfloordiv_ov(const APInt &RHS, bool &Overflow) const {
1990
  APInt quotient = sdiv_ov(RHS, Overflow);
1991
  if ((quotient * RHS != *this) && (isNegative() != RHS.isNegative()))
1992
    return quotient - 1;
1993
  return quotient;
1994
}
1995

1996
APInt APInt::sadd_sat(const APInt &RHS) const {
1997
  bool Overflow;
1998
  APInt Res = sadd_ov(RHS, Overflow);
1999
  if (!Overflow)
2000
    return Res;
2001

2002
  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2003
                      : APInt::getSignedMaxValue(BitWidth);
2004
}
2005

2006
APInt APInt::uadd_sat(const APInt &RHS) const {
2007
  bool Overflow;
2008
  APInt Res = uadd_ov(RHS, Overflow);
2009
  if (!Overflow)
2010
    return Res;
2011

2012
  return APInt::getMaxValue(BitWidth);
2013
}
2014

2015
APInt APInt::ssub_sat(const APInt &RHS) const {
2016
  bool Overflow;
2017
  APInt Res = ssub_ov(RHS, Overflow);
2018
  if (!Overflow)
2019
    return Res;
2020

2021
  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2022
                      : APInt::getSignedMaxValue(BitWidth);
2023
}
2024

2025
APInt APInt::usub_sat(const APInt &RHS) const {
2026
  bool Overflow;
2027
  APInt Res = usub_ov(RHS, Overflow);
2028
  if (!Overflow)
2029
    return Res;
2030

2031
  return APInt(BitWidth, 0);
2032
}
2033

2034
APInt APInt::smul_sat(const APInt &RHS) const {
2035
  bool Overflow;
2036
  APInt Res = smul_ov(RHS, Overflow);
2037
  if (!Overflow)
2038
    return Res;
2039

2040
  // The result is negative if one and only one of inputs is negative.
2041
  bool ResIsNegative = isNegative() ^ RHS.isNegative();
2042

2043
  return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2044
                       : APInt::getSignedMaxValue(BitWidth);
2045
}
2046

2047
APInt APInt::umul_sat(const APInt &RHS) const {
2048
  bool Overflow;
2049
  APInt Res = umul_ov(RHS, Overflow);
2050
  if (!Overflow)
2051
    return Res;
2052

2053
  return APInt::getMaxValue(BitWidth);
2054
}
2055

2056
APInt APInt::sshl_sat(const APInt &RHS) const {
2057
  return sshl_sat(RHS.getLimitedValue(getBitWidth()));
2058
}
2059

2060
APInt APInt::sshl_sat(unsigned RHS) const {
2061
  bool Overflow;
2062
  APInt Res = sshl_ov(RHS, Overflow);
2063
  if (!Overflow)
2064
    return Res;
2065

2066
  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2067
                      : APInt::getSignedMaxValue(BitWidth);
2068
}
2069

2070
APInt APInt::ushl_sat(const APInt &RHS) const {
2071
  return ushl_sat(RHS.getLimitedValue(getBitWidth()));
2072
}
2073

2074
APInt APInt::ushl_sat(unsigned RHS) const {
2075
  bool Overflow;
2076
  APInt Res = ushl_ov(RHS, Overflow);
2077
  if (!Overflow)
2078
    return Res;
2079

2080
  return APInt::getMaxValue(BitWidth);
2081
}
2082

2083
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2084
  // Check our assumptions here
2085
  assert(!str.empty() && "Invalid string length");
2086
  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2087
          radix == 36) &&
2088
         "Radix should be 2, 8, 10, 16, or 36!");
2089

2090
  StringRef::iterator p = str.begin();
2091
  size_t slen = str.size();
2092
  bool isNeg = *p == '-';
2093
  if (*p == '-' || *p == '+') {
2094
    p++;
2095
    slen--;
2096
    assert(slen && "String is only a sign, needs a value.");
2097
  }
2098
  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2099
  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2100
  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2101
  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2102
         "Insufficient bit width");
2103

2104
  // Allocate memory if needed
2105
  if (isSingleWord())
2106
    U.VAL = 0;
2107
  else
2108
    U.pVal = getClearedMemory(getNumWords());
2109

2110
  // Figure out if we can shift instead of multiply
2111
  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2112

2113
  // Enter digit traversal loop
2114
  for (StringRef::iterator e = str.end(); p != e; ++p) {
2115
    unsigned digit = getDigit(*p, radix);
2116
    assert(digit < radix && "Invalid character in digit string");
2117

2118
    // Shift or multiply the value by the radix
2119
    if (slen > 1) {
2120
      if (shift)
2121
        *this <<= shift;
2122
      else
2123
        *this *= radix;
2124
    }
2125

2126
    // Add in the digit we just interpreted
2127
    *this += digit;
2128
  }
2129
  // If its negative, put it in two's complement form
2130
  if (isNeg)
2131
    this->negate();
2132
}
2133

2134
void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
2135
                     bool formatAsCLiteral, bool UpperCase,
2136
                     bool InsertSeparators) const {
2137
  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2138
          Radix == 36) &&
2139
         "Radix should be 2, 8, 10, 16, or 36!");
2140

2141
  const char *Prefix = "";
2142
  if (formatAsCLiteral) {
2143
    switch (Radix) {
2144
      case 2:
2145
        // Binary literals are a non-standard extension added in gcc 4.3:
2146
        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2147
        Prefix = "0b";
2148
        break;
2149
      case 8:
2150
        Prefix = "0";
2151
        break;
2152
      case 10:
2153
        break; // No prefix
2154
      case 16:
2155
        Prefix = "0x";
2156
        break;
2157
      default:
2158
        llvm_unreachable("Invalid radix!");
2159
    }
2160
  }
2161

2162
  // Number of digits in a group between separators.
2163
  unsigned Grouping = (Radix == 8 || Radix == 10) ? 3 : 4;
2164

2165
  // First, check for a zero value and just short circuit the logic below.
2166
  if (isZero()) {
2167
    while (*Prefix) {
2168
      Str.push_back(*Prefix);
2169
      ++Prefix;
2170
    };
2171
    Str.push_back('0');
2172
    return;
2173
  }
2174

2175
  static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz"
2176
                                   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2177
  const char *Digits = BothDigits + (UpperCase ? 36 : 0);
2178

2179
  if (isSingleWord()) {
2180
    char Buffer[65];
2181
    char *BufPtr = std::end(Buffer);
2182

2183
    uint64_t N;
2184
    if (!Signed) {
2185
      N = getZExtValue();
2186
    } else {
2187
      int64_t I = getSExtValue();
2188
      if (I >= 0) {
2189
        N = I;
2190
      } else {
2191
        Str.push_back('-');
2192
        N = -(uint64_t)I;
2193
      }
2194
    }
2195

2196
    while (*Prefix) {
2197
      Str.push_back(*Prefix);
2198
      ++Prefix;
2199
    };
2200

2201
    int Pos = 0;
2202
    while (N) {
2203
      if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)
2204
        *--BufPtr = '\'';
2205
      *--BufPtr = Digits[N % Radix];
2206
      N /= Radix;
2207
      Pos++;
2208
    }
2209
    Str.append(BufPtr, std::end(Buffer));
2210
    return;
2211
  }
2212

2213
  APInt Tmp(*this);
2214

2215
  if (Signed && isNegative()) {
2216
    // They want to print the signed version and it is a negative value
2217
    // Flip the bits and add one to turn it into the equivalent positive
2218
    // value and put a '-' in the result.
2219
    Tmp.negate();
2220
    Str.push_back('-');
2221
  }
2222

2223
  while (*Prefix) {
2224
    Str.push_back(*Prefix);
2225
    ++Prefix;
2226
  };
2227

2228
  // We insert the digits backward, then reverse them to get the right order.
2229
  unsigned StartDig = Str.size();
2230

2231
  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2232
  // because the number of bits per digit (1, 3 and 4 respectively) divides
2233
  // equally.  We just shift until the value is zero.
2234
  if (Radix == 2 || Radix == 8 || Radix == 16) {
2235
    // Just shift tmp right for each digit width until it becomes zero
2236
    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2237
    unsigned MaskAmt = Radix - 1;
2238

2239
    int Pos = 0;
2240
    while (Tmp.getBoolValue()) {
2241
      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2242
      if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)
2243
        Str.push_back('\'');
2244

2245
      Str.push_back(Digits[Digit]);
2246
      Tmp.lshrInPlace(ShiftAmt);
2247
      Pos++;
2248
    }
2249
  } else {
2250
    int Pos = 0;
2251
    while (Tmp.getBoolValue()) {
2252
      uint64_t Digit;
2253
      udivrem(Tmp, Radix, Tmp, Digit);
2254
      assert(Digit < Radix && "divide failed");
2255
      if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)
2256
        Str.push_back('\'');
2257

2258
      Str.push_back(Digits[Digit]);
2259
      Pos++;
2260
    }
2261
  }
2262

2263
  // Reverse the digits before returning.
2264
  std::reverse(Str.begin()+StartDig, Str.end());
2265
}
2266

2267
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2268
LLVM_DUMP_METHOD void APInt::dump() const {
2269
  SmallString<40> S, U;
2270
  this->toStringUnsigned(U);
2271
  this->toStringSigned(S);
2272
  dbgs() << "APInt(" << BitWidth << "b, "
2273
         << U << "u " << S << "s)\n";
2274
}
2275
#endif
2276

2277
void APInt::print(raw_ostream &OS, bool isSigned) const {
2278
  SmallString<40> S;
2279
  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2280
  OS << S;
2281
}
2282

2283
// This implements a variety of operations on a representation of
2284
// arbitrary precision, two's-complement, bignum integer values.
2285

2286
// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2287
// and unrestricting assumption.
2288
static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2289
              "Part width must be divisible by 2!");
2290

2291
// Returns the integer part with the least significant BITS set.
2292
// BITS cannot be zero.
2293
static inline APInt::WordType lowBitMask(unsigned bits) {
2294
  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2295
  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2296
}
2297

2298
/// Returns the value of the lower half of PART.
2299
static inline APInt::WordType lowHalf(APInt::WordType part) {
2300
  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2301
}
2302

2303
/// Returns the value of the upper half of PART.
2304
static inline APInt::WordType highHalf(APInt::WordType part) {
2305
  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2306
}
2307

2308
/// Sets the least significant part of a bignum to the input value, and zeroes
2309
/// out higher parts.
2310
void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2311
  assert(parts > 0);
2312
  dst[0] = part;
2313
  for (unsigned i = 1; i < parts; i++)
2314
    dst[i] = 0;
2315
}
2316

2317
/// Assign one bignum to another.
2318
void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2319
  for (unsigned i = 0; i < parts; i++)
2320
    dst[i] = src[i];
2321
}
2322

2323
/// Returns true if a bignum is zero, false otherwise.
2324
bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2325
  for (unsigned i = 0; i < parts; i++)
2326
    if (src[i])
2327
      return false;
2328

2329
  return true;
2330
}
2331

2332
/// Extract the given bit of a bignum; returns 0 or 1.
2333
int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2334
  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2335
}
2336

2337
/// Set the given bit of a bignum.
2338
void APInt::tcSetBit(WordType *parts, unsigned bit) {
2339
  parts[whichWord(bit)] |= maskBit(bit);
2340
}
2341

2342
/// Clears the given bit of a bignum.
2343
void APInt::tcClearBit(WordType *parts, unsigned bit) {
2344
  parts[whichWord(bit)] &= ~maskBit(bit);
2345
}
2346

2347
/// Returns the bit number of the least significant set bit of a number.  If the
2348
/// input number has no bits set UINT_MAX is returned.
2349
unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2350
  for (unsigned i = 0; i < n; i++) {
2351
    if (parts[i] != 0) {
2352
      unsigned lsb = llvm::countr_zero(parts[i]);
2353
      return lsb + i * APINT_BITS_PER_WORD;
2354
    }
2355
  }
2356

2357
  return UINT_MAX;
2358
}
2359

2360
/// Returns the bit number of the most significant set bit of a number.
2361
/// If the input number has no bits set UINT_MAX is returned.
2362
unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2363
  do {
2364
    --n;
2365

2366
    if (parts[n] != 0) {
2367
      static_assert(sizeof(parts[n]) <= sizeof(uint64_t));
2368
      unsigned msb = llvm::Log2_64(parts[n]);
2369

2370
      return msb + n * APINT_BITS_PER_WORD;
2371
    }
2372
  } while (n);
2373

2374
  return UINT_MAX;
2375
}
2376

2377
/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2378
/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2379
/// significant bit of DST.  All high bits above srcBITS in DST are zero-filled.
2380
/// */
2381
void
2382
APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2383
                 unsigned srcBits, unsigned srcLSB) {
2384
  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2385
  assert(dstParts <= dstCount);
2386

2387
  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2388
  tcAssign(dst, src + firstSrcPart, dstParts);
2389

2390
  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2391
  tcShiftRight(dst, dstParts, shift);
2392

2393
  // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2394
  // in DST.  If this is less that srcBits, append the rest, else
2395
  // clear the high bits.
2396
  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2397
  if (n < srcBits) {
2398
    WordType mask = lowBitMask (srcBits - n);
2399
    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2400
                          << n % APINT_BITS_PER_WORD);
2401
  } else if (n > srcBits) {
2402
    if (srcBits % APINT_BITS_PER_WORD)
2403
      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2404
  }
2405

2406
  // Clear high parts.
2407
  while (dstParts < dstCount)
2408
    dst[dstParts++] = 0;
2409
}
2410

2411
//// DST += RHS + C where C is zero or one.  Returns the carry flag.
2412
APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2413
                             WordType c, unsigned parts) {
2414
  assert(c <= 1);
2415

2416
  for (unsigned i = 0; i < parts; i++) {
2417
    WordType l = dst[i];
2418
    if (c) {
2419
      dst[i] += rhs[i] + 1;
2420
      c = (dst[i] <= l);
2421
    } else {
2422
      dst[i] += rhs[i];
2423
      c = (dst[i] < l);
2424
    }
2425
  }
2426

2427
  return c;
2428
}
2429

2430
/// This function adds a single "word" integer, src, to the multiple
2431
/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2432
/// 1 is returned if there is a carry out, otherwise 0 is returned.
2433
/// @returns the carry of the addition.
2434
APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2435
                                 unsigned parts) {
2436
  for (unsigned i = 0; i < parts; ++i) {
2437
    dst[i] += src;
2438
    if (dst[i] >= src)
2439
      return 0; // No need to carry so exit early.
2440
    src = 1; // Carry one to next digit.
2441
  }
2442

2443
  return 1;
2444
}
2445

2446
/// DST -= RHS + C where C is zero or one.  Returns the carry flag.
2447
APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2448
                                  WordType c, unsigned parts) {
2449
  assert(c <= 1);
2450

2451
  for (unsigned i = 0; i < parts; i++) {
2452
    WordType l = dst[i];
2453
    if (c) {
2454
      dst[i] -= rhs[i] + 1;
2455
      c = (dst[i] >= l);
2456
    } else {
2457
      dst[i] -= rhs[i];
2458
      c = (dst[i] > l);
2459
    }
2460
  }
2461

2462
  return c;
2463
}
2464

2465
/// This function subtracts a single "word" (64-bit word), src, from
2466
/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2467
/// no further borrowing is needed or it runs out of "words" in dst.  The result
2468
/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2469
/// exhausted. In other words, if src > dst then this function returns 1,
2470
/// otherwise 0.
2471
/// @returns the borrow out of the subtraction
2472
APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2473
                                      unsigned parts) {
2474
  for (unsigned i = 0; i < parts; ++i) {
2475
    WordType Dst = dst[i];
2476
    dst[i] -= src;
2477
    if (src <= Dst)
2478
      return 0; // No need to borrow so exit early.
2479
    src = 1; // We have to "borrow 1" from next "word"
2480
  }
2481

2482
  return 1;
2483
}
2484

2485
/// Negate a bignum in-place.
2486
void APInt::tcNegate(WordType *dst, unsigned parts) {
2487
  tcComplement(dst, parts);
2488
  tcIncrement(dst, parts);
2489
}
2490

2491
/// DST += SRC * MULTIPLIER + CARRY   if add is true
2492
/// DST  = SRC * MULTIPLIER + CARRY   if add is false
2493
/// Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2494
/// they must start at the same point, i.e. DST == SRC.
2495
/// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2496
/// returned.  Otherwise DST is filled with the least significant
2497
/// DSTPARTS parts of the result, and if all of the omitted higher
2498
/// parts were zero return zero, otherwise overflow occurred and
2499
/// return one.
2500
int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2501
                          WordType multiplier, WordType carry,
2502
                          unsigned srcParts, unsigned dstParts,
2503
                          bool add) {
2504
  // Otherwise our writes of DST kill our later reads of SRC.
2505
  assert(dst <= src || dst >= src + srcParts);
2506
  assert(dstParts <= srcParts + 1);
2507

2508
  // N loops; minimum of dstParts and srcParts.
2509
  unsigned n = std::min(dstParts, srcParts);
2510

2511
  for (unsigned i = 0; i < n; i++) {
2512
    // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2513
    // This cannot overflow, because:
2514
    //   (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2515
    // which is less than n^2.
2516
    WordType srcPart = src[i];
2517
    WordType low, mid, high;
2518
    if (multiplier == 0 || srcPart == 0) {
2519
      low = carry;
2520
      high = 0;
2521
    } else {
2522
      low = lowHalf(srcPart) * lowHalf(multiplier);
2523
      high = highHalf(srcPart) * highHalf(multiplier);
2524

2525
      mid = lowHalf(srcPart) * highHalf(multiplier);
2526
      high += highHalf(mid);
2527
      mid <<= APINT_BITS_PER_WORD / 2;
2528
      if (low + mid < low)
2529
        high++;
2530
      low += mid;
2531

2532
      mid = highHalf(srcPart) * lowHalf(multiplier);
2533
      high += highHalf(mid);
2534
      mid <<= APINT_BITS_PER_WORD / 2;
2535
      if (low + mid < low)
2536
        high++;
2537
      low += mid;
2538

2539
      // Now add carry.
2540
      if (low + carry < low)
2541
        high++;
2542
      low += carry;
2543
    }
2544

2545
    if (add) {
2546
      // And now DST[i], and store the new low part there.
2547
      if (low + dst[i] < low)
2548
        high++;
2549
      dst[i] += low;
2550
    } else
2551
      dst[i] = low;
2552

2553
    carry = high;
2554
  }
2555

2556
  if (srcParts < dstParts) {
2557
    // Full multiplication, there is no overflow.
2558
    assert(srcParts + 1 == dstParts);
2559
    dst[srcParts] = carry;
2560
    return 0;
2561
  }
2562

2563
  // We overflowed if there is carry.
2564
  if (carry)
2565
    return 1;
2566

2567
  // We would overflow if any significant unwritten parts would be
2568
  // non-zero.  This is true if any remaining src parts are non-zero
2569
  // and the multiplier is non-zero.
2570
  if (multiplier)
2571
    for (unsigned i = dstParts; i < srcParts; i++)
2572
      if (src[i])
2573
        return 1;
2574

2575
  // We fitted in the narrow destination.
2576
  return 0;
2577
}
2578

2579
/// DST = LHS * RHS, where DST has the same width as the operands and
2580
/// is filled with the least significant parts of the result.  Returns
2581
/// one if overflow occurred, otherwise zero.  DST must be disjoint
2582
/// from both operands.
2583
int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2584
                      const WordType *rhs, unsigned parts) {
2585
  assert(dst != lhs && dst != rhs);
2586

2587
  int overflow = 0;
2588

2589
  for (unsigned i = 0; i < parts; i++) {
2590
    // Don't accumulate on the first iteration so we don't need to initalize
2591
    // dst to 0.
2592
    overflow |=
2593
        tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, parts - i, i != 0);
2594
  }
2595

2596
  return overflow;
2597
}
2598

2599
/// DST = LHS * RHS, where DST has width the sum of the widths of the
2600
/// operands. No overflow occurs. DST must be disjoint from both operands.
2601
void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2602
                           const WordType *rhs, unsigned lhsParts,
2603
                           unsigned rhsParts) {
2604
  // Put the narrower number on the LHS for less loops below.
2605
  if (lhsParts > rhsParts)
2606
    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2607

2608
  assert(dst != lhs && dst != rhs);
2609

2610
  for (unsigned i = 0; i < lhsParts; i++) {
2611
    // Don't accumulate on the first iteration so we don't need to initalize
2612
    // dst to 0.
2613
    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, i != 0);
2614
  }
2615
}
2616

2617
// If RHS is zero LHS and REMAINDER are left unchanged, return one.
2618
// Otherwise set LHS to LHS / RHS with the fractional part discarded,
2619
// set REMAINDER to the remainder, return zero.  i.e.
2620
//
2621
//   OLD_LHS = RHS * LHS + REMAINDER
2622
//
2623
// SCRATCH is a bignum of the same size as the operands and result for
2624
// use by the routine; its contents need not be initialized and are
2625
// destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2626
int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2627
                    WordType *remainder, WordType *srhs,
2628
                    unsigned parts) {
2629
  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2630

2631
  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2632
  if (shiftCount == 0)
2633
    return true;
2634

2635
  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2636
  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2637
  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2638

2639
  tcAssign(srhs, rhs, parts);
2640
  tcShiftLeft(srhs, parts, shiftCount);
2641
  tcAssign(remainder, lhs, parts);
2642
  tcSet(lhs, 0, parts);
2643

2644
  // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2645
  // total.
2646
  for (;;) {
2647
    int compare = tcCompare(remainder, srhs, parts);
2648
    if (compare >= 0) {
2649
      tcSubtract(remainder, srhs, 0, parts);
2650
      lhs[n] |= mask;
2651
    }
2652

2653
    if (shiftCount == 0)
2654
      break;
2655
    shiftCount--;
2656
    tcShiftRight(srhs, parts, 1);
2657
    if ((mask >>= 1) == 0) {
2658
      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2659
      n--;
2660
    }
2661
  }
2662

2663
  return false;
2664
}
2665

2666
/// Shift a bignum left Count bits in-place. Shifted in bits are zero. There are
2667
/// no restrictions on Count.
2668
void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2669
  // Don't bother performing a no-op shift.
2670
  if (!Count)
2671
    return;
2672

2673
  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2674
  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2675
  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2676

2677
  // Fastpath for moving by whole words.
2678
  if (BitShift == 0) {
2679
    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2680
  } else {
2681
    while (Words-- > WordShift) {
2682
      Dst[Words] = Dst[Words - WordShift] << BitShift;
2683
      if (Words > WordShift)
2684
        Dst[Words] |=
2685
          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2686
    }
2687
  }
2688

2689
  // Fill in the remainder with 0s.
2690
  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2691
}
2692

2693
/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2694
/// are no restrictions on Count.
2695
void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2696
  // Don't bother performing a no-op shift.
2697
  if (!Count)
2698
    return;
2699

2700
  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2701
  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2702
  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2703

2704
  unsigned WordsToMove = Words - WordShift;
2705
  // Fastpath for moving by whole words.
2706
  if (BitShift == 0) {
2707
    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2708
  } else {
2709
    for (unsigned i = 0; i != WordsToMove; ++i) {
2710
      Dst[i] = Dst[i + WordShift] >> BitShift;
2711
      if (i + 1 != WordsToMove)
2712
        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2713
    }
2714
  }
2715

2716
  // Fill in the remainder with 0s.
2717
  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2718
}
2719

2720
// Comparison (unsigned) of two bignums.
2721
int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2722
                     unsigned parts) {
2723
  while (parts) {
2724
    parts--;
2725
    if (lhs[parts] != rhs[parts])
2726
      return (lhs[parts] > rhs[parts]) ? 1 : -1;
2727
  }
2728

2729
  return 0;
2730
}
2731

2732
APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2733
                                   APInt::Rounding RM) {
2734
  // Currently udivrem always rounds down.
2735
  switch (RM) {
2736
  case APInt::Rounding::DOWN:
2737
  case APInt::Rounding::TOWARD_ZERO:
2738
    return A.udiv(B);
2739
  case APInt::Rounding::UP: {
2740
    APInt Quo, Rem;
2741
    APInt::udivrem(A, B, Quo, Rem);
2742
    if (Rem.isZero())
2743
      return Quo;
2744
    return Quo + 1;
2745
  }
2746
  }
2747
  llvm_unreachable("Unknown APInt::Rounding enum");
2748
}
2749

2750
APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2751
                                   APInt::Rounding RM) {
2752
  switch (RM) {
2753
  case APInt::Rounding::DOWN:
2754
  case APInt::Rounding::UP: {
2755
    APInt Quo, Rem;
2756
    APInt::sdivrem(A, B, Quo, Rem);
2757
    if (Rem.isZero())
2758
      return Quo;
2759
    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2760
    // We want to check whether the non-integer part of the mathematical value
2761
    // is negative or not. If the non-integer part is negative, we need to round
2762
    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2763
    // already rounded down.
2764
    if (RM == APInt::Rounding::DOWN) {
2765
      if (Rem.isNegative() != B.isNegative())
2766
        return Quo - 1;
2767
      return Quo;
2768
    }
2769
    if (Rem.isNegative() != B.isNegative())
2770
      return Quo;
2771
    return Quo + 1;
2772
  }
2773
  // Currently sdiv rounds towards zero.
2774
  case APInt::Rounding::TOWARD_ZERO:
2775
    return A.sdiv(B);
2776
  }
2777
  llvm_unreachable("Unknown APInt::Rounding enum");
2778
}
2779

2780
std::optional<APInt>
2781
llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2782
                                           unsigned RangeWidth) {
2783
  unsigned CoeffWidth = A.getBitWidth();
2784
  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2785
  assert(RangeWidth <= CoeffWidth &&
2786
         "Value range width should be less than coefficient width");
2787
  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2788

2789
  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2790
                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2791

2792
  // Identify 0 as a (non)solution immediately.
2793
  if (C.sextOrTrunc(RangeWidth).isZero()) {
2794
    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2795
    return APInt(CoeffWidth, 0);
2796
  }
2797

2798
  // The result of APInt arithmetic has the same bit width as the operands,
2799
  // so it can actually lose high bits. A product of two n-bit integers needs
2800
  // 2n-1 bits to represent the full value.
2801
  // The operation done below (on quadratic coefficients) that can produce
2802
  // the largest value is the evaluation of the equation during bisection,
2803
  // which needs 3 times the bitwidth of the coefficient, so the total number
2804
  // of required bits is 3n.
2805
  //
2806
  // The purpose of this extension is to simulate the set Z of all integers,
2807
  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2808
  // and negative numbers (not so much in a modulo arithmetic). The method
2809
  // used to solve the equation is based on the standard formula for real
2810
  // numbers, and uses the concepts of "positive" and "negative" with their
2811
  // usual meanings.
2812
  CoeffWidth *= 3;
2813
  A = A.sext(CoeffWidth);
2814
  B = B.sext(CoeffWidth);
2815
  C = C.sext(CoeffWidth);
2816

2817
  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2818
  // the bit width has increased.
2819
  if (A.isNegative()) {
2820
    A.negate();
2821
    B.negate();
2822
    C.negate();
2823
  }
2824

2825
  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2826
  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2827
  // and R = 2^BitWidth.
2828
  // Since we're trying not only to find exact solutions, but also values
2829
  // that "wrap around", such a set will always have a solution, i.e. an x
2830
  // that satisfies at least one of the equations, or such that |q(x)|
2831
  // exceeds kR, while |q(x-1)| for the same k does not.
2832
  //
2833
  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2834
  // positive solution n (in the above sense), and also such that the n
2835
  // will be the least among all solutions corresponding to k = 0, 1, ...
2836
  // (more precisely, the least element in the set
2837
  //   { n(k) | k is such that a solution n(k) exists }).
2838
  //
2839
  // Consider the parabola (over real numbers) that corresponds to the
2840
  // quadratic equation. Since A > 0, the arms of the parabola will point
2841
  // up. Picking different values of k will shift it up and down by R.
2842
  //
2843
  // We want to shift the parabola in such a way as to reduce the problem
2844
  // of solving q(x) = kR to solving shifted_q(x) = 0.
2845
  // (The interesting solutions are the ceilings of the real number
2846
  // solutions.)
2847
  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2848
  APInt TwoA = 2 * A;
2849
  APInt SqrB = B * B;
2850
  bool PickLow;
2851

2852
  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2853
    assert(A.isStrictlyPositive());
2854
    APInt T = V.abs().urem(A);
2855
    if (T.isZero())
2856
      return V;
2857
    return V.isNegative() ? V+T : V+(A-T);
2858
  };
2859

2860
  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2861
  // iff B is positive.
2862
  if (B.isNonNegative()) {
2863
    // If B >= 0, the vertex it at a negative location (or at 0), so in
2864
    // order to have a non-negative solution we need to pick k that makes
2865
    // C-kR negative. To satisfy all the requirements for the solution
2866
    // that we are looking for, it needs to be closest to 0 of all k.
2867
    C = C.srem(R);
2868
    if (C.isStrictlyPositive())
2869
      C -= R;
2870
    // Pick the greater solution.
2871
    PickLow = false;
2872
  } else {
2873
    // If B < 0, the vertex is at a positive location. For any solution
2874
    // to exist, the discriminant must be non-negative. This means that
2875
    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2876
    // lower bound on values of k: kR >= C - B^2/4A.
2877
    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2878
    // Round LowkR up (towards +inf) to the nearest kR.
2879
    LowkR = RoundUp(LowkR, R);
2880

2881
    // If there exists k meeting the condition above, and such that
2882
    // C-kR > 0, there will be two positive real number solutions of
2883
    // q(x) = kR. Out of all such values of k, pick the one that makes
2884
    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2885
    // In other words, find maximum k such that LowkR <= kR < C.
2886
    if (C.sgt(LowkR)) {
2887
      // If LowkR < C, then such a k is guaranteed to exist because
2888
      // LowkR itself is a multiple of R.
2889
      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2890
      // Pick the smaller solution.
2891
      PickLow = true;
2892
    } else {
2893
      // If C-kR < 0 for all potential k's, it means that one solution
2894
      // will be negative, while the other will be positive. The positive
2895
      // solution will shift towards 0 if the parabola is moved up.
2896
      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2897
      // to 0, or in other words, out of all parabolas that have solutions,
2898
      // pick the one that is the farthest "up").
2899
      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2900
      C -= LowkR;
2901
      // Pick the greater solution.
2902
      PickLow = false;
2903
    }
2904
  }
2905

2906
  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2907
                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2908

2909
  APInt D = SqrB - 4*A*C;
2910
  assert(D.isNonNegative() && "Negative discriminant");
2911
  APInt SQ = D.sqrt();
2912

2913
  APInt Q = SQ * SQ;
2914
  bool InexactSQ = Q != D;
2915
  // The calculated SQ may actually be greater than the exact (non-integer)
2916
  // value. If that's the case, decrement SQ to get a value that is lower.
2917
  if (Q.sgt(D))
2918
    SQ -= 1;
2919

2920
  APInt X;
2921
  APInt Rem;
2922

2923
  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2924
  // When using the quadratic formula directly, the calculated low root
2925
  // may be greater than the exact one, since we would be subtracting SQ.
2926
  // To make sure that the calculated root is not greater than the exact
2927
  // one, subtract SQ+1 when calculating the low root (for inexact value
2928
  // of SQ).
2929
  if (PickLow)
2930
    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2931
  else
2932
    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2933

2934
  // The updated coefficients should be such that the (exact) solution is
2935
  // positive. Since APInt division rounds towards 0, the calculated one
2936
  // can be 0, but cannot be negative.
2937
  assert(X.isNonNegative() && "Solution should be non-negative");
2938

2939
  if (!InexactSQ && Rem.isZero()) {
2940
    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2941
    return X;
2942
  }
2943

2944
  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2945
  // The exact value of the square root of D should be between SQ and SQ+1.
2946
  // This implies that the solution should be between that corresponding to
2947
  // SQ (i.e. X) and that corresponding to SQ+1.
2948
  //
2949
  // The calculated X cannot be greater than the exact (real) solution.
2950
  // Actually it must be strictly less than the exact solution, while
2951
  // X+1 will be greater than or equal to it.
2952

2953
  APInt VX = (A*X + B)*X + C;
2954
  APInt VY = VX + TwoA*X + A + B;
2955
  bool SignChange =
2956
      VX.isNegative() != VY.isNegative() || VX.isZero() != VY.isZero();
2957
  // If the sign did not change between X and X+1, X is not a valid solution.
2958
  // This could happen when the actual (exact) roots don't have an integer
2959
  // between them, so they would both be contained between X and X+1.
2960
  if (!SignChange) {
2961
    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2962
    return std::nullopt;
2963
  }
2964

2965
  X += 1;
2966
  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2967
  return X;
2968
}
2969

2970
std::optional<unsigned>
2971
llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
2972
  assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
2973
  if (A == B)
2974
    return std::nullopt;
2975
  return A.getBitWidth() - ((A ^ B).countl_zero() + 1);
2976
}
2977

2978
APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,
2979
                                   bool MatchAllBits) {
2980
  unsigned OldBitWidth = A.getBitWidth();
2981
  assert((((OldBitWidth % NewBitWidth) == 0) ||
2982
          ((NewBitWidth % OldBitWidth) == 0)) &&
2983
         "One size should be a multiple of the other one. "
2984
         "Can't do fractional scaling.");
2985

2986
  // Check for matching bitwidths.
2987
  if (OldBitWidth == NewBitWidth)
2988
    return A;
2989

2990
  APInt NewA = APInt::getZero(NewBitWidth);
2991

2992
  // Check for null input.
2993
  if (A.isZero())
2994
    return NewA;
2995

2996
  if (NewBitWidth > OldBitWidth) {
2997
    // Repeat bits.
2998
    unsigned Scale = NewBitWidth / OldBitWidth;
2999
    for (unsigned i = 0; i != OldBitWidth; ++i)
3000
      if (A[i])
3001
        NewA.setBits(i * Scale, (i + 1) * Scale);
3002
  } else {
3003
    unsigned Scale = OldBitWidth / NewBitWidth;
3004
    for (unsigned i = 0; i != NewBitWidth; ++i) {
3005
      if (MatchAllBits) {
3006
        if (A.extractBits(Scale, i * Scale).isAllOnes())
3007
          NewA.setBit(i);
3008
      } else {
3009
        if (!A.extractBits(Scale, i * Scale).isZero())
3010
          NewA.setBit(i);
3011
      }
3012
    }
3013
  }
3014

3015
  return NewA;
3016
}
3017

3018
/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3019
/// with the integer held in IntVal.
3020
void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3021
                            unsigned StoreBytes) {
3022
  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3023
  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3024

3025
  if (sys::IsLittleEndianHost) {
3026
    // Little-endian host - the source is ordered from LSB to MSB.  Order the
3027
    // destination from LSB to MSB: Do a straight copy.
3028
    memcpy(Dst, Src, StoreBytes);
3029
  } else {
3030
    // Big-endian host - the source is an array of 64 bit words ordered from
3031
    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3032
    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3033
    while (StoreBytes > sizeof(uint64_t)) {
3034
      StoreBytes -= sizeof(uint64_t);
3035
      // May not be aligned so use memcpy.
3036
      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3037
      Src += sizeof(uint64_t);
3038
    }
3039

3040
    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3041
  }
3042
}
3043

3044
/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3045
/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3046
void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3047
                             unsigned LoadBytes) {
3048
  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3049
  uint8_t *Dst = reinterpret_cast<uint8_t *>(
3050
                   const_cast<uint64_t *>(IntVal.getRawData()));
3051

3052
  if (sys::IsLittleEndianHost)
3053
    // Little-endian host - the destination must be ordered from LSB to MSB.
3054
    // The source is ordered from LSB to MSB: Do a straight copy.
3055
    memcpy(Dst, Src, LoadBytes);
3056
  else {
3057
    // Big-endian - the destination is an array of 64 bit words ordered from
3058
    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3059
    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3060
    // a word.
3061
    while (LoadBytes > sizeof(uint64_t)) {
3062
      LoadBytes -= sizeof(uint64_t);
3063
      // May not be aligned so use memcpy.
3064
      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3065
      Dst += sizeof(uint64_t);
3066
    }
3067

3068
    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3069
  }
3070
}
3071

3072
APInt APIntOps::avgFloorS(const APInt &C1, const APInt &C2) {
3073
  // Return floor((C1 + C2) / 2)
3074
  return (C1 & C2) + (C1 ^ C2).ashr(1);
3075
}
3076

3077
APInt APIntOps::avgFloorU(const APInt &C1, const APInt &C2) {
3078
  // Return floor((C1 + C2) / 2)
3079
  return (C1 & C2) + (C1 ^ C2).lshr(1);
3080
}
3081

3082
APInt APIntOps::avgCeilS(const APInt &C1, const APInt &C2) {
3083
  // Return ceil((C1 + C2) / 2)
3084
  return (C1 | C2) - (C1 ^ C2).ashr(1);
3085
}
3086

3087
APInt APIntOps::avgCeilU(const APInt &C1, const APInt &C2) {
3088
  // Return ceil((C1 + C2) / 2)
3089
  return (C1 | C2) - (C1 ^ C2).lshr(1);
3090
}
3091

3092
APInt APIntOps::mulhs(const APInt &C1, const APInt &C2) {
3093
  assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
3094
  unsigned FullWidth = C1.getBitWidth() * 2;
3095
  APInt C1Ext = C1.sext(FullWidth);
3096
  APInt C2Ext = C2.sext(FullWidth);
3097
  return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
3098
}
3099

3100
APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
3101
  assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
3102
  unsigned FullWidth = C1.getBitWidth() * 2;
3103
  APInt C1Ext = C1.zext(FullWidth);
3104
  APInt C2Ext = C2.zext(FullWidth);
3105
  return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
3106
}
3107

3108
Product

Resources

Company