Path: blob/main/contrib/llvm-project/llvm/lib/Support/APInt.cpp
35232 views
//===-- APInt.cpp - Implement APInt class ---------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements a class to represent arbitrary precision integer9// constant values and provide a variety of arithmetic operations on them.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ADT/APInt.h"14#include "llvm/ADT/ArrayRef.h"15#include "llvm/ADT/FoldingSet.h"16#include "llvm/ADT/Hashing.h"17#include "llvm/ADT/SmallString.h"18#include "llvm/ADT/StringRef.h"19#include "llvm/ADT/bit.h"20#include "llvm/Config/llvm-config.h"21#include "llvm/Support/Alignment.h"22#include "llvm/Support/Debug.h"23#include "llvm/Support/ErrorHandling.h"24#include "llvm/Support/MathExtras.h"25#include "llvm/Support/raw_ostream.h"26#include <cmath>27#include <optional>2829using namespace llvm;3031#define DEBUG_TYPE "apint"3233/// A utility function for allocating memory, checking for allocation failures,34/// and ensuring the contents are zeroed.35inline static uint64_t* getClearedMemory(unsigned numWords) {36uint64_t *result = new uint64_t[numWords];37memset(result, 0, numWords * sizeof(uint64_t));38return result;39}4041/// A utility function for allocating memory and checking for allocation42/// failure. The content is not zeroed.43inline static uint64_t* getMemory(unsigned numWords) {44return new uint64_t[numWords];45}4647/// A utility function that converts a character to a digit.48inline static unsigned getDigit(char cdigit, uint8_t radix) {49unsigned r;5051if (radix == 16 || radix == 36) {52r = cdigit - '0';53if (r <= 9)54return r;5556r = cdigit - 'A';57if (r <= radix - 11U)58return r + 10;5960r = cdigit - 'a';61if (r <= radix - 11U)62return r + 10;6364radix = 10;65}6667r = cdigit - '0';68if (r < radix)69return r;7071return UINT_MAX;72}737475void APInt::initSlowCase(uint64_t val, bool isSigned) {76U.pVal = getClearedMemory(getNumWords());77U.pVal[0] = val;78if (isSigned && int64_t(val) < 0)79for (unsigned i = 1; i < getNumWords(); ++i)80U.pVal[i] = WORDTYPE_MAX;81clearUnusedBits();82}8384void APInt::initSlowCase(const APInt& that) {85U.pVal = getMemory(getNumWords());86memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);87}8889void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {90assert(bigVal.data() && "Null pointer detected!");91if (isSingleWord())92U.VAL = bigVal[0];93else {94// Get memory, cleared to 095U.pVal = getClearedMemory(getNumWords());96// Calculate the number of words to copy97unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());98// Copy the words from bigVal to pVal99memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);100}101// Make sure unused high bits are cleared102clearUnusedBits();103}104105APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {106initFromArray(bigVal);107}108109APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])110: BitWidth(numBits) {111initFromArray(ArrayRef(bigVal, numWords));112}113114APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)115: BitWidth(numbits) {116fromString(numbits, Str, radix);117}118119void APInt::reallocate(unsigned NewBitWidth) {120// If the number of words is the same we can just change the width and stop.121if (getNumWords() == getNumWords(NewBitWidth)) {122BitWidth = NewBitWidth;123return;124}125126// If we have an allocation, delete it.127if (!isSingleWord())128delete [] U.pVal;129130// Update BitWidth.131BitWidth = NewBitWidth;132133// If we are supposed to have an allocation, create it.134if (!isSingleWord())135U.pVal = getMemory(getNumWords());136}137138void APInt::assignSlowCase(const APInt &RHS) {139// Don't do anything for X = X140if (this == &RHS)141return;142143// Adjust the bit width and handle allocations as necessary.144reallocate(RHS.getBitWidth());145146// Copy the data.147if (isSingleWord())148U.VAL = RHS.U.VAL;149else150memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);151}152153/// This method 'profiles' an APInt for use with FoldingSet.154void APInt::Profile(FoldingSetNodeID& ID) const {155ID.AddInteger(BitWidth);156157if (isSingleWord()) {158ID.AddInteger(U.VAL);159return;160}161162unsigned NumWords = getNumWords();163for (unsigned i = 0; i < NumWords; ++i)164ID.AddInteger(U.pVal[i]);165}166167bool APInt::isAligned(Align A) const {168if (isZero())169return true;170const unsigned TrailingZeroes = countr_zero();171const unsigned MinimumTrailingZeroes = Log2(A);172return TrailingZeroes >= MinimumTrailingZeroes;173}174175/// Prefix increment operator. Increments the APInt by one.176APInt& APInt::operator++() {177if (isSingleWord())178++U.VAL;179else180tcIncrement(U.pVal, getNumWords());181return clearUnusedBits();182}183184/// Prefix decrement operator. Decrements the APInt by one.185APInt& APInt::operator--() {186if (isSingleWord())187--U.VAL;188else189tcDecrement(U.pVal, getNumWords());190return clearUnusedBits();191}192193/// Adds the RHS APInt to this APInt.194/// @returns this, after addition of RHS.195/// Addition assignment operator.196APInt& APInt::operator+=(const APInt& RHS) {197assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");198if (isSingleWord())199U.VAL += RHS.U.VAL;200else201tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());202return clearUnusedBits();203}204205APInt& APInt::operator+=(uint64_t RHS) {206if (isSingleWord())207U.VAL += RHS;208else209tcAddPart(U.pVal, RHS, getNumWords());210return clearUnusedBits();211}212213/// Subtracts the RHS APInt from this APInt214/// @returns this, after subtraction215/// Subtraction assignment operator.216APInt& APInt::operator-=(const APInt& RHS) {217assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");218if (isSingleWord())219U.VAL -= RHS.U.VAL;220else221tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());222return clearUnusedBits();223}224225APInt& APInt::operator-=(uint64_t RHS) {226if (isSingleWord())227U.VAL -= RHS;228else229tcSubtractPart(U.pVal, RHS, getNumWords());230return clearUnusedBits();231}232233APInt APInt::operator*(const APInt& RHS) const {234assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");235if (isSingleWord())236return APInt(BitWidth, U.VAL * RHS.U.VAL);237238APInt Result(getMemory(getNumWords()), getBitWidth());239tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());240Result.clearUnusedBits();241return Result;242}243244void APInt::andAssignSlowCase(const APInt &RHS) {245WordType *dst = U.pVal, *rhs = RHS.U.pVal;246for (size_t i = 0, e = getNumWords(); i != e; ++i)247dst[i] &= rhs[i];248}249250void APInt::orAssignSlowCase(const APInt &RHS) {251WordType *dst = U.pVal, *rhs = RHS.U.pVal;252for (size_t i = 0, e = getNumWords(); i != e; ++i)253dst[i] |= rhs[i];254}255256void APInt::xorAssignSlowCase(const APInt &RHS) {257WordType *dst = U.pVal, *rhs = RHS.U.pVal;258for (size_t i = 0, e = getNumWords(); i != e; ++i)259dst[i] ^= rhs[i];260}261262APInt &APInt::operator*=(const APInt &RHS) {263*this = *this * RHS;264return *this;265}266267APInt& APInt::operator*=(uint64_t RHS) {268if (isSingleWord()) {269U.VAL *= RHS;270} else {271unsigned NumWords = getNumWords();272tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);273}274return clearUnusedBits();275}276277bool APInt::equalSlowCase(const APInt &RHS) const {278return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);279}280281int APInt::compare(const APInt& RHS) const {282assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");283if (isSingleWord())284return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;285286return tcCompare(U.pVal, RHS.U.pVal, getNumWords());287}288289int APInt::compareSigned(const APInt& RHS) const {290assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");291if (isSingleWord()) {292int64_t lhsSext = SignExtend64(U.VAL, BitWidth);293int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);294return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;295}296297bool lhsNeg = isNegative();298bool rhsNeg = RHS.isNegative();299300// If the sign bits don't match, then (LHS < RHS) if LHS is negative301if (lhsNeg != rhsNeg)302return lhsNeg ? -1 : 1;303304// Otherwise we can just use an unsigned comparison, because even negative305// numbers compare correctly this way if both have the same signed-ness.306return tcCompare(U.pVal, RHS.U.pVal, getNumWords());307}308309void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {310unsigned loWord = whichWord(loBit);311unsigned hiWord = whichWord(hiBit);312313// Create an initial mask for the low word with zeros below loBit.314uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);315316// If hiBit is not aligned, we need a high mask.317unsigned hiShiftAmt = whichBit(hiBit);318if (hiShiftAmt != 0) {319// Create a high mask with zeros above hiBit.320uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);321// If loWord and hiWord are equal, then we combine the masks. Otherwise,322// set the bits in hiWord.323if (hiWord == loWord)324loMask &= hiMask;325else326U.pVal[hiWord] |= hiMask;327}328// Apply the mask to the low word.329U.pVal[loWord] |= loMask;330331// Fill any words between loWord and hiWord with all ones.332for (unsigned word = loWord + 1; word < hiWord; ++word)333U.pVal[word] = WORDTYPE_MAX;334}335336// Complement a bignum in-place.337static void tcComplement(APInt::WordType *dst, unsigned parts) {338for (unsigned i = 0; i < parts; i++)339dst[i] = ~dst[i];340}341342/// Toggle every bit to its opposite value.343void APInt::flipAllBitsSlowCase() {344tcComplement(U.pVal, getNumWords());345clearUnusedBits();346}347348/// Concatenate the bits from "NewLSB" onto the bottom of *this. This is349/// equivalent to:350/// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)351/// In the slow case, we know the result is large.352APInt APInt::concatSlowCase(const APInt &NewLSB) const {353unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();354APInt Result = NewLSB.zext(NewWidth);355Result.insertBits(*this, NewLSB.getBitWidth());356return Result;357}358359/// Toggle a given bit to its opposite value whose position is given360/// as "bitPosition".361/// Toggles a given bit to its opposite value.362void APInt::flipBit(unsigned bitPosition) {363assert(bitPosition < BitWidth && "Out of the bit-width range!");364setBitVal(bitPosition, !(*this)[bitPosition]);365}366367void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {368unsigned subBitWidth = subBits.getBitWidth();369assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion");370371// inserting no bits is a noop.372if (subBitWidth == 0)373return;374375// Insertion is a direct copy.376if (subBitWidth == BitWidth) {377*this = subBits;378return;379}380381// Single word result can be done as a direct bitmask.382if (isSingleWord()) {383uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);384U.VAL &= ~(mask << bitPosition);385U.VAL |= (subBits.U.VAL << bitPosition);386return;387}388389unsigned loBit = whichBit(bitPosition);390unsigned loWord = whichWord(bitPosition);391unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);392393// Insertion within a single word can be done as a direct bitmask.394if (loWord == hi1Word) {395uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);396U.pVal[loWord] &= ~(mask << loBit);397U.pVal[loWord] |= (subBits.U.VAL << loBit);398return;399}400401// Insert on word boundaries.402if (loBit == 0) {403// Direct copy whole words.404unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;405memcpy(U.pVal + loWord, subBits.getRawData(),406numWholeSubWords * APINT_WORD_SIZE);407408// Mask+insert remaining bits.409unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;410if (remainingBits != 0) {411uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);412U.pVal[hi1Word] &= ~mask;413U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);414}415return;416}417418// General case - set/clear individual bits in dst based on src.419// TODO - there is scope for optimization here, but at the moment this code420// path is barely used so prefer readability over performance.421for (unsigned i = 0; i != subBitWidth; ++i)422setBitVal(bitPosition + i, subBits[i]);423}424425void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {426uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);427subBits &= maskBits;428if (isSingleWord()) {429U.VAL &= ~(maskBits << bitPosition);430U.VAL |= subBits << bitPosition;431return;432}433434unsigned loBit = whichBit(bitPosition);435unsigned loWord = whichWord(bitPosition);436unsigned hiWord = whichWord(bitPosition + numBits - 1);437if (loWord == hiWord) {438U.pVal[loWord] &= ~(maskBits << loBit);439U.pVal[loWord] |= subBits << loBit;440return;441}442443static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");444unsigned wordBits = 8 * sizeof(WordType);445U.pVal[loWord] &= ~(maskBits << loBit);446U.pVal[loWord] |= subBits << loBit;447448U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));449U.pVal[hiWord] |= subBits >> (wordBits - loBit);450}451452APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {453assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&454"Illegal bit extraction");455456if (isSingleWord())457return APInt(numBits, U.VAL >> bitPosition);458459unsigned loBit = whichBit(bitPosition);460unsigned loWord = whichWord(bitPosition);461unsigned hiWord = whichWord(bitPosition + numBits - 1);462463// Single word result extracting bits from a single word source.464if (loWord == hiWord)465return APInt(numBits, U.pVal[loWord] >> loBit);466467// Extracting bits that start on a source word boundary can be done468// as a fast memory copy.469if (loBit == 0)470return APInt(numBits, ArrayRef(U.pVal + loWord, 1 + hiWord - loWord));471472// General case - shift + copy source words directly into place.473APInt Result(numBits, 0);474unsigned NumSrcWords = getNumWords();475unsigned NumDstWords = Result.getNumWords();476477uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;478for (unsigned word = 0; word < NumDstWords; ++word) {479uint64_t w0 = U.pVal[loWord + word];480uint64_t w1 =481(loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;482DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));483}484485return Result.clearUnusedBits();486}487488uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,489unsigned bitPosition) const {490assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&491"Illegal bit extraction");492assert(numBits <= 64 && "Illegal bit extraction");493494uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);495if (isSingleWord())496return (U.VAL >> bitPosition) & maskBits;497498unsigned loBit = whichBit(bitPosition);499unsigned loWord = whichWord(bitPosition);500unsigned hiWord = whichWord(bitPosition + numBits - 1);501if (loWord == hiWord)502return (U.pVal[loWord] >> loBit) & maskBits;503504static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");505unsigned wordBits = 8 * sizeof(WordType);506uint64_t retBits = U.pVal[loWord] >> loBit;507retBits |= U.pVal[hiWord] << (wordBits - loBit);508retBits &= maskBits;509return retBits;510}511512unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {513assert(!Str.empty() && "Invalid string length");514size_t StrLen = Str.size();515516// Each computation below needs to know if it's negative.517unsigned IsNegative = false;518if (Str[0] == '-' || Str[0] == '+') {519IsNegative = Str[0] == '-';520StrLen--;521assert(StrLen && "String is only a sign, needs a value.");522}523524// For radixes of power-of-two values, the bits required is accurately and525// easily computed.526if (Radix == 2)527return StrLen + IsNegative;528if (Radix == 8)529return StrLen * 3 + IsNegative;530if (Radix == 16)531return StrLen * 4 + IsNegative;532533// Compute a sufficient number of bits that is always large enough but might534// be too large. This avoids the assertion in the constructor. This535// calculation doesn't work appropriately for the numbers 0-9, so just use 4536// bits in that case.537if (Radix == 10)538return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;539540assert(Radix == 36);541return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;542}543544unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {545// Compute a sufficient number of bits that is always large enough but might546// be too large.547unsigned sufficient = getSufficientBitsNeeded(str, radix);548549// For bases 2, 8, and 16, the sufficient number of bits is exact and we can550// return the value directly. For bases 10 and 36, we need to do extra work.551if (radix == 2 || radix == 8 || radix == 16)552return sufficient;553554// This is grossly inefficient but accurate. We could probably do something555// with a computation of roughly slen*64/20 and then adjust by the value of556// the first few digits. But, I'm not sure how accurate that could be.557size_t slen = str.size();558559// Each computation below needs to know if it's negative.560StringRef::iterator p = str.begin();561unsigned isNegative = *p == '-';562if (*p == '-' || *p == '+') {563p++;564slen--;565assert(slen && "String is only a sign, needs a value.");566}567568569// Convert to the actual binary value.570APInt tmp(sufficient, StringRef(p, slen), radix);571572// Compute how many bits are required. If the log is infinite, assume we need573// just bit. If the log is exact and value is negative, then the value is574// MinSignedValue with (log + 1) bits.575unsigned log = tmp.logBase2();576if (log == (unsigned)-1) {577return isNegative + 1;578} else if (isNegative && tmp.isPowerOf2()) {579return isNegative + log;580} else {581return isNegative + log + 1;582}583}584585hash_code llvm::hash_value(const APInt &Arg) {586if (Arg.isSingleWord())587return hash_combine(Arg.BitWidth, Arg.U.VAL);588589return hash_combine(590Arg.BitWidth,591hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));592}593594unsigned DenseMapInfo<APInt, void>::getHashValue(const APInt &Key) {595return static_cast<unsigned>(hash_value(Key));596}597598bool APInt::isSplat(unsigned SplatSizeInBits) const {599assert(getBitWidth() % SplatSizeInBits == 0 &&600"SplatSizeInBits must divide width!");601// We can check that all parts of an integer are equal by making use of a602// little trick: rotate and check if it's still the same value.603return *this == rotl(SplatSizeInBits);604}605606/// This function returns the high "numBits" bits of this APInt.607APInt APInt::getHiBits(unsigned numBits) const {608return this->lshr(BitWidth - numBits);609}610611/// This function returns the low "numBits" bits of this APInt.612APInt APInt::getLoBits(unsigned numBits) const {613APInt Result(getLowBitsSet(BitWidth, numBits));614Result &= *this;615return Result;616}617618/// Return a value containing V broadcasted over NewLen bits.619APInt APInt::getSplat(unsigned NewLen, const APInt &V) {620assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");621622APInt Val = V.zext(NewLen);623for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)624Val |= Val << I;625626return Val;627}628629unsigned APInt::countLeadingZerosSlowCase() const {630unsigned Count = 0;631for (int i = getNumWords()-1; i >= 0; --i) {632uint64_t V = U.pVal[i];633if (V == 0)634Count += APINT_BITS_PER_WORD;635else {636Count += llvm::countl_zero(V);637break;638}639}640// Adjust for unused bits in the most significant word (they are zero).641unsigned Mod = BitWidth % APINT_BITS_PER_WORD;642Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;643return Count;644}645646unsigned APInt::countLeadingOnesSlowCase() const {647unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;648unsigned shift;649if (!highWordBits) {650highWordBits = APINT_BITS_PER_WORD;651shift = 0;652} else {653shift = APINT_BITS_PER_WORD - highWordBits;654}655int i = getNumWords() - 1;656unsigned Count = llvm::countl_one(U.pVal[i] << shift);657if (Count == highWordBits) {658for (i--; i >= 0; --i) {659if (U.pVal[i] == WORDTYPE_MAX)660Count += APINT_BITS_PER_WORD;661else {662Count += llvm::countl_one(U.pVal[i]);663break;664}665}666}667return Count;668}669670unsigned APInt::countTrailingZerosSlowCase() const {671unsigned Count = 0;672unsigned i = 0;673for (; i < getNumWords() && U.pVal[i] == 0; ++i)674Count += APINT_BITS_PER_WORD;675if (i < getNumWords())676Count += llvm::countr_zero(U.pVal[i]);677return std::min(Count, BitWidth);678}679680unsigned APInt::countTrailingOnesSlowCase() const {681unsigned Count = 0;682unsigned i = 0;683for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)684Count += APINT_BITS_PER_WORD;685if (i < getNumWords())686Count += llvm::countr_one(U.pVal[i]);687assert(Count <= BitWidth);688return Count;689}690691unsigned APInt::countPopulationSlowCase() const {692unsigned Count = 0;693for (unsigned i = 0; i < getNumWords(); ++i)694Count += llvm::popcount(U.pVal[i]);695return Count;696}697698bool APInt::intersectsSlowCase(const APInt &RHS) const {699for (unsigned i = 0, e = getNumWords(); i != e; ++i)700if ((U.pVal[i] & RHS.U.pVal[i]) != 0)701return true;702703return false;704}705706bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {707for (unsigned i = 0, e = getNumWords(); i != e; ++i)708if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)709return false;710711return true;712}713714APInt APInt::byteSwap() const {715assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");716if (BitWidth == 16)717return APInt(BitWidth, llvm::byteswap<uint16_t>(U.VAL));718if (BitWidth == 32)719return APInt(BitWidth, llvm::byteswap<uint32_t>(U.VAL));720if (BitWidth <= 64) {721uint64_t Tmp1 = llvm::byteswap<uint64_t>(U.VAL);722Tmp1 >>= (64 - BitWidth);723return APInt(BitWidth, Tmp1);724}725726APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);727for (unsigned I = 0, N = getNumWords(); I != N; ++I)728Result.U.pVal[I] = llvm::byteswap<uint64_t>(U.pVal[N - I - 1]);729if (Result.BitWidth != BitWidth) {730Result.lshrInPlace(Result.BitWidth - BitWidth);731Result.BitWidth = BitWidth;732}733return Result;734}735736APInt APInt::reverseBits() const {737switch (BitWidth) {738case 64:739return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));740case 32:741return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));742case 16:743return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));744case 8:745return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));746case 0:747return *this;748default:749break;750}751752APInt Val(*this);753APInt Reversed(BitWidth, 0);754unsigned S = BitWidth;755756for (; Val != 0; Val.lshrInPlace(1)) {757Reversed <<= 1;758Reversed |= Val[0];759--S;760}761762Reversed <<= S;763return Reversed;764}765766APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {767// Fast-path a common case.768if (A == B) return A;769770// Corner cases: if either operand is zero, the other is the gcd.771if (!A) return B;772if (!B) return A;773774// Count common powers of 2 and remove all other powers of 2.775unsigned Pow2;776{777unsigned Pow2_A = A.countr_zero();778unsigned Pow2_B = B.countr_zero();779if (Pow2_A > Pow2_B) {780A.lshrInPlace(Pow2_A - Pow2_B);781Pow2 = Pow2_B;782} else if (Pow2_B > Pow2_A) {783B.lshrInPlace(Pow2_B - Pow2_A);784Pow2 = Pow2_A;785} else {786Pow2 = Pow2_A;787}788}789790// Both operands are odd multiples of 2^Pow_2:791//792// gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))793//794// This is a modified version of Stein's algorithm, taking advantage of795// efficient countTrailingZeros().796while (A != B) {797if (A.ugt(B)) {798A -= B;799A.lshrInPlace(A.countr_zero() - Pow2);800} else {801B -= A;802B.lshrInPlace(B.countr_zero() - Pow2);803}804}805806return A;807}808809APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {810uint64_t I = bit_cast<uint64_t>(Double);811812// Get the sign bit from the highest order bit813bool isNeg = I >> 63;814815// Get the 11-bit exponent and adjust for the 1023 bit bias816int64_t exp = ((I >> 52) & 0x7ff) - 1023;817818// If the exponent is negative, the value is < 0 so just return 0.819if (exp < 0)820return APInt(width, 0u);821822// Extract the mantissa by clearing the top 12 bits (sign + exponent).823uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;824825// If the exponent doesn't shift all bits out of the mantissa826if (exp < 52)827return isNeg ? -APInt(width, mantissa >> (52 - exp)) :828APInt(width, mantissa >> (52 - exp));829830// If the client didn't provide enough bits for us to shift the mantissa into831// then the result is undefined, just return 0832if (width <= exp - 52)833return APInt(width, 0);834835// Otherwise, we have to shift the mantissa bits up to the right location836APInt Tmp(width, mantissa);837Tmp <<= (unsigned)exp - 52;838return isNeg ? -Tmp : Tmp;839}840841/// This function converts this APInt to a double.842/// The layout for double is as following (IEEE Standard 754):843/// --------------------------------------844/// | Sign Exponent Fraction Bias |845/// |-------------------------------------- |846/// | 1[63] 11[62-52] 52[51-00] 1023 |847/// --------------------------------------848double APInt::roundToDouble(bool isSigned) const {849850// Handle the simple case where the value is contained in one uint64_t.851// It is wrong to optimize getWord(0) to VAL; there might be more than one word.852if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {853if (isSigned) {854int64_t sext = SignExtend64(getWord(0), BitWidth);855return double(sext);856} else857return double(getWord(0));858}859860// Determine if the value is negative.861bool isNeg = isSigned ? (*this)[BitWidth-1] : false;862863// Construct the absolute value if we're negative.864APInt Tmp(isNeg ? -(*this) : (*this));865866// Figure out how many bits we're using.867unsigned n = Tmp.getActiveBits();868869// The exponent (without bias normalization) is just the number of bits870// we are using. Note that the sign bit is gone since we constructed the871// absolute value.872uint64_t exp = n;873874// Return infinity for exponent overflow875if (exp > 1023) {876if (!isSigned || !isNeg)877return std::numeric_limits<double>::infinity();878else879return -std::numeric_limits<double>::infinity();880}881exp += 1023; // Increment for 1023 bias882883// Number of bits in mantissa is 52. To obtain the mantissa value, we must884// extract the high 52 bits from the correct words in pVal.885uint64_t mantissa;886unsigned hiWord = whichWord(n-1);887if (hiWord == 0) {888mantissa = Tmp.U.pVal[0];889if (n > 52)890mantissa >>= n - 52; // shift down, we want the top 52 bits.891} else {892assert(hiWord > 0 && "huh?");893uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);894uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);895mantissa = hibits | lobits;896}897898// The leading bit of mantissa is implicit, so get rid of it.899uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;900uint64_t I = sign | (exp << 52) | mantissa;901return bit_cast<double>(I);902}903904// Truncate to new width.905APInt APInt::trunc(unsigned width) const {906assert(width <= BitWidth && "Invalid APInt Truncate request");907908if (width <= APINT_BITS_PER_WORD)909return APInt(width, getRawData()[0]);910911if (width == BitWidth)912return *this;913914APInt Result(getMemory(getNumWords(width)), width);915916// Copy full words.917unsigned i;918for (i = 0; i != width / APINT_BITS_PER_WORD; i++)919Result.U.pVal[i] = U.pVal[i];920921// Truncate and copy any partial word.922unsigned bits = (0 - width) % APINT_BITS_PER_WORD;923if (bits != 0)924Result.U.pVal[i] = U.pVal[i] << bits >> bits;925926return Result;927}928929// Truncate to new width with unsigned saturation.930APInt APInt::truncUSat(unsigned width) const {931assert(width <= BitWidth && "Invalid APInt Truncate request");932933// Can we just losslessly truncate it?934if (isIntN(width))935return trunc(width);936// If not, then just return the new limit.937return APInt::getMaxValue(width);938}939940// Truncate to new width with signed saturation.941APInt APInt::truncSSat(unsigned width) const {942assert(width <= BitWidth && "Invalid APInt Truncate request");943944// Can we just losslessly truncate it?945if (isSignedIntN(width))946return trunc(width);947// If not, then just return the new limits.948return isNegative() ? APInt::getSignedMinValue(width)949: APInt::getSignedMaxValue(width);950}951952// Sign extend to a new width.953APInt APInt::sext(unsigned Width) const {954assert(Width >= BitWidth && "Invalid APInt SignExtend request");955956if (Width <= APINT_BITS_PER_WORD)957return APInt(Width, SignExtend64(U.VAL, BitWidth));958959if (Width == BitWidth)960return *this;961962APInt Result(getMemory(getNumWords(Width)), Width);963964// Copy words.965std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);966967// Sign extend the last word since there may be unused bits in the input.968Result.U.pVal[getNumWords() - 1] =969SignExtend64(Result.U.pVal[getNumWords() - 1],970((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);971972// Fill with sign bits.973std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,974(Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);975Result.clearUnusedBits();976return Result;977}978979// Zero extend to a new width.980APInt APInt::zext(unsigned width) const {981assert(width >= BitWidth && "Invalid APInt ZeroExtend request");982983if (width <= APINT_BITS_PER_WORD)984return APInt(width, U.VAL);985986if (width == BitWidth)987return *this;988989APInt Result(getMemory(getNumWords(width)), width);990991// Copy words.992std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);993994// Zero remaining words.995std::memset(Result.U.pVal + getNumWords(), 0,996(Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);997998return Result;999}10001001APInt APInt::zextOrTrunc(unsigned width) const {1002if (BitWidth < width)1003return zext(width);1004if (BitWidth > width)1005return trunc(width);1006return *this;1007}10081009APInt APInt::sextOrTrunc(unsigned width) const {1010if (BitWidth < width)1011return sext(width);1012if (BitWidth > width)1013return trunc(width);1014return *this;1015}10161017/// Arithmetic right-shift this APInt by shiftAmt.1018/// Arithmetic right-shift function.1019void APInt::ashrInPlace(const APInt &shiftAmt) {1020ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));1021}10221023/// Arithmetic right-shift this APInt by shiftAmt.1024/// Arithmetic right-shift function.1025void APInt::ashrSlowCase(unsigned ShiftAmt) {1026// Don't bother performing a no-op shift.1027if (!ShiftAmt)1028return;10291030// Save the original sign bit for later.1031bool Negative = isNegative();10321033// WordShift is the inter-part shift; BitShift is intra-part shift.1034unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;1035unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;10361037unsigned WordsToMove = getNumWords() - WordShift;1038if (WordsToMove != 0) {1039// Sign extend the last word to fill in the unused bits.1040U.pVal[getNumWords() - 1] = SignExtend64(1041U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);10421043// Fastpath for moving by whole words.1044if (BitShift == 0) {1045std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);1046} else {1047// Move the words containing significant bits.1048for (unsigned i = 0; i != WordsToMove - 1; ++i)1049U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |1050(U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));10511052// Handle the last word which has no high bits to copy.1053U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;1054// Sign extend one more time.1055U.pVal[WordsToMove - 1] =1056SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);1057}1058}10591060// Fill in the remainder based on the original sign.1061std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,1062WordShift * APINT_WORD_SIZE);1063clearUnusedBits();1064}10651066/// Logical right-shift this APInt by shiftAmt.1067/// Logical right-shift function.1068void APInt::lshrInPlace(const APInt &shiftAmt) {1069lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));1070}10711072/// Logical right-shift this APInt by shiftAmt.1073/// Logical right-shift function.1074void APInt::lshrSlowCase(unsigned ShiftAmt) {1075tcShiftRight(U.pVal, getNumWords(), ShiftAmt);1076}10771078/// Left-shift this APInt by shiftAmt.1079/// Left-shift function.1080APInt &APInt::operator<<=(const APInt &shiftAmt) {1081// It's undefined behavior in C to shift by BitWidth or greater.1082*this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);1083return *this;1084}10851086void APInt::shlSlowCase(unsigned ShiftAmt) {1087tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);1088clearUnusedBits();1089}10901091// Calculate the rotate amount modulo the bit width.1092static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {1093if (LLVM_UNLIKELY(BitWidth == 0))1094return 0;1095unsigned rotBitWidth = rotateAmt.getBitWidth();1096APInt rot = rotateAmt;1097if (rotBitWidth < BitWidth) {1098// Extend the rotate APInt, so that the urem doesn't divide by 0.1099// e.g. APInt(1, 32) would give APInt(1, 0).1100rot = rotateAmt.zext(BitWidth);1101}1102rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));1103return rot.getLimitedValue(BitWidth);1104}11051106APInt APInt::rotl(const APInt &rotateAmt) const {1107return rotl(rotateModulo(BitWidth, rotateAmt));1108}11091110APInt APInt::rotl(unsigned rotateAmt) const {1111if (LLVM_UNLIKELY(BitWidth == 0))1112return *this;1113rotateAmt %= BitWidth;1114if (rotateAmt == 0)1115return *this;1116return shl(rotateAmt) | lshr(BitWidth - rotateAmt);1117}11181119APInt APInt::rotr(const APInt &rotateAmt) const {1120return rotr(rotateModulo(BitWidth, rotateAmt));1121}11221123APInt APInt::rotr(unsigned rotateAmt) const {1124if (BitWidth == 0)1125return *this;1126rotateAmt %= BitWidth;1127if (rotateAmt == 0)1128return *this;1129return lshr(rotateAmt) | shl(BitWidth - rotateAmt);1130}11311132/// \returns the nearest log base 2 of this APInt. Ties round up.1133///1134/// NOTE: When we have a BitWidth of 1, we define:1135///1136/// log2(0) = UINT32_MAX1137/// log2(1) = 01138///1139/// to get around any mathematical concerns resulting from1140/// referencing 2 in a space where 2 does no exist.1141unsigned APInt::nearestLogBase2() const {1142// Special case when we have a bitwidth of 1. If VAL is 1, then we1143// get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to1144// UINT32_MAX.1145if (BitWidth == 1)1146return U.VAL - 1;11471148// Handle the zero case.1149if (isZero())1150return UINT32_MAX;11511152// The non-zero case is handled by computing:1153//1154// nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].1155//1156// where x[i] is referring to the value of the ith bit of x.1157unsigned lg = logBase2();1158return lg + unsigned((*this)[lg - 1]);1159}11601161// Square Root - this method computes and returns the square root of "this".1162// Three mechanisms are used for computation. For small values (<= 5 bits),1163// a table lookup is done. This gets some performance for common cases. For1164// values using less than 52 bits, the value is converted to double and then1165// the libc sqrt function is called. The result is rounded and then converted1166// back to a uint64_t which is then used to construct the result. Finally,1167// the Babylonian method for computing square roots is used.1168APInt APInt::sqrt() const {11691170// Determine the magnitude of the value.1171unsigned magnitude = getActiveBits();11721173// Use a fast table for some small values. This also gets rid of some1174// rounding errors in libc sqrt for small values.1175if (magnitude <= 5) {1176static const uint8_t results[32] = {1177/* 0 */ 0,1178/* 1- 2 */ 1, 1,1179/* 3- 6 */ 2, 2, 2, 2,1180/* 7-12 */ 3, 3, 3, 3, 3, 3,1181/* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,1182/* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,1183/* 31 */ 61184};1185return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);1186}11871188// If the magnitude of the value fits in less than 52 bits (the precision of1189// an IEEE double precision floating point value), then we can use the1190// libc sqrt function which will probably use a hardware sqrt computation.1191// This should be faster than the algorithm below.1192if (magnitude < 52) {1193return APInt(BitWidth,1194uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL1195: U.pVal[0])))));1196}11971198// Okay, all the short cuts are exhausted. We must compute it. The following1199// is a classical Babylonian method for computing the square root. This code1200// was adapted to APInt from a wikipedia article on such computations.1201// See http://www.wikipedia.org/ and go to the page named1202// Calculate_an_integer_square_root.1203unsigned nbits = BitWidth, i = 4;1204APInt testy(BitWidth, 16);1205APInt x_old(BitWidth, 1);1206APInt x_new(BitWidth, 0);1207APInt two(BitWidth, 2);12081209// Select a good starting value using binary logarithms.1210for (;; i += 2, testy = testy.shl(2))1211if (i >= nbits || this->ule(testy)) {1212x_old = x_old.shl(i / 2);1213break;1214}12151216// Use the Babylonian method to arrive at the integer square root:1217for (;;) {1218x_new = (this->udiv(x_old) + x_old).udiv(two);1219if (x_old.ule(x_new))1220break;1221x_old = x_new;1222}12231224// Make sure we return the closest approximation1225// NOTE: The rounding calculation below is correct. It will produce an1226// off-by-one discrepancy with results from pari/gp. That discrepancy has been1227// determined to be a rounding issue with pari/gp as it begins to use a1228// floating point representation after 192 bits. There are no discrepancies1229// between this algorithm and pari/gp for bit widths < 192 bits.1230APInt square(x_old * x_old);1231APInt nextSquare((x_old + 1) * (x_old +1));1232if (this->ult(square))1233return x_old;1234assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");1235APInt midpoint((nextSquare - square).udiv(two));1236APInt offset(*this - square);1237if (offset.ult(midpoint))1238return x_old;1239return x_old + 1;1240}12411242/// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth.1243APInt APInt::multiplicativeInverse() const {1244assert((*this)[0] &&1245"multiplicative inverse is only defined for odd numbers!");12461247// Use Newton's method.1248APInt Factor = *this;1249APInt T;1250while (!(T = *this * Factor).isOne())1251Factor *= 2 - std::move(T);1252return Factor;1253}12541255/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)1256/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The1257/// variables here have the same names as in the algorithm. Comments explain1258/// the algorithm and any deviation from it.1259static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,1260unsigned m, unsigned n) {1261assert(u && "Must provide dividend");1262assert(v && "Must provide divisor");1263assert(q && "Must provide quotient");1264assert(u != v && u != q && v != q && "Must use different memory");1265assert(n>1 && "n must be > 1");12661267// b denotes the base of the number system. In our case b is 2^32.1268const uint64_t b = uint64_t(1) << 32;12691270// The DEBUG macros here tend to be spam in the debug output if you're not1271// debugging this code. Disable them unless KNUTH_DEBUG is defined.1272#ifdef KNUTH_DEBUG1273#define DEBUG_KNUTH(X) LLVM_DEBUG(X)1274#else1275#define DEBUG_KNUTH(X) do {} while(false)1276#endif12771278DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');1279DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");1280DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);1281DEBUG_KNUTH(dbgs() << " by");1282DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);1283DEBUG_KNUTH(dbgs() << '\n');1284// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of1285// u and v by d. Note that we have taken Knuth's advice here to use a power1286// of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of1287// 2 allows us to shift instead of multiply and it is easy to determine the1288// shift amount from the leading zeros. We are basically normalizing the u1289// and v so that its high bits are shifted to the top of v's range without1290// overflow. Note that this can require an extra word in u so that u must1291// be of length m+n+1.1292unsigned shift = llvm::countl_zero(v[n - 1]);1293uint32_t v_carry = 0;1294uint32_t u_carry = 0;1295if (shift) {1296for (unsigned i = 0; i < m+n; ++i) {1297uint32_t u_tmp = u[i] >> (32 - shift);1298u[i] = (u[i] << shift) | u_carry;1299u_carry = u_tmp;1300}1301for (unsigned i = 0; i < n; ++i) {1302uint32_t v_tmp = v[i] >> (32 - shift);1303v[i] = (v[i] << shift) | v_carry;1304v_carry = v_tmp;1305}1306}1307u[m+n] = u_carry;13081309DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");1310DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);1311DEBUG_KNUTH(dbgs() << " by");1312DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);1313DEBUG_KNUTH(dbgs() << '\n');13141315// D2. [Initialize j.] Set j to m. This is the loop counter over the places.1316int j = m;1317do {1318DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');1319// D3. [Calculate q'.].1320// Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')1321// Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')1322// Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease1323// qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test1324// on v[n-2] determines at high speed most of the cases in which the trial1325// value qp is one too large, and it eliminates all cases where qp is two1326// too large.1327uint64_t dividend = Make_64(u[j+n], u[j+n-1]);1328DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');1329uint64_t qp = dividend / v[n-1];1330uint64_t rp = dividend % v[n-1];1331if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {1332qp--;1333rp += v[n-1];1334if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))1335qp--;1336}1337DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');13381339// D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with1340// (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation1341// consists of a simple multiplication by a one-place number, combined with1342// a subtraction.1343// The digits (u[j+n]...u[j]) should be kept positive; if the result of1344// this step is actually negative, (u[j+n]...u[j]) should be left as the1345// true value plus b**(n+1), namely as the b's complement of1346// the true value, and a "borrow" to the left should be remembered.1347int64_t borrow = 0;1348for (unsigned i = 0; i < n; ++i) {1349uint64_t p = uint64_t(qp) * uint64_t(v[i]);1350int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);1351u[j+i] = Lo_32(subres);1352borrow = Hi_32(p) - Hi_32(subres);1353DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]1354<< ", borrow = " << borrow << '\n');1355}1356bool isNeg = u[j+n] < borrow;1357u[j+n] -= Lo_32(borrow);13581359DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");1360DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);1361DEBUG_KNUTH(dbgs() << '\n');13621363// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was1364// negative, go to step D6; otherwise go on to step D7.1365q[j] = Lo_32(qp);1366if (isNeg) {1367// D6. [Add back]. The probability that this step is necessary is very1368// small, on the order of only 2/b. Make sure that test data accounts for1369// this possibility. Decrease q[j] by 11370q[j]--;1371// and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).1372// A carry will occur to the left of u[j+n], and it should be ignored1373// since it cancels with the borrow that occurred in D4.1374bool carry = false;1375for (unsigned i = 0; i < n; i++) {1376uint32_t limit = std::min(u[j+i],v[i]);1377u[j+i] += v[i] + carry;1378carry = u[j+i] < limit || (carry && u[j+i] == limit);1379}1380u[j+n] += carry;1381}1382DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");1383DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);1384DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');13851386// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.1387} while (--j >= 0);13881389DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");1390DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);1391DEBUG_KNUTH(dbgs() << '\n');13921393// D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired1394// remainder may be obtained by dividing u[...] by d. If r is non-null we1395// compute the remainder (urem uses this).1396if (r) {1397// The value d is expressed by the "shift" value above since we avoided1398// multiplication by d by using a shift left. So, all we have to do is1399// shift right here.1400if (shift) {1401uint32_t carry = 0;1402DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");1403for (int i = n-1; i >= 0; i--) {1404r[i] = (u[i] >> shift) | carry;1405carry = u[i] << (32 - shift);1406DEBUG_KNUTH(dbgs() << " " << r[i]);1407}1408} else {1409for (int i = n-1; i >= 0; i--) {1410r[i] = u[i];1411DEBUG_KNUTH(dbgs() << " " << r[i]);1412}1413}1414DEBUG_KNUTH(dbgs() << '\n');1415}1416DEBUG_KNUTH(dbgs() << '\n');1417}14181419void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,1420unsigned rhsWords, WordType *Quotient, WordType *Remainder) {1421assert(lhsWords >= rhsWords && "Fractional result");14221423// First, compose the values into an array of 32-bit words instead of1424// 64-bit words. This is a necessity of both the "short division" algorithm1425// and the Knuth "classical algorithm" which requires there to be native1426// operations for +, -, and * on an m bit value with an m*2 bit result. We1427// can't use 64-bit operands here because we don't have native results of1428// 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't1429// work on large-endian machines.1430unsigned n = rhsWords * 2;1431unsigned m = (lhsWords * 2) - n;14321433// Allocate space for the temporary values we need either on the stack, if1434// it will fit, or on the heap if it won't.1435uint32_t SPACE[128];1436uint32_t *U = nullptr;1437uint32_t *V = nullptr;1438uint32_t *Q = nullptr;1439uint32_t *R = nullptr;1440if ((Remainder?4:3)*n+2*m+1 <= 128) {1441U = &SPACE[0];1442V = &SPACE[m+n+1];1443Q = &SPACE[(m+n+1) + n];1444if (Remainder)1445R = &SPACE[(m+n+1) + n + (m+n)];1446} else {1447U = new uint32_t[m + n + 1];1448V = new uint32_t[n];1449Q = new uint32_t[m+n];1450if (Remainder)1451R = new uint32_t[n];1452}14531454// Initialize the dividend1455memset(U, 0, (m+n+1)*sizeof(uint32_t));1456for (unsigned i = 0; i < lhsWords; ++i) {1457uint64_t tmp = LHS[i];1458U[i * 2] = Lo_32(tmp);1459U[i * 2 + 1] = Hi_32(tmp);1460}1461U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.14621463// Initialize the divisor1464memset(V, 0, (n)*sizeof(uint32_t));1465for (unsigned i = 0; i < rhsWords; ++i) {1466uint64_t tmp = RHS[i];1467V[i * 2] = Lo_32(tmp);1468V[i * 2 + 1] = Hi_32(tmp);1469}14701471// initialize the quotient and remainder1472memset(Q, 0, (m+n) * sizeof(uint32_t));1473if (Remainder)1474memset(R, 0, n * sizeof(uint32_t));14751476// Now, adjust m and n for the Knuth division. n is the number of words in1477// the divisor. m is the number of words by which the dividend exceeds the1478// divisor (i.e. m+n is the length of the dividend). These sizes must not1479// contain any zero words or the Knuth algorithm fails.1480for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {1481n--;1482m++;1483}1484for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)1485m--;14861487// If we're left with only a single word for the divisor, Knuth doesn't work1488// so we implement the short division algorithm here. This is much simpler1489// and faster because we are certain that we can divide a 64-bit quantity1490// by a 32-bit quantity at hardware speed and short division is simply a1491// series of such operations. This is just like doing short division but we1492// are using base 2^32 instead of base 10.1493assert(n != 0 && "Divide by zero?");1494if (n == 1) {1495uint32_t divisor = V[0];1496uint32_t remainder = 0;1497for (int i = m; i >= 0; i--) {1498uint64_t partial_dividend = Make_64(remainder, U[i]);1499if (partial_dividend == 0) {1500Q[i] = 0;1501remainder = 0;1502} else if (partial_dividend < divisor) {1503Q[i] = 0;1504remainder = Lo_32(partial_dividend);1505} else if (partial_dividend == divisor) {1506Q[i] = 1;1507remainder = 0;1508} else {1509Q[i] = Lo_32(partial_dividend / divisor);1510remainder = Lo_32(partial_dividend - (Q[i] * divisor));1511}1512}1513if (R)1514R[0] = remainder;1515} else {1516// Now we're ready to invoke the Knuth classical divide algorithm. In this1517// case n > 1.1518KnuthDiv(U, V, Q, R, m, n);1519}15201521// If the caller wants the quotient1522if (Quotient) {1523for (unsigned i = 0; i < lhsWords; ++i)1524Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);1525}15261527// If the caller wants the remainder1528if (Remainder) {1529for (unsigned i = 0; i < rhsWords; ++i)1530Remainder[i] = Make_64(R[i*2+1], R[i*2]);1531}15321533// Clean up the memory we allocated.1534if (U != &SPACE[0]) {1535delete [] U;1536delete [] V;1537delete [] Q;1538delete [] R;1539}1540}15411542APInt APInt::udiv(const APInt &RHS) const {1543assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");15441545// First, deal with the easy case1546if (isSingleWord()) {1547assert(RHS.U.VAL != 0 && "Divide by zero?");1548return APInt(BitWidth, U.VAL / RHS.U.VAL);1549}15501551// Get some facts about the LHS and RHS number of bits and words1552unsigned lhsWords = getNumWords(getActiveBits());1553unsigned rhsBits = RHS.getActiveBits();1554unsigned rhsWords = getNumWords(rhsBits);1555assert(rhsWords && "Divided by zero???");15561557// Deal with some degenerate cases1558if (!lhsWords)1559// 0 / X ===> 01560return APInt(BitWidth, 0);1561if (rhsBits == 1)1562// X / 1 ===> X1563return *this;1564if (lhsWords < rhsWords || this->ult(RHS))1565// X / Y ===> 0, iff X < Y1566return APInt(BitWidth, 0);1567if (*this == RHS)1568// X / X ===> 11569return APInt(BitWidth, 1);1570if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.1571// All high words are zero, just use native divide1572return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);15731574// We have to compute it the hard way. Invoke the Knuth divide algorithm.1575APInt Quotient(BitWidth, 0); // to hold result.1576divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);1577return Quotient;1578}15791580APInt APInt::udiv(uint64_t RHS) const {1581assert(RHS != 0 && "Divide by zero?");15821583// First, deal with the easy case1584if (isSingleWord())1585return APInt(BitWidth, U.VAL / RHS);15861587// Get some facts about the LHS words.1588unsigned lhsWords = getNumWords(getActiveBits());15891590// Deal with some degenerate cases1591if (!lhsWords)1592// 0 / X ===> 01593return APInt(BitWidth, 0);1594if (RHS == 1)1595// X / 1 ===> X1596return *this;1597if (this->ult(RHS))1598// X / Y ===> 0, iff X < Y1599return APInt(BitWidth, 0);1600if (*this == RHS)1601// X / X ===> 11602return APInt(BitWidth, 1);1603if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.1604// All high words are zero, just use native divide1605return APInt(BitWidth, this->U.pVal[0] / RHS);16061607// We have to compute it the hard way. Invoke the Knuth divide algorithm.1608APInt Quotient(BitWidth, 0); // to hold result.1609divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);1610return Quotient;1611}16121613APInt APInt::sdiv(const APInt &RHS) const {1614if (isNegative()) {1615if (RHS.isNegative())1616return (-(*this)).udiv(-RHS);1617return -((-(*this)).udiv(RHS));1618}1619if (RHS.isNegative())1620return -(this->udiv(-RHS));1621return this->udiv(RHS);1622}16231624APInt APInt::sdiv(int64_t RHS) const {1625if (isNegative()) {1626if (RHS < 0)1627return (-(*this)).udiv(-RHS);1628return -((-(*this)).udiv(RHS));1629}1630if (RHS < 0)1631return -(this->udiv(-RHS));1632return this->udiv(RHS);1633}16341635APInt APInt::urem(const APInt &RHS) const {1636assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");1637if (isSingleWord()) {1638assert(RHS.U.VAL != 0 && "Remainder by zero?");1639return APInt(BitWidth, U.VAL % RHS.U.VAL);1640}16411642// Get some facts about the LHS1643unsigned lhsWords = getNumWords(getActiveBits());16441645// Get some facts about the RHS1646unsigned rhsBits = RHS.getActiveBits();1647unsigned rhsWords = getNumWords(rhsBits);1648assert(rhsWords && "Performing remainder operation by zero ???");16491650// Check the degenerate cases1651if (lhsWords == 0)1652// 0 % Y ===> 01653return APInt(BitWidth, 0);1654if (rhsBits == 1)1655// X % 1 ===> 01656return APInt(BitWidth, 0);1657if (lhsWords < rhsWords || this->ult(RHS))1658// X % Y ===> X, iff X < Y1659return *this;1660if (*this == RHS)1661// X % X == 0;1662return APInt(BitWidth, 0);1663if (lhsWords == 1)1664// All high words are zero, just use native remainder1665return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);16661667// We have to compute it the hard way. Invoke the Knuth divide algorithm.1668APInt Remainder(BitWidth, 0);1669divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);1670return Remainder;1671}16721673uint64_t APInt::urem(uint64_t RHS) const {1674assert(RHS != 0 && "Remainder by zero?");16751676if (isSingleWord())1677return U.VAL % RHS;16781679// Get some facts about the LHS1680unsigned lhsWords = getNumWords(getActiveBits());16811682// Check the degenerate cases1683if (lhsWords == 0)1684// 0 % Y ===> 01685return 0;1686if (RHS == 1)1687// X % 1 ===> 01688return 0;1689if (this->ult(RHS))1690// X % Y ===> X, iff X < Y1691return getZExtValue();1692if (*this == RHS)1693// X % X == 0;1694return 0;1695if (lhsWords == 1)1696// All high words are zero, just use native remainder1697return U.pVal[0] % RHS;16981699// We have to compute it the hard way. Invoke the Knuth divide algorithm.1700uint64_t Remainder;1701divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);1702return Remainder;1703}17041705APInt APInt::srem(const APInt &RHS) const {1706if (isNegative()) {1707if (RHS.isNegative())1708return -((-(*this)).urem(-RHS));1709return -((-(*this)).urem(RHS));1710}1711if (RHS.isNegative())1712return this->urem(-RHS);1713return this->urem(RHS);1714}17151716int64_t APInt::srem(int64_t RHS) const {1717if (isNegative()) {1718if (RHS < 0)1719return -((-(*this)).urem(-RHS));1720return -((-(*this)).urem(RHS));1721}1722if (RHS < 0)1723return this->urem(-RHS);1724return this->urem(RHS);1725}17261727void APInt::udivrem(const APInt &LHS, const APInt &RHS,1728APInt &Quotient, APInt &Remainder) {1729assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");1730unsigned BitWidth = LHS.BitWidth;17311732// First, deal with the easy case1733if (LHS.isSingleWord()) {1734assert(RHS.U.VAL != 0 && "Divide by zero?");1735uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;1736uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;1737Quotient = APInt(BitWidth, QuotVal);1738Remainder = APInt(BitWidth, RemVal);1739return;1740}17411742// Get some size facts about the dividend and divisor1743unsigned lhsWords = getNumWords(LHS.getActiveBits());1744unsigned rhsBits = RHS.getActiveBits();1745unsigned rhsWords = getNumWords(rhsBits);1746assert(rhsWords && "Performing divrem operation by zero ???");17471748// Check the degenerate cases1749if (lhsWords == 0) {1750Quotient = APInt(BitWidth, 0); // 0 / Y ===> 01751Remainder = APInt(BitWidth, 0); // 0 % Y ===> 01752return;1753}17541755if (rhsBits == 1) {1756Quotient = LHS; // X / 1 ===> X1757Remainder = APInt(BitWidth, 0); // X % 1 ===> 01758}17591760if (lhsWords < rhsWords || LHS.ult(RHS)) {1761Remainder = LHS; // X % Y ===> X, iff X < Y1762Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y1763return;1764}17651766if (LHS == RHS) {1767Quotient = APInt(BitWidth, 1); // X / X ===> 11768Remainder = APInt(BitWidth, 0); // X % X ===> 0;1769return;1770}17711772// Make sure there is enough space to hold the results.1773// NOTE: This assumes that reallocate won't affect any bits if it doesn't1774// change the size. This is necessary if Quotient or Remainder is aliased1775// with LHS or RHS.1776Quotient.reallocate(BitWidth);1777Remainder.reallocate(BitWidth);17781779if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.1780// There is only one word to consider so use the native versions.1781uint64_t lhsValue = LHS.U.pVal[0];1782uint64_t rhsValue = RHS.U.pVal[0];1783Quotient = lhsValue / rhsValue;1784Remainder = lhsValue % rhsValue;1785return;1786}17871788// Okay, lets do it the long way1789divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,1790Remainder.U.pVal);1791// Clear the rest of the Quotient and Remainder.1792std::memset(Quotient.U.pVal + lhsWords, 0,1793(getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);1794std::memset(Remainder.U.pVal + rhsWords, 0,1795(getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);1796}17971798void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,1799uint64_t &Remainder) {1800assert(RHS != 0 && "Divide by zero?");1801unsigned BitWidth = LHS.BitWidth;18021803// First, deal with the easy case1804if (LHS.isSingleWord()) {1805uint64_t QuotVal = LHS.U.VAL / RHS;1806Remainder = LHS.U.VAL % RHS;1807Quotient = APInt(BitWidth, QuotVal);1808return;1809}18101811// Get some size facts about the dividend and divisor1812unsigned lhsWords = getNumWords(LHS.getActiveBits());18131814// Check the degenerate cases1815if (lhsWords == 0) {1816Quotient = APInt(BitWidth, 0); // 0 / Y ===> 01817Remainder = 0; // 0 % Y ===> 01818return;1819}18201821if (RHS == 1) {1822Quotient = LHS; // X / 1 ===> X1823Remainder = 0; // X % 1 ===> 01824return;1825}18261827if (LHS.ult(RHS)) {1828Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y1829Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y1830return;1831}18321833if (LHS == RHS) {1834Quotient = APInt(BitWidth, 1); // X / X ===> 11835Remainder = 0; // X % X ===> 0;1836return;1837}18381839// Make sure there is enough space to hold the results.1840// NOTE: This assumes that reallocate won't affect any bits if it doesn't1841// change the size. This is necessary if Quotient is aliased with LHS.1842Quotient.reallocate(BitWidth);18431844if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.1845// There is only one word to consider so use the native versions.1846uint64_t lhsValue = LHS.U.pVal[0];1847Quotient = lhsValue / RHS;1848Remainder = lhsValue % RHS;1849return;1850}18511852// Okay, lets do it the long way1853divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);1854// Clear the rest of the Quotient.1855std::memset(Quotient.U.pVal + lhsWords, 0,1856(getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);1857}18581859void APInt::sdivrem(const APInt &LHS, const APInt &RHS,1860APInt &Quotient, APInt &Remainder) {1861if (LHS.isNegative()) {1862if (RHS.isNegative())1863APInt::udivrem(-LHS, -RHS, Quotient, Remainder);1864else {1865APInt::udivrem(-LHS, RHS, Quotient, Remainder);1866Quotient.negate();1867}1868Remainder.negate();1869} else if (RHS.isNegative()) {1870APInt::udivrem(LHS, -RHS, Quotient, Remainder);1871Quotient.negate();1872} else {1873APInt::udivrem(LHS, RHS, Quotient, Remainder);1874}1875}18761877void APInt::sdivrem(const APInt &LHS, int64_t RHS,1878APInt &Quotient, int64_t &Remainder) {1879uint64_t R = Remainder;1880if (LHS.isNegative()) {1881if (RHS < 0)1882APInt::udivrem(-LHS, -RHS, Quotient, R);1883else {1884APInt::udivrem(-LHS, RHS, Quotient, R);1885Quotient.negate();1886}1887R = -R;1888} else if (RHS < 0) {1889APInt::udivrem(LHS, -RHS, Quotient, R);1890Quotient.negate();1891} else {1892APInt::udivrem(LHS, RHS, Quotient, R);1893}1894Remainder = R;1895}18961897APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {1898APInt Res = *this+RHS;1899Overflow = isNonNegative() == RHS.isNonNegative() &&1900Res.isNonNegative() != isNonNegative();1901return Res;1902}19031904APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {1905APInt Res = *this+RHS;1906Overflow = Res.ult(RHS);1907return Res;1908}19091910APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {1911APInt Res = *this - RHS;1912Overflow = isNonNegative() != RHS.isNonNegative() &&1913Res.isNonNegative() != isNonNegative();1914return Res;1915}19161917APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {1918APInt Res = *this-RHS;1919Overflow = Res.ugt(*this);1920return Res;1921}19221923APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {1924// MININT/-1 --> overflow.1925Overflow = isMinSignedValue() && RHS.isAllOnes();1926return sdiv(RHS);1927}19281929APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {1930APInt Res = *this * RHS;19311932if (RHS != 0)1933Overflow = Res.sdiv(RHS) != *this ||1934(isMinSignedValue() && RHS.isAllOnes());1935else1936Overflow = false;1937return Res;1938}19391940APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {1941if (countl_zero() + RHS.countl_zero() + 2 <= BitWidth) {1942Overflow = true;1943return *this * RHS;1944}19451946APInt Res = lshr(1) * RHS;1947Overflow = Res.isNegative();1948Res <<= 1;1949if ((*this)[0]) {1950Res += RHS;1951if (Res.ult(RHS))1952Overflow = true;1953}1954return Res;1955}19561957APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {1958return sshl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);1959}19601961APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {1962Overflow = ShAmt >= getBitWidth();1963if (Overflow)1964return APInt(BitWidth, 0);19651966if (isNonNegative()) // Don't allow sign change.1967Overflow = ShAmt >= countl_zero();1968else1969Overflow = ShAmt >= countl_one();19701971return *this << ShAmt;1972}19731974APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {1975return ushl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);1976}19771978APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {1979Overflow = ShAmt >= getBitWidth();1980if (Overflow)1981return APInt(BitWidth, 0);19821983Overflow = ShAmt > countl_zero();19841985return *this << ShAmt;1986}19871988APInt APInt::sfloordiv_ov(const APInt &RHS, bool &Overflow) const {1989APInt quotient = sdiv_ov(RHS, Overflow);1990if ((quotient * RHS != *this) && (isNegative() != RHS.isNegative()))1991return quotient - 1;1992return quotient;1993}19941995APInt APInt::sadd_sat(const APInt &RHS) const {1996bool Overflow;1997APInt Res = sadd_ov(RHS, Overflow);1998if (!Overflow)1999return Res;20002001return isNegative() ? APInt::getSignedMinValue(BitWidth)2002: APInt::getSignedMaxValue(BitWidth);2003}20042005APInt APInt::uadd_sat(const APInt &RHS) const {2006bool Overflow;2007APInt Res = uadd_ov(RHS, Overflow);2008if (!Overflow)2009return Res;20102011return APInt::getMaxValue(BitWidth);2012}20132014APInt APInt::ssub_sat(const APInt &RHS) const {2015bool Overflow;2016APInt Res = ssub_ov(RHS, Overflow);2017if (!Overflow)2018return Res;20192020return isNegative() ? APInt::getSignedMinValue(BitWidth)2021: APInt::getSignedMaxValue(BitWidth);2022}20232024APInt APInt::usub_sat(const APInt &RHS) const {2025bool Overflow;2026APInt Res = usub_ov(RHS, Overflow);2027if (!Overflow)2028return Res;20292030return APInt(BitWidth, 0);2031}20322033APInt APInt::smul_sat(const APInt &RHS) const {2034bool Overflow;2035APInt Res = smul_ov(RHS, Overflow);2036if (!Overflow)2037return Res;20382039// The result is negative if one and only one of inputs is negative.2040bool ResIsNegative = isNegative() ^ RHS.isNegative();20412042return ResIsNegative ? APInt::getSignedMinValue(BitWidth)2043: APInt::getSignedMaxValue(BitWidth);2044}20452046APInt APInt::umul_sat(const APInt &RHS) const {2047bool Overflow;2048APInt Res = umul_ov(RHS, Overflow);2049if (!Overflow)2050return Res;20512052return APInt::getMaxValue(BitWidth);2053}20542055APInt APInt::sshl_sat(const APInt &RHS) const {2056return sshl_sat(RHS.getLimitedValue(getBitWidth()));2057}20582059APInt APInt::sshl_sat(unsigned RHS) const {2060bool Overflow;2061APInt Res = sshl_ov(RHS, Overflow);2062if (!Overflow)2063return Res;20642065return isNegative() ? APInt::getSignedMinValue(BitWidth)2066: APInt::getSignedMaxValue(BitWidth);2067}20682069APInt APInt::ushl_sat(const APInt &RHS) const {2070return ushl_sat(RHS.getLimitedValue(getBitWidth()));2071}20722073APInt APInt::ushl_sat(unsigned RHS) const {2074bool Overflow;2075APInt Res = ushl_ov(RHS, Overflow);2076if (!Overflow)2077return Res;20782079return APInt::getMaxValue(BitWidth);2080}20812082void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {2083// Check our assumptions here2084assert(!str.empty() && "Invalid string length");2085assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||2086radix == 36) &&2087"Radix should be 2, 8, 10, 16, or 36!");20882089StringRef::iterator p = str.begin();2090size_t slen = str.size();2091bool isNeg = *p == '-';2092if (*p == '-' || *p == '+') {2093p++;2094slen--;2095assert(slen && "String is only a sign, needs a value.");2096}2097assert((slen <= numbits || radix != 2) && "Insufficient bit width");2098assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");2099assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");2100assert((((slen-1)*64)/22 <= numbits || radix != 10) &&2101"Insufficient bit width");21022103// Allocate memory if needed2104if (isSingleWord())2105U.VAL = 0;2106else2107U.pVal = getClearedMemory(getNumWords());21082109// Figure out if we can shift instead of multiply2110unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);21112112// Enter digit traversal loop2113for (StringRef::iterator e = str.end(); p != e; ++p) {2114unsigned digit = getDigit(*p, radix);2115assert(digit < radix && "Invalid character in digit string");21162117// Shift or multiply the value by the radix2118if (slen > 1) {2119if (shift)2120*this <<= shift;2121else2122*this *= radix;2123}21242125// Add in the digit we just interpreted2126*this += digit;2127}2128// If its negative, put it in two's complement form2129if (isNeg)2130this->negate();2131}21322133void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,2134bool formatAsCLiteral, bool UpperCase,2135bool InsertSeparators) const {2136assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||2137Radix == 36) &&2138"Radix should be 2, 8, 10, 16, or 36!");21392140const char *Prefix = "";2141if (formatAsCLiteral) {2142switch (Radix) {2143case 2:2144// Binary literals are a non-standard extension added in gcc 4.3:2145// http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html2146Prefix = "0b";2147break;2148case 8:2149Prefix = "0";2150break;2151case 10:2152break; // No prefix2153case 16:2154Prefix = "0x";2155break;2156default:2157llvm_unreachable("Invalid radix!");2158}2159}21602161// Number of digits in a group between separators.2162unsigned Grouping = (Radix == 8 || Radix == 10) ? 3 : 4;21632164// First, check for a zero value and just short circuit the logic below.2165if (isZero()) {2166while (*Prefix) {2167Str.push_back(*Prefix);2168++Prefix;2169};2170Str.push_back('0');2171return;2172}21732174static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz"2175"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";2176const char *Digits = BothDigits + (UpperCase ? 36 : 0);21772178if (isSingleWord()) {2179char Buffer[65];2180char *BufPtr = std::end(Buffer);21812182uint64_t N;2183if (!Signed) {2184N = getZExtValue();2185} else {2186int64_t I = getSExtValue();2187if (I >= 0) {2188N = I;2189} else {2190Str.push_back('-');2191N = -(uint64_t)I;2192}2193}21942195while (*Prefix) {2196Str.push_back(*Prefix);2197++Prefix;2198};21992200int Pos = 0;2201while (N) {2202if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)2203*--BufPtr = '\'';2204*--BufPtr = Digits[N % Radix];2205N /= Radix;2206Pos++;2207}2208Str.append(BufPtr, std::end(Buffer));2209return;2210}22112212APInt Tmp(*this);22132214if (Signed && isNegative()) {2215// They want to print the signed version and it is a negative value2216// Flip the bits and add one to turn it into the equivalent positive2217// value and put a '-' in the result.2218Tmp.negate();2219Str.push_back('-');2220}22212222while (*Prefix) {2223Str.push_back(*Prefix);2224++Prefix;2225};22262227// We insert the digits backward, then reverse them to get the right order.2228unsigned StartDig = Str.size();22292230// For the 2, 8 and 16 bit cases, we can just shift instead of divide2231// because the number of bits per digit (1, 3 and 4 respectively) divides2232// equally. We just shift until the value is zero.2233if (Radix == 2 || Radix == 8 || Radix == 16) {2234// Just shift tmp right for each digit width until it becomes zero2235unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));2236unsigned MaskAmt = Radix - 1;22372238int Pos = 0;2239while (Tmp.getBoolValue()) {2240unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;2241if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)2242Str.push_back('\'');22432244Str.push_back(Digits[Digit]);2245Tmp.lshrInPlace(ShiftAmt);2246Pos++;2247}2248} else {2249int Pos = 0;2250while (Tmp.getBoolValue()) {2251uint64_t Digit;2252udivrem(Tmp, Radix, Tmp, Digit);2253assert(Digit < Radix && "divide failed");2254if (InsertSeparators && Pos % Grouping == 0 && Pos > 0)2255Str.push_back('\'');22562257Str.push_back(Digits[Digit]);2258Pos++;2259}2260}22612262// Reverse the digits before returning.2263std::reverse(Str.begin()+StartDig, Str.end());2264}22652266#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)2267LLVM_DUMP_METHOD void APInt::dump() const {2268SmallString<40> S, U;2269this->toStringUnsigned(U);2270this->toStringSigned(S);2271dbgs() << "APInt(" << BitWidth << "b, "2272<< U << "u " << S << "s)\n";2273}2274#endif22752276void APInt::print(raw_ostream &OS, bool isSigned) const {2277SmallString<40> S;2278this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);2279OS << S;2280}22812282// This implements a variety of operations on a representation of2283// arbitrary precision, two's-complement, bignum integer values.22842285// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe2286// and unrestricting assumption.2287static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,2288"Part width must be divisible by 2!");22892290// Returns the integer part with the least significant BITS set.2291// BITS cannot be zero.2292static inline APInt::WordType lowBitMask(unsigned bits) {2293assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);2294return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);2295}22962297/// Returns the value of the lower half of PART.2298static inline APInt::WordType lowHalf(APInt::WordType part) {2299return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);2300}23012302/// Returns the value of the upper half of PART.2303static inline APInt::WordType highHalf(APInt::WordType part) {2304return part >> (APInt::APINT_BITS_PER_WORD / 2);2305}23062307/// Sets the least significant part of a bignum to the input value, and zeroes2308/// out higher parts.2309void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {2310assert(parts > 0);2311dst[0] = part;2312for (unsigned i = 1; i < parts; i++)2313dst[i] = 0;2314}23152316/// Assign one bignum to another.2317void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {2318for (unsigned i = 0; i < parts; i++)2319dst[i] = src[i];2320}23212322/// Returns true if a bignum is zero, false otherwise.2323bool APInt::tcIsZero(const WordType *src, unsigned parts) {2324for (unsigned i = 0; i < parts; i++)2325if (src[i])2326return false;23272328return true;2329}23302331/// Extract the given bit of a bignum; returns 0 or 1.2332int APInt::tcExtractBit(const WordType *parts, unsigned bit) {2333return (parts[whichWord(bit)] & maskBit(bit)) != 0;2334}23352336/// Set the given bit of a bignum.2337void APInt::tcSetBit(WordType *parts, unsigned bit) {2338parts[whichWord(bit)] |= maskBit(bit);2339}23402341/// Clears the given bit of a bignum.2342void APInt::tcClearBit(WordType *parts, unsigned bit) {2343parts[whichWord(bit)] &= ~maskBit(bit);2344}23452346/// Returns the bit number of the least significant set bit of a number. If the2347/// input number has no bits set UINT_MAX is returned.2348unsigned APInt::tcLSB(const WordType *parts, unsigned n) {2349for (unsigned i = 0; i < n; i++) {2350if (parts[i] != 0) {2351unsigned lsb = llvm::countr_zero(parts[i]);2352return lsb + i * APINT_BITS_PER_WORD;2353}2354}23552356return UINT_MAX;2357}23582359/// Returns the bit number of the most significant set bit of a number.2360/// If the input number has no bits set UINT_MAX is returned.2361unsigned APInt::tcMSB(const WordType *parts, unsigned n) {2362do {2363--n;23642365if (parts[n] != 0) {2366static_assert(sizeof(parts[n]) <= sizeof(uint64_t));2367unsigned msb = llvm::Log2_64(parts[n]);23682369return msb + n * APINT_BITS_PER_WORD;2370}2371} while (n);23722373return UINT_MAX;2374}23752376/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to2377/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least2378/// significant bit of DST. All high bits above srcBITS in DST are zero-filled.2379/// */2380void2381APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,2382unsigned srcBits, unsigned srcLSB) {2383unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;2384assert(dstParts <= dstCount);23852386unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;2387tcAssign(dst, src + firstSrcPart, dstParts);23882389unsigned shift = srcLSB % APINT_BITS_PER_WORD;2390tcShiftRight(dst, dstParts, shift);23912392// We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC2393// in DST. If this is less that srcBits, append the rest, else2394// clear the high bits.2395unsigned n = dstParts * APINT_BITS_PER_WORD - shift;2396if (n < srcBits) {2397WordType mask = lowBitMask (srcBits - n);2398dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)2399<< n % APINT_BITS_PER_WORD);2400} else if (n > srcBits) {2401if (srcBits % APINT_BITS_PER_WORD)2402dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);2403}24042405// Clear high parts.2406while (dstParts < dstCount)2407dst[dstParts++] = 0;2408}24092410//// DST += RHS + C where C is zero or one. Returns the carry flag.2411APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,2412WordType c, unsigned parts) {2413assert(c <= 1);24142415for (unsigned i = 0; i < parts; i++) {2416WordType l = dst[i];2417if (c) {2418dst[i] += rhs[i] + 1;2419c = (dst[i] <= l);2420} else {2421dst[i] += rhs[i];2422c = (dst[i] < l);2423}2424}24252426return c;2427}24282429/// This function adds a single "word" integer, src, to the multiple2430/// "word" integer array, dst[]. dst[] is modified to reflect the addition and2431/// 1 is returned if there is a carry out, otherwise 0 is returned.2432/// @returns the carry of the addition.2433APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,2434unsigned parts) {2435for (unsigned i = 0; i < parts; ++i) {2436dst[i] += src;2437if (dst[i] >= src)2438return 0; // No need to carry so exit early.2439src = 1; // Carry one to next digit.2440}24412442return 1;2443}24442445/// DST -= RHS + C where C is zero or one. Returns the carry flag.2446APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,2447WordType c, unsigned parts) {2448assert(c <= 1);24492450for (unsigned i = 0; i < parts; i++) {2451WordType l = dst[i];2452if (c) {2453dst[i] -= rhs[i] + 1;2454c = (dst[i] >= l);2455} else {2456dst[i] -= rhs[i];2457c = (dst[i] > l);2458}2459}24602461return c;2462}24632464/// This function subtracts a single "word" (64-bit word), src, from2465/// the multi-word integer array, dst[], propagating the borrowed 1 value until2466/// no further borrowing is needed or it runs out of "words" in dst. The result2467/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not2468/// exhausted. In other words, if src > dst then this function returns 1,2469/// otherwise 0.2470/// @returns the borrow out of the subtraction2471APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,2472unsigned parts) {2473for (unsigned i = 0; i < parts; ++i) {2474WordType Dst = dst[i];2475dst[i] -= src;2476if (src <= Dst)2477return 0; // No need to borrow so exit early.2478src = 1; // We have to "borrow 1" from next "word"2479}24802481return 1;2482}24832484/// Negate a bignum in-place.2485void APInt::tcNegate(WordType *dst, unsigned parts) {2486tcComplement(dst, parts);2487tcIncrement(dst, parts);2488}24892490/// DST += SRC * MULTIPLIER + CARRY if add is true2491/// DST = SRC * MULTIPLIER + CARRY if add is false2492/// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC2493/// they must start at the same point, i.e. DST == SRC.2494/// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is2495/// returned. Otherwise DST is filled with the least significant2496/// DSTPARTS parts of the result, and if all of the omitted higher2497/// parts were zero return zero, otherwise overflow occurred and2498/// return one.2499int APInt::tcMultiplyPart(WordType *dst, const WordType *src,2500WordType multiplier, WordType carry,2501unsigned srcParts, unsigned dstParts,2502bool add) {2503// Otherwise our writes of DST kill our later reads of SRC.2504assert(dst <= src || dst >= src + srcParts);2505assert(dstParts <= srcParts + 1);25062507// N loops; minimum of dstParts and srcParts.2508unsigned n = std::min(dstParts, srcParts);25092510for (unsigned i = 0; i < n; i++) {2511// [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.2512// This cannot overflow, because:2513// (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)2514// which is less than n^2.2515WordType srcPart = src[i];2516WordType low, mid, high;2517if (multiplier == 0 || srcPart == 0) {2518low = carry;2519high = 0;2520} else {2521low = lowHalf(srcPart) * lowHalf(multiplier);2522high = highHalf(srcPart) * highHalf(multiplier);25232524mid = lowHalf(srcPart) * highHalf(multiplier);2525high += highHalf(mid);2526mid <<= APINT_BITS_PER_WORD / 2;2527if (low + mid < low)2528high++;2529low += mid;25302531mid = highHalf(srcPart) * lowHalf(multiplier);2532high += highHalf(mid);2533mid <<= APINT_BITS_PER_WORD / 2;2534if (low + mid < low)2535high++;2536low += mid;25372538// Now add carry.2539if (low + carry < low)2540high++;2541low += carry;2542}25432544if (add) {2545// And now DST[i], and store the new low part there.2546if (low + dst[i] < low)2547high++;2548dst[i] += low;2549} else2550dst[i] = low;25512552carry = high;2553}25542555if (srcParts < dstParts) {2556// Full multiplication, there is no overflow.2557assert(srcParts + 1 == dstParts);2558dst[srcParts] = carry;2559return 0;2560}25612562// We overflowed if there is carry.2563if (carry)2564return 1;25652566// We would overflow if any significant unwritten parts would be2567// non-zero. This is true if any remaining src parts are non-zero2568// and the multiplier is non-zero.2569if (multiplier)2570for (unsigned i = dstParts; i < srcParts; i++)2571if (src[i])2572return 1;25732574// We fitted in the narrow destination.2575return 0;2576}25772578/// DST = LHS * RHS, where DST has the same width as the operands and2579/// is filled with the least significant parts of the result. Returns2580/// one if overflow occurred, otherwise zero. DST must be disjoint2581/// from both operands.2582int APInt::tcMultiply(WordType *dst, const WordType *lhs,2583const WordType *rhs, unsigned parts) {2584assert(dst != lhs && dst != rhs);25852586int overflow = 0;25872588for (unsigned i = 0; i < parts; i++) {2589// Don't accumulate on the first iteration so we don't need to initalize2590// dst to 0.2591overflow |=2592tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, parts - i, i != 0);2593}25942595return overflow;2596}25972598/// DST = LHS * RHS, where DST has width the sum of the widths of the2599/// operands. No overflow occurs. DST must be disjoint from both operands.2600void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,2601const WordType *rhs, unsigned lhsParts,2602unsigned rhsParts) {2603// Put the narrower number on the LHS for less loops below.2604if (lhsParts > rhsParts)2605return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);26062607assert(dst != lhs && dst != rhs);26082609for (unsigned i = 0; i < lhsParts; i++) {2610// Don't accumulate on the first iteration so we don't need to initalize2611// dst to 0.2612tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, i != 0);2613}2614}26152616// If RHS is zero LHS and REMAINDER are left unchanged, return one.2617// Otherwise set LHS to LHS / RHS with the fractional part discarded,2618// set REMAINDER to the remainder, return zero. i.e.2619//2620// OLD_LHS = RHS * LHS + REMAINDER2621//2622// SCRATCH is a bignum of the same size as the operands and result for2623// use by the routine; its contents need not be initialized and are2624// destroyed. LHS, REMAINDER and SCRATCH must be distinct.2625int APInt::tcDivide(WordType *lhs, const WordType *rhs,2626WordType *remainder, WordType *srhs,2627unsigned parts) {2628assert(lhs != remainder && lhs != srhs && remainder != srhs);26292630unsigned shiftCount = tcMSB(rhs, parts) + 1;2631if (shiftCount == 0)2632return true;26332634shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;2635unsigned n = shiftCount / APINT_BITS_PER_WORD;2636WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);26372638tcAssign(srhs, rhs, parts);2639tcShiftLeft(srhs, parts, shiftCount);2640tcAssign(remainder, lhs, parts);2641tcSet(lhs, 0, parts);26422643// Loop, subtracting SRHS if REMAINDER is greater and adding that to the2644// total.2645for (;;) {2646int compare = tcCompare(remainder, srhs, parts);2647if (compare >= 0) {2648tcSubtract(remainder, srhs, 0, parts);2649lhs[n] |= mask;2650}26512652if (shiftCount == 0)2653break;2654shiftCount--;2655tcShiftRight(srhs, parts, 1);2656if ((mask >>= 1) == 0) {2657mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);2658n--;2659}2660}26612662return false;2663}26642665/// Shift a bignum left Count bits in-place. Shifted in bits are zero. There are2666/// no restrictions on Count.2667void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {2668// Don't bother performing a no-op shift.2669if (!Count)2670return;26712672// WordShift is the inter-part shift; BitShift is the intra-part shift.2673unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);2674unsigned BitShift = Count % APINT_BITS_PER_WORD;26752676// Fastpath for moving by whole words.2677if (BitShift == 0) {2678std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);2679} else {2680while (Words-- > WordShift) {2681Dst[Words] = Dst[Words - WordShift] << BitShift;2682if (Words > WordShift)2683Dst[Words] |=2684Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);2685}2686}26872688// Fill in the remainder with 0s.2689std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);2690}26912692/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There2693/// are no restrictions on Count.2694void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {2695// Don't bother performing a no-op shift.2696if (!Count)2697return;26982699// WordShift is the inter-part shift; BitShift is the intra-part shift.2700unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);2701unsigned BitShift = Count % APINT_BITS_PER_WORD;27022703unsigned WordsToMove = Words - WordShift;2704// Fastpath for moving by whole words.2705if (BitShift == 0) {2706std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);2707} else {2708for (unsigned i = 0; i != WordsToMove; ++i) {2709Dst[i] = Dst[i + WordShift] >> BitShift;2710if (i + 1 != WordsToMove)2711Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);2712}2713}27142715// Fill in the remainder with 0s.2716std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);2717}27182719// Comparison (unsigned) of two bignums.2720int APInt::tcCompare(const WordType *lhs, const WordType *rhs,2721unsigned parts) {2722while (parts) {2723parts--;2724if (lhs[parts] != rhs[parts])2725return (lhs[parts] > rhs[parts]) ? 1 : -1;2726}27272728return 0;2729}27302731APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,2732APInt::Rounding RM) {2733// Currently udivrem always rounds down.2734switch (RM) {2735case APInt::Rounding::DOWN:2736case APInt::Rounding::TOWARD_ZERO:2737return A.udiv(B);2738case APInt::Rounding::UP: {2739APInt Quo, Rem;2740APInt::udivrem(A, B, Quo, Rem);2741if (Rem.isZero())2742return Quo;2743return Quo + 1;2744}2745}2746llvm_unreachable("Unknown APInt::Rounding enum");2747}27482749APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,2750APInt::Rounding RM) {2751switch (RM) {2752case APInt::Rounding::DOWN:2753case APInt::Rounding::UP: {2754APInt Quo, Rem;2755APInt::sdivrem(A, B, Quo, Rem);2756if (Rem.isZero())2757return Quo;2758// This algorithm deals with arbitrary rounding mode used by sdivrem.2759// We want to check whether the non-integer part of the mathematical value2760// is negative or not. If the non-integer part is negative, we need to round2761// down from Quo; otherwise, if it's positive or 0, we return Quo, as it's2762// already rounded down.2763if (RM == APInt::Rounding::DOWN) {2764if (Rem.isNegative() != B.isNegative())2765return Quo - 1;2766return Quo;2767}2768if (Rem.isNegative() != B.isNegative())2769return Quo;2770return Quo + 1;2771}2772// Currently sdiv rounds towards zero.2773case APInt::Rounding::TOWARD_ZERO:2774return A.sdiv(B);2775}2776llvm_unreachable("Unknown APInt::Rounding enum");2777}27782779std::optional<APInt>2780llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,2781unsigned RangeWidth) {2782unsigned CoeffWidth = A.getBitWidth();2783assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());2784assert(RangeWidth <= CoeffWidth &&2785"Value range width should be less than coefficient width");2786assert(RangeWidth > 1 && "Value range bit width should be > 1");27872788LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B2789<< "x + " << C << ", rw:" << RangeWidth << '\n');27902791// Identify 0 as a (non)solution immediately.2792if (C.sextOrTrunc(RangeWidth).isZero()) {2793LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");2794return APInt(CoeffWidth, 0);2795}27962797// The result of APInt arithmetic has the same bit width as the operands,2798// so it can actually lose high bits. A product of two n-bit integers needs2799// 2n-1 bits to represent the full value.2800// The operation done below (on quadratic coefficients) that can produce2801// the largest value is the evaluation of the equation during bisection,2802// which needs 3 times the bitwidth of the coefficient, so the total number2803// of required bits is 3n.2804//2805// The purpose of this extension is to simulate the set Z of all integers,2806// where n+1 > n for all n in Z. In Z it makes sense to talk about positive2807// and negative numbers (not so much in a modulo arithmetic). The method2808// used to solve the equation is based on the standard formula for real2809// numbers, and uses the concepts of "positive" and "negative" with their2810// usual meanings.2811CoeffWidth *= 3;2812A = A.sext(CoeffWidth);2813B = B.sext(CoeffWidth);2814C = C.sext(CoeffWidth);28152816// Make A > 0 for simplicity. Negate cannot overflow at this point because2817// the bit width has increased.2818if (A.isNegative()) {2819A.negate();2820B.negate();2821C.negate();2822}28232824// Solving an equation q(x) = 0 with coefficients in modular arithmetic2825// is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,2826// and R = 2^BitWidth.2827// Since we're trying not only to find exact solutions, but also values2828// that "wrap around", such a set will always have a solution, i.e. an x2829// that satisfies at least one of the equations, or such that |q(x)|2830// exceeds kR, while |q(x-1)| for the same k does not.2831//2832// We need to find a value k, such that Ax^2 + Bx + C = kR will have a2833// positive solution n (in the above sense), and also such that the n2834// will be the least among all solutions corresponding to k = 0, 1, ...2835// (more precisely, the least element in the set2836// { n(k) | k is such that a solution n(k) exists }).2837//2838// Consider the parabola (over real numbers) that corresponds to the2839// quadratic equation. Since A > 0, the arms of the parabola will point2840// up. Picking different values of k will shift it up and down by R.2841//2842// We want to shift the parabola in such a way as to reduce the problem2843// of solving q(x) = kR to solving shifted_q(x) = 0.2844// (The interesting solutions are the ceilings of the real number2845// solutions.)2846APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);2847APInt TwoA = 2 * A;2848APInt SqrB = B * B;2849bool PickLow;28502851auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {2852assert(A.isStrictlyPositive());2853APInt T = V.abs().urem(A);2854if (T.isZero())2855return V;2856return V.isNegative() ? V+T : V+(A-T);2857};28582859// The vertex of the parabola is at -B/2A, but since A > 0, it's negative2860// iff B is positive.2861if (B.isNonNegative()) {2862// If B >= 0, the vertex it at a negative location (or at 0), so in2863// order to have a non-negative solution we need to pick k that makes2864// C-kR negative. To satisfy all the requirements for the solution2865// that we are looking for, it needs to be closest to 0 of all k.2866C = C.srem(R);2867if (C.isStrictlyPositive())2868C -= R;2869// Pick the greater solution.2870PickLow = false;2871} else {2872// If B < 0, the vertex is at a positive location. For any solution2873// to exist, the discriminant must be non-negative. This means that2874// C-kR <= B^2/4A is a necessary condition for k, i.e. there is a2875// lower bound on values of k: kR >= C - B^2/4A.2876APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.2877// Round LowkR up (towards +inf) to the nearest kR.2878LowkR = RoundUp(LowkR, R);28792880// If there exists k meeting the condition above, and such that2881// C-kR > 0, there will be two positive real number solutions of2882// q(x) = kR. Out of all such values of k, pick the one that makes2883// C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).2884// In other words, find maximum k such that LowkR <= kR < C.2885if (C.sgt(LowkR)) {2886// If LowkR < C, then such a k is guaranteed to exist because2887// LowkR itself is a multiple of R.2888C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)2889// Pick the smaller solution.2890PickLow = true;2891} else {2892// If C-kR < 0 for all potential k's, it means that one solution2893// will be negative, while the other will be positive. The positive2894// solution will shift towards 0 if the parabola is moved up.2895// Pick the kR closest to the lower bound (i.e. make C-kR closest2896// to 0, or in other words, out of all parabolas that have solutions,2897// pick the one that is the farthest "up").2898// Since LowkR is itself a multiple of R, simply take C-LowkR.2899C -= LowkR;2900// Pick the greater solution.2901PickLow = false;2902}2903}29042905LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "2906<< B << "x + " << C << ", rw:" << RangeWidth << '\n');29072908APInt D = SqrB - 4*A*C;2909assert(D.isNonNegative() && "Negative discriminant");2910APInt SQ = D.sqrt();29112912APInt Q = SQ * SQ;2913bool InexactSQ = Q != D;2914// The calculated SQ may actually be greater than the exact (non-integer)2915// value. If that's the case, decrement SQ to get a value that is lower.2916if (Q.sgt(D))2917SQ -= 1;29182919APInt X;2920APInt Rem;29212922// SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.2923// When using the quadratic formula directly, the calculated low root2924// may be greater than the exact one, since we would be subtracting SQ.2925// To make sure that the calculated root is not greater than the exact2926// one, subtract SQ+1 when calculating the low root (for inexact value2927// of SQ).2928if (PickLow)2929APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);2930else2931APInt::sdivrem(-B + SQ, TwoA, X, Rem);29322933// The updated coefficients should be such that the (exact) solution is2934// positive. Since APInt division rounds towards 0, the calculated one2935// can be 0, but cannot be negative.2936assert(X.isNonNegative() && "Solution should be non-negative");29372938if (!InexactSQ && Rem.isZero()) {2939LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');2940return X;2941}29422943assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");2944// The exact value of the square root of D should be between SQ and SQ+1.2945// This implies that the solution should be between that corresponding to2946// SQ (i.e. X) and that corresponding to SQ+1.2947//2948// The calculated X cannot be greater than the exact (real) solution.2949// Actually it must be strictly less than the exact solution, while2950// X+1 will be greater than or equal to it.29512952APInt VX = (A*X + B)*X + C;2953APInt VY = VX + TwoA*X + A + B;2954bool SignChange =2955VX.isNegative() != VY.isNegative() || VX.isZero() != VY.isZero();2956// If the sign did not change between X and X+1, X is not a valid solution.2957// This could happen when the actual (exact) roots don't have an integer2958// between them, so they would both be contained between X and X+1.2959if (!SignChange) {2960LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");2961return std::nullopt;2962}29632964X += 1;2965LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');2966return X;2967}29682969std::optional<unsigned>2970llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {2971assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");2972if (A == B)2973return std::nullopt;2974return A.getBitWidth() - ((A ^ B).countl_zero() + 1);2975}29762977APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,2978bool MatchAllBits) {2979unsigned OldBitWidth = A.getBitWidth();2980assert((((OldBitWidth % NewBitWidth) == 0) ||2981((NewBitWidth % OldBitWidth) == 0)) &&2982"One size should be a multiple of the other one. "2983"Can't do fractional scaling.");29842985// Check for matching bitwidths.2986if (OldBitWidth == NewBitWidth)2987return A;29882989APInt NewA = APInt::getZero(NewBitWidth);29902991// Check for null input.2992if (A.isZero())2993return NewA;29942995if (NewBitWidth > OldBitWidth) {2996// Repeat bits.2997unsigned Scale = NewBitWidth / OldBitWidth;2998for (unsigned i = 0; i != OldBitWidth; ++i)2999if (A[i])3000NewA.setBits(i * Scale, (i + 1) * Scale);3001} else {3002unsigned Scale = OldBitWidth / NewBitWidth;3003for (unsigned i = 0; i != NewBitWidth; ++i) {3004if (MatchAllBits) {3005if (A.extractBits(Scale, i * Scale).isAllOnes())3006NewA.setBit(i);3007} else {3008if (!A.extractBits(Scale, i * Scale).isZero())3009NewA.setBit(i);3010}3011}3012}30133014return NewA;3015}30163017/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst3018/// with the integer held in IntVal.3019void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,3020unsigned StoreBytes) {3021assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");3022const uint8_t *Src = (const uint8_t *)IntVal.getRawData();30233024if (sys::IsLittleEndianHost) {3025// Little-endian host - the source is ordered from LSB to MSB. Order the3026// destination from LSB to MSB: Do a straight copy.3027memcpy(Dst, Src, StoreBytes);3028} else {3029// Big-endian host - the source is an array of 64 bit words ordered from3030// LSW to MSW. Each word is ordered from MSB to LSB. Order the destination3031// from MSB to LSB: Reverse the word order, but not the bytes in a word.3032while (StoreBytes > sizeof(uint64_t)) {3033StoreBytes -= sizeof(uint64_t);3034// May not be aligned so use memcpy.3035memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));3036Src += sizeof(uint64_t);3037}30383039memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);3040}3041}30423043/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting3044/// from Src into IntVal, which is assumed to be wide enough and to hold zero.3045void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,3046unsigned LoadBytes) {3047assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");3048uint8_t *Dst = reinterpret_cast<uint8_t *>(3049const_cast<uint64_t *>(IntVal.getRawData()));30503051if (sys::IsLittleEndianHost)3052// Little-endian host - the destination must be ordered from LSB to MSB.3053// The source is ordered from LSB to MSB: Do a straight copy.3054memcpy(Dst, Src, LoadBytes);3055else {3056// Big-endian - the destination is an array of 64 bit words ordered from3057// LSW to MSW. Each word must be ordered from MSB to LSB. The source is3058// ordered from MSB to LSB: Reverse the word order, but not the bytes in3059// a word.3060while (LoadBytes > sizeof(uint64_t)) {3061LoadBytes -= sizeof(uint64_t);3062// May not be aligned so use memcpy.3063memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));3064Dst += sizeof(uint64_t);3065}30663067memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);3068}3069}30703071APInt APIntOps::avgFloorS(const APInt &C1, const APInt &C2) {3072// Return floor((C1 + C2) / 2)3073return (C1 & C2) + (C1 ^ C2).ashr(1);3074}30753076APInt APIntOps::avgFloorU(const APInt &C1, const APInt &C2) {3077// Return floor((C1 + C2) / 2)3078return (C1 & C2) + (C1 ^ C2).lshr(1);3079}30803081APInt APIntOps::avgCeilS(const APInt &C1, const APInt &C2) {3082// Return ceil((C1 + C2) / 2)3083return (C1 | C2) - (C1 ^ C2).ashr(1);3084}30853086APInt APIntOps::avgCeilU(const APInt &C1, const APInt &C2) {3087// Return ceil((C1 + C2) / 2)3088return (C1 | C2) - (C1 ^ C2).lshr(1);3089}30903091APInt APIntOps::mulhs(const APInt &C1, const APInt &C2) {3092assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");3093unsigned FullWidth = C1.getBitWidth() * 2;3094APInt C1Ext = C1.sext(FullWidth);3095APInt C2Ext = C2.sext(FullWidth);3096return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());3097}30983099APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {3100assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");3101unsigned FullWidth = C1.getBitWidth() * 2;3102APInt C1Ext = C1.zext(FullWidth);3103APInt C2Ext = C2.zext(FullWidth);3104return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());3105}310631073108