Path: blob/main/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
35232 views
//===-- APFloat.cpp - Implement APFloat class -----------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements a class to represent arbitrary precision floating9// point values and provide a variety of arithmetic operations on them.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ADT/APFloat.h"14#include "llvm/ADT/APSInt.h"15#include "llvm/ADT/ArrayRef.h"16#include "llvm/ADT/FloatingPointMode.h"17#include "llvm/ADT/FoldingSet.h"18#include "llvm/ADT/Hashing.h"19#include "llvm/ADT/STLExtras.h"20#include "llvm/ADT/StringExtras.h"21#include "llvm/ADT/StringRef.h"22#include "llvm/Config/llvm-config.h"23#include "llvm/Support/Debug.h"24#include "llvm/Support/Error.h"25#include "llvm/Support/MathExtras.h"26#include "llvm/Support/raw_ostream.h"27#include <cstring>28#include <limits.h>2930#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \31do { \32if (usesLayout<IEEEFloat>(getSemantics())) \33return U.IEEE.METHOD_CALL; \34if (usesLayout<DoubleAPFloat>(getSemantics())) \35return U.Double.METHOD_CALL; \36llvm_unreachable("Unexpected semantics"); \37} while (false)3839using namespace llvm;4041/// A macro used to combine two fcCategory enums into one key which can be used42/// in a switch statement to classify how the interaction of two APFloat's43/// categories affects an operation.44///45/// TODO: If clang source code is ever allowed to use constexpr in its own46/// codebase, change this into a static inline function.47#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))4849/* Assumed in hexadecimal significand parsing, and conversion to50hexadecimal strings. */51static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");5253namespace llvm {5455// How the nonfinite values Inf and NaN are represented.56enum class fltNonfiniteBehavior {57// Represents standard IEEE 754 behavior. A value is nonfinite if the58// exponent field is all 1s. In such cases, a value is Inf if the59// significand bits are all zero, and NaN otherwise60IEEE754,6162// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,63// Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no64// representation for Inf, and operations that would ordinarily produce Inf65// produce NaN instead.66// The details of the NaN representation(s) in this form are determined by the67// `fltNanEncoding` enum. We treat all NaNs as quiet, as the available68// encodings do not distinguish between signalling and quiet NaN.69NanOnly,7071// This behavior is present in Float6E3M2FN, Float6E2M3FN, and72// Float4E2M1FN types, which do not support Inf or NaN values.73FiniteOnly,74};7576// How NaN values are represented. This is curently only used in combination77// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE78// while having IEEE non-finite behavior is liable to lead to unexpected79// results.80enum class fltNanEncoding {81// Represents the standard IEEE behavior where a value is NaN if its82// exponent is all 1s and the significand is non-zero.83IEEE,8485// Represents the behavior in the Float8E4M3FN floating point type where NaN86// is represented by having the exponent and mantissa set to all 1s.87// This behavior matches the FP8 E4M3 type described in88// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs89// as non-signalling, although the paper does not state whether the NaN90// values are signalling or not.91AllOnes,9293// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types94// where NaN is represented by a sign bit of 1 and all 0s in the exponent95// and mantissa (i.e. the negative zero encoding in a IEEE float). Since96// there is only one NaN value, it is treated as quiet NaN. This matches the97// behavior described in https://arxiv.org/abs/2206.02915 .98NegativeZero,99};100101/* Represents floating point arithmetic semantics. */102struct fltSemantics {103/* The largest E such that 2^E is representable; this matches the104definition of IEEE 754. */105APFloatBase::ExponentType maxExponent;106107/* The smallest E such that 2^E is a normalized number; this108matches the definition of IEEE 754. */109APFloatBase::ExponentType minExponent;110111/* Number of bits in the significand. This includes the integer112bit. */113unsigned int precision;114115/* Number of bits actually used in the semantics. */116unsigned int sizeInBits;117118fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;119120fltNanEncoding nanEncoding = fltNanEncoding::IEEE;121// Returns true if any number described by this semantics can be precisely122// represented by the specified semantics. Does not take into account123// the value of fltNonfiniteBehavior.124bool isRepresentableBy(const fltSemantics &S) const {125return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&126precision <= S.precision;127}128};129130static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};131static constexpr fltSemantics semBFloat = {127, -126, 8, 16};132static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};133static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};134static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};135static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};136static constexpr fltSemantics semFloat8E5M2FNUZ = {13715, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};138static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};139static constexpr fltSemantics semFloat8E4M3FN = {1408, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};141static constexpr fltSemantics semFloat8E4M3FNUZ = {1427, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};143static constexpr fltSemantics semFloat8E4M3B11FNUZ = {1444, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};145static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};146static constexpr fltSemantics semFloat6E3M2FN = {1474, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};148static constexpr fltSemantics semFloat6E2M3FN = {1492, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};150static constexpr fltSemantics semFloat4E2M1FN = {1512, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};152static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};153static constexpr fltSemantics semBogus = {0, 0, 0, 0};154155/* The IBM double-double semantics. Such a number consists of a pair of IEEE15664-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,157(double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.158Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent159to each other, and two 11-bit exponents.160161Note: we need to make the value different from semBogus as otherwise162an unsafe optimization may collapse both values to a single address,163and we heavily rely on them having distinct addresses. */164static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};165166/* These are legacy semantics for the fallback, inaccrurate implementation of167IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the168operation. It's equivalent to having an IEEE number with consecutive 106169bits of mantissa and 11 bits of exponent.170171It's not equivalent to IBM double-double. For example, a legit IBM172double-double, 1 + epsilon:1731741 + epsilon = 1 + (1 >> 1076)175176is not representable by a consecutive 106 bits of mantissa.177178Currently, these semantics are used in the following way:179180semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->181(64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->182semPPCDoubleDoubleLegacy -> IEEE operations183184We use bitcastToAPInt() to get the bit representation (in APInt) of the185underlying IEEEdouble, then use the APInt constructor to construct the186legacy IEEE float.187188TODO: Implement all operations in semPPCDoubleDouble, and delete these189semantics. */190static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,19153 + 53, 128};192193const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {194switch (S) {195case S_IEEEhalf:196return IEEEhalf();197case S_BFloat:198return BFloat();199case S_IEEEsingle:200return IEEEsingle();201case S_IEEEdouble:202return IEEEdouble();203case S_IEEEquad:204return IEEEquad();205case S_PPCDoubleDouble:206return PPCDoubleDouble();207case S_Float8E5M2:208return Float8E5M2();209case S_Float8E5M2FNUZ:210return Float8E5M2FNUZ();211case S_Float8E4M3:212return Float8E4M3();213case S_Float8E4M3FN:214return Float8E4M3FN();215case S_Float8E4M3FNUZ:216return Float8E4M3FNUZ();217case S_Float8E4M3B11FNUZ:218return Float8E4M3B11FNUZ();219case S_FloatTF32:220return FloatTF32();221case S_Float6E3M2FN:222return Float6E3M2FN();223case S_Float6E2M3FN:224return Float6E2M3FN();225case S_Float4E2M1FN:226return Float4E2M1FN();227case S_x87DoubleExtended:228return x87DoubleExtended();229}230llvm_unreachable("Unrecognised floating semantics");231}232233APFloatBase::Semantics234APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {235if (&Sem == &llvm::APFloat::IEEEhalf())236return S_IEEEhalf;237else if (&Sem == &llvm::APFloat::BFloat())238return S_BFloat;239else if (&Sem == &llvm::APFloat::IEEEsingle())240return S_IEEEsingle;241else if (&Sem == &llvm::APFloat::IEEEdouble())242return S_IEEEdouble;243else if (&Sem == &llvm::APFloat::IEEEquad())244return S_IEEEquad;245else if (&Sem == &llvm::APFloat::PPCDoubleDouble())246return S_PPCDoubleDouble;247else if (&Sem == &llvm::APFloat::Float8E5M2())248return S_Float8E5M2;249else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())250return S_Float8E5M2FNUZ;251else if (&Sem == &llvm::APFloat::Float8E4M3())252return S_Float8E4M3;253else if (&Sem == &llvm::APFloat::Float8E4M3FN())254return S_Float8E4M3FN;255else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())256return S_Float8E4M3FNUZ;257else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())258return S_Float8E4M3B11FNUZ;259else if (&Sem == &llvm::APFloat::FloatTF32())260return S_FloatTF32;261else if (&Sem == &llvm::APFloat::Float6E3M2FN())262return S_Float6E3M2FN;263else if (&Sem == &llvm::APFloat::Float6E2M3FN())264return S_Float6E2M3FN;265else if (&Sem == &llvm::APFloat::Float4E2M1FN())266return S_Float4E2M1FN;267else if (&Sem == &llvm::APFloat::x87DoubleExtended())268return S_x87DoubleExtended;269else270llvm_unreachable("Unknown floating semantics");271}272273const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }274const fltSemantics &APFloatBase::BFloat() { return semBFloat; }275const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }276const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }277const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }278const fltSemantics &APFloatBase::PPCDoubleDouble() {279return semPPCDoubleDouble;280}281const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }282const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }283const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }284const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }285const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }286const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {287return semFloat8E4M3B11FNUZ;288}289const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }290const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }291const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }292const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }293const fltSemantics &APFloatBase::x87DoubleExtended() {294return semX87DoubleExtended;295}296const fltSemantics &APFloatBase::Bogus() { return semBogus; }297298constexpr RoundingMode APFloatBase::rmNearestTiesToEven;299constexpr RoundingMode APFloatBase::rmTowardPositive;300constexpr RoundingMode APFloatBase::rmTowardNegative;301constexpr RoundingMode APFloatBase::rmTowardZero;302constexpr RoundingMode APFloatBase::rmNearestTiesToAway;303304/* A tight upper bound on number of parts required to hold the value305pow(5, power) is306307power * 815 / (351 * integerPartWidth) + 1308309However, whilst the result may require only this many parts,310because we are multiplying two values to get it, the311multiplication may require an extra part with the excess part312being zero (consider the trivial case of 1 * 1, tcFullMultiply313requires two parts to hold the single-part result). So we add an314extra one to guarantee enough space whilst multiplying. */315const unsigned int maxExponent = 16383;316const unsigned int maxPrecision = 113;317const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;318const unsigned int maxPowerOfFiveParts =3192 +320((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));321322unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {323return semantics.precision;324}325APFloatBase::ExponentType326APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {327return semantics.maxExponent;328}329APFloatBase::ExponentType330APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {331return semantics.minExponent;332}333unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {334return semantics.sizeInBits;335}336unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,337bool isSigned) {338// The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need339// at least one more bit than the MaxExponent to hold the max FP value.340unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;341// Extra sign bit needed.342if (isSigned)343++MinBitWidth;344return MinBitWidth;345}346347bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,348const fltSemantics &Dst) {349// Exponent range must be larger.350if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)351return false;352353// If the mantissa is long enough, the result value could still be denormal354// with a larger exponent range.355//356// FIXME: This condition is probably not accurate but also shouldn't be a357// practical concern with existing types.358return Dst.precision >= Src.precision;359}360361unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {362return Sem.sizeInBits;363}364365static constexpr APFloatBase::ExponentType366exponentZero(const fltSemantics &semantics) {367return semantics.minExponent - 1;368}369370static constexpr APFloatBase::ExponentType371exponentInf(const fltSemantics &semantics) {372return semantics.maxExponent + 1;373}374375static constexpr APFloatBase::ExponentType376exponentNaN(const fltSemantics &semantics) {377if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {378if (semantics.nanEncoding == fltNanEncoding::NegativeZero)379return exponentZero(semantics);380return semantics.maxExponent;381}382return semantics.maxExponent + 1;383}384385/* A bunch of private, handy routines. */386387static inline Error createError(const Twine &Err) {388return make_error<StringError>(Err, inconvertibleErrorCode());389}390391static constexpr inline unsigned int partCountForBits(unsigned int bits) {392return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;393}394395/* Returns 0U-9U. Return values >= 10U are not digits. */396static inline unsigned int397decDigitValue(unsigned int c)398{399return c - '0';400}401402/* Return the value of a decimal exponent of the form403[+-]ddddddd.404405If the exponent overflows, returns a large exponent with the406appropriate sign. */407static Expected<int> readExponent(StringRef::iterator begin,408StringRef::iterator end) {409bool isNegative;410unsigned int absExponent;411const unsigned int overlargeExponent = 24000; /* FIXME. */412StringRef::iterator p = begin;413414// Treat no exponent as 0 to match binutils415if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {416return 0;417}418419isNegative = (*p == '-');420if (*p == '-' || *p == '+') {421p++;422if (p == end)423return createError("Exponent has no digits");424}425426absExponent = decDigitValue(*p++);427if (absExponent >= 10U)428return createError("Invalid character in exponent");429430for (; p != end; ++p) {431unsigned int value;432433value = decDigitValue(*p);434if (value >= 10U)435return createError("Invalid character in exponent");436437absExponent = absExponent * 10U + value;438if (absExponent >= overlargeExponent) {439absExponent = overlargeExponent;440break;441}442}443444if (isNegative)445return -(int) absExponent;446else447return (int) absExponent;448}449450/* This is ugly and needs cleaning up, but I don't immediately see451how whilst remaining safe. */452static Expected<int> totalExponent(StringRef::iterator p,453StringRef::iterator end,454int exponentAdjustment) {455int unsignedExponent;456bool negative, overflow;457int exponent = 0;458459if (p == end)460return createError("Exponent has no digits");461462negative = *p == '-';463if (*p == '-' || *p == '+') {464p++;465if (p == end)466return createError("Exponent has no digits");467}468469unsignedExponent = 0;470overflow = false;471for (; p != end; ++p) {472unsigned int value;473474value = decDigitValue(*p);475if (value >= 10U)476return createError("Invalid character in exponent");477478unsignedExponent = unsignedExponent * 10 + value;479if (unsignedExponent > 32767) {480overflow = true;481break;482}483}484485if (exponentAdjustment > 32767 || exponentAdjustment < -32768)486overflow = true;487488if (!overflow) {489exponent = unsignedExponent;490if (negative)491exponent = -exponent;492exponent += exponentAdjustment;493if (exponent > 32767 || exponent < -32768)494overflow = true;495}496497if (overflow)498exponent = negative ? -32768: 32767;499500return exponent;501}502503static Expected<StringRef::iterator>504skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,505StringRef::iterator *dot) {506StringRef::iterator p = begin;507*dot = end;508while (p != end && *p == '0')509p++;510511if (p != end && *p == '.') {512*dot = p++;513514if (end - begin == 1)515return createError("Significand has no digits");516517while (p != end && *p == '0')518p++;519}520521return p;522}523524/* Given a normal decimal floating point number of the form525526dddd.dddd[eE][+-]ddd527528where the decimal point and exponent are optional, fill out the529structure D. Exponent is appropriate if the significand is530treated as an integer, and normalizedExponent if the significand531is taken to have the decimal point after a single leading532non-zero digit.533534If the value is zero, V->firstSigDigit points to a non-digit, and535the return exponent is zero.536*/537struct decimalInfo {538const char *firstSigDigit;539const char *lastSigDigit;540int exponent;541int normalizedExponent;542};543544static Error interpretDecimal(StringRef::iterator begin,545StringRef::iterator end, decimalInfo *D) {546StringRef::iterator dot = end;547548auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);549if (!PtrOrErr)550return PtrOrErr.takeError();551StringRef::iterator p = *PtrOrErr;552553D->firstSigDigit = p;554D->exponent = 0;555D->normalizedExponent = 0;556557for (; p != end; ++p) {558if (*p == '.') {559if (dot != end)560return createError("String contains multiple dots");561dot = p++;562if (p == end)563break;564}565if (decDigitValue(*p) >= 10U)566break;567}568569if (p != end) {570if (*p != 'e' && *p != 'E')571return createError("Invalid character in significand");572if (p == begin)573return createError("Significand has no digits");574if (dot != end && p - begin == 1)575return createError("Significand has no digits");576577/* p points to the first non-digit in the string */578auto ExpOrErr = readExponent(p + 1, end);579if (!ExpOrErr)580return ExpOrErr.takeError();581D->exponent = *ExpOrErr;582583/* Implied decimal point? */584if (dot == end)585dot = p;586}587588/* If number is all zeroes accept any exponent. */589if (p != D->firstSigDigit) {590/* Drop insignificant trailing zeroes. */591if (p != begin) {592do593do594p--;595while (p != begin && *p == '0');596while (p != begin && *p == '.');597}598599/* Adjust the exponents for any decimal point. */600D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));601D->normalizedExponent = (D->exponent +602static_cast<APFloat::ExponentType>((p - D->firstSigDigit)603- (dot > D->firstSigDigit && dot < p)));604}605606D->lastSigDigit = p;607return Error::success();608}609610/* Return the trailing fraction of a hexadecimal number.611DIGITVALUE is the first hex digit of the fraction, P points to612the next digit. */613static Expected<lostFraction>614trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,615unsigned int digitValue) {616unsigned int hexDigit;617618/* If the first trailing digit isn't 0 or 8 we can work out the619fraction immediately. */620if (digitValue > 8)621return lfMoreThanHalf;622else if (digitValue < 8 && digitValue > 0)623return lfLessThanHalf;624625// Otherwise we need to find the first non-zero digit.626while (p != end && (*p == '0' || *p == '.'))627p++;628629if (p == end)630return createError("Invalid trailing hexadecimal fraction!");631632hexDigit = hexDigitValue(*p);633634/* If we ran off the end it is exactly zero or one-half, otherwise635a little more. */636if (hexDigit == UINT_MAX)637return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;638else639return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;640}641642/* Return the fraction lost were a bignum truncated losing the least643significant BITS bits. */644static lostFraction645lostFractionThroughTruncation(const APFloatBase::integerPart *parts,646unsigned int partCount,647unsigned int bits)648{649unsigned int lsb;650651lsb = APInt::tcLSB(parts, partCount);652653/* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */654if (bits <= lsb)655return lfExactlyZero;656if (bits == lsb + 1)657return lfExactlyHalf;658if (bits <= partCount * APFloatBase::integerPartWidth &&659APInt::tcExtractBit(parts, bits - 1))660return lfMoreThanHalf;661662return lfLessThanHalf;663}664665/* Shift DST right BITS bits noting lost fraction. */666static lostFraction667shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)668{669lostFraction lost_fraction;670671lost_fraction = lostFractionThroughTruncation(dst, parts, bits);672673APInt::tcShiftRight(dst, parts, bits);674675return lost_fraction;676}677678/* Combine the effect of two lost fractions. */679static lostFraction680combineLostFractions(lostFraction moreSignificant,681lostFraction lessSignificant)682{683if (lessSignificant != lfExactlyZero) {684if (moreSignificant == lfExactlyZero)685moreSignificant = lfLessThanHalf;686else if (moreSignificant == lfExactlyHalf)687moreSignificant = lfMoreThanHalf;688}689690return moreSignificant;691}692693/* The error from the true value, in half-ulps, on multiplying two694floating point numbers, which differ from the value they695approximate by at most HUE1 and HUE2 half-ulps, is strictly less696than the returned value.697698See "How to Read Floating Point Numbers Accurately" by William D699Clinger. */700static unsigned int701HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)702{703assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));704705if (HUerr1 + HUerr2 == 0)706return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */707else708return inexactMultiply + 2 * (HUerr1 + HUerr2);709}710711/* The number of ulps from the boundary (zero, or half if ISNEAREST)712when the least significant BITS are truncated. BITS cannot be713zero. */714static APFloatBase::integerPart715ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,716bool isNearest) {717unsigned int count, partBits;718APFloatBase::integerPart part, boundary;719720assert(bits != 0);721722bits--;723count = bits / APFloatBase::integerPartWidth;724partBits = bits % APFloatBase::integerPartWidth + 1;725726part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));727728if (isNearest)729boundary = (APFloatBase::integerPart) 1 << (partBits - 1);730else731boundary = 0;732733if (count == 0) {734if (part - boundary <= boundary - part)735return part - boundary;736else737return boundary - part;738}739740if (part == boundary) {741while (--count)742if (parts[count])743return ~(APFloatBase::integerPart) 0; /* A lot. */744745return parts[0];746} else if (part == boundary - 1) {747while (--count)748if (~parts[count])749return ~(APFloatBase::integerPart) 0; /* A lot. */750751return -parts[0];752}753754return ~(APFloatBase::integerPart) 0; /* A lot. */755}756757/* Place pow(5, power) in DST, and return the number of parts used.758DST must be at least one part larger than size of the answer. */759static unsigned int760powerOf5(APFloatBase::integerPart *dst, unsigned int power) {761static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };762APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];763pow5s[0] = 78125 * 5;764765unsigned int partsCount = 1;766APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;767unsigned int result;768assert(power <= maxExponent);769770p1 = dst;771p2 = scratch;772773*p1 = firstEightPowers[power & 7];774power >>= 3;775776result = 1;777pow5 = pow5s;778779for (unsigned int n = 0; power; power >>= 1, n++) {780/* Calculate pow(5,pow(2,n+3)) if we haven't yet. */781if (n != 0) {782APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,783partsCount, partsCount);784partsCount *= 2;785if (pow5[partsCount - 1] == 0)786partsCount--;787}788789if (power & 1) {790APFloatBase::integerPart *tmp;791792APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);793result += partsCount;794if (p2[result - 1] == 0)795result--;796797/* Now result is in p1 with partsCount parts and p2 is scratch798space. */799tmp = p1;800p1 = p2;801p2 = tmp;802}803804pow5 += partsCount;805}806807if (p1 != dst)808APInt::tcAssign(dst, p1, result);809810return result;811}812813/* Zero at the end to avoid modular arithmetic when adding one; used814when rounding up during hexadecimal output. */815static const char hexDigitsLower[] = "0123456789abcdef0";816static const char hexDigitsUpper[] = "0123456789ABCDEF0";817static const char infinityL[] = "infinity";818static const char infinityU[] = "INFINITY";819static const char NaNL[] = "nan";820static const char NaNU[] = "NAN";821822/* Write out an integerPart in hexadecimal, starting with the most823significant nibble. Write out exactly COUNT hexdigits, return824COUNT. */825static unsigned int826partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,827const char *hexDigitChars)828{829unsigned int result = count;830831assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);832833part >>= (APFloatBase::integerPartWidth - 4 * count);834while (count--) {835dst[count] = hexDigitChars[part & 0xf];836part >>= 4;837}838839return result;840}841842/* Write out an unsigned decimal integer. */843static char *844writeUnsignedDecimal (char *dst, unsigned int n)845{846char buff[40], *p;847848p = buff;849do850*p++ = '0' + n % 10;851while (n /= 10);852853do854*dst++ = *--p;855while (p != buff);856857return dst;858}859860/* Write out a signed decimal integer. */861static char *862writeSignedDecimal (char *dst, int value)863{864if (value < 0) {865*dst++ = '-';866dst = writeUnsignedDecimal(dst, -(unsigned) value);867} else868dst = writeUnsignedDecimal(dst, value);869870return dst;871}872873namespace detail {874/* Constructors. */875void IEEEFloat::initialize(const fltSemantics *ourSemantics) {876unsigned int count;877878semantics = ourSemantics;879count = partCount();880if (count > 1)881significand.parts = new integerPart[count];882}883884void IEEEFloat::freeSignificand() {885if (needsCleanup())886delete [] significand.parts;887}888889void IEEEFloat::assign(const IEEEFloat &rhs) {890assert(semantics == rhs.semantics);891892sign = rhs.sign;893category = rhs.category;894exponent = rhs.exponent;895if (isFiniteNonZero() || category == fcNaN)896copySignificand(rhs);897}898899void IEEEFloat::copySignificand(const IEEEFloat &rhs) {900assert(isFiniteNonZero() || category == fcNaN);901assert(rhs.partCount() >= partCount());902903APInt::tcAssign(significandParts(), rhs.significandParts(),904partCount());905}906907/* Make this number a NaN, with an arbitrary but deterministic value908for the significand. If double or longer, this is a signalling NaN,909which may not be ideal. If float, this is QNaN(0). */910void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {911if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)912llvm_unreachable("This floating point format does not support NaN");913914category = fcNaN;915sign = Negative;916exponent = exponentNaN();917918integerPart *significand = significandParts();919unsigned numParts = partCount();920921APInt fill_storage;922if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {923// Finite-only types do not distinguish signalling and quiet NaN, so924// make them all signalling.925SNaN = false;926if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {927sign = true;928fill_storage = APInt::getZero(semantics->precision - 1);929} else {930fill_storage = APInt::getAllOnes(semantics->precision - 1);931}932fill = &fill_storage;933}934935// Set the significand bits to the fill.936if (!fill || fill->getNumWords() < numParts)937APInt::tcSet(significand, 0, numParts);938if (fill) {939APInt::tcAssign(significand, fill->getRawData(),940std::min(fill->getNumWords(), numParts));941942// Zero out the excess bits of the significand.943unsigned bitsToPreserve = semantics->precision - 1;944unsigned part = bitsToPreserve / 64;945bitsToPreserve %= 64;946significand[part] &= ((1ULL << bitsToPreserve) - 1);947for (part++; part != numParts; ++part)948significand[part] = 0;949}950951unsigned QNaNBit = semantics->precision - 2;952953if (SNaN) {954// We always have to clear the QNaN bit to make it an SNaN.955APInt::tcClearBit(significand, QNaNBit);956957// If there are no bits set in the payload, we have to set958// *something* to make it a NaN instead of an infinity;959// conventionally, this is the next bit down from the QNaN bit.960if (APInt::tcIsZero(significand, numParts))961APInt::tcSetBit(significand, QNaNBit - 1);962} else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {963// The only NaN is a quiet NaN, and it has no bits sets in the significand.964// Do nothing.965} else {966// We always have to set the QNaN bit to make it a QNaN.967APInt::tcSetBit(significand, QNaNBit);968}969970// For x87 extended precision, we want to make a NaN, not a971// pseudo-NaN. Maybe we should expose the ability to make972// pseudo-NaNs?973if (semantics == &semX87DoubleExtended)974APInt::tcSetBit(significand, QNaNBit + 1);975}976977IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {978if (this != &rhs) {979if (semantics != rhs.semantics) {980freeSignificand();981initialize(rhs.semantics);982}983assign(rhs);984}985986return *this;987}988989IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {990freeSignificand();991992semantics = rhs.semantics;993significand = rhs.significand;994exponent = rhs.exponent;995category = rhs.category;996sign = rhs.sign;997998rhs.semantics = &semBogus;999return *this;1000}10011002bool IEEEFloat::isDenormal() const {1003return isFiniteNonZero() && (exponent == semantics->minExponent) &&1004(APInt::tcExtractBit(significandParts(),1005semantics->precision - 1) == 0);1006}10071008bool IEEEFloat::isSmallest() const {1009// The smallest number by magnitude in our format will be the smallest1010// denormal, i.e. the floating point number with exponent being minimum1011// exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).1012return isFiniteNonZero() && exponent == semantics->minExponent &&1013significandMSB() == 0;1014}10151016bool IEEEFloat::isSmallestNormalized() const {1017return getCategory() == fcNormal && exponent == semantics->minExponent &&1018isSignificandAllZerosExceptMSB();1019}10201021bool IEEEFloat::isSignificandAllOnes() const {1022// Test if the significand excluding the integral bit is all ones. This allows1023// us to test for binade boundaries.1024const integerPart *Parts = significandParts();1025const unsigned PartCount = partCountForBits(semantics->precision);1026for (unsigned i = 0; i < PartCount - 1; i++)1027if (~Parts[i])1028return false;10291030// Set the unused high bits to all ones when we compare.1031const unsigned NumHighBits =1032PartCount*integerPartWidth - semantics->precision + 1;1033assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&1034"Can not have more high bits to fill than integerPartWidth");1035const integerPart HighBitFill =1036~integerPart(0) << (integerPartWidth - NumHighBits);1037if (~(Parts[PartCount - 1] | HighBitFill))1038return false;10391040return true;1041}10421043bool IEEEFloat::isSignificandAllOnesExceptLSB() const {1044// Test if the significand excluding the integral bit is all ones except for1045// the least significant bit.1046const integerPart *Parts = significandParts();10471048if (Parts[0] & 1)1049return false;10501051const unsigned PartCount = partCountForBits(semantics->precision);1052for (unsigned i = 0; i < PartCount - 1; i++) {1053if (~Parts[i] & ~unsigned{!i})1054return false;1055}10561057// Set the unused high bits to all ones when we compare.1058const unsigned NumHighBits =1059PartCount * integerPartWidth - semantics->precision + 1;1060assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&1061"Can not have more high bits to fill than integerPartWidth");1062const integerPart HighBitFill = ~integerPart(0)1063<< (integerPartWidth - NumHighBits);1064if (~(Parts[PartCount - 1] | HighBitFill | 0x1))1065return false;10661067return true;1068}10691070bool IEEEFloat::isSignificandAllZeros() const {1071// Test if the significand excluding the integral bit is all zeros. This1072// allows us to test for binade boundaries.1073const integerPart *Parts = significandParts();1074const unsigned PartCount = partCountForBits(semantics->precision);10751076for (unsigned i = 0; i < PartCount - 1; i++)1077if (Parts[i])1078return false;10791080// Compute how many bits are used in the final word.1081const unsigned NumHighBits =1082PartCount*integerPartWidth - semantics->precision + 1;1083assert(NumHighBits < integerPartWidth && "Can not have more high bits to "1084"clear than integerPartWidth");1085const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;10861087if (Parts[PartCount - 1] & HighBitMask)1088return false;10891090return true;1091}10921093bool IEEEFloat::isSignificandAllZerosExceptMSB() const {1094const integerPart *Parts = significandParts();1095const unsigned PartCount = partCountForBits(semantics->precision);10961097for (unsigned i = 0; i < PartCount - 1; i++) {1098if (Parts[i])1099return false;1100}11011102const unsigned NumHighBits =1103PartCount * integerPartWidth - semantics->precision + 1;1104return Parts[PartCount - 1] == integerPart(1)1105<< (integerPartWidth - NumHighBits);1106}11071108bool IEEEFloat::isLargest() const {1109if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&1110semantics->nanEncoding == fltNanEncoding::AllOnes) {1111// The largest number by magnitude in our format will be the floating point1112// number with maximum exponent and with significand that is all ones except1113// the LSB.1114return isFiniteNonZero() && exponent == semantics->maxExponent &&1115isSignificandAllOnesExceptLSB();1116} else {1117// The largest number by magnitude in our format will be the floating point1118// number with maximum exponent and with significand that is all ones.1119return isFiniteNonZero() && exponent == semantics->maxExponent &&1120isSignificandAllOnes();1121}1122}11231124bool IEEEFloat::isInteger() const {1125// This could be made more efficient; I'm going for obviously correct.1126if (!isFinite()) return false;1127IEEEFloat truncated = *this;1128truncated.roundToIntegral(rmTowardZero);1129return compare(truncated) == cmpEqual;1130}11311132bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {1133if (this == &rhs)1134return true;1135if (semantics != rhs.semantics ||1136category != rhs.category ||1137sign != rhs.sign)1138return false;1139if (category==fcZero || category==fcInfinity)1140return true;11411142if (isFiniteNonZero() && exponent != rhs.exponent)1143return false;11441145return std::equal(significandParts(), significandParts() + partCount(),1146rhs.significandParts());1147}11481149IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {1150initialize(&ourSemantics);1151sign = 0;1152category = fcNormal;1153zeroSignificand();1154exponent = ourSemantics.precision - 1;1155significandParts()[0] = value;1156normalize(rmNearestTiesToEven, lfExactlyZero);1157}11581159IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {1160initialize(&ourSemantics);1161makeZero(false);1162}11631164// Delegate to the previous constructor, because later copy constructor may1165// actually inspects category, which can't be garbage.1166IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)1167: IEEEFloat(ourSemantics) {}11681169IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {1170initialize(rhs.semantics);1171assign(rhs);1172}11731174IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {1175*this = std::move(rhs);1176}11771178IEEEFloat::~IEEEFloat() { freeSignificand(); }11791180unsigned int IEEEFloat::partCount() const {1181return partCountForBits(semantics->precision + 1);1182}11831184const IEEEFloat::integerPart *IEEEFloat::significandParts() const {1185return const_cast<IEEEFloat *>(this)->significandParts();1186}11871188IEEEFloat::integerPart *IEEEFloat::significandParts() {1189if (partCount() > 1)1190return significand.parts;1191else1192return &significand.part;1193}11941195void IEEEFloat::zeroSignificand() {1196APInt::tcSet(significandParts(), 0, partCount());1197}11981199/* Increment an fcNormal floating point number's significand. */1200void IEEEFloat::incrementSignificand() {1201integerPart carry;12021203carry = APInt::tcIncrement(significandParts(), partCount());12041205/* Our callers should never cause us to overflow. */1206assert(carry == 0);1207(void)carry;1208}12091210/* Add the significand of the RHS. Returns the carry flag. */1211IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {1212integerPart *parts;12131214parts = significandParts();12151216assert(semantics == rhs.semantics);1217assert(exponent == rhs.exponent);12181219return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());1220}12211222/* Subtract the significand of the RHS with a borrow flag. Returns1223the borrow flag. */1224IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,1225integerPart borrow) {1226integerPart *parts;12271228parts = significandParts();12291230assert(semantics == rhs.semantics);1231assert(exponent == rhs.exponent);12321233return APInt::tcSubtract(parts, rhs.significandParts(), borrow,1234partCount());1235}12361237/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it1238on to the full-precision result of the multiplication. Returns the1239lost fraction. */1240lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,1241IEEEFloat addend) {1242unsigned int omsb; // One, not zero, based MSB.1243unsigned int partsCount, newPartsCount, precision;1244integerPart *lhsSignificand;1245integerPart scratch[4];1246integerPart *fullSignificand;1247lostFraction lost_fraction;1248bool ignored;12491250assert(semantics == rhs.semantics);12511252precision = semantics->precision;12531254// Allocate space for twice as many bits as the original significand, plus one1255// extra bit for the addition to overflow into.1256newPartsCount = partCountForBits(precision * 2 + 1);12571258if (newPartsCount > 4)1259fullSignificand = new integerPart[newPartsCount];1260else1261fullSignificand = scratch;12621263lhsSignificand = significandParts();1264partsCount = partCount();12651266APInt::tcFullMultiply(fullSignificand, lhsSignificand,1267rhs.significandParts(), partsCount, partsCount);12681269lost_fraction = lfExactlyZero;1270omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;1271exponent += rhs.exponent;12721273// Assume the operands involved in the multiplication are single-precision1274// FP, and the two multiplicants are:1275// *this = a23 . a22 ... a0 * 2^e11276// rhs = b23 . b22 ... b0 * 2^e21277// the result of multiplication is:1278// *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)1279// Note that there are three significant bits at the left-hand side of the1280// radix point: two for the multiplication, and an overflow bit for the1281// addition (that will always be zero at this point). Move the radix point1282// toward left by two bits, and adjust exponent accordingly.1283exponent += 2;12841285if (addend.isNonZero()) {1286// The intermediate result of the multiplication has "2 * precision"1287// signicant bit; adjust the addend to be consistent with mul result.1288//1289Significand savedSignificand = significand;1290const fltSemantics *savedSemantics = semantics;1291fltSemantics extendedSemantics;1292opStatus status;1293unsigned int extendedPrecision;12941295// Normalize our MSB to one below the top bit to allow for overflow.1296extendedPrecision = 2 * precision + 1;1297if (omsb != extendedPrecision - 1) {1298assert(extendedPrecision > omsb);1299APInt::tcShiftLeft(fullSignificand, newPartsCount,1300(extendedPrecision - 1) - omsb);1301exponent -= (extendedPrecision - 1) - omsb;1302}13031304/* Create new semantics. */1305extendedSemantics = *semantics;1306extendedSemantics.precision = extendedPrecision;13071308if (newPartsCount == 1)1309significand.part = fullSignificand[0];1310else1311significand.parts = fullSignificand;1312semantics = &extendedSemantics;13131314// Make a copy so we can convert it to the extended semantics.1315// Note that we cannot convert the addend directly, as the extendedSemantics1316// is a local variable (which we take a reference to).1317IEEEFloat extendedAddend(addend);1318status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);1319assert(status == opOK);1320(void)status;13211322// Shift the significand of the addend right by one bit. This guarantees1323// that the high bit of the significand is zero (same as fullSignificand),1324// so the addition will overflow (if it does overflow at all) into the top bit.1325lost_fraction = extendedAddend.shiftSignificandRight(1);1326assert(lost_fraction == lfExactlyZero &&1327"Lost precision while shifting addend for fused-multiply-add.");13281329lost_fraction = addOrSubtractSignificand(extendedAddend, false);13301331/* Restore our state. */1332if (newPartsCount == 1)1333fullSignificand[0] = significand.part;1334significand = savedSignificand;1335semantics = savedSemantics;13361337omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;1338}13391340// Convert the result having "2 * precision" significant-bits back to the one1341// having "precision" significant-bits. First, move the radix point from1342// poision "2*precision - 1" to "precision - 1". The exponent need to be1343// adjusted by "2*precision - 1" - "precision - 1" = "precision".1344exponent -= precision + 1;13451346// In case MSB resides at the left-hand side of radix point, shift the1347// mantissa right by some amount to make sure the MSB reside right before1348// the radix point (i.e. "MSB . rest-significant-bits").1349//1350// Note that the result is not normalized when "omsb < precision". So, the1351// caller needs to call IEEEFloat::normalize() if normalized value is1352// expected.1353if (omsb > precision) {1354unsigned int bits, significantParts;1355lostFraction lf;13561357bits = omsb - precision;1358significantParts = partCountForBits(omsb);1359lf = shiftRight(fullSignificand, significantParts, bits);1360lost_fraction = combineLostFractions(lf, lost_fraction);1361exponent += bits;1362}13631364APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);13651366if (newPartsCount > 4)1367delete [] fullSignificand;13681369return lost_fraction;1370}13711372lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {1373return multiplySignificand(rhs, IEEEFloat(*semantics));1374}13751376/* Multiply the significands of LHS and RHS to DST. */1377lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {1378unsigned int bit, i, partsCount;1379const integerPart *rhsSignificand;1380integerPart *lhsSignificand, *dividend, *divisor;1381integerPart scratch[4];1382lostFraction lost_fraction;13831384assert(semantics == rhs.semantics);13851386lhsSignificand = significandParts();1387rhsSignificand = rhs.significandParts();1388partsCount = partCount();13891390if (partsCount > 2)1391dividend = new integerPart[partsCount * 2];1392else1393dividend = scratch;13941395divisor = dividend + partsCount;13961397/* Copy the dividend and divisor as they will be modified in-place. */1398for (i = 0; i < partsCount; i++) {1399dividend[i] = lhsSignificand[i];1400divisor[i] = rhsSignificand[i];1401lhsSignificand[i] = 0;1402}14031404exponent -= rhs.exponent;14051406unsigned int precision = semantics->precision;14071408/* Normalize the divisor. */1409bit = precision - APInt::tcMSB(divisor, partsCount) - 1;1410if (bit) {1411exponent += bit;1412APInt::tcShiftLeft(divisor, partsCount, bit);1413}14141415/* Normalize the dividend. */1416bit = precision - APInt::tcMSB(dividend, partsCount) - 1;1417if (bit) {1418exponent -= bit;1419APInt::tcShiftLeft(dividend, partsCount, bit);1420}14211422/* Ensure the dividend >= divisor initially for the loop below.1423Incidentally, this means that the division loop below is1424guaranteed to set the integer bit to one. */1425if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {1426exponent--;1427APInt::tcShiftLeft(dividend, partsCount, 1);1428assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);1429}14301431/* Long division. */1432for (bit = precision; bit; bit -= 1) {1433if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {1434APInt::tcSubtract(dividend, divisor, 0, partsCount);1435APInt::tcSetBit(lhsSignificand, bit - 1);1436}14371438APInt::tcShiftLeft(dividend, partsCount, 1);1439}14401441/* Figure out the lost fraction. */1442int cmp = APInt::tcCompare(dividend, divisor, partsCount);14431444if (cmp > 0)1445lost_fraction = lfMoreThanHalf;1446else if (cmp == 0)1447lost_fraction = lfExactlyHalf;1448else if (APInt::tcIsZero(dividend, partsCount))1449lost_fraction = lfExactlyZero;1450else1451lost_fraction = lfLessThanHalf;14521453if (partsCount > 2)1454delete [] dividend;14551456return lost_fraction;1457}14581459unsigned int IEEEFloat::significandMSB() const {1460return APInt::tcMSB(significandParts(), partCount());1461}14621463unsigned int IEEEFloat::significandLSB() const {1464return APInt::tcLSB(significandParts(), partCount());1465}14661467/* Note that a zero result is NOT normalized to fcZero. */1468lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {1469/* Our exponent should not overflow. */1470assert((ExponentType) (exponent + bits) >= exponent);14711472exponent += bits;14731474return shiftRight(significandParts(), partCount(), bits);1475}14761477/* Shift the significand left BITS bits, subtract BITS from its exponent. */1478void IEEEFloat::shiftSignificandLeft(unsigned int bits) {1479assert(bits < semantics->precision);14801481if (bits) {1482unsigned int partsCount = partCount();14831484APInt::tcShiftLeft(significandParts(), partsCount, bits);1485exponent -= bits;14861487assert(!APInt::tcIsZero(significandParts(), partsCount));1488}1489}14901491IEEEFloat::cmpResult1492IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {1493int compare;14941495assert(semantics == rhs.semantics);1496assert(isFiniteNonZero());1497assert(rhs.isFiniteNonZero());14981499compare = exponent - rhs.exponent;15001501/* If exponents are equal, do an unsigned bignum comparison of the1502significands. */1503if (compare == 0)1504compare = APInt::tcCompare(significandParts(), rhs.significandParts(),1505partCount());15061507if (compare > 0)1508return cmpGreaterThan;1509else if (compare < 0)1510return cmpLessThan;1511else1512return cmpEqual;1513}15141515/* Set the least significant BITS bits of a bignum, clear the1516rest. */1517static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,1518unsigned bits) {1519unsigned i = 0;1520while (bits > APInt::APINT_BITS_PER_WORD) {1521dst[i++] = ~(APInt::WordType)0;1522bits -= APInt::APINT_BITS_PER_WORD;1523}15241525if (bits)1526dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);15271528while (i < parts)1529dst[i++] = 0;1530}15311532/* Handle overflow. Sign is preserved. We either become infinity or1533the largest finite number. */1534IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {1535if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {1536/* Infinity? */1537if (rounding_mode == rmNearestTiesToEven ||1538rounding_mode == rmNearestTiesToAway ||1539(rounding_mode == rmTowardPositive && !sign) ||1540(rounding_mode == rmTowardNegative && sign)) {1541if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)1542makeNaN(false, sign);1543else1544category = fcInfinity;1545return static_cast<opStatus>(opOverflow | opInexact);1546}1547}15481549/* Otherwise we become the largest finite number. */1550category = fcNormal;1551exponent = semantics->maxExponent;1552tcSetLeastSignificantBits(significandParts(), partCount(),1553semantics->precision);1554if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&1555semantics->nanEncoding == fltNanEncoding::AllOnes)1556APInt::tcClearBit(significandParts(), 0);15571558return opInexact;1559}15601561/* Returns TRUE if, when truncating the current number, with BIT the1562new LSB, with the given lost fraction and rounding mode, the result1563would need to be rounded away from zero (i.e., by increasing the1564signficand). This routine must work for fcZero of both signs, and1565fcNormal numbers. */1566bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,1567lostFraction lost_fraction,1568unsigned int bit) const {1569/* NaNs and infinities should not have lost fractions. */1570assert(isFiniteNonZero() || category == fcZero);15711572/* Current callers never pass this so we don't handle it. */1573assert(lost_fraction != lfExactlyZero);15741575switch (rounding_mode) {1576case rmNearestTiesToAway:1577return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;15781579case rmNearestTiesToEven:1580if (lost_fraction == lfMoreThanHalf)1581return true;15821583/* Our zeroes don't have a significand to test. */1584if (lost_fraction == lfExactlyHalf && category != fcZero)1585return APInt::tcExtractBit(significandParts(), bit);15861587return false;15881589case rmTowardZero:1590return false;15911592case rmTowardPositive:1593return !sign;15941595case rmTowardNegative:1596return sign;15971598default:1599break;1600}1601llvm_unreachable("Invalid rounding mode found");1602}16031604IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,1605lostFraction lost_fraction) {1606unsigned int omsb; /* One, not zero, based MSB. */1607int exponentChange;16081609if (!isFiniteNonZero())1610return opOK;16111612/* Before rounding normalize the exponent of fcNormal numbers. */1613omsb = significandMSB() + 1;16141615if (omsb) {1616/* OMSB is numbered from 1. We want to place it in the integer1617bit numbered PRECISION if possible, with a compensating change in1618the exponent. */1619exponentChange = omsb - semantics->precision;16201621/* If the resulting exponent is too high, overflow according to1622the rounding mode. */1623if (exponent + exponentChange > semantics->maxExponent)1624return handleOverflow(rounding_mode);16251626/* Subnormal numbers have exponent minExponent, and their MSB1627is forced based on that. */1628if (exponent + exponentChange < semantics->minExponent)1629exponentChange = semantics->minExponent - exponent;16301631/* Shifting left is easy as we don't lose precision. */1632if (exponentChange < 0) {1633assert(lost_fraction == lfExactlyZero);16341635shiftSignificandLeft(-exponentChange);16361637return opOK;1638}16391640if (exponentChange > 0) {1641lostFraction lf;16421643/* Shift right and capture any new lost fraction. */1644lf = shiftSignificandRight(exponentChange);16451646lost_fraction = combineLostFractions(lf, lost_fraction);16471648/* Keep OMSB up-to-date. */1649if (omsb > (unsigned) exponentChange)1650omsb -= exponentChange;1651else1652omsb = 0;1653}1654}16551656// The all-ones values is an overflow if NaN is all ones. If NaN is1657// represented by negative zero, then it is a valid finite value.1658if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&1659semantics->nanEncoding == fltNanEncoding::AllOnes &&1660exponent == semantics->maxExponent && isSignificandAllOnes())1661return handleOverflow(rounding_mode);16621663/* Now round the number according to rounding_mode given the lost1664fraction. */16651666/* As specified in IEEE 754, since we do not trap we do not report1667underflow for exact results. */1668if (lost_fraction == lfExactlyZero) {1669/* Canonicalize zeroes. */1670if (omsb == 0) {1671category = fcZero;1672if (semantics->nanEncoding == fltNanEncoding::NegativeZero)1673sign = false;1674}16751676return opOK;1677}16781679/* Increment the significand if we're rounding away from zero. */1680if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {1681if (omsb == 0)1682exponent = semantics->minExponent;16831684incrementSignificand();1685omsb = significandMSB() + 1;16861687/* Did the significand increment overflow? */1688if (omsb == (unsigned) semantics->precision + 1) {1689/* Renormalize by incrementing the exponent and shifting our1690significand right one. However if we already have the1691maximum exponent we overflow to infinity. */1692if (exponent == semantics->maxExponent)1693// Invoke overflow handling with a rounding mode that will guarantee1694// that the result gets turned into the correct infinity representation.1695// This is needed instead of just setting the category to infinity to1696// account for 8-bit floating point types that have no inf, only NaN.1697return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);16981699shiftSignificandRight(1);17001701return opInexact;1702}17031704// The all-ones values is an overflow if NaN is all ones. If NaN is1705// represented by negative zero, then it is a valid finite value.1706if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&1707semantics->nanEncoding == fltNanEncoding::AllOnes &&1708exponent == semantics->maxExponent && isSignificandAllOnes())1709return handleOverflow(rounding_mode);1710}17111712/* The normal case - we were and are not denormal, and any1713significand increment above didn't overflow. */1714if (omsb == semantics->precision)1715return opInexact;17161717/* We have a non-zero denormal. */1718assert(omsb < semantics->precision);17191720/* Canonicalize zeroes. */1721if (omsb == 0) {1722category = fcZero;1723if (semantics->nanEncoding == fltNanEncoding::NegativeZero)1724sign = false;1725}17261727/* The fcZero case is a denormal that underflowed to zero. */1728return (opStatus) (opUnderflow | opInexact);1729}17301731IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,1732bool subtract) {1733switch (PackCategoriesIntoKey(category, rhs.category)) {1734default:1735llvm_unreachable(nullptr);17361737case PackCategoriesIntoKey(fcZero, fcNaN):1738case PackCategoriesIntoKey(fcNormal, fcNaN):1739case PackCategoriesIntoKey(fcInfinity, fcNaN):1740assign(rhs);1741[[fallthrough]];1742case PackCategoriesIntoKey(fcNaN, fcZero):1743case PackCategoriesIntoKey(fcNaN, fcNormal):1744case PackCategoriesIntoKey(fcNaN, fcInfinity):1745case PackCategoriesIntoKey(fcNaN, fcNaN):1746if (isSignaling()) {1747makeQuiet();1748return opInvalidOp;1749}1750return rhs.isSignaling() ? opInvalidOp : opOK;17511752case PackCategoriesIntoKey(fcNormal, fcZero):1753case PackCategoriesIntoKey(fcInfinity, fcNormal):1754case PackCategoriesIntoKey(fcInfinity, fcZero):1755return opOK;17561757case PackCategoriesIntoKey(fcNormal, fcInfinity):1758case PackCategoriesIntoKey(fcZero, fcInfinity):1759category = fcInfinity;1760sign = rhs.sign ^ subtract;1761return opOK;17621763case PackCategoriesIntoKey(fcZero, fcNormal):1764assign(rhs);1765sign = rhs.sign ^ subtract;1766return opOK;17671768case PackCategoriesIntoKey(fcZero, fcZero):1769/* Sign depends on rounding mode; handled by caller. */1770return opOK;17711772case PackCategoriesIntoKey(fcInfinity, fcInfinity):1773/* Differently signed infinities can only be validly1774subtracted. */1775if (((sign ^ rhs.sign)!=0) != subtract) {1776makeNaN();1777return opInvalidOp;1778}17791780return opOK;17811782case PackCategoriesIntoKey(fcNormal, fcNormal):1783return opDivByZero;1784}1785}17861787/* Add or subtract two normal numbers. */1788lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,1789bool subtract) {1790integerPart carry;1791lostFraction lost_fraction;1792int bits;17931794/* Determine if the operation on the absolute values is effectively1795an addition or subtraction. */1796subtract ^= static_cast<bool>(sign ^ rhs.sign);17971798/* Are we bigger exponent-wise than the RHS? */1799bits = exponent - rhs.exponent;18001801/* Subtraction is more subtle than one might naively expect. */1802if (subtract) {1803IEEEFloat temp_rhs(rhs);18041805if (bits == 0)1806lost_fraction = lfExactlyZero;1807else if (bits > 0) {1808lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);1809shiftSignificandLeft(1);1810} else {1811lost_fraction = shiftSignificandRight(-bits - 1);1812temp_rhs.shiftSignificandLeft(1);1813}18141815// Should we reverse the subtraction.1816if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {1817carry = temp_rhs.subtractSignificand1818(*this, lost_fraction != lfExactlyZero);1819copySignificand(temp_rhs);1820sign = !sign;1821} else {1822carry = subtractSignificand1823(temp_rhs, lost_fraction != lfExactlyZero);1824}18251826/* Invert the lost fraction - it was on the RHS and1827subtracted. */1828if (lost_fraction == lfLessThanHalf)1829lost_fraction = lfMoreThanHalf;1830else if (lost_fraction == lfMoreThanHalf)1831lost_fraction = lfLessThanHalf;18321833/* The code above is intended to ensure that no borrow is1834necessary. */1835assert(!carry);1836(void)carry;1837} else {1838if (bits > 0) {1839IEEEFloat temp_rhs(rhs);18401841lost_fraction = temp_rhs.shiftSignificandRight(bits);1842carry = addSignificand(temp_rhs);1843} else {1844lost_fraction = shiftSignificandRight(-bits);1845carry = addSignificand(rhs);1846}18471848/* We have a guard bit; generating a carry cannot happen. */1849assert(!carry);1850(void)carry;1851}18521853return lost_fraction;1854}18551856IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {1857switch (PackCategoriesIntoKey(category, rhs.category)) {1858default:1859llvm_unreachable(nullptr);18601861case PackCategoriesIntoKey(fcZero, fcNaN):1862case PackCategoriesIntoKey(fcNormal, fcNaN):1863case PackCategoriesIntoKey(fcInfinity, fcNaN):1864assign(rhs);1865sign = false;1866[[fallthrough]];1867case PackCategoriesIntoKey(fcNaN, fcZero):1868case PackCategoriesIntoKey(fcNaN, fcNormal):1869case PackCategoriesIntoKey(fcNaN, fcInfinity):1870case PackCategoriesIntoKey(fcNaN, fcNaN):1871sign ^= rhs.sign; // restore the original sign1872if (isSignaling()) {1873makeQuiet();1874return opInvalidOp;1875}1876return rhs.isSignaling() ? opInvalidOp : opOK;18771878case PackCategoriesIntoKey(fcNormal, fcInfinity):1879case PackCategoriesIntoKey(fcInfinity, fcNormal):1880case PackCategoriesIntoKey(fcInfinity, fcInfinity):1881category = fcInfinity;1882return opOK;18831884case PackCategoriesIntoKey(fcZero, fcNormal):1885case PackCategoriesIntoKey(fcNormal, fcZero):1886case PackCategoriesIntoKey(fcZero, fcZero):1887category = fcZero;1888return opOK;18891890case PackCategoriesIntoKey(fcZero, fcInfinity):1891case PackCategoriesIntoKey(fcInfinity, fcZero):1892makeNaN();1893return opInvalidOp;18941895case PackCategoriesIntoKey(fcNormal, fcNormal):1896return opOK;1897}1898}18991900IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {1901switch (PackCategoriesIntoKey(category, rhs.category)) {1902default:1903llvm_unreachable(nullptr);19041905case PackCategoriesIntoKey(fcZero, fcNaN):1906case PackCategoriesIntoKey(fcNormal, fcNaN):1907case PackCategoriesIntoKey(fcInfinity, fcNaN):1908assign(rhs);1909sign = false;1910[[fallthrough]];1911case PackCategoriesIntoKey(fcNaN, fcZero):1912case PackCategoriesIntoKey(fcNaN, fcNormal):1913case PackCategoriesIntoKey(fcNaN, fcInfinity):1914case PackCategoriesIntoKey(fcNaN, fcNaN):1915sign ^= rhs.sign; // restore the original sign1916if (isSignaling()) {1917makeQuiet();1918return opInvalidOp;1919}1920return rhs.isSignaling() ? opInvalidOp : opOK;19211922case PackCategoriesIntoKey(fcInfinity, fcZero):1923case PackCategoriesIntoKey(fcInfinity, fcNormal):1924case PackCategoriesIntoKey(fcZero, fcInfinity):1925case PackCategoriesIntoKey(fcZero, fcNormal):1926return opOK;19271928case PackCategoriesIntoKey(fcNormal, fcInfinity):1929category = fcZero;1930return opOK;19311932case PackCategoriesIntoKey(fcNormal, fcZero):1933if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)1934makeNaN(false, sign);1935else1936category = fcInfinity;1937return opDivByZero;19381939case PackCategoriesIntoKey(fcInfinity, fcInfinity):1940case PackCategoriesIntoKey(fcZero, fcZero):1941makeNaN();1942return opInvalidOp;19431944case PackCategoriesIntoKey(fcNormal, fcNormal):1945return opOK;1946}1947}19481949IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {1950switch (PackCategoriesIntoKey(category, rhs.category)) {1951default:1952llvm_unreachable(nullptr);19531954case PackCategoriesIntoKey(fcZero, fcNaN):1955case PackCategoriesIntoKey(fcNormal, fcNaN):1956case PackCategoriesIntoKey(fcInfinity, fcNaN):1957assign(rhs);1958[[fallthrough]];1959case PackCategoriesIntoKey(fcNaN, fcZero):1960case PackCategoriesIntoKey(fcNaN, fcNormal):1961case PackCategoriesIntoKey(fcNaN, fcInfinity):1962case PackCategoriesIntoKey(fcNaN, fcNaN):1963if (isSignaling()) {1964makeQuiet();1965return opInvalidOp;1966}1967return rhs.isSignaling() ? opInvalidOp : opOK;19681969case PackCategoriesIntoKey(fcZero, fcInfinity):1970case PackCategoriesIntoKey(fcZero, fcNormal):1971case PackCategoriesIntoKey(fcNormal, fcInfinity):1972return opOK;19731974case PackCategoriesIntoKey(fcNormal, fcZero):1975case PackCategoriesIntoKey(fcInfinity, fcZero):1976case PackCategoriesIntoKey(fcInfinity, fcNormal):1977case PackCategoriesIntoKey(fcInfinity, fcInfinity):1978case PackCategoriesIntoKey(fcZero, fcZero):1979makeNaN();1980return opInvalidOp;19811982case PackCategoriesIntoKey(fcNormal, fcNormal):1983return opOK;1984}1985}19861987IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {1988switch (PackCategoriesIntoKey(category, rhs.category)) {1989default:1990llvm_unreachable(nullptr);19911992case PackCategoriesIntoKey(fcZero, fcNaN):1993case PackCategoriesIntoKey(fcNormal, fcNaN):1994case PackCategoriesIntoKey(fcInfinity, fcNaN):1995assign(rhs);1996[[fallthrough]];1997case PackCategoriesIntoKey(fcNaN, fcZero):1998case PackCategoriesIntoKey(fcNaN, fcNormal):1999case PackCategoriesIntoKey(fcNaN, fcInfinity):2000case PackCategoriesIntoKey(fcNaN, fcNaN):2001if (isSignaling()) {2002makeQuiet();2003return opInvalidOp;2004}2005return rhs.isSignaling() ? opInvalidOp : opOK;20062007case PackCategoriesIntoKey(fcZero, fcInfinity):2008case PackCategoriesIntoKey(fcZero, fcNormal):2009case PackCategoriesIntoKey(fcNormal, fcInfinity):2010return opOK;20112012case PackCategoriesIntoKey(fcNormal, fcZero):2013case PackCategoriesIntoKey(fcInfinity, fcZero):2014case PackCategoriesIntoKey(fcInfinity, fcNormal):2015case PackCategoriesIntoKey(fcInfinity, fcInfinity):2016case PackCategoriesIntoKey(fcZero, fcZero):2017makeNaN();2018return opInvalidOp;20192020case PackCategoriesIntoKey(fcNormal, fcNormal):2021return opDivByZero; // fake status, indicating this is not a special case2022}2023}20242025/* Change sign. */2026void IEEEFloat::changeSign() {2027// With NaN-as-negative-zero, neither NaN or negative zero can change2028// their signs.2029if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&2030(isZero() || isNaN()))2031return;2032/* Look mummy, this one's easy. */2033sign = !sign;2034}20352036/* Normalized addition or subtraction. */2037IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,2038roundingMode rounding_mode,2039bool subtract) {2040opStatus fs;20412042fs = addOrSubtractSpecials(rhs, subtract);20432044/* This return code means it was not a simple case. */2045if (fs == opDivByZero) {2046lostFraction lost_fraction;20472048lost_fraction = addOrSubtractSignificand(rhs, subtract);2049fs = normalize(rounding_mode, lost_fraction);20502051/* Can only be zero if we lost no fraction. */2052assert(category != fcZero || lost_fraction == lfExactlyZero);2053}20542055/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a2056positive zero unless rounding to minus infinity, except that2057adding two like-signed zeroes gives that zero. */2058if (category == fcZero) {2059if (rhs.category != fcZero || (sign == rhs.sign) == subtract)2060sign = (rounding_mode == rmTowardNegative);2061// NaN-in-negative-zero means zeros need to be normalized to +0.2062if (semantics->nanEncoding == fltNanEncoding::NegativeZero)2063sign = false;2064}20652066return fs;2067}20682069/* Normalized addition. */2070IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,2071roundingMode rounding_mode) {2072return addOrSubtract(rhs, rounding_mode, false);2073}20742075/* Normalized subtraction. */2076IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,2077roundingMode rounding_mode) {2078return addOrSubtract(rhs, rounding_mode, true);2079}20802081/* Normalized multiply. */2082IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,2083roundingMode rounding_mode) {2084opStatus fs;20852086sign ^= rhs.sign;2087fs = multiplySpecials(rhs);20882089if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)2090sign = false;2091if (isFiniteNonZero()) {2092lostFraction lost_fraction = multiplySignificand(rhs);2093fs = normalize(rounding_mode, lost_fraction);2094if (lost_fraction != lfExactlyZero)2095fs = (opStatus) (fs | opInexact);2096}20972098return fs;2099}21002101/* Normalized divide. */2102IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,2103roundingMode rounding_mode) {2104opStatus fs;21052106sign ^= rhs.sign;2107fs = divideSpecials(rhs);21082109if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)2110sign = false;2111if (isFiniteNonZero()) {2112lostFraction lost_fraction = divideSignificand(rhs);2113fs = normalize(rounding_mode, lost_fraction);2114if (lost_fraction != lfExactlyZero)2115fs = (opStatus) (fs | opInexact);2116}21172118return fs;2119}21202121/* Normalized remainder. */2122IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {2123opStatus fs;2124unsigned int origSign = sign;21252126// First handle the special cases.2127fs = remainderSpecials(rhs);2128if (fs != opDivByZero)2129return fs;21302131fs = opOK;21322133// Make sure the current value is less than twice the denom. If the addition2134// did not succeed (an overflow has happened), which means that the finite2135// value we currently posses must be less than twice the denom (as we are2136// using the same semantics).2137IEEEFloat P2 = rhs;2138if (P2.add(rhs, rmNearestTiesToEven) == opOK) {2139fs = mod(P2);2140assert(fs == opOK);2141}21422143// Lets work with absolute numbers.2144IEEEFloat P = rhs;2145P.sign = false;2146sign = false;21472148//2149// To calculate the remainder we use the following scheme.2150//2151// The remainder is defained as follows:2152//2153// remainder = numer - rquot * denom = x - r * p2154//2155// Where r is the result of: x/p, rounded toward the nearest integral value2156// (with halfway cases rounded toward the even number).2157//2158// Currently, (after x mod 2p):2159// r is the number of 2p's present inside x, which is inherently, an even2160// number of p's.2161//2162// We may split the remaining calculation into 4 options:2163// - if x < 0.5p then we round to the nearest number with is 0, and are done.2164// - if x == 0.5p then we round to the nearest even number which is 0, and we2165// are done as well.2166// - if 0.5p < x < p then we round to nearest number which is 1, and we have2167// to subtract 1p at least once.2168// - if x >= p then we must subtract p at least once, as x must be a2169// remainder.2170//2171// By now, we were done, or we added 1 to r, which in turn, now an odd number.2172//2173// We can now split the remaining calculation to the following 3 options:2174// - if x < 0.5p then we round to the nearest number with is 0, and are done.2175// - if x == 0.5p then we round to the nearest even number. As r is odd, we2176// must round up to the next even number. so we must subtract p once more.2177// - if x > 0.5p (and inherently x < p) then we must round r up to the next2178// integral, and subtract p once more.2179//21802181// Extend the semantics to prevent an overflow/underflow or inexact result.2182bool losesInfo;2183fltSemantics extendedSemantics = *semantics;2184extendedSemantics.maxExponent++;2185extendedSemantics.minExponent--;2186extendedSemantics.precision += 2;21872188IEEEFloat VEx = *this;2189fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);2190assert(fs == opOK && !losesInfo);2191IEEEFloat PEx = P;2192fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);2193assert(fs == opOK && !losesInfo);21942195// It is simpler to work with 2x instead of 0.5p, and we do not need to lose2196// any fraction.2197fs = VEx.add(VEx, rmNearestTiesToEven);2198assert(fs == opOK);21992200if (VEx.compare(PEx) == cmpGreaterThan) {2201fs = subtract(P, rmNearestTiesToEven);2202assert(fs == opOK);22032204// Make VEx = this.add(this), but because we have different semantics, we do2205// not want to `convert` again, so we just subtract PEx twice (which equals2206// to the desired value).2207fs = VEx.subtract(PEx, rmNearestTiesToEven);2208assert(fs == opOK);2209fs = VEx.subtract(PEx, rmNearestTiesToEven);2210assert(fs == opOK);22112212cmpResult result = VEx.compare(PEx);2213if (result == cmpGreaterThan || result == cmpEqual) {2214fs = subtract(P, rmNearestTiesToEven);2215assert(fs == opOK);2216}2217}22182219if (isZero()) {2220sign = origSign; // IEEE754 requires this2221if (semantics->nanEncoding == fltNanEncoding::NegativeZero)2222// But some 8-bit floats only have positive 0.2223sign = false;2224}22252226else2227sign ^= origSign;2228return fs;2229}22302231/* Normalized llvm frem (C fmod). */2232IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {2233opStatus fs;2234fs = modSpecials(rhs);2235unsigned int origSign = sign;22362237while (isFiniteNonZero() && rhs.isFiniteNonZero() &&2238compareAbsoluteValue(rhs) != cmpLessThan) {2239int Exp = ilogb(*this) - ilogb(rhs);2240IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);2241// V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly2242// check for it.2243if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)2244V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);2245V.sign = sign;22462247fs = subtract(V, rmNearestTiesToEven);2248assert(fs==opOK);2249}2250if (isZero()) {2251sign = origSign; // fmod requires this2252if (semantics->nanEncoding == fltNanEncoding::NegativeZero)2253sign = false;2254}2255return fs;2256}22572258/* Normalized fused-multiply-add. */2259IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,2260const IEEEFloat &addend,2261roundingMode rounding_mode) {2262opStatus fs;22632264/* Post-multiplication sign, before addition. */2265sign ^= multiplicand.sign;22662267/* If and only if all arguments are normal do we need to do an2268extended-precision calculation. */2269if (isFiniteNonZero() &&2270multiplicand.isFiniteNonZero() &&2271addend.isFinite()) {2272lostFraction lost_fraction;22732274lost_fraction = multiplySignificand(multiplicand, addend);2275fs = normalize(rounding_mode, lost_fraction);2276if (lost_fraction != lfExactlyZero)2277fs = (opStatus) (fs | opInexact);22782279/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a2280positive zero unless rounding to minus infinity, except that2281adding two like-signed zeroes gives that zero. */2282if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {2283sign = (rounding_mode == rmTowardNegative);2284if (semantics->nanEncoding == fltNanEncoding::NegativeZero)2285sign = false;2286}2287} else {2288fs = multiplySpecials(multiplicand);22892290/* FS can only be opOK or opInvalidOp. There is no more work2291to do in the latter case. The IEEE-754R standard says it is2292implementation-defined in this case whether, if ADDEND is a2293quiet NaN, we raise invalid op; this implementation does so.22942295If we need to do the addition we can do so with normal2296precision. */2297if (fs == opOK)2298fs = addOrSubtract(addend, rounding_mode, false);2299}23002301return fs;2302}23032304/* Rounding-mode correct round to integral value. */2305IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {2306opStatus fs;23072308if (isInfinity())2309// [IEEE Std 754-2008 6.1]:2310// The behavior of infinity in floating-point arithmetic is derived from the2311// limiting cases of real arithmetic with operands of arbitrarily2312// large magnitude, when such a limit exists.2313// ...2314// Operations on infinite operands are usually exact and therefore signal no2315// exceptions ...2316return opOK;23172318if (isNaN()) {2319if (isSignaling()) {2320// [IEEE Std 754-2008 6.2]:2321// Under default exception handling, any operation signaling an invalid2322// operation exception and for which a floating-point result is to be2323// delivered shall deliver a quiet NaN.2324makeQuiet();2325// [IEEE Std 754-2008 6.2]:2326// Signaling NaNs shall be reserved operands that, under default exception2327// handling, signal the invalid operation exception(see 7.2) for every2328// general-computational and signaling-computational operation except for2329// the conversions described in 5.12.2330return opInvalidOp;2331} else {2332// [IEEE Std 754-2008 6.2]:2333// For an operation with quiet NaN inputs, other than maximum and minimum2334// operations, if a floating-point result is to be delivered the result2335// shall be a quiet NaN which should be one of the input NaNs.2336// ...2337// Every general-computational and quiet-computational operation involving2338// one or more input NaNs, none of them signaling, shall signal no2339// exception, except fusedMultiplyAdd might signal the invalid operation2340// exception(see 7.2).2341return opOK;2342}2343}23442345if (isZero()) {2346// [IEEE Std 754-2008 6.3]:2347// ... the sign of the result of conversions, the quantize operation, the2348// roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is2349// the sign of the first or only operand.2350return opOK;2351}23522353// If the exponent is large enough, we know that this value is already2354// integral, and the arithmetic below would potentially cause it to saturate2355// to +/-Inf. Bail out early instead.2356if (exponent+1 >= (int)semanticsPrecision(*semantics))2357return opOK;23582359// The algorithm here is quite simple: we add 2^(p-1), where p is the2360// precision of our format, and then subtract it back off again. The choice2361// of rounding modes for the addition/subtraction determines the rounding mode2362// for our integral rounding as well.2363// NOTE: When the input value is negative, we do subtraction followed by2364// addition instead.2365APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);2366IntegerConstant <<= semanticsPrecision(*semantics)-1;2367IEEEFloat MagicConstant(*semantics);2368fs = MagicConstant.convertFromAPInt(IntegerConstant, false,2369rmNearestTiesToEven);2370assert(fs == opOK);2371MagicConstant.sign = sign;23722373// Preserve the input sign so that we can handle the case of zero result2374// correctly.2375bool inputSign = isNegative();23762377fs = add(MagicConstant, rounding_mode);23782379// Current value and 'MagicConstant' are both integers, so the result of the2380// subtraction is always exact according to Sterbenz' lemma.2381subtract(MagicConstant, rounding_mode);23822383// Restore the input sign.2384if (inputSign != isNegative())2385changeSign();23862387return fs;2388}238923902391/* Comparison requires normalized numbers. */2392IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {2393cmpResult result;23942395assert(semantics == rhs.semantics);23962397switch (PackCategoriesIntoKey(category, rhs.category)) {2398default:2399llvm_unreachable(nullptr);24002401case PackCategoriesIntoKey(fcNaN, fcZero):2402case PackCategoriesIntoKey(fcNaN, fcNormal):2403case PackCategoriesIntoKey(fcNaN, fcInfinity):2404case PackCategoriesIntoKey(fcNaN, fcNaN):2405case PackCategoriesIntoKey(fcZero, fcNaN):2406case PackCategoriesIntoKey(fcNormal, fcNaN):2407case PackCategoriesIntoKey(fcInfinity, fcNaN):2408return cmpUnordered;24092410case PackCategoriesIntoKey(fcInfinity, fcNormal):2411case PackCategoriesIntoKey(fcInfinity, fcZero):2412case PackCategoriesIntoKey(fcNormal, fcZero):2413if (sign)2414return cmpLessThan;2415else2416return cmpGreaterThan;24172418case PackCategoriesIntoKey(fcNormal, fcInfinity):2419case PackCategoriesIntoKey(fcZero, fcInfinity):2420case PackCategoriesIntoKey(fcZero, fcNormal):2421if (rhs.sign)2422return cmpGreaterThan;2423else2424return cmpLessThan;24252426case PackCategoriesIntoKey(fcInfinity, fcInfinity):2427if (sign == rhs.sign)2428return cmpEqual;2429else if (sign)2430return cmpLessThan;2431else2432return cmpGreaterThan;24332434case PackCategoriesIntoKey(fcZero, fcZero):2435return cmpEqual;24362437case PackCategoriesIntoKey(fcNormal, fcNormal):2438break;2439}24402441/* Two normal numbers. Do they have the same sign? */2442if (sign != rhs.sign) {2443if (sign)2444result = cmpLessThan;2445else2446result = cmpGreaterThan;2447} else {2448/* Compare absolute values; invert result if negative. */2449result = compareAbsoluteValue(rhs);24502451if (sign) {2452if (result == cmpLessThan)2453result = cmpGreaterThan;2454else if (result == cmpGreaterThan)2455result = cmpLessThan;2456}2457}24582459return result;2460}24612462/// IEEEFloat::convert - convert a value of one floating point type to another.2463/// The return value corresponds to the IEEE754 exceptions. *losesInfo2464/// records whether the transformation lost information, i.e. whether2465/// converting the result back to the original type will produce the2466/// original value (this is almost the same as return value==fsOK, but there2467/// are edge cases where this is not so).24682469IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,2470roundingMode rounding_mode,2471bool *losesInfo) {2472lostFraction lostFraction;2473unsigned int newPartCount, oldPartCount;2474opStatus fs;2475int shift;2476const fltSemantics &fromSemantics = *semantics;2477bool is_signaling = isSignaling();24782479lostFraction = lfExactlyZero;2480newPartCount = partCountForBits(toSemantics.precision + 1);2481oldPartCount = partCount();2482shift = toSemantics.precision - fromSemantics.precision;24832484bool X86SpecialNan = false;2485if (&fromSemantics == &semX87DoubleExtended &&2486&toSemantics != &semX87DoubleExtended && category == fcNaN &&2487(!(*significandParts() & 0x8000000000000000ULL) ||2488!(*significandParts() & 0x4000000000000000ULL))) {2489// x86 has some unusual NaNs which cannot be represented in any other2490// format; note them here.2491X86SpecialNan = true;2492}24932494// If this is a truncation of a denormal number, and the target semantics2495// has larger exponent range than the source semantics (this can happen2496// when truncating from PowerPC double-double to double format), the2497// right shift could lose result mantissa bits. Adjust exponent instead2498// of performing excessive shift.2499// Also do a similar trick in case shifting denormal would produce zero2500// significand as this case isn't handled correctly by normalize.2501if (shift < 0 && isFiniteNonZero()) {2502int omsb = significandMSB() + 1;2503int exponentChange = omsb - fromSemantics.precision;2504if (exponent + exponentChange < toSemantics.minExponent)2505exponentChange = toSemantics.minExponent - exponent;2506if (exponentChange < shift)2507exponentChange = shift;2508if (exponentChange < 0) {2509shift -= exponentChange;2510exponent += exponentChange;2511} else if (omsb <= -shift) {2512exponentChange = omsb + shift - 1; // leave at least one bit set2513shift -= exponentChange;2514exponent += exponentChange;2515}2516}25172518// If this is a truncation, perform the shift before we narrow the storage.2519if (shift < 0 && (isFiniteNonZero() ||2520(category == fcNaN && semantics->nonFiniteBehavior !=2521fltNonfiniteBehavior::NanOnly)))2522lostFraction = shiftRight(significandParts(), oldPartCount, -shift);25232524// Fix the storage so it can hold to new value.2525if (newPartCount > oldPartCount) {2526// The new type requires more storage; make it available.2527integerPart *newParts;2528newParts = new integerPart[newPartCount];2529APInt::tcSet(newParts, 0, newPartCount);2530if (isFiniteNonZero() || category==fcNaN)2531APInt::tcAssign(newParts, significandParts(), oldPartCount);2532freeSignificand();2533significand.parts = newParts;2534} else if (newPartCount == 1 && oldPartCount != 1) {2535// Switch to built-in storage for a single part.2536integerPart newPart = 0;2537if (isFiniteNonZero() || category==fcNaN)2538newPart = significandParts()[0];2539freeSignificand();2540significand.part = newPart;2541}25422543// Now that we have the right storage, switch the semantics.2544semantics = &toSemantics;25452546// If this is an extension, perform the shift now that the storage is2547// available.2548if (shift > 0 && (isFiniteNonZero() || category==fcNaN))2549APInt::tcShiftLeft(significandParts(), newPartCount, shift);25502551if (isFiniteNonZero()) {2552fs = normalize(rounding_mode, lostFraction);2553*losesInfo = (fs != opOK);2554} else if (category == fcNaN) {2555if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {2556*losesInfo =2557fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;2558makeNaN(false, sign);2559return is_signaling ? opInvalidOp : opOK;2560}25612562// If NaN is negative zero, we need to create a new NaN to avoid converting2563// NaN to -Inf.2564if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&2565semantics->nanEncoding != fltNanEncoding::NegativeZero)2566makeNaN(false, false);25672568*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;25692570// For x87 extended precision, we want to make a NaN, not a special NaN if2571// the input wasn't special either.2572if (!X86SpecialNan && semantics == &semX87DoubleExtended)2573APInt::tcSetBit(significandParts(), semantics->precision - 1);25742575// Convert of sNaN creates qNaN and raises an exception (invalid op).2576// This also guarantees that a sNaN does not become Inf on a truncation2577// that loses all payload bits.2578if (is_signaling) {2579makeQuiet();2580fs = opInvalidOp;2581} else {2582fs = opOK;2583}2584} else if (category == fcInfinity &&2585semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {2586makeNaN(false, sign);2587*losesInfo = true;2588fs = opInexact;2589} else if (category == fcZero &&2590semantics->nanEncoding == fltNanEncoding::NegativeZero) {2591// Negative zero loses info, but positive zero doesn't.2592*losesInfo =2593fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;2594fs = *losesInfo ? opInexact : opOK;2595// NaN is negative zero means -0 -> +0, which can lose information2596sign = false;2597} else {2598*losesInfo = false;2599fs = opOK;2600}26012602return fs;2603}26042605/* Convert a floating point number to an integer according to the2606rounding mode. If the rounded integer value is out of range this2607returns an invalid operation exception and the contents of the2608destination parts are unspecified. If the rounded value is in2609range but the floating point number is not the exact integer, the C2610standard doesn't require an inexact exception to be raised. IEEE2611854 does require it so we do that.26122613Note that for conversions to integer type the C standard requires2614round-to-zero to always be used. */2615IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(2616MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,2617roundingMode rounding_mode, bool *isExact) const {2618lostFraction lost_fraction;2619const integerPart *src;2620unsigned int dstPartsCount, truncatedBits;26212622*isExact = false;26232624/* Handle the three special cases first. */2625if (category == fcInfinity || category == fcNaN)2626return opInvalidOp;26272628dstPartsCount = partCountForBits(width);2629assert(dstPartsCount <= parts.size() && "Integer too big");26302631if (category == fcZero) {2632APInt::tcSet(parts.data(), 0, dstPartsCount);2633// Negative zero can't be represented as an int.2634*isExact = !sign;2635return opOK;2636}26372638src = significandParts();26392640/* Step 1: place our absolute value, with any fraction truncated, in2641the destination. */2642if (exponent < 0) {2643/* Our absolute value is less than one; truncate everything. */2644APInt::tcSet(parts.data(), 0, dstPartsCount);2645/* For exponent -1 the integer bit represents .5, look at that.2646For smaller exponents leftmost truncated bit is 0. */2647truncatedBits = semantics->precision -1U - exponent;2648} else {2649/* We want the most significant (exponent + 1) bits; the rest are2650truncated. */2651unsigned int bits = exponent + 1U;26522653/* Hopelessly large in magnitude? */2654if (bits > width)2655return opInvalidOp;26562657if (bits < semantics->precision) {2658/* We truncate (semantics->precision - bits) bits. */2659truncatedBits = semantics->precision - bits;2660APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);2661} else {2662/* We want at least as many bits as are available. */2663APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,26640);2665APInt::tcShiftLeft(parts.data(), dstPartsCount,2666bits - semantics->precision);2667truncatedBits = 0;2668}2669}26702671/* Step 2: work out any lost fraction, and increment the absolute2672value if we would round away from zero. */2673if (truncatedBits) {2674lost_fraction = lostFractionThroughTruncation(src, partCount(),2675truncatedBits);2676if (lost_fraction != lfExactlyZero &&2677roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {2678if (APInt::tcIncrement(parts.data(), dstPartsCount))2679return opInvalidOp; /* Overflow. */2680}2681} else {2682lost_fraction = lfExactlyZero;2683}26842685/* Step 3: check if we fit in the destination. */2686unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;26872688if (sign) {2689if (!isSigned) {2690/* Negative numbers cannot be represented as unsigned. */2691if (omsb != 0)2692return opInvalidOp;2693} else {2694/* It takes omsb bits to represent the unsigned integer value.2695We lose a bit for the sign, but care is needed as the2696maximally negative integer is a special case. */2697if (omsb == width &&2698APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)2699return opInvalidOp;27002701/* This case can happen because of rounding. */2702if (omsb > width)2703return opInvalidOp;2704}27052706APInt::tcNegate (parts.data(), dstPartsCount);2707} else {2708if (omsb >= width + !isSigned)2709return opInvalidOp;2710}27112712if (lost_fraction == lfExactlyZero) {2713*isExact = true;2714return opOK;2715} else2716return opInexact;2717}27182719/* Same as convertToSignExtendedInteger, except we provide2720deterministic values in case of an invalid operation exception,2721namely zero for NaNs and the minimal or maximal value respectively2722for underflow or overflow.2723The *isExact output tells whether the result is exact, in the sense2724that converting it back to the original floating point type produces2725the original value. This is almost equivalent to result==opOK,2726except for negative zeroes.2727*/2728IEEEFloat::opStatus2729IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,2730unsigned int width, bool isSigned,2731roundingMode rounding_mode, bool *isExact) const {2732opStatus fs;27332734fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,2735isExact);27362737if (fs == opInvalidOp) {2738unsigned int bits, dstPartsCount;27392740dstPartsCount = partCountForBits(width);2741assert(dstPartsCount <= parts.size() && "Integer too big");27422743if (category == fcNaN)2744bits = 0;2745else if (sign)2746bits = isSigned;2747else2748bits = width - isSigned;27492750tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);2751if (sign && isSigned)2752APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);2753}27542755return fs;2756}27572758/* Convert an unsigned integer SRC to a floating point number,2759rounding according to ROUNDING_MODE. The sign of the floating2760point number is not modified. */2761IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(2762const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {2763unsigned int omsb, precision, dstCount;2764integerPart *dst;2765lostFraction lost_fraction;27662767category = fcNormal;2768omsb = APInt::tcMSB(src, srcCount) + 1;2769dst = significandParts();2770dstCount = partCount();2771precision = semantics->precision;27722773/* We want the most significant PRECISION bits of SRC. There may not2774be that many; extract what we can. */2775if (precision <= omsb) {2776exponent = omsb - 1;2777lost_fraction = lostFractionThroughTruncation(src, srcCount,2778omsb - precision);2779APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);2780} else {2781exponent = precision - 1;2782lost_fraction = lfExactlyZero;2783APInt::tcExtract(dst, dstCount, src, omsb, 0);2784}27852786return normalize(rounding_mode, lost_fraction);2787}27882789IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,2790roundingMode rounding_mode) {2791unsigned int partCount = Val.getNumWords();2792APInt api = Val;27932794sign = false;2795if (isSigned && api.isNegative()) {2796sign = true;2797api = -api;2798}27992800return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);2801}28022803/* Convert a two's complement integer SRC to a floating point number,2804rounding according to ROUNDING_MODE. ISSIGNED is true if the2805integer is signed, in which case it must be sign-extended. */2806IEEEFloat::opStatus2807IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,2808unsigned int srcCount, bool isSigned,2809roundingMode rounding_mode) {2810opStatus status;28112812if (isSigned &&2813APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {2814integerPart *copy;28152816/* If we're signed and negative negate a copy. */2817sign = true;2818copy = new integerPart[srcCount];2819APInt::tcAssign(copy, src, srcCount);2820APInt::tcNegate(copy, srcCount);2821status = convertFromUnsignedParts(copy, srcCount, rounding_mode);2822delete [] copy;2823} else {2824sign = false;2825status = convertFromUnsignedParts(src, srcCount, rounding_mode);2826}28272828return status;2829}28302831/* FIXME: should this just take a const APInt reference? */2832IEEEFloat::opStatus2833IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,2834unsigned int width, bool isSigned,2835roundingMode rounding_mode) {2836unsigned int partCount = partCountForBits(width);2837APInt api = APInt(width, ArrayRef(parts, partCount));28382839sign = false;2840if (isSigned && APInt::tcExtractBit(parts, width - 1)) {2841sign = true;2842api = -api;2843}28442845return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);2846}28472848Expected<IEEEFloat::opStatus>2849IEEEFloat::convertFromHexadecimalString(StringRef s,2850roundingMode rounding_mode) {2851lostFraction lost_fraction = lfExactlyZero;28522853category = fcNormal;2854zeroSignificand();2855exponent = 0;28562857integerPart *significand = significandParts();2858unsigned partsCount = partCount();2859unsigned bitPos = partsCount * integerPartWidth;2860bool computedTrailingFraction = false;28612862// Skip leading zeroes and any (hexa)decimal point.2863StringRef::iterator begin = s.begin();2864StringRef::iterator end = s.end();2865StringRef::iterator dot;2866auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);2867if (!PtrOrErr)2868return PtrOrErr.takeError();2869StringRef::iterator p = *PtrOrErr;2870StringRef::iterator firstSignificantDigit = p;28712872while (p != end) {2873integerPart hex_value;28742875if (*p == '.') {2876if (dot != end)2877return createError("String contains multiple dots");2878dot = p++;2879continue;2880}28812882hex_value = hexDigitValue(*p);2883if (hex_value == UINT_MAX)2884break;28852886p++;28872888// Store the number while we have space.2889if (bitPos) {2890bitPos -= 4;2891hex_value <<= bitPos % integerPartWidth;2892significand[bitPos / integerPartWidth] |= hex_value;2893} else if (!computedTrailingFraction) {2894auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);2895if (!FractOrErr)2896return FractOrErr.takeError();2897lost_fraction = *FractOrErr;2898computedTrailingFraction = true;2899}2900}29012902/* Hex floats require an exponent but not a hexadecimal point. */2903if (p == end)2904return createError("Hex strings require an exponent");2905if (*p != 'p' && *p != 'P')2906return createError("Invalid character in significand");2907if (p == begin)2908return createError("Significand has no digits");2909if (dot != end && p - begin == 1)2910return createError("Significand has no digits");29112912/* Ignore the exponent if we are zero. */2913if (p != firstSignificantDigit) {2914int expAdjustment;29152916/* Implicit hexadecimal point? */2917if (dot == end)2918dot = p;29192920/* Calculate the exponent adjustment implicit in the number of2921significant digits. */2922expAdjustment = static_cast<int>(dot - firstSignificantDigit);2923if (expAdjustment < 0)2924expAdjustment++;2925expAdjustment = expAdjustment * 4 - 1;29262927/* Adjust for writing the significand starting at the most2928significant nibble. */2929expAdjustment += semantics->precision;2930expAdjustment -= partsCount * integerPartWidth;29312932/* Adjust for the given exponent. */2933auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);2934if (!ExpOrErr)2935return ExpOrErr.takeError();2936exponent = *ExpOrErr;2937}29382939return normalize(rounding_mode, lost_fraction);2940}29412942IEEEFloat::opStatus2943IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,2944unsigned sigPartCount, int exp,2945roundingMode rounding_mode) {2946unsigned int parts, pow5PartCount;2947fltSemantics calcSemantics = { 32767, -32767, 0, 0 };2948integerPart pow5Parts[maxPowerOfFiveParts];2949bool isNearest;29502951isNearest = (rounding_mode == rmNearestTiesToEven ||2952rounding_mode == rmNearestTiesToAway);29532954parts = partCountForBits(semantics->precision + 11);29552956/* Calculate pow(5, abs(exp)). */2957pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);29582959for (;; parts *= 2) {2960opStatus sigStatus, powStatus;2961unsigned int excessPrecision, truncatedBits;29622963calcSemantics.precision = parts * integerPartWidth - 1;2964excessPrecision = calcSemantics.precision - semantics->precision;2965truncatedBits = excessPrecision;29662967IEEEFloat decSig(calcSemantics, uninitialized);2968decSig.makeZero(sign);2969IEEEFloat pow5(calcSemantics);29702971sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,2972rmNearestTiesToEven);2973powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,2974rmNearestTiesToEven);2975/* Add exp, as 10^n = 5^n * 2^n. */2976decSig.exponent += exp;29772978lostFraction calcLostFraction;2979integerPart HUerr, HUdistance;2980unsigned int powHUerr;29812982if (exp >= 0) {2983/* multiplySignificand leaves the precision-th bit set to 1. */2984calcLostFraction = decSig.multiplySignificand(pow5);2985powHUerr = powStatus != opOK;2986} else {2987calcLostFraction = decSig.divideSignificand(pow5);2988/* Denormal numbers have less precision. */2989if (decSig.exponent < semantics->minExponent) {2990excessPrecision += (semantics->minExponent - decSig.exponent);2991truncatedBits = excessPrecision;2992if (excessPrecision > calcSemantics.precision)2993excessPrecision = calcSemantics.precision;2994}2995/* Extra half-ulp lost in reciprocal of exponent. */2996powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;2997}29982999/* Both multiplySignificand and divideSignificand return the3000result with the integer bit set. */3001assert(APInt::tcExtractBit3002(decSig.significandParts(), calcSemantics.precision - 1) == 1);30033004HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,3005powHUerr);3006HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),3007excessPrecision, isNearest);30083009/* Are we guaranteed to round correctly if we truncate? */3010if (HUdistance >= HUerr) {3011APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),3012calcSemantics.precision - excessPrecision,3013excessPrecision);3014/* Take the exponent of decSig. If we tcExtract-ed less bits3015above we must adjust our exponent to compensate for the3016implicit right shift. */3017exponent = (decSig.exponent + semantics->precision3018- (calcSemantics.precision - excessPrecision));3019calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),3020decSig.partCount(),3021truncatedBits);3022return normalize(rounding_mode, calcLostFraction);3023}3024}3025}30263027Expected<IEEEFloat::opStatus>3028IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {3029decimalInfo D;3030opStatus fs;30313032/* Scan the text. */3033StringRef::iterator p = str.begin();3034if (Error Err = interpretDecimal(p, str.end(), &D))3035return std::move(Err);30363037/* Handle the quick cases. First the case of no significant digits,3038i.e. zero, and then exponents that are obviously too large or too3039small. Writing L for log 10 / log 2, a number d.ddddd*10^exp3040definitely overflows if30413042(exp - 1) * L >= maxExponent30433044and definitely underflows to zero where30453046(exp + 1) * L <= minExponent - precision30473048With integer arithmetic the tightest bounds for L are3049305093/28 < L < 196/59 [ numerator <= 256 ]305142039/12655 < L < 28738/8651 [ numerator <= 65536 ]3052*/30533054// Test if we have a zero number allowing for strings with no null terminators3055// and zero decimals with non-zero exponents.3056//3057// We computed firstSigDigit by ignoring all zeros and dots. Thus if3058// D->firstSigDigit equals str.end(), every digit must be a zero and there can3059// be at most one dot. On the other hand, if we have a zero with a non-zero3060// exponent, then we know that D.firstSigDigit will be non-numeric.3061if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {3062category = fcZero;3063fs = opOK;3064if (semantics->nanEncoding == fltNanEncoding::NegativeZero)3065sign = false;30663067/* Check whether the normalized exponent is high enough to overflow3068max during the log-rebasing in the max-exponent check below. */3069} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {3070fs = handleOverflow(rounding_mode);30713072/* If it wasn't, then it also wasn't high enough to overflow max3073during the log-rebasing in the min-exponent check. Check that it3074won't overflow min in either check, then perform the min-exponent3075check. */3076} else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||3077(D.normalizedExponent + 1) * 28738 <=30788651 * (semantics->minExponent - (int) semantics->precision)) {3079/* Underflow to zero and round. */3080category = fcNormal;3081zeroSignificand();3082fs = normalize(rounding_mode, lfLessThanHalf);30833084/* We can finally safely perform the max-exponent check. */3085} else if ((D.normalizedExponent - 1) * 420393086>= 12655 * semantics->maxExponent) {3087/* Overflow and round. */3088fs = handleOverflow(rounding_mode);3089} else {3090integerPart *decSignificand;3091unsigned int partCount;30923093/* A tight upper bound on number of bits required to hold an3094N-digit decimal integer is N * 196 / 59. Allocate enough space3095to hold the full significand, and an extra part required by3096tcMultiplyPart. */3097partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;3098partCount = partCountForBits(1 + 196 * partCount / 59);3099decSignificand = new integerPart[partCount + 1];3100partCount = 0;31013102/* Convert to binary efficiently - we do almost all multiplication3103in an integerPart. When this would overflow do we do a single3104bignum multiplication, and then revert again to multiplication3105in an integerPart. */3106do {3107integerPart decValue, val, multiplier;31083109val = 0;3110multiplier = 1;31113112do {3113if (*p == '.') {3114p++;3115if (p == str.end()) {3116break;3117}3118}3119decValue = decDigitValue(*p++);3120if (decValue >= 10U) {3121delete[] decSignificand;3122return createError("Invalid character in significand");3123}3124multiplier *= 10;3125val = val * 10 + decValue;3126/* The maximum number that can be multiplied by ten with any3127digit added without overflowing an integerPart. */3128} while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);31293130/* Multiply out the current part. */3131APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,3132partCount, partCount + 1, false);31333134/* If we used another part (likely but not guaranteed), increase3135the count. */3136if (decSignificand[partCount])3137partCount++;3138} while (p <= D.lastSigDigit);31393140category = fcNormal;3141fs = roundSignificandWithExponent(decSignificand, partCount,3142D.exponent, rounding_mode);31433144delete [] decSignificand;3145}31463147return fs;3148}31493150bool IEEEFloat::convertFromStringSpecials(StringRef str) {3151const size_t MIN_NAME_SIZE = 3;31523153if (str.size() < MIN_NAME_SIZE)3154return false;31553156if (str == "inf" || str == "INFINITY" || str == "+Inf") {3157makeInf(false);3158return true;3159}31603161bool IsNegative = str.front() == '-';3162if (IsNegative) {3163str = str.drop_front();3164if (str.size() < MIN_NAME_SIZE)3165return false;31663167if (str == "inf" || str == "INFINITY" || str == "Inf") {3168makeInf(true);3169return true;3170}3171}31723173// If we have a 's' (or 'S') prefix, then this is a Signaling NaN.3174bool IsSignaling = str.front() == 's' || str.front() == 'S';3175if (IsSignaling) {3176str = str.drop_front();3177if (str.size() < MIN_NAME_SIZE)3178return false;3179}31803181if (str.starts_with("nan") || str.starts_with("NaN")) {3182str = str.drop_front(3);31833184// A NaN without payload.3185if (str.empty()) {3186makeNaN(IsSignaling, IsNegative);3187return true;3188}31893190// Allow the payload to be inside parentheses.3191if (str.front() == '(') {3192// Parentheses should be balanced (and not empty).3193if (str.size() <= 2 || str.back() != ')')3194return false;31953196str = str.slice(1, str.size() - 1);3197}31983199// Determine the payload number's radix.3200unsigned Radix = 10;3201if (str[0] == '0') {3202if (str.size() > 1 && tolower(str[1]) == 'x') {3203str = str.drop_front(2);3204Radix = 16;3205} else3206Radix = 8;3207}32083209// Parse the payload and make the NaN.3210APInt Payload;3211if (!str.getAsInteger(Radix, Payload)) {3212makeNaN(IsSignaling, IsNegative, &Payload);3213return true;3214}3215}32163217return false;3218}32193220Expected<IEEEFloat::opStatus>3221IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {3222if (str.empty())3223return createError("Invalid string length");32243225// Handle special cases.3226if (convertFromStringSpecials(str))3227return opOK;32283229/* Handle a leading minus sign. */3230StringRef::iterator p = str.begin();3231size_t slen = str.size();3232sign = *p == '-' ? 1 : 0;3233if (*p == '-' || *p == '+') {3234p++;3235slen--;3236if (!slen)3237return createError("String has no digits");3238}32393240if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {3241if (slen == 2)3242return createError("Invalid string");3243return convertFromHexadecimalString(StringRef(p + 2, slen - 2),3244rounding_mode);3245}32463247return convertFromDecimalString(StringRef(p, slen), rounding_mode);3248}32493250/* Write out a hexadecimal representation of the floating point value3251to DST, which must be of sufficient size, in the C99 form3252[-]0xh.hhhhp[+-]d. Return the number of characters written,3253excluding the terminating NUL.32543255If UPPERCASE, the output is in upper case, otherwise in lower case.32563257HEXDIGITS digits appear altogether, rounding the value if3258necessary. If HEXDIGITS is 0, the minimal precision to display the3259number precisely is used instead. If nothing would appear after3260the decimal point it is suppressed.32613262The decimal exponent is always printed and has at least one digit.3263Zero values display an exponent of zero. Infinities and NaNs3264appear as "infinity" or "nan" respectively.32653266The above rules are as specified by C99. There is ambiguity about3267what the leading hexadecimal digit should be. This implementation3268uses whatever is necessary so that the exponent is displayed as3269stored. This implies the exponent will fall within the IEEE format3270range, and the leading hexadecimal digit will be 0 (for denormals),32711 (normal numbers) or 2 (normal numbers rounded-away-from-zero with3272any other digits zero).3273*/3274unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,3275bool upperCase,3276roundingMode rounding_mode) const {3277char *p;32783279p = dst;3280if (sign)3281*dst++ = '-';32823283switch (category) {3284case fcInfinity:3285memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);3286dst += sizeof infinityL - 1;3287break;32883289case fcNaN:3290memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);3291dst += sizeof NaNU - 1;3292break;32933294case fcZero:3295*dst++ = '0';3296*dst++ = upperCase ? 'X': 'x';3297*dst++ = '0';3298if (hexDigits > 1) {3299*dst++ = '.';3300memset (dst, '0', hexDigits - 1);3301dst += hexDigits - 1;3302}3303*dst++ = upperCase ? 'P': 'p';3304*dst++ = '0';3305break;33063307case fcNormal:3308dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);3309break;3310}33113312*dst = 0;33133314return static_cast<unsigned int>(dst - p);3315}33163317/* Does the hard work of outputting the correctly rounded hexadecimal3318form of a normal floating point number with the specified number of3319hexadecimal digits. If HEXDIGITS is zero the minimum number of3320digits necessary to print the value precisely is output. */3321char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,3322bool upperCase,3323roundingMode rounding_mode) const {3324unsigned int count, valueBits, shift, partsCount, outputDigits;3325const char *hexDigitChars;3326const integerPart *significand;3327char *p;3328bool roundUp;33293330*dst++ = '0';3331*dst++ = upperCase ? 'X': 'x';33323333roundUp = false;3334hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;33353336significand = significandParts();3337partsCount = partCount();33383339/* +3 because the first digit only uses the single integer bit, so3340we have 3 virtual zero most-significant-bits. */3341valueBits = semantics->precision + 3;3342shift = integerPartWidth - valueBits % integerPartWidth;33433344/* The natural number of digits required ignoring trailing3345insignificant zeroes. */3346outputDigits = (valueBits - significandLSB () + 3) / 4;33473348/* hexDigits of zero means use the required number for the3349precision. Otherwise, see if we are truncating. If we are,3350find out if we need to round away from zero. */3351if (hexDigits) {3352if (hexDigits < outputDigits) {3353/* We are dropping non-zero bits, so need to check how to round.3354"bits" is the number of dropped bits. */3355unsigned int bits;3356lostFraction fraction;33573358bits = valueBits - hexDigits * 4;3359fraction = lostFractionThroughTruncation (significand, partsCount, bits);3360roundUp = roundAwayFromZero(rounding_mode, fraction, bits);3361}3362outputDigits = hexDigits;3363}33643365/* Write the digits consecutively, and start writing in the location3366of the hexadecimal point. We move the most significant digit3367left and add the hexadecimal point later. */3368p = ++dst;33693370count = (valueBits + integerPartWidth - 1) / integerPartWidth;33713372while (outputDigits && count) {3373integerPart part;33743375/* Put the most significant integerPartWidth bits in "part". */3376if (--count == partsCount)3377part = 0; /* An imaginary higher zero part. */3378else3379part = significand[count] << shift;33803381if (count && shift)3382part |= significand[count - 1] >> (integerPartWidth - shift);33833384/* Convert as much of "part" to hexdigits as we can. */3385unsigned int curDigits = integerPartWidth / 4;33863387if (curDigits > outputDigits)3388curDigits = outputDigits;3389dst += partAsHex (dst, part, curDigits, hexDigitChars);3390outputDigits -= curDigits;3391}33923393if (roundUp) {3394char *q = dst;33953396/* Note that hexDigitChars has a trailing '0'. */3397do {3398q--;3399*q = hexDigitChars[hexDigitValue (*q) + 1];3400} while (*q == '0');3401assert(q >= p);3402} else {3403/* Add trailing zeroes. */3404memset (dst, '0', outputDigits);3405dst += outputDigits;3406}34073408/* Move the most significant digit to before the point, and if there3409is something after the decimal point add it. This must come3410after rounding above. */3411p[-1] = p[0];3412if (dst -1 == p)3413dst--;3414else3415p[0] = '.';34163417/* Finally output the exponent. */3418*dst++ = upperCase ? 'P': 'p';34193420return writeSignedDecimal (dst, exponent);3421}34223423hash_code hash_value(const IEEEFloat &Arg) {3424if (!Arg.isFiniteNonZero())3425return hash_combine((uint8_t)Arg.category,3426// NaN has no sign, fix it at zero.3427Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,3428Arg.semantics->precision);34293430// Normal floats need their exponent and significand hashed.3431return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,3432Arg.semantics->precision, Arg.exponent,3433hash_combine_range(3434Arg.significandParts(),3435Arg.significandParts() + Arg.partCount()));3436}34373438// Conversion from APFloat to/from host float/double. It may eventually be3439// possible to eliminate these and have everybody deal with APFloats, but that3440// will take a while. This approach will not easily extend to long double.3441// Current implementation requires integerPartWidth==64, which is correct at3442// the moment but could be made more general.34433444// Denormals have exponent minExponent in APFloat, but minExponent-1 in3445// the actual IEEE respresentations. We compensate for that here.34463447APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {3448assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);3449assert(partCount()==2);34503451uint64_t myexponent, mysignificand;34523453if (isFiniteNonZero()) {3454myexponent = exponent+16383; //bias3455mysignificand = significandParts()[0];3456if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))3457myexponent = 0; // denormal3458} else if (category==fcZero) {3459myexponent = 0;3460mysignificand = 0;3461} else if (category==fcInfinity) {3462myexponent = 0x7fff;3463mysignificand = 0x8000000000000000ULL;3464} else {3465assert(category == fcNaN && "Unknown category");3466myexponent = 0x7fff;3467mysignificand = significandParts()[0];3468}34693470uint64_t words[2];3471words[0] = mysignificand;3472words[1] = ((uint64_t)(sign & 1) << 15) |3473(myexponent & 0x7fffLL);3474return APInt(80, words);3475}34763477APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {3478assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);3479assert(partCount()==2);34803481uint64_t words[2];3482opStatus fs;3483bool losesInfo;34843485// Convert number to double. To avoid spurious underflows, we re-3486// normalize against the "double" minExponent first, and only *then*3487// truncate the mantissa. The result of that second conversion3488// may be inexact, but should never underflow.3489// Declare fltSemantics before APFloat that uses it (and3490// saves pointer to it) to ensure correct destruction order.3491fltSemantics extendedSemantics = *semantics;3492extendedSemantics.minExponent = semIEEEdouble.minExponent;3493IEEEFloat extended(*this);3494fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);3495assert(fs == opOK && !losesInfo);3496(void)fs;34973498IEEEFloat u(extended);3499fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);3500assert(fs == opOK || fs == opInexact);3501(void)fs;3502words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();35033504// If conversion was exact or resulted in a special case, we're done;3505// just set the second double to zero. Otherwise, re-convert back to3506// the extended format and compute the difference. This now should3507// convert exactly to double.3508if (u.isFiniteNonZero() && losesInfo) {3509fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);3510assert(fs == opOK && !losesInfo);3511(void)fs;35123513IEEEFloat v(extended);3514v.subtract(u, rmNearestTiesToEven);3515fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);3516assert(fs == opOK && !losesInfo);3517(void)fs;3518words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();3519} else {3520words[1] = 0;3521}35223523return APInt(128, words);3524}35253526template <const fltSemantics &S>3527APInt IEEEFloat::convertIEEEFloatToAPInt() const {3528assert(semantics == &S);35293530constexpr int bias = -(S.minExponent - 1);3531constexpr unsigned int trailing_significand_bits = S.precision - 1;3532constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;3533constexpr integerPart integer_bit =3534integerPart{1} << (trailing_significand_bits % integerPartWidth);3535constexpr uint64_t significand_mask = integer_bit - 1;3536constexpr unsigned int exponent_bits =3537S.sizeInBits - 1 - trailing_significand_bits;3538static_assert(exponent_bits < 64);3539constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;35403541uint64_t myexponent;3542std::array<integerPart, partCountForBits(trailing_significand_bits)>3543mysignificand;35443545if (isFiniteNonZero()) {3546myexponent = exponent + bias;3547std::copy_n(significandParts(), mysignificand.size(),3548mysignificand.begin());3549if (myexponent == 1 &&3550!(significandParts()[integer_bit_part] & integer_bit))3551myexponent = 0; // denormal3552} else if (category == fcZero) {3553myexponent = ::exponentZero(S) + bias;3554mysignificand.fill(0);3555} else if (category == fcInfinity) {3556if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||3557S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)3558llvm_unreachable("semantics don't support inf!");3559myexponent = ::exponentInf(S) + bias;3560mysignificand.fill(0);3561} else {3562assert(category == fcNaN && "Unknown category!");3563if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)3564llvm_unreachable("semantics don't support NaN!");3565myexponent = ::exponentNaN(S) + bias;3566std::copy_n(significandParts(), mysignificand.size(),3567mysignificand.begin());3568}3569std::array<uint64_t, (S.sizeInBits + 63) / 64> words;3570auto words_iter =3571std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());3572if constexpr (significand_mask != 0) {3573// Clear the integer bit.3574words[mysignificand.size() - 1] &= significand_mask;3575}3576std::fill(words_iter, words.end(), uint64_t{0});3577constexpr size_t last_word = words.size() - 1;3578uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)3579<< ((S.sizeInBits - 1) % 64);3580words[last_word] |= shifted_sign;3581uint64_t shifted_exponent = (myexponent & exponent_mask)3582<< (trailing_significand_bits % 64);3583words[last_word] |= shifted_exponent;3584if constexpr (last_word == 0) {3585return APInt(S.sizeInBits, words[0]);3586}3587return APInt(S.sizeInBits, words);3588}35893590APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {3591assert(partCount() == 2);3592return convertIEEEFloatToAPInt<semIEEEquad>();3593}35943595APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {3596assert(partCount()==1);3597return convertIEEEFloatToAPInt<semIEEEdouble>();3598}35993600APInt IEEEFloat::convertFloatAPFloatToAPInt() const {3601assert(partCount()==1);3602return convertIEEEFloatToAPInt<semIEEEsingle>();3603}36043605APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {3606assert(partCount() == 1);3607return convertIEEEFloatToAPInt<semBFloat>();3608}36093610APInt IEEEFloat::convertHalfAPFloatToAPInt() const {3611assert(partCount()==1);3612return convertIEEEFloatToAPInt<semIEEEhalf>();3613}36143615APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {3616assert(partCount() == 1);3617return convertIEEEFloatToAPInt<semFloat8E5M2>();3618}36193620APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {3621assert(partCount() == 1);3622return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();3623}36243625APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {3626assert(partCount() == 1);3627return convertIEEEFloatToAPInt<semFloat8E4M3>();3628}36293630APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {3631assert(partCount() == 1);3632return convertIEEEFloatToAPInt<semFloat8E4M3FN>();3633}36343635APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {3636assert(partCount() == 1);3637return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();3638}36393640APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {3641assert(partCount() == 1);3642return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();3643}36443645APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {3646assert(partCount() == 1);3647return convertIEEEFloatToAPInt<semFloatTF32>();3648}36493650APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {3651assert(partCount() == 1);3652return convertIEEEFloatToAPInt<semFloat6E3M2FN>();3653}36543655APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {3656assert(partCount() == 1);3657return convertIEEEFloatToAPInt<semFloat6E2M3FN>();3658}36593660APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {3661assert(partCount() == 1);3662return convertIEEEFloatToAPInt<semFloat4E2M1FN>();3663}36643665// This function creates an APInt that is just a bit map of the floating3666// point constant as it would appear in memory. It is not a conversion,3667// and treating the result as a normal integer is unlikely to be useful.36683669APInt IEEEFloat::bitcastToAPInt() const {3670if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)3671return convertHalfAPFloatToAPInt();36723673if (semantics == (const llvm::fltSemantics *)&semBFloat)3674return convertBFloatAPFloatToAPInt();36753676if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)3677return convertFloatAPFloatToAPInt();36783679if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)3680return convertDoubleAPFloatToAPInt();36813682if (semantics == (const llvm::fltSemantics*)&semIEEEquad)3683return convertQuadrupleAPFloatToAPInt();36843685if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)3686return convertPPCDoubleDoubleAPFloatToAPInt();36873688if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)3689return convertFloat8E5M2APFloatToAPInt();36903691if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)3692return convertFloat8E5M2FNUZAPFloatToAPInt();36933694if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)3695return convertFloat8E4M3APFloatToAPInt();36963697if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)3698return convertFloat8E4M3FNAPFloatToAPInt();36993700if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)3701return convertFloat8E4M3FNUZAPFloatToAPInt();37023703if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)3704return convertFloat8E4M3B11FNUZAPFloatToAPInt();37053706if (semantics == (const llvm::fltSemantics *)&semFloatTF32)3707return convertFloatTF32APFloatToAPInt();37083709if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)3710return convertFloat6E3M2FNAPFloatToAPInt();37113712if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)3713return convertFloat6E2M3FNAPFloatToAPInt();37143715if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)3716return convertFloat4E2M1FNAPFloatToAPInt();37173718assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&3719"unknown format!");3720return convertF80LongDoubleAPFloatToAPInt();3721}37223723float IEEEFloat::convertToFloat() const {3724assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&3725"Float semantics are not IEEEsingle");3726APInt api = bitcastToAPInt();3727return api.bitsToFloat();3728}37293730double IEEEFloat::convertToDouble() const {3731assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&3732"Float semantics are not IEEEdouble");3733APInt api = bitcastToAPInt();3734return api.bitsToDouble();3735}37363737#ifdef HAS_IEE754_FLOAT1283738float128 IEEEFloat::convertToQuad() const {3739assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&3740"Float semantics are not IEEEquads");3741APInt api = bitcastToAPInt();3742return api.bitsToQuad();3743}3744#endif37453746/// Integer bit is explicit in this format. Intel hardware (387 and later)3747/// does not support these bit patterns:3748/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")3749/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")3750/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")3751/// exponent = 0, integer bit 1 ("pseudodenormal")3752/// At the moment, the first three are treated as NaNs, the last one as Normal.3753void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {3754uint64_t i1 = api.getRawData()[0];3755uint64_t i2 = api.getRawData()[1];3756uint64_t myexponent = (i2 & 0x7fff);3757uint64_t mysignificand = i1;3758uint8_t myintegerbit = mysignificand >> 63;37593760initialize(&semX87DoubleExtended);3761assert(partCount()==2);37623763sign = static_cast<unsigned int>(i2>>15);3764if (myexponent == 0 && mysignificand == 0) {3765makeZero(sign);3766} else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {3767makeInf(sign);3768} else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||3769(myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {3770category = fcNaN;3771exponent = exponentNaN();3772significandParts()[0] = mysignificand;3773significandParts()[1] = 0;3774} else {3775category = fcNormal;3776exponent = myexponent - 16383;3777significandParts()[0] = mysignificand;3778significandParts()[1] = 0;3779if (myexponent==0) // denormal3780exponent = -16382;3781}3782}37833784void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {3785uint64_t i1 = api.getRawData()[0];3786uint64_t i2 = api.getRawData()[1];3787opStatus fs;3788bool losesInfo;37893790// Get the first double and convert to our format.3791initFromDoubleAPInt(APInt(64, i1));3792fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);3793assert(fs == opOK && !losesInfo);3794(void)fs;37953796// Unless we have a special case, add in second double.3797if (isFiniteNonZero()) {3798IEEEFloat v(semIEEEdouble, APInt(64, i2));3799fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);3800assert(fs == opOK && !losesInfo);3801(void)fs;38023803add(v, rmNearestTiesToEven);3804}3805}38063807template <const fltSemantics &S>3808void IEEEFloat::initFromIEEEAPInt(const APInt &api) {3809assert(api.getBitWidth() == S.sizeInBits);3810constexpr integerPart integer_bit = integerPart{1}3811<< ((S.precision - 1) % integerPartWidth);3812constexpr uint64_t significand_mask = integer_bit - 1;3813constexpr unsigned int trailing_significand_bits = S.precision - 1;3814constexpr unsigned int stored_significand_parts =3815partCountForBits(trailing_significand_bits);3816constexpr unsigned int exponent_bits =3817S.sizeInBits - 1 - trailing_significand_bits;3818static_assert(exponent_bits < 64);3819constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;3820constexpr int bias = -(S.minExponent - 1);38213822// Copy the bits of the significand. We need to clear out the exponent and3823// sign bit in the last word.3824std::array<integerPart, stored_significand_parts> mysignificand;3825std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());3826if constexpr (significand_mask != 0) {3827mysignificand[mysignificand.size() - 1] &= significand_mask;3828}38293830// We assume the last word holds the sign bit, the exponent, and potentially3831// some of the trailing significand field.3832uint64_t last_word = api.getRawData()[api.getNumWords() - 1];3833uint64_t myexponent =3834(last_word >> (trailing_significand_bits % 64)) & exponent_mask;38353836initialize(&S);3837assert(partCount() == mysignificand.size());38383839sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));38403841bool all_zero_significand =3842llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });38433844bool is_zero = myexponent == 0 && all_zero_significand;38453846if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {3847if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {3848makeInf(sign);3849return;3850}3851}38523853bool is_nan = false;38543855if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {3856is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;3857} else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {3858bool all_ones_significand =3859std::all_of(mysignificand.begin(), mysignificand.end() - 1,3860[](integerPart bits) { return bits == ~integerPart{0}; }) &&3861(!significand_mask ||3862mysignificand[mysignificand.size() - 1] == significand_mask);3863is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;3864} else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {3865is_nan = is_zero && sign;3866}38673868if (is_nan) {3869category = fcNaN;3870exponent = ::exponentNaN(S);3871std::copy_n(mysignificand.begin(), mysignificand.size(),3872significandParts());3873return;3874}38753876if (is_zero) {3877makeZero(sign);3878return;3879}38803881category = fcNormal;3882exponent = myexponent - bias;3883std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());3884if (myexponent == 0) // denormal3885exponent = S.minExponent;3886else3887significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit3888}38893890void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {3891initFromIEEEAPInt<semIEEEquad>(api);3892}38933894void IEEEFloat::initFromDoubleAPInt(const APInt &api) {3895initFromIEEEAPInt<semIEEEdouble>(api);3896}38973898void IEEEFloat::initFromFloatAPInt(const APInt &api) {3899initFromIEEEAPInt<semIEEEsingle>(api);3900}39013902void IEEEFloat::initFromBFloatAPInt(const APInt &api) {3903initFromIEEEAPInt<semBFloat>(api);3904}39053906void IEEEFloat::initFromHalfAPInt(const APInt &api) {3907initFromIEEEAPInt<semIEEEhalf>(api);3908}39093910void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {3911initFromIEEEAPInt<semFloat8E5M2>(api);3912}39133914void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {3915initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);3916}39173918void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {3919initFromIEEEAPInt<semFloat8E4M3>(api);3920}39213922void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {3923initFromIEEEAPInt<semFloat8E4M3FN>(api);3924}39253926void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {3927initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);3928}39293930void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {3931initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);3932}39333934void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {3935initFromIEEEAPInt<semFloatTF32>(api);3936}39373938void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {3939initFromIEEEAPInt<semFloat6E3M2FN>(api);3940}39413942void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {3943initFromIEEEAPInt<semFloat6E2M3FN>(api);3944}39453946void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {3947initFromIEEEAPInt<semFloat4E2M1FN>(api);3948}39493950/// Treat api as containing the bits of a floating point number.3951void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {3952assert(api.getBitWidth() == Sem->sizeInBits);3953if (Sem == &semIEEEhalf)3954return initFromHalfAPInt(api);3955if (Sem == &semBFloat)3956return initFromBFloatAPInt(api);3957if (Sem == &semIEEEsingle)3958return initFromFloatAPInt(api);3959if (Sem == &semIEEEdouble)3960return initFromDoubleAPInt(api);3961if (Sem == &semX87DoubleExtended)3962return initFromF80LongDoubleAPInt(api);3963if (Sem == &semIEEEquad)3964return initFromQuadrupleAPInt(api);3965if (Sem == &semPPCDoubleDoubleLegacy)3966return initFromPPCDoubleDoubleAPInt(api);3967if (Sem == &semFloat8E5M2)3968return initFromFloat8E5M2APInt(api);3969if (Sem == &semFloat8E5M2FNUZ)3970return initFromFloat8E5M2FNUZAPInt(api);3971if (Sem == &semFloat8E4M3)3972return initFromFloat8E4M3APInt(api);3973if (Sem == &semFloat8E4M3FN)3974return initFromFloat8E4M3FNAPInt(api);3975if (Sem == &semFloat8E4M3FNUZ)3976return initFromFloat8E4M3FNUZAPInt(api);3977if (Sem == &semFloat8E4M3B11FNUZ)3978return initFromFloat8E4M3B11FNUZAPInt(api);3979if (Sem == &semFloatTF32)3980return initFromFloatTF32APInt(api);3981if (Sem == &semFloat6E3M2FN)3982return initFromFloat6E3M2FNAPInt(api);3983if (Sem == &semFloat6E2M3FN)3984return initFromFloat6E2M3FNAPInt(api);3985if (Sem == &semFloat4E2M1FN)3986return initFromFloat4E2M1FNAPInt(api);39873988llvm_unreachable(nullptr);3989}39903991/// Make this number the largest magnitude normal number in the given3992/// semantics.3993void IEEEFloat::makeLargest(bool Negative) {3994// We want (in interchange format):3995// sign = {Negative}3996// exponent = 1..103997// significand = 1..13998category = fcNormal;3999sign = Negative;4000exponent = semantics->maxExponent;40014002// Use memset to set all but the highest integerPart to all ones.4003integerPart *significand = significandParts();4004unsigned PartCount = partCount();4005memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));40064007// Set the high integerPart especially setting all unused top bits for4008// internal consistency.4009const unsigned NumUnusedHighBits =4010PartCount*integerPartWidth - semantics->precision;4011significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)4012? (~integerPart(0) >> NumUnusedHighBits)4013: 0;40144015if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&4016semantics->nanEncoding == fltNanEncoding::AllOnes)4017significand[0] &= ~integerPart(1);4018}40194020/// Make this number the smallest magnitude denormal number in the given4021/// semantics.4022void IEEEFloat::makeSmallest(bool Negative) {4023// We want (in interchange format):4024// sign = {Negative}4025// exponent = 0..04026// significand = 0..014027category = fcNormal;4028sign = Negative;4029exponent = semantics->minExponent;4030APInt::tcSet(significandParts(), 1, partCount());4031}40324033void IEEEFloat::makeSmallestNormalized(bool Negative) {4034// We want (in interchange format):4035// sign = {Negative}4036// exponent = 0..04037// significand = 10..040384039category = fcNormal;4040zeroSignificand();4041sign = Negative;4042exponent = semantics->minExponent;4043APInt::tcSetBit(significandParts(), semantics->precision - 1);4044}40454046IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {4047initFromAPInt(&Sem, API);4048}40494050IEEEFloat::IEEEFloat(float f) {4051initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));4052}40534054IEEEFloat::IEEEFloat(double d) {4055initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));4056}40574058namespace {4059void append(SmallVectorImpl<char> &Buffer, StringRef Str) {4060Buffer.append(Str.begin(), Str.end());4061}40624063/// Removes data from the given significand until it is no more4064/// precise than is required for the desired precision.4065void AdjustToPrecision(APInt &significand,4066int &exp, unsigned FormatPrecision) {4067unsigned bits = significand.getActiveBits();40684069// 196/59 is a very slight overestimate of lg_2(10).4070unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;40714072if (bits <= bitsRequired) return;40734074unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;4075if (!tensRemovable) return;40764077exp += tensRemovable;40784079APInt divisor(significand.getBitWidth(), 1);4080APInt powten(significand.getBitWidth(), 10);4081while (true) {4082if (tensRemovable & 1)4083divisor *= powten;4084tensRemovable >>= 1;4085if (!tensRemovable) break;4086powten *= powten;4087}40884089significand = significand.udiv(divisor);40904091// Truncate the significand down to its active bit count.4092significand = significand.trunc(significand.getActiveBits());4093}409440954096void AdjustToPrecision(SmallVectorImpl<char> &buffer,4097int &exp, unsigned FormatPrecision) {4098unsigned N = buffer.size();4099if (N <= FormatPrecision) return;41004101// The most significant figures are the last ones in the buffer.4102unsigned FirstSignificant = N - FormatPrecision;41034104// Round.4105// FIXME: this probably shouldn't use 'round half up'.41064107// Rounding down is just a truncation, except we also want to drop4108// trailing zeros from the new result.4109if (buffer[FirstSignificant - 1] < '5') {4110while (FirstSignificant < N && buffer[FirstSignificant] == '0')4111FirstSignificant++;41124113exp += FirstSignificant;4114buffer.erase(&buffer[0], &buffer[FirstSignificant]);4115return;4116}41174118// Rounding up requires a decimal add-with-carry. If we continue4119// the carry, the newly-introduced zeros will just be truncated.4120for (unsigned I = FirstSignificant; I != N; ++I) {4121if (buffer[I] == '9') {4122FirstSignificant++;4123} else {4124buffer[I]++;4125break;4126}4127}41284129// If we carried through, we have exactly one digit of precision.4130if (FirstSignificant == N) {4131exp += FirstSignificant;4132buffer.clear();4133buffer.push_back('1');4134return;4135}41364137exp += FirstSignificant;4138buffer.erase(&buffer[0], &buffer[FirstSignificant]);4139}41404141void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,4142APInt significand, unsigned FormatPrecision,4143unsigned FormatMaxPadding, bool TruncateZero) {4144const int semanticsPrecision = significand.getBitWidth();41454146if (isNeg)4147Str.push_back('-');41484149// Set FormatPrecision if zero. We want to do this before we4150// truncate trailing zeros, as those are part of the precision.4151if (!FormatPrecision) {4152// We use enough digits so the number can be round-tripped back to an4153// APFloat. The formula comes from "How to Print Floating-Point Numbers4154// Accurately" by Steele and White.4155// FIXME: Using a formula based purely on the precision is conservative;4156// we can print fewer digits depending on the actual value being printed.41574158// FormatPrecision = 2 + floor(significandBits / lg_2(10))4159FormatPrecision = 2 + semanticsPrecision * 59 / 196;4160}41614162// Ignore trailing binary zeros.4163int trailingZeros = significand.countr_zero();4164exp += trailingZeros;4165significand.lshrInPlace(trailingZeros);41664167// Change the exponent from 2^e to 10^e.4168if (exp == 0) {4169// Nothing to do.4170} else if (exp > 0) {4171// Just shift left.4172significand = significand.zext(semanticsPrecision + exp);4173significand <<= exp;4174exp = 0;4175} else { /* exp < 0 */4176int texp = -exp;41774178// We transform this using the identity:4179// (N)(2^-e) == (N)(5^e)(10^-e)4180// This means we have to multiply N (the significand) by 5^e.4181// To avoid overflow, we have to operate on numbers large4182// enough to store N * 5^e:4183// log2(N * 5^e) == log2(N) + e * log2(5)4184// <= semantics->precision + e * 137 / 594185// (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)41864187unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;41884189// Multiply significand by 5^e.4190// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)4191significand = significand.zext(precision);4192APInt five_to_the_i(precision, 5);4193while (true) {4194if (texp & 1)4195significand *= five_to_the_i;41964197texp >>= 1;4198if (!texp)4199break;4200five_to_the_i *= five_to_the_i;4201}4202}42034204AdjustToPrecision(significand, exp, FormatPrecision);42054206SmallVector<char, 256> buffer;42074208// Fill the buffer.4209unsigned precision = significand.getBitWidth();4210if (precision < 4) {4211// We need enough precision to store the value 10.4212precision = 4;4213significand = significand.zext(precision);4214}4215APInt ten(precision, 10);4216APInt digit(precision, 0);42174218bool inTrail = true;4219while (significand != 0) {4220// digit <- significand % 104221// significand <- significand / 104222APInt::udivrem(significand, ten, significand, digit);42234224unsigned d = digit.getZExtValue();42254226// Drop trailing zeros.4227if (inTrail && !d)4228exp++;4229else {4230buffer.push_back((char) ('0' + d));4231inTrail = false;4232}4233}42344235assert(!buffer.empty() && "no characters in buffer!");42364237// Drop down to FormatPrecision.4238// TODO: don't do more precise calculations above than are required.4239AdjustToPrecision(buffer, exp, FormatPrecision);42404241unsigned NDigits = buffer.size();42424243// Check whether we should use scientific notation.4244bool FormatScientific;4245if (!FormatMaxPadding)4246FormatScientific = true;4247else {4248if (exp >= 0) {4249// 765e3 --> 7650004250// ^^^4251// But we shouldn't make the number look more precise than it is.4252FormatScientific = ((unsigned) exp > FormatMaxPadding ||4253NDigits + (unsigned) exp > FormatPrecision);4254} else {4255// Power of the most significant digit.4256int MSD = exp + (int) (NDigits - 1);4257if (MSD >= 0) {4258// 765e-2 == 7.654259FormatScientific = false;4260} else {4261// 765e-5 == 0.007654262// ^ ^^4263FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;4264}4265}4266}42674268// Scientific formatting is pretty straightforward.4269if (FormatScientific) {4270exp += (NDigits - 1);42714272Str.push_back(buffer[NDigits-1]);4273Str.push_back('.');4274if (NDigits == 1 && TruncateZero)4275Str.push_back('0');4276else4277for (unsigned I = 1; I != NDigits; ++I)4278Str.push_back(buffer[NDigits-1-I]);4279// Fill with zeros up to FormatPrecision.4280if (!TruncateZero && FormatPrecision > NDigits - 1)4281Str.append(FormatPrecision - NDigits + 1, '0');4282// For !TruncateZero we use lower 'e'.4283Str.push_back(TruncateZero ? 'E' : 'e');42844285Str.push_back(exp >= 0 ? '+' : '-');4286if (exp < 0)4287exp = -exp;4288SmallVector<char, 6> expbuf;4289do {4290expbuf.push_back((char) ('0' + (exp % 10)));4291exp /= 10;4292} while (exp);4293// Exponent always at least two digits if we do not truncate zeros.4294if (!TruncateZero && expbuf.size() < 2)4295expbuf.push_back('0');4296for (unsigned I = 0, E = expbuf.size(); I != E; ++I)4297Str.push_back(expbuf[E-1-I]);4298return;4299}43004301// Non-scientific, positive exponents.4302if (exp >= 0) {4303for (unsigned I = 0; I != NDigits; ++I)4304Str.push_back(buffer[NDigits-1-I]);4305for (unsigned I = 0; I != (unsigned) exp; ++I)4306Str.push_back('0');4307return;4308}43094310// Non-scientific, negative exponents.43114312// The number of digits to the left of the decimal point.4313int NWholeDigits = exp + (int) NDigits;43144315unsigned I = 0;4316if (NWholeDigits > 0) {4317for (; I != (unsigned) NWholeDigits; ++I)4318Str.push_back(buffer[NDigits-I-1]);4319Str.push_back('.');4320} else {4321unsigned NZeros = 1 + (unsigned) -NWholeDigits;43224323Str.push_back('0');4324Str.push_back('.');4325for (unsigned Z = 1; Z != NZeros; ++Z)4326Str.push_back('0');4327}43284329for (; I != NDigits; ++I)4330Str.push_back(buffer[NDigits-I-1]);43314332}4333} // namespace43344335void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,4336unsigned FormatMaxPadding, bool TruncateZero) const {4337switch (category) {4338case fcInfinity:4339if (isNegative())4340return append(Str, "-Inf");4341else4342return append(Str, "+Inf");43434344case fcNaN: return append(Str, "NaN");43454346case fcZero:4347if (isNegative())4348Str.push_back('-');43494350if (!FormatMaxPadding) {4351if (TruncateZero)4352append(Str, "0.0E+0");4353else {4354append(Str, "0.0");4355if (FormatPrecision > 1)4356Str.append(FormatPrecision - 1, '0');4357append(Str, "e+00");4358}4359} else4360Str.push_back('0');4361return;43624363case fcNormal:4364break;4365}43664367// Decompose the number into an APInt and an exponent.4368int exp = exponent - ((int) semantics->precision - 1);4369APInt significand(4370semantics->precision,4371ArrayRef(significandParts(), partCountForBits(semantics->precision)));43724373toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,4374FormatMaxPadding, TruncateZero);43754376}43774378bool IEEEFloat::getExactInverse(APFloat *inv) const {4379// Special floats and denormals have no exact inverse.4380if (!isFiniteNonZero())4381return false;43824383// Check that the number is a power of two by making sure that only the4384// integer bit is set in the significand.4385if (significandLSB() != semantics->precision - 1)4386return false;43874388// Get the inverse.4389IEEEFloat reciprocal(*semantics, 1ULL);4390if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)4391return false;43924393// Avoid multiplication with a denormal, it is not safe on all platforms and4394// may be slower than a normal division.4395if (reciprocal.isDenormal())4396return false;43974398assert(reciprocal.isFiniteNonZero() &&4399reciprocal.significandLSB() == reciprocal.semantics->precision - 1);44004401if (inv)4402*inv = APFloat(reciprocal, *semantics);44034404return true;4405}44064407int IEEEFloat::getExactLog2Abs() const {4408if (!isFinite() || isZero())4409return INT_MIN;44104411const integerPart *Parts = significandParts();4412const int PartCount = partCountForBits(semantics->precision);44134414int PopCount = 0;4415for (int i = 0; i < PartCount; ++i) {4416PopCount += llvm::popcount(Parts[i]);4417if (PopCount > 1)4418return INT_MIN;4419}44204421if (exponent != semantics->minExponent)4422return exponent;44234424int CountrParts = 0;4425for (int i = 0; i < PartCount;4426++i, CountrParts += APInt::APINT_BITS_PER_WORD) {4427if (Parts[i] != 0) {4428return exponent - semantics->precision + CountrParts +4429llvm::countr_zero(Parts[i]) + 1;4430}4431}44324433llvm_unreachable("didn't find the set bit");4434}44354436bool IEEEFloat::isSignaling() const {4437if (!isNaN())4438return false;4439if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||4440semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)4441return false;44424443// IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the4444// first bit of the trailing significand being 0.4445return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);4446}44474448/// IEEE-754R 2008 5.3.1: nextUp/nextDown.4449///4450/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with4451/// appropriate sign switching before/after the computation.4452IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {4453// If we are performing nextDown, swap sign so we have -x.4454if (nextDown)4455changeSign();44564457// Compute nextUp(x)4458opStatus result = opOK;44594460// Handle each float category separately.4461switch (category) {4462case fcInfinity:4463// nextUp(+inf) = +inf4464if (!isNegative())4465break;4466// nextUp(-inf) = -getLargest()4467makeLargest(true);4468break;4469case fcNaN:4470// IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.4471// IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not4472// change the payload.4473if (isSignaling()) {4474result = opInvalidOp;4475// For consistency, propagate the sign of the sNaN to the qNaN.4476makeNaN(false, isNegative(), nullptr);4477}4478break;4479case fcZero:4480// nextUp(pm 0) = +getSmallest()4481makeSmallest(false);4482break;4483case fcNormal:4484// nextUp(-getSmallest()) = -04485if (isSmallest() && isNegative()) {4486APInt::tcSet(significandParts(), 0, partCount());4487category = fcZero;4488exponent = 0;4489if (semantics->nanEncoding == fltNanEncoding::NegativeZero)4490sign = false;4491break;4492}44934494if (isLargest() && !isNegative()) {4495if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {4496// nextUp(getLargest()) == NAN4497makeNaN();4498break;4499} else if (semantics->nonFiniteBehavior ==4500fltNonfiniteBehavior::FiniteOnly) {4501// nextUp(getLargest()) == getLargest()4502break;4503} else {4504// nextUp(getLargest()) == INFINITY4505APInt::tcSet(significandParts(), 0, partCount());4506category = fcInfinity;4507exponent = semantics->maxExponent + 1;4508break;4509}4510}45114512// nextUp(normal) == normal + inc.4513if (isNegative()) {4514// If we are negative, we need to decrement the significand.45154516// We only cross a binade boundary that requires adjusting the exponent4517// if:4518// 1. exponent != semantics->minExponent. This implies we are not in the4519// smallest binade or are dealing with denormals.4520// 2. Our significand excluding the integral bit is all zeros.4521bool WillCrossBinadeBoundary =4522exponent != semantics->minExponent && isSignificandAllZeros();45234524// Decrement the significand.4525//4526// We always do this since:4527// 1. If we are dealing with a non-binade decrement, by definition we4528// just decrement the significand.4529// 2. If we are dealing with a normal -> normal binade decrement, since4530// we have an explicit integral bit the fact that all bits but the4531// integral bit are zero implies that subtracting one will yield a4532// significand with 0 integral bit and 1 in all other spots. Thus we4533// must just adjust the exponent and set the integral bit to 1.4534// 3. If we are dealing with a normal -> denormal binade decrement,4535// since we set the integral bit to 0 when we represent denormals, we4536// just decrement the significand.4537integerPart *Parts = significandParts();4538APInt::tcDecrement(Parts, partCount());45394540if (WillCrossBinadeBoundary) {4541// Our result is a normal number. Do the following:4542// 1. Set the integral bit to 1.4543// 2. Decrement the exponent.4544APInt::tcSetBit(Parts, semantics->precision - 1);4545exponent--;4546}4547} else {4548// If we are positive, we need to increment the significand.45494550// We only cross a binade boundary that requires adjusting the exponent if4551// the input is not a denormal and all of said input's significand bits4552// are set. If all of said conditions are true: clear the significand, set4553// the integral bit to 1, and increment the exponent. If we have a4554// denormal always increment since moving denormals and the numbers in the4555// smallest normal binade have the same exponent in our representation.4556bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();45574558if (WillCrossBinadeBoundary) {4559integerPart *Parts = significandParts();4560APInt::tcSet(Parts, 0, partCount());4561APInt::tcSetBit(Parts, semantics->precision - 1);4562assert(exponent != semantics->maxExponent &&4563"We can not increment an exponent beyond the maxExponent allowed"4564" by the given floating point semantics.");4565exponent++;4566} else {4567incrementSignificand();4568}4569}4570break;4571}45724573// If we are performing nextDown, swap sign so we have -nextUp(-x)4574if (nextDown)4575changeSign();45764577return result;4578}45794580APFloatBase::ExponentType IEEEFloat::exponentNaN() const {4581return ::exponentNaN(*semantics);4582}45834584APFloatBase::ExponentType IEEEFloat::exponentInf() const {4585return ::exponentInf(*semantics);4586}45874588APFloatBase::ExponentType IEEEFloat::exponentZero() const {4589return ::exponentZero(*semantics);4590}45914592void IEEEFloat::makeInf(bool Negative) {4593if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)4594llvm_unreachable("This floating point format does not support Inf");45954596if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {4597// There is no Inf, so make NaN instead.4598makeNaN(false, Negative);4599return;4600}4601category = fcInfinity;4602sign = Negative;4603exponent = exponentInf();4604APInt::tcSet(significandParts(), 0, partCount());4605}46064607void IEEEFloat::makeZero(bool Negative) {4608category = fcZero;4609sign = Negative;4610if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {4611// Merge negative zero to positive because 0b10000...000 is used for NaN4612sign = false;4613}4614exponent = exponentZero();4615APInt::tcSet(significandParts(), 0, partCount());4616}46174618void IEEEFloat::makeQuiet() {4619assert(isNaN());4620if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)4621APInt::tcSetBit(significandParts(), semantics->precision - 2);4622}46234624int ilogb(const IEEEFloat &Arg) {4625if (Arg.isNaN())4626return IEEEFloat::IEK_NaN;4627if (Arg.isZero())4628return IEEEFloat::IEK_Zero;4629if (Arg.isInfinity())4630return IEEEFloat::IEK_Inf;4631if (!Arg.isDenormal())4632return Arg.exponent;46334634IEEEFloat Normalized(Arg);4635int SignificandBits = Arg.getSemantics().precision - 1;46364637Normalized.exponent += SignificandBits;4638Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);4639return Normalized.exponent - SignificandBits;4640}46414642IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {4643auto MaxExp = X.getSemantics().maxExponent;4644auto MinExp = X.getSemantics().minExponent;46454646// If Exp is wildly out-of-scale, simply adding it to X.exponent will4647// overflow; clamp it to a safe range before adding, but ensure that the range4648// is large enough that the clamp does not change the result. The range we4649// need to support is the difference between the largest possible exponent and4650// the normalized exponent of half the smallest denormal.46514652int SignificandBits = X.getSemantics().precision - 1;4653int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;46544655// Clamp to one past the range ends to let normalize handle overlflow.4656X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);4657X.normalize(RoundingMode, lfExactlyZero);4658if (X.isNaN())4659X.makeQuiet();4660return X;4661}46624663IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {4664Exp = ilogb(Val);46654666// Quiet signalling nans.4667if (Exp == IEEEFloat::IEK_NaN) {4668IEEEFloat Quiet(Val);4669Quiet.makeQuiet();4670return Quiet;4671}46724673if (Exp == IEEEFloat::IEK_Inf)4674return Val;46754676// 1 is added because frexp is defined to return a normalized fraction in4677// +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).4678Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;4679return scalbn(Val, -Exp, RM);4680}46814682DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)4683: Semantics(&S),4684Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {4685assert(Semantics == &semPPCDoubleDouble);4686}46874688DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)4689: Semantics(&S),4690Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),4691APFloat(semIEEEdouble, uninitialized)}) {4692assert(Semantics == &semPPCDoubleDouble);4693}46944695DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)4696: Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),4697APFloat(semIEEEdouble)}) {4698assert(Semantics == &semPPCDoubleDouble);4699}47004701DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)4702: Semantics(&S),4703Floats(new APFloat[2]{4704APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),4705APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {4706assert(Semantics == &semPPCDoubleDouble);4707}47084709DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,4710APFloat &&Second)4711: Semantics(&S),4712Floats(new APFloat[2]{std::move(First), std::move(Second)}) {4713assert(Semantics == &semPPCDoubleDouble);4714assert(&Floats[0].getSemantics() == &semIEEEdouble);4715assert(&Floats[1].getSemantics() == &semIEEEdouble);4716}47174718DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)4719: Semantics(RHS.Semantics),4720Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),4721APFloat(RHS.Floats[1])}4722: nullptr) {4723assert(Semantics == &semPPCDoubleDouble);4724}47254726DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)4727: Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {4728RHS.Semantics = &semBogus;4729assert(Semantics == &semPPCDoubleDouble);4730}47314732DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {4733if (Semantics == RHS.Semantics && RHS.Floats) {4734Floats[0] = RHS.Floats[0];4735Floats[1] = RHS.Floats[1];4736} else if (this != &RHS) {4737this->~DoubleAPFloat();4738new (this) DoubleAPFloat(RHS);4739}4740return *this;4741}47424743// Implement addition, subtraction, multiplication and division based on:4744// "Software for Doubled-Precision Floating-Point Computations",4745// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.4746APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,4747const APFloat &c, const APFloat &cc,4748roundingMode RM) {4749int Status = opOK;4750APFloat z = a;4751Status |= z.add(c, RM);4752if (!z.isFinite()) {4753if (!z.isInfinity()) {4754Floats[0] = std::move(z);4755Floats[1].makeZero(/* Neg = */ false);4756return (opStatus)Status;4757}4758Status = opOK;4759auto AComparedToC = a.compareAbsoluteValue(c);4760z = cc;4761Status |= z.add(aa, RM);4762if (AComparedToC == APFloat::cmpGreaterThan) {4763// z = cc + aa + c + a;4764Status |= z.add(c, RM);4765Status |= z.add(a, RM);4766} else {4767// z = cc + aa + a + c;4768Status |= z.add(a, RM);4769Status |= z.add(c, RM);4770}4771if (!z.isFinite()) {4772Floats[0] = std::move(z);4773Floats[1].makeZero(/* Neg = */ false);4774return (opStatus)Status;4775}4776Floats[0] = z;4777APFloat zz = aa;4778Status |= zz.add(cc, RM);4779if (AComparedToC == APFloat::cmpGreaterThan) {4780// Floats[1] = a - z + c + zz;4781Floats[1] = a;4782Status |= Floats[1].subtract(z, RM);4783Status |= Floats[1].add(c, RM);4784Status |= Floats[1].add(zz, RM);4785} else {4786// Floats[1] = c - z + a + zz;4787Floats[1] = c;4788Status |= Floats[1].subtract(z, RM);4789Status |= Floats[1].add(a, RM);4790Status |= Floats[1].add(zz, RM);4791}4792} else {4793// q = a - z;4794APFloat q = a;4795Status |= q.subtract(z, RM);47964797// zz = q + c + (a - (q + z)) + aa + cc;4798// Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.4799auto zz = q;4800Status |= zz.add(c, RM);4801Status |= q.add(z, RM);4802Status |= q.subtract(a, RM);4803q.changeSign();4804Status |= zz.add(q, RM);4805Status |= zz.add(aa, RM);4806Status |= zz.add(cc, RM);4807if (zz.isZero() && !zz.isNegative()) {4808Floats[0] = std::move(z);4809Floats[1].makeZero(/* Neg = */ false);4810return opOK;4811}4812Floats[0] = z;4813Status |= Floats[0].add(zz, RM);4814if (!Floats[0].isFinite()) {4815Floats[1].makeZero(/* Neg = */ false);4816return (opStatus)Status;4817}4818Floats[1] = std::move(z);4819Status |= Floats[1].subtract(Floats[0], RM);4820Status |= Floats[1].add(zz, RM);4821}4822return (opStatus)Status;4823}48244825APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,4826const DoubleAPFloat &RHS,4827DoubleAPFloat &Out,4828roundingMode RM) {4829if (LHS.getCategory() == fcNaN) {4830Out = LHS;4831return opOK;4832}4833if (RHS.getCategory() == fcNaN) {4834Out = RHS;4835return opOK;4836}4837if (LHS.getCategory() == fcZero) {4838Out = RHS;4839return opOK;4840}4841if (RHS.getCategory() == fcZero) {4842Out = LHS;4843return opOK;4844}4845if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&4846LHS.isNegative() != RHS.isNegative()) {4847Out.makeNaN(false, Out.isNegative(), nullptr);4848return opInvalidOp;4849}4850if (LHS.getCategory() == fcInfinity) {4851Out = LHS;4852return opOK;4853}4854if (RHS.getCategory() == fcInfinity) {4855Out = RHS;4856return opOK;4857}4858assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);48594860APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),4861CC(RHS.Floats[1]);4862assert(&A.getSemantics() == &semIEEEdouble);4863assert(&AA.getSemantics() == &semIEEEdouble);4864assert(&C.getSemantics() == &semIEEEdouble);4865assert(&CC.getSemantics() == &semIEEEdouble);4866assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);4867assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);4868return Out.addImpl(A, AA, C, CC, RM);4869}48704871APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,4872roundingMode RM) {4873return addWithSpecial(*this, RHS, *this, RM);4874}48754876APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,4877roundingMode RM) {4878changeSign();4879auto Ret = add(RHS, RM);4880changeSign();4881return Ret;4882}48834884APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,4885APFloat::roundingMode RM) {4886const auto &LHS = *this;4887auto &Out = *this;4888/* Interesting observation: For special categories, finding the lowest4889common ancestor of the following layered graph gives the correct4890return category:48914892NaN4893/ \4894Zero Inf4895\ /4896Normal48974898e.g. NaN * NaN = NaN4899Zero * Inf = NaN4900Normal * Zero = Zero4901Normal * Inf = Inf4902*/4903if (LHS.getCategory() == fcNaN) {4904Out = LHS;4905return opOK;4906}4907if (RHS.getCategory() == fcNaN) {4908Out = RHS;4909return opOK;4910}4911if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||4912(LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {4913Out.makeNaN(false, false, nullptr);4914return opOK;4915}4916if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {4917Out = LHS;4918return opOK;4919}4920if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {4921Out = RHS;4922return opOK;4923}4924assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&4925"Special cases not handled exhaustively");49264927int Status = opOK;4928APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];4929// t = a * c4930APFloat T = A;4931Status |= T.multiply(C, RM);4932if (!T.isFiniteNonZero()) {4933Floats[0] = T;4934Floats[1].makeZero(/* Neg = */ false);4935return (opStatus)Status;4936}49374938// tau = fmsub(a, c, t), that is -fmadd(-a, c, t).4939APFloat Tau = A;4940T.changeSign();4941Status |= Tau.fusedMultiplyAdd(C, T, RM);4942T.changeSign();4943{4944// v = a * d4945APFloat V = A;4946Status |= V.multiply(D, RM);4947// w = b * c4948APFloat W = B;4949Status |= W.multiply(C, RM);4950Status |= V.add(W, RM);4951// tau += v + w4952Status |= Tau.add(V, RM);4953}4954// u = t + tau4955APFloat U = T;4956Status |= U.add(Tau, RM);49574958Floats[0] = U;4959if (!U.isFinite()) {4960Floats[1].makeZero(/* Neg = */ false);4961} else {4962// Floats[1] = (t - u) + tau4963Status |= T.subtract(U, RM);4964Status |= T.add(Tau, RM);4965Floats[1] = T;4966}4967return (opStatus)Status;4968}49694970APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,4971APFloat::roundingMode RM) {4972assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");4973APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());4974auto Ret =4975Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);4976*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());4977return Ret;4978}49794980APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {4981assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");4982APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());4983auto Ret =4984Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));4985*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());4986return Ret;4987}49884989APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {4990assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");4991APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());4992auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));4993*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());4994return Ret;4995}49964997APFloat::opStatus4998DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,4999const DoubleAPFloat &Addend,5000APFloat::roundingMode RM) {5001assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5002APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());5003auto Ret = Tmp.fusedMultiplyAdd(5004APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),5005APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);5006*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5007return Ret;5008}50095010APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {5011assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5012APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());5013auto Ret = Tmp.roundToIntegral(RM);5014*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5015return Ret;5016}50175018void DoubleAPFloat::changeSign() {5019Floats[0].changeSign();5020Floats[1].changeSign();5021}50225023APFloat::cmpResult5024DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {5025auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);5026if (Result != cmpEqual)5027return Result;5028Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);5029if (Result == cmpLessThan || Result == cmpGreaterThan) {5030auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();5031auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();5032if (Against && !RHSAgainst)5033return cmpLessThan;5034if (!Against && RHSAgainst)5035return cmpGreaterThan;5036if (!Against && !RHSAgainst)5037return Result;5038if (Against && RHSAgainst)5039return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);5040}5041return Result;5042}50435044APFloat::fltCategory DoubleAPFloat::getCategory() const {5045return Floats[0].getCategory();5046}50475048bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }50495050void DoubleAPFloat::makeInf(bool Neg) {5051Floats[0].makeInf(Neg);5052Floats[1].makeZero(/* Neg = */ false);5053}50545055void DoubleAPFloat::makeZero(bool Neg) {5056Floats[0].makeZero(Neg);5057Floats[1].makeZero(/* Neg = */ false);5058}50595060void DoubleAPFloat::makeLargest(bool Neg) {5061assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5062Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));5063Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));5064if (Neg)5065changeSign();5066}50675068void DoubleAPFloat::makeSmallest(bool Neg) {5069assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5070Floats[0].makeSmallest(Neg);5071Floats[1].makeZero(/* Neg = */ false);5072}50735074void DoubleAPFloat::makeSmallestNormalized(bool Neg) {5075assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5076Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));5077if (Neg)5078Floats[0].changeSign();5079Floats[1].makeZero(/* Neg = */ false);5080}50815082void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {5083Floats[0].makeNaN(SNaN, Neg, fill);5084Floats[1].makeZero(/* Neg = */ false);5085}50865087APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {5088auto Result = Floats[0].compare(RHS.Floats[0]);5089// |Float[0]| > |Float[1]|5090if (Result == APFloat::cmpEqual)5091return Floats[1].compare(RHS.Floats[1]);5092return Result;5093}50945095bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {5096return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&5097Floats[1].bitwiseIsEqual(RHS.Floats[1]);5098}50995100hash_code hash_value(const DoubleAPFloat &Arg) {5101if (Arg.Floats)5102return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));5103return hash_combine(Arg.Semantics);5104}51055106APInt DoubleAPFloat::bitcastToAPInt() const {5107assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5108uint64_t Data[] = {5109Floats[0].bitcastToAPInt().getRawData()[0],5110Floats[1].bitcastToAPInt().getRawData()[0],5111};5112return APInt(128, 2, Data);5113}51145115Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,5116roundingMode RM) {5117assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5118APFloat Tmp(semPPCDoubleDoubleLegacy);5119auto Ret = Tmp.convertFromString(S, RM);5120*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5121return Ret;5122}51235124APFloat::opStatus DoubleAPFloat::next(bool nextDown) {5125assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5126APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());5127auto Ret = Tmp.next(nextDown);5128*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5129return Ret;5130}51315132APFloat::opStatus5133DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,5134unsigned int Width, bool IsSigned,5135roundingMode RM, bool *IsExact) const {5136assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5137return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())5138.convertToInteger(Input, Width, IsSigned, RM, IsExact);5139}51405141APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,5142bool IsSigned,5143roundingMode RM) {5144assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5145APFloat Tmp(semPPCDoubleDoubleLegacy);5146auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);5147*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5148return Ret;5149}51505151APFloat::opStatus5152DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,5153unsigned int InputSize,5154bool IsSigned, roundingMode RM) {5155assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5156APFloat Tmp(semPPCDoubleDoubleLegacy);5157auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);5158*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5159return Ret;5160}51615162APFloat::opStatus5163DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,5164unsigned int InputSize,5165bool IsSigned, roundingMode RM) {5166assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5167APFloat Tmp(semPPCDoubleDoubleLegacy);5168auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);5169*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());5170return Ret;5171}51725173unsigned int DoubleAPFloat::convertToHexString(char *DST,5174unsigned int HexDigits,5175bool UpperCase,5176roundingMode RM) const {5177assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5178return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())5179.convertToHexString(DST, HexDigits, UpperCase, RM);5180}51815182bool DoubleAPFloat::isDenormal() const {5183return getCategory() == fcNormal &&5184(Floats[0].isDenormal() || Floats[1].isDenormal() ||5185// (double)(Hi + Lo) == Hi defines a normal number.5186Floats[0] != Floats[0] + Floats[1]);5187}51885189bool DoubleAPFloat::isSmallest() const {5190if (getCategory() != fcNormal)5191return false;5192DoubleAPFloat Tmp(*this);5193Tmp.makeSmallest(this->isNegative());5194return Tmp.compare(*this) == cmpEqual;5195}51965197bool DoubleAPFloat::isSmallestNormalized() const {5198if (getCategory() != fcNormal)5199return false;52005201DoubleAPFloat Tmp(*this);5202Tmp.makeSmallestNormalized(this->isNegative());5203return Tmp.compare(*this) == cmpEqual;5204}52055206bool DoubleAPFloat::isLargest() const {5207if (getCategory() != fcNormal)5208return false;5209DoubleAPFloat Tmp(*this);5210Tmp.makeLargest(this->isNegative());5211return Tmp.compare(*this) == cmpEqual;5212}52135214bool DoubleAPFloat::isInteger() const {5215assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5216return Floats[0].isInteger() && Floats[1].isInteger();5217}52185219void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,5220unsigned FormatPrecision,5221unsigned FormatMaxPadding,5222bool TruncateZero) const {5223assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5224APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())5225.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);5226}52275228bool DoubleAPFloat::getExactInverse(APFloat *inv) const {5229assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5230APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());5231if (!inv)5232return Tmp.getExactInverse(nullptr);5233APFloat Inv(semPPCDoubleDoubleLegacy);5234auto Ret = Tmp.getExactInverse(&Inv);5235*inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());5236return Ret;5237}52385239int DoubleAPFloat::getExactLog2() const {5240// TODO: Implement me5241return INT_MIN;5242}52435244int DoubleAPFloat::getExactLog2Abs() const {5245// TODO: Implement me5246return INT_MIN;5247}52485249DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,5250APFloat::roundingMode RM) {5251assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5252return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),5253scalbn(Arg.Floats[1], Exp, RM));5254}52555256DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,5257APFloat::roundingMode RM) {5258assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");5259APFloat First = frexp(Arg.Floats[0], Exp, RM);5260APFloat Second = Arg.Floats[1];5261if (Arg.getCategory() == APFloat::fcNormal)5262Second = scalbn(Second, -Exp, RM);5263return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));5264}52655266} // namespace detail52675268APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {5269if (usesLayout<IEEEFloat>(Semantics)) {5270new (&IEEE) IEEEFloat(std::move(F));5271return;5272}5273if (usesLayout<DoubleAPFloat>(Semantics)) {5274const fltSemantics& S = F.getSemantics();5275new (&Double)5276DoubleAPFloat(Semantics, APFloat(std::move(F), S),5277APFloat(semIEEEdouble));5278return;5279}5280llvm_unreachable("Unexpected semantics");5281}52825283Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,5284roundingMode RM) {5285APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));5286}52875288hash_code hash_value(const APFloat &Arg) {5289if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))5290return hash_value(Arg.U.IEEE);5291if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))5292return hash_value(Arg.U.Double);5293llvm_unreachable("Unexpected semantics");5294}52955296APFloat::APFloat(const fltSemantics &Semantics, StringRef S)5297: APFloat(Semantics) {5298auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);5299assert(StatusOrErr && "Invalid floating point representation");5300consumeError(StatusOrErr.takeError());5301}53025303FPClassTest APFloat::classify() const {5304if (isZero())5305return isNegative() ? fcNegZero : fcPosZero;5306if (isNormal())5307return isNegative() ? fcNegNormal : fcPosNormal;5308if (isDenormal())5309return isNegative() ? fcNegSubnormal : fcPosSubnormal;5310if (isInfinity())5311return isNegative() ? fcNegInf : fcPosInf;5312assert(isNaN() && "Other class of FP constant");5313return isSignaling() ? fcSNan : fcQNan;5314}53155316APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,5317roundingMode RM, bool *losesInfo) {5318if (&getSemantics() == &ToSemantics) {5319*losesInfo = false;5320return opOK;5321}5322if (usesLayout<IEEEFloat>(getSemantics()) &&5323usesLayout<IEEEFloat>(ToSemantics))5324return U.IEEE.convert(ToSemantics, RM, losesInfo);5325if (usesLayout<IEEEFloat>(getSemantics()) &&5326usesLayout<DoubleAPFloat>(ToSemantics)) {5327assert(&ToSemantics == &semPPCDoubleDouble);5328auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);5329*this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());5330return Ret;5331}5332if (usesLayout<DoubleAPFloat>(getSemantics()) &&5333usesLayout<IEEEFloat>(ToSemantics)) {5334auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);5335*this = APFloat(std::move(getIEEE()), ToSemantics);5336return Ret;5337}5338llvm_unreachable("Unexpected semantics");5339}53405341APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {5342return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));5343}53445345void APFloat::print(raw_ostream &OS) const {5346SmallVector<char, 16> Buffer;5347toString(Buffer);5348OS << Buffer << "\n";5349}53505351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)5352LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }5353#endif53545355void APFloat::Profile(FoldingSetNodeID &NID) const {5356NID.Add(bitcastToAPInt());5357}53585359/* Same as convertToInteger(integerPart*, ...), except the result is returned in5360an APSInt, whose initial bit-width and signed-ness are used to determine the5361precision of the conversion.5362*/5363APFloat::opStatus APFloat::convertToInteger(APSInt &result,5364roundingMode rounding_mode,5365bool *isExact) const {5366unsigned bitWidth = result.getBitWidth();5367SmallVector<uint64_t, 4> parts(result.getNumWords());5368opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),5369rounding_mode, isExact);5370// Keeps the original signed-ness.5371result = APInt(bitWidth, parts);5372return status;5373}53745375double APFloat::convertToDouble() const {5376if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)5377return getIEEE().convertToDouble();5378assert(getSemantics().isRepresentableBy(semIEEEdouble) &&5379"Float semantics is not representable by IEEEdouble");5380APFloat Temp = *this;5381bool LosesInfo;5382opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);5383assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");5384(void)St;5385return Temp.getIEEE().convertToDouble();5386}53875388#ifdef HAS_IEE754_FLOAT1285389float128 APFloat::convertToQuad() const {5390if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)5391return getIEEE().convertToQuad();5392assert(getSemantics().isRepresentableBy(semIEEEquad) &&5393"Float semantics is not representable by IEEEquad");5394APFloat Temp = *this;5395bool LosesInfo;5396opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);5397assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");5398(void)St;5399return Temp.getIEEE().convertToQuad();5400}5401#endif54025403float APFloat::convertToFloat() const {5404if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)5405return getIEEE().convertToFloat();5406assert(getSemantics().isRepresentableBy(semIEEEsingle) &&5407"Float semantics is not representable by IEEEsingle");5408APFloat Temp = *this;5409bool LosesInfo;5410opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);5411assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");5412(void)St;5413return Temp.getIEEE().convertToFloat();5414}54155416} // namespace llvm54175418#undef APFLOAT_DISPATCH_ON_SEMANTICS541954205421