Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Support/APFloat.cpp
35232 views
1
//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements a class to represent arbitrary precision floating
10
// point values and provide a variety of arithmetic operations on them.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/ADT/APFloat.h"
15
#include "llvm/ADT/APSInt.h"
16
#include "llvm/ADT/ArrayRef.h"
17
#include "llvm/ADT/FloatingPointMode.h"
18
#include "llvm/ADT/FoldingSet.h"
19
#include "llvm/ADT/Hashing.h"
20
#include "llvm/ADT/STLExtras.h"
21
#include "llvm/ADT/StringExtras.h"
22
#include "llvm/ADT/StringRef.h"
23
#include "llvm/Config/llvm-config.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/Error.h"
26
#include "llvm/Support/MathExtras.h"
27
#include "llvm/Support/raw_ostream.h"
28
#include <cstring>
29
#include <limits.h>
30
31
#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32
do { \
33
if (usesLayout<IEEEFloat>(getSemantics())) \
34
return U.IEEE.METHOD_CALL; \
35
if (usesLayout<DoubleAPFloat>(getSemantics())) \
36
return U.Double.METHOD_CALL; \
37
llvm_unreachable("Unexpected semantics"); \
38
} while (false)
39
40
using namespace llvm;
41
42
/// A macro used to combine two fcCategory enums into one key which can be used
43
/// in a switch statement to classify how the interaction of two APFloat's
44
/// categories affects an operation.
45
///
46
/// TODO: If clang source code is ever allowed to use constexpr in its own
47
/// codebase, change this into a static inline function.
48
#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49
50
/* Assumed in hexadecimal significand parsing, and conversion to
51
hexadecimal strings. */
52
static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53
54
namespace llvm {
55
56
// How the nonfinite values Inf and NaN are represented.
57
enum class fltNonfiniteBehavior {
58
// Represents standard IEEE 754 behavior. A value is nonfinite if the
59
// exponent field is all 1s. In such cases, a value is Inf if the
60
// significand bits are all zero, and NaN otherwise
61
IEEE754,
62
63
// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64
// Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65
// representation for Inf, and operations that would ordinarily produce Inf
66
// produce NaN instead.
67
// The details of the NaN representation(s) in this form are determined by the
68
// `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69
// encodings do not distinguish between signalling and quiet NaN.
70
NanOnly,
71
72
// This behavior is present in Float6E3M2FN, Float6E2M3FN, and
73
// Float4E2M1FN types, which do not support Inf or NaN values.
74
FiniteOnly,
75
};
76
77
// How NaN values are represented. This is curently only used in combination
78
// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
79
// while having IEEE non-finite behavior is liable to lead to unexpected
80
// results.
81
enum class fltNanEncoding {
82
// Represents the standard IEEE behavior where a value is NaN if its
83
// exponent is all 1s and the significand is non-zero.
84
IEEE,
85
86
// Represents the behavior in the Float8E4M3FN floating point type where NaN
87
// is represented by having the exponent and mantissa set to all 1s.
88
// This behavior matches the FP8 E4M3 type described in
89
// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
90
// as non-signalling, although the paper does not state whether the NaN
91
// values are signalling or not.
92
AllOnes,
93
94
// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
95
// where NaN is represented by a sign bit of 1 and all 0s in the exponent
96
// and mantissa (i.e. the negative zero encoding in a IEEE float). Since
97
// there is only one NaN value, it is treated as quiet NaN. This matches the
98
// behavior described in https://arxiv.org/abs/2206.02915 .
99
NegativeZero,
100
};
101
102
/* Represents floating point arithmetic semantics. */
103
struct fltSemantics {
104
/* The largest E such that 2^E is representable; this matches the
105
definition of IEEE 754. */
106
APFloatBase::ExponentType maxExponent;
107
108
/* The smallest E such that 2^E is a normalized number; this
109
matches the definition of IEEE 754. */
110
APFloatBase::ExponentType minExponent;
111
112
/* Number of bits in the significand. This includes the integer
113
bit. */
114
unsigned int precision;
115
116
/* Number of bits actually used in the semantics. */
117
unsigned int sizeInBits;
118
119
fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
120
121
fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
122
// Returns true if any number described by this semantics can be precisely
123
// represented by the specified semantics. Does not take into account
124
// the value of fltNonfiniteBehavior.
125
bool isRepresentableBy(const fltSemantics &S) const {
126
return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
127
precision <= S.precision;
128
}
129
};
130
131
static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
132
static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
133
static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
134
static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
135
static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
136
static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
137
static constexpr fltSemantics semFloat8E5M2FNUZ = {
138
15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
139
static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
140
static constexpr fltSemantics semFloat8E4M3FN = {
141
8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
142
static constexpr fltSemantics semFloat8E4M3FNUZ = {
143
7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
144
static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
145
4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146
static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
147
static constexpr fltSemantics semFloat6E3M2FN = {
148
4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
149
static constexpr fltSemantics semFloat6E2M3FN = {
150
2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
151
static constexpr fltSemantics semFloat4E2M1FN = {
152
2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
153
static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
154
static constexpr fltSemantics semBogus = {0, 0, 0, 0};
155
156
/* The IBM double-double semantics. Such a number consists of a pair of IEEE
157
64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
158
(double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
159
Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
160
to each other, and two 11-bit exponents.
161
162
Note: we need to make the value different from semBogus as otherwise
163
an unsafe optimization may collapse both values to a single address,
164
and we heavily rely on them having distinct addresses. */
165
static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
166
167
/* These are legacy semantics for the fallback, inaccrurate implementation of
168
IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
169
operation. It's equivalent to having an IEEE number with consecutive 106
170
bits of mantissa and 11 bits of exponent.
171
172
It's not equivalent to IBM double-double. For example, a legit IBM
173
double-double, 1 + epsilon:
174
175
1 + epsilon = 1 + (1 >> 1076)
176
177
is not representable by a consecutive 106 bits of mantissa.
178
179
Currently, these semantics are used in the following way:
180
181
semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
182
(64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
183
semPPCDoubleDoubleLegacy -> IEEE operations
184
185
We use bitcastToAPInt() to get the bit representation (in APInt) of the
186
underlying IEEEdouble, then use the APInt constructor to construct the
187
legacy IEEE float.
188
189
TODO: Implement all operations in semPPCDoubleDouble, and delete these
190
semantics. */
191
static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
192
53 + 53, 128};
193
194
const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
195
switch (S) {
196
case S_IEEEhalf:
197
return IEEEhalf();
198
case S_BFloat:
199
return BFloat();
200
case S_IEEEsingle:
201
return IEEEsingle();
202
case S_IEEEdouble:
203
return IEEEdouble();
204
case S_IEEEquad:
205
return IEEEquad();
206
case S_PPCDoubleDouble:
207
return PPCDoubleDouble();
208
case S_Float8E5M2:
209
return Float8E5M2();
210
case S_Float8E5M2FNUZ:
211
return Float8E5M2FNUZ();
212
case S_Float8E4M3:
213
return Float8E4M3();
214
case S_Float8E4M3FN:
215
return Float8E4M3FN();
216
case S_Float8E4M3FNUZ:
217
return Float8E4M3FNUZ();
218
case S_Float8E4M3B11FNUZ:
219
return Float8E4M3B11FNUZ();
220
case S_FloatTF32:
221
return FloatTF32();
222
case S_Float6E3M2FN:
223
return Float6E3M2FN();
224
case S_Float6E2M3FN:
225
return Float6E2M3FN();
226
case S_Float4E2M1FN:
227
return Float4E2M1FN();
228
case S_x87DoubleExtended:
229
return x87DoubleExtended();
230
}
231
llvm_unreachable("Unrecognised floating semantics");
232
}
233
234
APFloatBase::Semantics
235
APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
236
if (&Sem == &llvm::APFloat::IEEEhalf())
237
return S_IEEEhalf;
238
else if (&Sem == &llvm::APFloat::BFloat())
239
return S_BFloat;
240
else if (&Sem == &llvm::APFloat::IEEEsingle())
241
return S_IEEEsingle;
242
else if (&Sem == &llvm::APFloat::IEEEdouble())
243
return S_IEEEdouble;
244
else if (&Sem == &llvm::APFloat::IEEEquad())
245
return S_IEEEquad;
246
else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
247
return S_PPCDoubleDouble;
248
else if (&Sem == &llvm::APFloat::Float8E5M2())
249
return S_Float8E5M2;
250
else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
251
return S_Float8E5M2FNUZ;
252
else if (&Sem == &llvm::APFloat::Float8E4M3())
253
return S_Float8E4M3;
254
else if (&Sem == &llvm::APFloat::Float8E4M3FN())
255
return S_Float8E4M3FN;
256
else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
257
return S_Float8E4M3FNUZ;
258
else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
259
return S_Float8E4M3B11FNUZ;
260
else if (&Sem == &llvm::APFloat::FloatTF32())
261
return S_FloatTF32;
262
else if (&Sem == &llvm::APFloat::Float6E3M2FN())
263
return S_Float6E3M2FN;
264
else if (&Sem == &llvm::APFloat::Float6E2M3FN())
265
return S_Float6E2M3FN;
266
else if (&Sem == &llvm::APFloat::Float4E2M1FN())
267
return S_Float4E2M1FN;
268
else if (&Sem == &llvm::APFloat::x87DoubleExtended())
269
return S_x87DoubleExtended;
270
else
271
llvm_unreachable("Unknown floating semantics");
272
}
273
274
const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
275
const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
276
const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
277
const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
278
const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
279
const fltSemantics &APFloatBase::PPCDoubleDouble() {
280
return semPPCDoubleDouble;
281
}
282
const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
283
const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
284
const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
285
const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
286
const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
287
const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
288
return semFloat8E4M3B11FNUZ;
289
}
290
const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
291
const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
292
const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
293
const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
294
const fltSemantics &APFloatBase::x87DoubleExtended() {
295
return semX87DoubleExtended;
296
}
297
const fltSemantics &APFloatBase::Bogus() { return semBogus; }
298
299
constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
300
constexpr RoundingMode APFloatBase::rmTowardPositive;
301
constexpr RoundingMode APFloatBase::rmTowardNegative;
302
constexpr RoundingMode APFloatBase::rmTowardZero;
303
constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
304
305
/* A tight upper bound on number of parts required to hold the value
306
pow(5, power) is
307
308
power * 815 / (351 * integerPartWidth) + 1
309
310
However, whilst the result may require only this many parts,
311
because we are multiplying two values to get it, the
312
multiplication may require an extra part with the excess part
313
being zero (consider the trivial case of 1 * 1, tcFullMultiply
314
requires two parts to hold the single-part result). So we add an
315
extra one to guarantee enough space whilst multiplying. */
316
const unsigned int maxExponent = 16383;
317
const unsigned int maxPrecision = 113;
318
const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
319
const unsigned int maxPowerOfFiveParts =
320
2 +
321
((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
322
323
unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
324
return semantics.precision;
325
}
326
APFloatBase::ExponentType
327
APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
328
return semantics.maxExponent;
329
}
330
APFloatBase::ExponentType
331
APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
332
return semantics.minExponent;
333
}
334
unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
335
return semantics.sizeInBits;
336
}
337
unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
338
bool isSigned) {
339
// The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
340
// at least one more bit than the MaxExponent to hold the max FP value.
341
unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
342
// Extra sign bit needed.
343
if (isSigned)
344
++MinBitWidth;
345
return MinBitWidth;
346
}
347
348
bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
349
const fltSemantics &Dst) {
350
// Exponent range must be larger.
351
if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
352
return false;
353
354
// If the mantissa is long enough, the result value could still be denormal
355
// with a larger exponent range.
356
//
357
// FIXME: This condition is probably not accurate but also shouldn't be a
358
// practical concern with existing types.
359
return Dst.precision >= Src.precision;
360
}
361
362
unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
363
return Sem.sizeInBits;
364
}
365
366
static constexpr APFloatBase::ExponentType
367
exponentZero(const fltSemantics &semantics) {
368
return semantics.minExponent - 1;
369
}
370
371
static constexpr APFloatBase::ExponentType
372
exponentInf(const fltSemantics &semantics) {
373
return semantics.maxExponent + 1;
374
}
375
376
static constexpr APFloatBase::ExponentType
377
exponentNaN(const fltSemantics &semantics) {
378
if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
379
if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
380
return exponentZero(semantics);
381
return semantics.maxExponent;
382
}
383
return semantics.maxExponent + 1;
384
}
385
386
/* A bunch of private, handy routines. */
387
388
static inline Error createError(const Twine &Err) {
389
return make_error<StringError>(Err, inconvertibleErrorCode());
390
}
391
392
static constexpr inline unsigned int partCountForBits(unsigned int bits) {
393
return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
394
}
395
396
/* Returns 0U-9U. Return values >= 10U are not digits. */
397
static inline unsigned int
398
decDigitValue(unsigned int c)
399
{
400
return c - '0';
401
}
402
403
/* Return the value of a decimal exponent of the form
404
[+-]ddddddd.
405
406
If the exponent overflows, returns a large exponent with the
407
appropriate sign. */
408
static Expected<int> readExponent(StringRef::iterator begin,
409
StringRef::iterator end) {
410
bool isNegative;
411
unsigned int absExponent;
412
const unsigned int overlargeExponent = 24000; /* FIXME. */
413
StringRef::iterator p = begin;
414
415
// Treat no exponent as 0 to match binutils
416
if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
417
return 0;
418
}
419
420
isNegative = (*p == '-');
421
if (*p == '-' || *p == '+') {
422
p++;
423
if (p == end)
424
return createError("Exponent has no digits");
425
}
426
427
absExponent = decDigitValue(*p++);
428
if (absExponent >= 10U)
429
return createError("Invalid character in exponent");
430
431
for (; p != end; ++p) {
432
unsigned int value;
433
434
value = decDigitValue(*p);
435
if (value >= 10U)
436
return createError("Invalid character in exponent");
437
438
absExponent = absExponent * 10U + value;
439
if (absExponent >= overlargeExponent) {
440
absExponent = overlargeExponent;
441
break;
442
}
443
}
444
445
if (isNegative)
446
return -(int) absExponent;
447
else
448
return (int) absExponent;
449
}
450
451
/* This is ugly and needs cleaning up, but I don't immediately see
452
how whilst remaining safe. */
453
static Expected<int> totalExponent(StringRef::iterator p,
454
StringRef::iterator end,
455
int exponentAdjustment) {
456
int unsignedExponent;
457
bool negative, overflow;
458
int exponent = 0;
459
460
if (p == end)
461
return createError("Exponent has no digits");
462
463
negative = *p == '-';
464
if (*p == '-' || *p == '+') {
465
p++;
466
if (p == end)
467
return createError("Exponent has no digits");
468
}
469
470
unsignedExponent = 0;
471
overflow = false;
472
for (; p != end; ++p) {
473
unsigned int value;
474
475
value = decDigitValue(*p);
476
if (value >= 10U)
477
return createError("Invalid character in exponent");
478
479
unsignedExponent = unsignedExponent * 10 + value;
480
if (unsignedExponent > 32767) {
481
overflow = true;
482
break;
483
}
484
}
485
486
if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
487
overflow = true;
488
489
if (!overflow) {
490
exponent = unsignedExponent;
491
if (negative)
492
exponent = -exponent;
493
exponent += exponentAdjustment;
494
if (exponent > 32767 || exponent < -32768)
495
overflow = true;
496
}
497
498
if (overflow)
499
exponent = negative ? -32768: 32767;
500
501
return exponent;
502
}
503
504
static Expected<StringRef::iterator>
505
skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
506
StringRef::iterator *dot) {
507
StringRef::iterator p = begin;
508
*dot = end;
509
while (p != end && *p == '0')
510
p++;
511
512
if (p != end && *p == '.') {
513
*dot = p++;
514
515
if (end - begin == 1)
516
return createError("Significand has no digits");
517
518
while (p != end && *p == '0')
519
p++;
520
}
521
522
return p;
523
}
524
525
/* Given a normal decimal floating point number of the form
526
527
dddd.dddd[eE][+-]ddd
528
529
where the decimal point and exponent are optional, fill out the
530
structure D. Exponent is appropriate if the significand is
531
treated as an integer, and normalizedExponent if the significand
532
is taken to have the decimal point after a single leading
533
non-zero digit.
534
535
If the value is zero, V->firstSigDigit points to a non-digit, and
536
the return exponent is zero.
537
*/
538
struct decimalInfo {
539
const char *firstSigDigit;
540
const char *lastSigDigit;
541
int exponent;
542
int normalizedExponent;
543
};
544
545
static Error interpretDecimal(StringRef::iterator begin,
546
StringRef::iterator end, decimalInfo *D) {
547
StringRef::iterator dot = end;
548
549
auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
550
if (!PtrOrErr)
551
return PtrOrErr.takeError();
552
StringRef::iterator p = *PtrOrErr;
553
554
D->firstSigDigit = p;
555
D->exponent = 0;
556
D->normalizedExponent = 0;
557
558
for (; p != end; ++p) {
559
if (*p == '.') {
560
if (dot != end)
561
return createError("String contains multiple dots");
562
dot = p++;
563
if (p == end)
564
break;
565
}
566
if (decDigitValue(*p) >= 10U)
567
break;
568
}
569
570
if (p != end) {
571
if (*p != 'e' && *p != 'E')
572
return createError("Invalid character in significand");
573
if (p == begin)
574
return createError("Significand has no digits");
575
if (dot != end && p - begin == 1)
576
return createError("Significand has no digits");
577
578
/* p points to the first non-digit in the string */
579
auto ExpOrErr = readExponent(p + 1, end);
580
if (!ExpOrErr)
581
return ExpOrErr.takeError();
582
D->exponent = *ExpOrErr;
583
584
/* Implied decimal point? */
585
if (dot == end)
586
dot = p;
587
}
588
589
/* If number is all zeroes accept any exponent. */
590
if (p != D->firstSigDigit) {
591
/* Drop insignificant trailing zeroes. */
592
if (p != begin) {
593
do
594
do
595
p--;
596
while (p != begin && *p == '0');
597
while (p != begin && *p == '.');
598
}
599
600
/* Adjust the exponents for any decimal point. */
601
D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
602
D->normalizedExponent = (D->exponent +
603
static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
604
- (dot > D->firstSigDigit && dot < p)));
605
}
606
607
D->lastSigDigit = p;
608
return Error::success();
609
}
610
611
/* Return the trailing fraction of a hexadecimal number.
612
DIGITVALUE is the first hex digit of the fraction, P points to
613
the next digit. */
614
static Expected<lostFraction>
615
trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
616
unsigned int digitValue) {
617
unsigned int hexDigit;
618
619
/* If the first trailing digit isn't 0 or 8 we can work out the
620
fraction immediately. */
621
if (digitValue > 8)
622
return lfMoreThanHalf;
623
else if (digitValue < 8 && digitValue > 0)
624
return lfLessThanHalf;
625
626
// Otherwise we need to find the first non-zero digit.
627
while (p != end && (*p == '0' || *p == '.'))
628
p++;
629
630
if (p == end)
631
return createError("Invalid trailing hexadecimal fraction!");
632
633
hexDigit = hexDigitValue(*p);
634
635
/* If we ran off the end it is exactly zero or one-half, otherwise
636
a little more. */
637
if (hexDigit == UINT_MAX)
638
return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
639
else
640
return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
641
}
642
643
/* Return the fraction lost were a bignum truncated losing the least
644
significant BITS bits. */
645
static lostFraction
646
lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
647
unsigned int partCount,
648
unsigned int bits)
649
{
650
unsigned int lsb;
651
652
lsb = APInt::tcLSB(parts, partCount);
653
654
/* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
655
if (bits <= lsb)
656
return lfExactlyZero;
657
if (bits == lsb + 1)
658
return lfExactlyHalf;
659
if (bits <= partCount * APFloatBase::integerPartWidth &&
660
APInt::tcExtractBit(parts, bits - 1))
661
return lfMoreThanHalf;
662
663
return lfLessThanHalf;
664
}
665
666
/* Shift DST right BITS bits noting lost fraction. */
667
static lostFraction
668
shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
669
{
670
lostFraction lost_fraction;
671
672
lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
673
674
APInt::tcShiftRight(dst, parts, bits);
675
676
return lost_fraction;
677
}
678
679
/* Combine the effect of two lost fractions. */
680
static lostFraction
681
combineLostFractions(lostFraction moreSignificant,
682
lostFraction lessSignificant)
683
{
684
if (lessSignificant != lfExactlyZero) {
685
if (moreSignificant == lfExactlyZero)
686
moreSignificant = lfLessThanHalf;
687
else if (moreSignificant == lfExactlyHalf)
688
moreSignificant = lfMoreThanHalf;
689
}
690
691
return moreSignificant;
692
}
693
694
/* The error from the true value, in half-ulps, on multiplying two
695
floating point numbers, which differ from the value they
696
approximate by at most HUE1 and HUE2 half-ulps, is strictly less
697
than the returned value.
698
699
See "How to Read Floating Point Numbers Accurately" by William D
700
Clinger. */
701
static unsigned int
702
HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
703
{
704
assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
705
706
if (HUerr1 + HUerr2 == 0)
707
return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
708
else
709
return inexactMultiply + 2 * (HUerr1 + HUerr2);
710
}
711
712
/* The number of ulps from the boundary (zero, or half if ISNEAREST)
713
when the least significant BITS are truncated. BITS cannot be
714
zero. */
715
static APFloatBase::integerPart
716
ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
717
bool isNearest) {
718
unsigned int count, partBits;
719
APFloatBase::integerPart part, boundary;
720
721
assert(bits != 0);
722
723
bits--;
724
count = bits / APFloatBase::integerPartWidth;
725
partBits = bits % APFloatBase::integerPartWidth + 1;
726
727
part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
728
729
if (isNearest)
730
boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
731
else
732
boundary = 0;
733
734
if (count == 0) {
735
if (part - boundary <= boundary - part)
736
return part - boundary;
737
else
738
return boundary - part;
739
}
740
741
if (part == boundary) {
742
while (--count)
743
if (parts[count])
744
return ~(APFloatBase::integerPart) 0; /* A lot. */
745
746
return parts[0];
747
} else if (part == boundary - 1) {
748
while (--count)
749
if (~parts[count])
750
return ~(APFloatBase::integerPart) 0; /* A lot. */
751
752
return -parts[0];
753
}
754
755
return ~(APFloatBase::integerPart) 0; /* A lot. */
756
}
757
758
/* Place pow(5, power) in DST, and return the number of parts used.
759
DST must be at least one part larger than size of the answer. */
760
static unsigned int
761
powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
762
static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
763
APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
764
pow5s[0] = 78125 * 5;
765
766
unsigned int partsCount = 1;
767
APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
768
unsigned int result;
769
assert(power <= maxExponent);
770
771
p1 = dst;
772
p2 = scratch;
773
774
*p1 = firstEightPowers[power & 7];
775
power >>= 3;
776
777
result = 1;
778
pow5 = pow5s;
779
780
for (unsigned int n = 0; power; power >>= 1, n++) {
781
/* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
782
if (n != 0) {
783
APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
784
partsCount, partsCount);
785
partsCount *= 2;
786
if (pow5[partsCount - 1] == 0)
787
partsCount--;
788
}
789
790
if (power & 1) {
791
APFloatBase::integerPart *tmp;
792
793
APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
794
result += partsCount;
795
if (p2[result - 1] == 0)
796
result--;
797
798
/* Now result is in p1 with partsCount parts and p2 is scratch
799
space. */
800
tmp = p1;
801
p1 = p2;
802
p2 = tmp;
803
}
804
805
pow5 += partsCount;
806
}
807
808
if (p1 != dst)
809
APInt::tcAssign(dst, p1, result);
810
811
return result;
812
}
813
814
/* Zero at the end to avoid modular arithmetic when adding one; used
815
when rounding up during hexadecimal output. */
816
static const char hexDigitsLower[] = "0123456789abcdef0";
817
static const char hexDigitsUpper[] = "0123456789ABCDEF0";
818
static const char infinityL[] = "infinity";
819
static const char infinityU[] = "INFINITY";
820
static const char NaNL[] = "nan";
821
static const char NaNU[] = "NAN";
822
823
/* Write out an integerPart in hexadecimal, starting with the most
824
significant nibble. Write out exactly COUNT hexdigits, return
825
COUNT. */
826
static unsigned int
827
partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
828
const char *hexDigitChars)
829
{
830
unsigned int result = count;
831
832
assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
833
834
part >>= (APFloatBase::integerPartWidth - 4 * count);
835
while (count--) {
836
dst[count] = hexDigitChars[part & 0xf];
837
part >>= 4;
838
}
839
840
return result;
841
}
842
843
/* Write out an unsigned decimal integer. */
844
static char *
845
writeUnsignedDecimal (char *dst, unsigned int n)
846
{
847
char buff[40], *p;
848
849
p = buff;
850
do
851
*p++ = '0' + n % 10;
852
while (n /= 10);
853
854
do
855
*dst++ = *--p;
856
while (p != buff);
857
858
return dst;
859
}
860
861
/* Write out a signed decimal integer. */
862
static char *
863
writeSignedDecimal (char *dst, int value)
864
{
865
if (value < 0) {
866
*dst++ = '-';
867
dst = writeUnsignedDecimal(dst, -(unsigned) value);
868
} else
869
dst = writeUnsignedDecimal(dst, value);
870
871
return dst;
872
}
873
874
namespace detail {
875
/* Constructors. */
876
void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
877
unsigned int count;
878
879
semantics = ourSemantics;
880
count = partCount();
881
if (count > 1)
882
significand.parts = new integerPart[count];
883
}
884
885
void IEEEFloat::freeSignificand() {
886
if (needsCleanup())
887
delete [] significand.parts;
888
}
889
890
void IEEEFloat::assign(const IEEEFloat &rhs) {
891
assert(semantics == rhs.semantics);
892
893
sign = rhs.sign;
894
category = rhs.category;
895
exponent = rhs.exponent;
896
if (isFiniteNonZero() || category == fcNaN)
897
copySignificand(rhs);
898
}
899
900
void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
901
assert(isFiniteNonZero() || category == fcNaN);
902
assert(rhs.partCount() >= partCount());
903
904
APInt::tcAssign(significandParts(), rhs.significandParts(),
905
partCount());
906
}
907
908
/* Make this number a NaN, with an arbitrary but deterministic value
909
for the significand. If double or longer, this is a signalling NaN,
910
which may not be ideal. If float, this is QNaN(0). */
911
void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
912
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
913
llvm_unreachable("This floating point format does not support NaN");
914
915
category = fcNaN;
916
sign = Negative;
917
exponent = exponentNaN();
918
919
integerPart *significand = significandParts();
920
unsigned numParts = partCount();
921
922
APInt fill_storage;
923
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
924
// Finite-only types do not distinguish signalling and quiet NaN, so
925
// make them all signalling.
926
SNaN = false;
927
if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
928
sign = true;
929
fill_storage = APInt::getZero(semantics->precision - 1);
930
} else {
931
fill_storage = APInt::getAllOnes(semantics->precision - 1);
932
}
933
fill = &fill_storage;
934
}
935
936
// Set the significand bits to the fill.
937
if (!fill || fill->getNumWords() < numParts)
938
APInt::tcSet(significand, 0, numParts);
939
if (fill) {
940
APInt::tcAssign(significand, fill->getRawData(),
941
std::min(fill->getNumWords(), numParts));
942
943
// Zero out the excess bits of the significand.
944
unsigned bitsToPreserve = semantics->precision - 1;
945
unsigned part = bitsToPreserve / 64;
946
bitsToPreserve %= 64;
947
significand[part] &= ((1ULL << bitsToPreserve) - 1);
948
for (part++; part != numParts; ++part)
949
significand[part] = 0;
950
}
951
952
unsigned QNaNBit = semantics->precision - 2;
953
954
if (SNaN) {
955
// We always have to clear the QNaN bit to make it an SNaN.
956
APInt::tcClearBit(significand, QNaNBit);
957
958
// If there are no bits set in the payload, we have to set
959
// *something* to make it a NaN instead of an infinity;
960
// conventionally, this is the next bit down from the QNaN bit.
961
if (APInt::tcIsZero(significand, numParts))
962
APInt::tcSetBit(significand, QNaNBit - 1);
963
} else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
964
// The only NaN is a quiet NaN, and it has no bits sets in the significand.
965
// Do nothing.
966
} else {
967
// We always have to set the QNaN bit to make it a QNaN.
968
APInt::tcSetBit(significand, QNaNBit);
969
}
970
971
// For x87 extended precision, we want to make a NaN, not a
972
// pseudo-NaN. Maybe we should expose the ability to make
973
// pseudo-NaNs?
974
if (semantics == &semX87DoubleExtended)
975
APInt::tcSetBit(significand, QNaNBit + 1);
976
}
977
978
IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
979
if (this != &rhs) {
980
if (semantics != rhs.semantics) {
981
freeSignificand();
982
initialize(rhs.semantics);
983
}
984
assign(rhs);
985
}
986
987
return *this;
988
}
989
990
IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
991
freeSignificand();
992
993
semantics = rhs.semantics;
994
significand = rhs.significand;
995
exponent = rhs.exponent;
996
category = rhs.category;
997
sign = rhs.sign;
998
999
rhs.semantics = &semBogus;
1000
return *this;
1001
}
1002
1003
bool IEEEFloat::isDenormal() const {
1004
return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1005
(APInt::tcExtractBit(significandParts(),
1006
semantics->precision - 1) == 0);
1007
}
1008
1009
bool IEEEFloat::isSmallest() const {
1010
// The smallest number by magnitude in our format will be the smallest
1011
// denormal, i.e. the floating point number with exponent being minimum
1012
// exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1013
return isFiniteNonZero() && exponent == semantics->minExponent &&
1014
significandMSB() == 0;
1015
}
1016
1017
bool IEEEFloat::isSmallestNormalized() const {
1018
return getCategory() == fcNormal && exponent == semantics->minExponent &&
1019
isSignificandAllZerosExceptMSB();
1020
}
1021
1022
bool IEEEFloat::isSignificandAllOnes() const {
1023
// Test if the significand excluding the integral bit is all ones. This allows
1024
// us to test for binade boundaries.
1025
const integerPart *Parts = significandParts();
1026
const unsigned PartCount = partCountForBits(semantics->precision);
1027
for (unsigned i = 0; i < PartCount - 1; i++)
1028
if (~Parts[i])
1029
return false;
1030
1031
// Set the unused high bits to all ones when we compare.
1032
const unsigned NumHighBits =
1033
PartCount*integerPartWidth - semantics->precision + 1;
1034
assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1035
"Can not have more high bits to fill than integerPartWidth");
1036
const integerPart HighBitFill =
1037
~integerPart(0) << (integerPartWidth - NumHighBits);
1038
if (~(Parts[PartCount - 1] | HighBitFill))
1039
return false;
1040
1041
return true;
1042
}
1043
1044
bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1045
// Test if the significand excluding the integral bit is all ones except for
1046
// the least significant bit.
1047
const integerPart *Parts = significandParts();
1048
1049
if (Parts[0] & 1)
1050
return false;
1051
1052
const unsigned PartCount = partCountForBits(semantics->precision);
1053
for (unsigned i = 0; i < PartCount - 1; i++) {
1054
if (~Parts[i] & ~unsigned{!i})
1055
return false;
1056
}
1057
1058
// Set the unused high bits to all ones when we compare.
1059
const unsigned NumHighBits =
1060
PartCount * integerPartWidth - semantics->precision + 1;
1061
assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1062
"Can not have more high bits to fill than integerPartWidth");
1063
const integerPart HighBitFill = ~integerPart(0)
1064
<< (integerPartWidth - NumHighBits);
1065
if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1066
return false;
1067
1068
return true;
1069
}
1070
1071
bool IEEEFloat::isSignificandAllZeros() const {
1072
// Test if the significand excluding the integral bit is all zeros. This
1073
// allows us to test for binade boundaries.
1074
const integerPart *Parts = significandParts();
1075
const unsigned PartCount = partCountForBits(semantics->precision);
1076
1077
for (unsigned i = 0; i < PartCount - 1; i++)
1078
if (Parts[i])
1079
return false;
1080
1081
// Compute how many bits are used in the final word.
1082
const unsigned NumHighBits =
1083
PartCount*integerPartWidth - semantics->precision + 1;
1084
assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1085
"clear than integerPartWidth");
1086
const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1087
1088
if (Parts[PartCount - 1] & HighBitMask)
1089
return false;
1090
1091
return true;
1092
}
1093
1094
bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1095
const integerPart *Parts = significandParts();
1096
const unsigned PartCount = partCountForBits(semantics->precision);
1097
1098
for (unsigned i = 0; i < PartCount - 1; i++) {
1099
if (Parts[i])
1100
return false;
1101
}
1102
1103
const unsigned NumHighBits =
1104
PartCount * integerPartWidth - semantics->precision + 1;
1105
return Parts[PartCount - 1] == integerPart(1)
1106
<< (integerPartWidth - NumHighBits);
1107
}
1108
1109
bool IEEEFloat::isLargest() const {
1110
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1111
semantics->nanEncoding == fltNanEncoding::AllOnes) {
1112
// The largest number by magnitude in our format will be the floating point
1113
// number with maximum exponent and with significand that is all ones except
1114
// the LSB.
1115
return isFiniteNonZero() && exponent == semantics->maxExponent &&
1116
isSignificandAllOnesExceptLSB();
1117
} else {
1118
// The largest number by magnitude in our format will be the floating point
1119
// number with maximum exponent and with significand that is all ones.
1120
return isFiniteNonZero() && exponent == semantics->maxExponent &&
1121
isSignificandAllOnes();
1122
}
1123
}
1124
1125
bool IEEEFloat::isInteger() const {
1126
// This could be made more efficient; I'm going for obviously correct.
1127
if (!isFinite()) return false;
1128
IEEEFloat truncated = *this;
1129
truncated.roundToIntegral(rmTowardZero);
1130
return compare(truncated) == cmpEqual;
1131
}
1132
1133
bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1134
if (this == &rhs)
1135
return true;
1136
if (semantics != rhs.semantics ||
1137
category != rhs.category ||
1138
sign != rhs.sign)
1139
return false;
1140
if (category==fcZero || category==fcInfinity)
1141
return true;
1142
1143
if (isFiniteNonZero() && exponent != rhs.exponent)
1144
return false;
1145
1146
return std::equal(significandParts(), significandParts() + partCount(),
1147
rhs.significandParts());
1148
}
1149
1150
IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1151
initialize(&ourSemantics);
1152
sign = 0;
1153
category = fcNormal;
1154
zeroSignificand();
1155
exponent = ourSemantics.precision - 1;
1156
significandParts()[0] = value;
1157
normalize(rmNearestTiesToEven, lfExactlyZero);
1158
}
1159
1160
IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1161
initialize(&ourSemantics);
1162
makeZero(false);
1163
}
1164
1165
// Delegate to the previous constructor, because later copy constructor may
1166
// actually inspects category, which can't be garbage.
1167
IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1168
: IEEEFloat(ourSemantics) {}
1169
1170
IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1171
initialize(rhs.semantics);
1172
assign(rhs);
1173
}
1174
1175
IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1176
*this = std::move(rhs);
1177
}
1178
1179
IEEEFloat::~IEEEFloat() { freeSignificand(); }
1180
1181
unsigned int IEEEFloat::partCount() const {
1182
return partCountForBits(semantics->precision + 1);
1183
}
1184
1185
const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1186
return const_cast<IEEEFloat *>(this)->significandParts();
1187
}
1188
1189
IEEEFloat::integerPart *IEEEFloat::significandParts() {
1190
if (partCount() > 1)
1191
return significand.parts;
1192
else
1193
return &significand.part;
1194
}
1195
1196
void IEEEFloat::zeroSignificand() {
1197
APInt::tcSet(significandParts(), 0, partCount());
1198
}
1199
1200
/* Increment an fcNormal floating point number's significand. */
1201
void IEEEFloat::incrementSignificand() {
1202
integerPart carry;
1203
1204
carry = APInt::tcIncrement(significandParts(), partCount());
1205
1206
/* Our callers should never cause us to overflow. */
1207
assert(carry == 0);
1208
(void)carry;
1209
}
1210
1211
/* Add the significand of the RHS. Returns the carry flag. */
1212
IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1213
integerPart *parts;
1214
1215
parts = significandParts();
1216
1217
assert(semantics == rhs.semantics);
1218
assert(exponent == rhs.exponent);
1219
1220
return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1221
}
1222
1223
/* Subtract the significand of the RHS with a borrow flag. Returns
1224
the borrow flag. */
1225
IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1226
integerPart borrow) {
1227
integerPart *parts;
1228
1229
parts = significandParts();
1230
1231
assert(semantics == rhs.semantics);
1232
assert(exponent == rhs.exponent);
1233
1234
return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1235
partCount());
1236
}
1237
1238
/* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1239
on to the full-precision result of the multiplication. Returns the
1240
lost fraction. */
1241
lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1242
IEEEFloat addend) {
1243
unsigned int omsb; // One, not zero, based MSB.
1244
unsigned int partsCount, newPartsCount, precision;
1245
integerPart *lhsSignificand;
1246
integerPart scratch[4];
1247
integerPart *fullSignificand;
1248
lostFraction lost_fraction;
1249
bool ignored;
1250
1251
assert(semantics == rhs.semantics);
1252
1253
precision = semantics->precision;
1254
1255
// Allocate space for twice as many bits as the original significand, plus one
1256
// extra bit for the addition to overflow into.
1257
newPartsCount = partCountForBits(precision * 2 + 1);
1258
1259
if (newPartsCount > 4)
1260
fullSignificand = new integerPart[newPartsCount];
1261
else
1262
fullSignificand = scratch;
1263
1264
lhsSignificand = significandParts();
1265
partsCount = partCount();
1266
1267
APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1268
rhs.significandParts(), partsCount, partsCount);
1269
1270
lost_fraction = lfExactlyZero;
1271
omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1272
exponent += rhs.exponent;
1273
1274
// Assume the operands involved in the multiplication are single-precision
1275
// FP, and the two multiplicants are:
1276
// *this = a23 . a22 ... a0 * 2^e1
1277
// rhs = b23 . b22 ... b0 * 2^e2
1278
// the result of multiplication is:
1279
// *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1280
// Note that there are three significant bits at the left-hand side of the
1281
// radix point: two for the multiplication, and an overflow bit for the
1282
// addition (that will always be zero at this point). Move the radix point
1283
// toward left by two bits, and adjust exponent accordingly.
1284
exponent += 2;
1285
1286
if (addend.isNonZero()) {
1287
// The intermediate result of the multiplication has "2 * precision"
1288
// signicant bit; adjust the addend to be consistent with mul result.
1289
//
1290
Significand savedSignificand = significand;
1291
const fltSemantics *savedSemantics = semantics;
1292
fltSemantics extendedSemantics;
1293
opStatus status;
1294
unsigned int extendedPrecision;
1295
1296
// Normalize our MSB to one below the top bit to allow for overflow.
1297
extendedPrecision = 2 * precision + 1;
1298
if (omsb != extendedPrecision - 1) {
1299
assert(extendedPrecision > omsb);
1300
APInt::tcShiftLeft(fullSignificand, newPartsCount,
1301
(extendedPrecision - 1) - omsb);
1302
exponent -= (extendedPrecision - 1) - omsb;
1303
}
1304
1305
/* Create new semantics. */
1306
extendedSemantics = *semantics;
1307
extendedSemantics.precision = extendedPrecision;
1308
1309
if (newPartsCount == 1)
1310
significand.part = fullSignificand[0];
1311
else
1312
significand.parts = fullSignificand;
1313
semantics = &extendedSemantics;
1314
1315
// Make a copy so we can convert it to the extended semantics.
1316
// Note that we cannot convert the addend directly, as the extendedSemantics
1317
// is a local variable (which we take a reference to).
1318
IEEEFloat extendedAddend(addend);
1319
status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1320
assert(status == opOK);
1321
(void)status;
1322
1323
// Shift the significand of the addend right by one bit. This guarantees
1324
// that the high bit of the significand is zero (same as fullSignificand),
1325
// so the addition will overflow (if it does overflow at all) into the top bit.
1326
lost_fraction = extendedAddend.shiftSignificandRight(1);
1327
assert(lost_fraction == lfExactlyZero &&
1328
"Lost precision while shifting addend for fused-multiply-add.");
1329
1330
lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1331
1332
/* Restore our state. */
1333
if (newPartsCount == 1)
1334
fullSignificand[0] = significand.part;
1335
significand = savedSignificand;
1336
semantics = savedSemantics;
1337
1338
omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1339
}
1340
1341
// Convert the result having "2 * precision" significant-bits back to the one
1342
// having "precision" significant-bits. First, move the radix point from
1343
// poision "2*precision - 1" to "precision - 1". The exponent need to be
1344
// adjusted by "2*precision - 1" - "precision - 1" = "precision".
1345
exponent -= precision + 1;
1346
1347
// In case MSB resides at the left-hand side of radix point, shift the
1348
// mantissa right by some amount to make sure the MSB reside right before
1349
// the radix point (i.e. "MSB . rest-significant-bits").
1350
//
1351
// Note that the result is not normalized when "omsb < precision". So, the
1352
// caller needs to call IEEEFloat::normalize() if normalized value is
1353
// expected.
1354
if (omsb > precision) {
1355
unsigned int bits, significantParts;
1356
lostFraction lf;
1357
1358
bits = omsb - precision;
1359
significantParts = partCountForBits(omsb);
1360
lf = shiftRight(fullSignificand, significantParts, bits);
1361
lost_fraction = combineLostFractions(lf, lost_fraction);
1362
exponent += bits;
1363
}
1364
1365
APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1366
1367
if (newPartsCount > 4)
1368
delete [] fullSignificand;
1369
1370
return lost_fraction;
1371
}
1372
1373
lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1374
return multiplySignificand(rhs, IEEEFloat(*semantics));
1375
}
1376
1377
/* Multiply the significands of LHS and RHS to DST. */
1378
lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1379
unsigned int bit, i, partsCount;
1380
const integerPart *rhsSignificand;
1381
integerPart *lhsSignificand, *dividend, *divisor;
1382
integerPart scratch[4];
1383
lostFraction lost_fraction;
1384
1385
assert(semantics == rhs.semantics);
1386
1387
lhsSignificand = significandParts();
1388
rhsSignificand = rhs.significandParts();
1389
partsCount = partCount();
1390
1391
if (partsCount > 2)
1392
dividend = new integerPart[partsCount * 2];
1393
else
1394
dividend = scratch;
1395
1396
divisor = dividend + partsCount;
1397
1398
/* Copy the dividend and divisor as they will be modified in-place. */
1399
for (i = 0; i < partsCount; i++) {
1400
dividend[i] = lhsSignificand[i];
1401
divisor[i] = rhsSignificand[i];
1402
lhsSignificand[i] = 0;
1403
}
1404
1405
exponent -= rhs.exponent;
1406
1407
unsigned int precision = semantics->precision;
1408
1409
/* Normalize the divisor. */
1410
bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1411
if (bit) {
1412
exponent += bit;
1413
APInt::tcShiftLeft(divisor, partsCount, bit);
1414
}
1415
1416
/* Normalize the dividend. */
1417
bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1418
if (bit) {
1419
exponent -= bit;
1420
APInt::tcShiftLeft(dividend, partsCount, bit);
1421
}
1422
1423
/* Ensure the dividend >= divisor initially for the loop below.
1424
Incidentally, this means that the division loop below is
1425
guaranteed to set the integer bit to one. */
1426
if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1427
exponent--;
1428
APInt::tcShiftLeft(dividend, partsCount, 1);
1429
assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1430
}
1431
1432
/* Long division. */
1433
for (bit = precision; bit; bit -= 1) {
1434
if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1435
APInt::tcSubtract(dividend, divisor, 0, partsCount);
1436
APInt::tcSetBit(lhsSignificand, bit - 1);
1437
}
1438
1439
APInt::tcShiftLeft(dividend, partsCount, 1);
1440
}
1441
1442
/* Figure out the lost fraction. */
1443
int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1444
1445
if (cmp > 0)
1446
lost_fraction = lfMoreThanHalf;
1447
else if (cmp == 0)
1448
lost_fraction = lfExactlyHalf;
1449
else if (APInt::tcIsZero(dividend, partsCount))
1450
lost_fraction = lfExactlyZero;
1451
else
1452
lost_fraction = lfLessThanHalf;
1453
1454
if (partsCount > 2)
1455
delete [] dividend;
1456
1457
return lost_fraction;
1458
}
1459
1460
unsigned int IEEEFloat::significandMSB() const {
1461
return APInt::tcMSB(significandParts(), partCount());
1462
}
1463
1464
unsigned int IEEEFloat::significandLSB() const {
1465
return APInt::tcLSB(significandParts(), partCount());
1466
}
1467
1468
/* Note that a zero result is NOT normalized to fcZero. */
1469
lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1470
/* Our exponent should not overflow. */
1471
assert((ExponentType) (exponent + bits) >= exponent);
1472
1473
exponent += bits;
1474
1475
return shiftRight(significandParts(), partCount(), bits);
1476
}
1477
1478
/* Shift the significand left BITS bits, subtract BITS from its exponent. */
1479
void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1480
assert(bits < semantics->precision);
1481
1482
if (bits) {
1483
unsigned int partsCount = partCount();
1484
1485
APInt::tcShiftLeft(significandParts(), partsCount, bits);
1486
exponent -= bits;
1487
1488
assert(!APInt::tcIsZero(significandParts(), partsCount));
1489
}
1490
}
1491
1492
IEEEFloat::cmpResult
1493
IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1494
int compare;
1495
1496
assert(semantics == rhs.semantics);
1497
assert(isFiniteNonZero());
1498
assert(rhs.isFiniteNonZero());
1499
1500
compare = exponent - rhs.exponent;
1501
1502
/* If exponents are equal, do an unsigned bignum comparison of the
1503
significands. */
1504
if (compare == 0)
1505
compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1506
partCount());
1507
1508
if (compare > 0)
1509
return cmpGreaterThan;
1510
else if (compare < 0)
1511
return cmpLessThan;
1512
else
1513
return cmpEqual;
1514
}
1515
1516
/* Set the least significant BITS bits of a bignum, clear the
1517
rest. */
1518
static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1519
unsigned bits) {
1520
unsigned i = 0;
1521
while (bits > APInt::APINT_BITS_PER_WORD) {
1522
dst[i++] = ~(APInt::WordType)0;
1523
bits -= APInt::APINT_BITS_PER_WORD;
1524
}
1525
1526
if (bits)
1527
dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1528
1529
while (i < parts)
1530
dst[i++] = 0;
1531
}
1532
1533
/* Handle overflow. Sign is preserved. We either become infinity or
1534
the largest finite number. */
1535
IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1536
if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1537
/* Infinity? */
1538
if (rounding_mode == rmNearestTiesToEven ||
1539
rounding_mode == rmNearestTiesToAway ||
1540
(rounding_mode == rmTowardPositive && !sign) ||
1541
(rounding_mode == rmTowardNegative && sign)) {
1542
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1543
makeNaN(false, sign);
1544
else
1545
category = fcInfinity;
1546
return static_cast<opStatus>(opOverflow | opInexact);
1547
}
1548
}
1549
1550
/* Otherwise we become the largest finite number. */
1551
category = fcNormal;
1552
exponent = semantics->maxExponent;
1553
tcSetLeastSignificantBits(significandParts(), partCount(),
1554
semantics->precision);
1555
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1556
semantics->nanEncoding == fltNanEncoding::AllOnes)
1557
APInt::tcClearBit(significandParts(), 0);
1558
1559
return opInexact;
1560
}
1561
1562
/* Returns TRUE if, when truncating the current number, with BIT the
1563
new LSB, with the given lost fraction and rounding mode, the result
1564
would need to be rounded away from zero (i.e., by increasing the
1565
signficand). This routine must work for fcZero of both signs, and
1566
fcNormal numbers. */
1567
bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1568
lostFraction lost_fraction,
1569
unsigned int bit) const {
1570
/* NaNs and infinities should not have lost fractions. */
1571
assert(isFiniteNonZero() || category == fcZero);
1572
1573
/* Current callers never pass this so we don't handle it. */
1574
assert(lost_fraction != lfExactlyZero);
1575
1576
switch (rounding_mode) {
1577
case rmNearestTiesToAway:
1578
return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1579
1580
case rmNearestTiesToEven:
1581
if (lost_fraction == lfMoreThanHalf)
1582
return true;
1583
1584
/* Our zeroes don't have a significand to test. */
1585
if (lost_fraction == lfExactlyHalf && category != fcZero)
1586
return APInt::tcExtractBit(significandParts(), bit);
1587
1588
return false;
1589
1590
case rmTowardZero:
1591
return false;
1592
1593
case rmTowardPositive:
1594
return !sign;
1595
1596
case rmTowardNegative:
1597
return sign;
1598
1599
default:
1600
break;
1601
}
1602
llvm_unreachable("Invalid rounding mode found");
1603
}
1604
1605
IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1606
lostFraction lost_fraction) {
1607
unsigned int omsb; /* One, not zero, based MSB. */
1608
int exponentChange;
1609
1610
if (!isFiniteNonZero())
1611
return opOK;
1612
1613
/* Before rounding normalize the exponent of fcNormal numbers. */
1614
omsb = significandMSB() + 1;
1615
1616
if (omsb) {
1617
/* OMSB is numbered from 1. We want to place it in the integer
1618
bit numbered PRECISION if possible, with a compensating change in
1619
the exponent. */
1620
exponentChange = omsb - semantics->precision;
1621
1622
/* If the resulting exponent is too high, overflow according to
1623
the rounding mode. */
1624
if (exponent + exponentChange > semantics->maxExponent)
1625
return handleOverflow(rounding_mode);
1626
1627
/* Subnormal numbers have exponent minExponent, and their MSB
1628
is forced based on that. */
1629
if (exponent + exponentChange < semantics->minExponent)
1630
exponentChange = semantics->minExponent - exponent;
1631
1632
/* Shifting left is easy as we don't lose precision. */
1633
if (exponentChange < 0) {
1634
assert(lost_fraction == lfExactlyZero);
1635
1636
shiftSignificandLeft(-exponentChange);
1637
1638
return opOK;
1639
}
1640
1641
if (exponentChange > 0) {
1642
lostFraction lf;
1643
1644
/* Shift right and capture any new lost fraction. */
1645
lf = shiftSignificandRight(exponentChange);
1646
1647
lost_fraction = combineLostFractions(lf, lost_fraction);
1648
1649
/* Keep OMSB up-to-date. */
1650
if (omsb > (unsigned) exponentChange)
1651
omsb -= exponentChange;
1652
else
1653
omsb = 0;
1654
}
1655
}
1656
1657
// The all-ones values is an overflow if NaN is all ones. If NaN is
1658
// represented by negative zero, then it is a valid finite value.
1659
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1660
semantics->nanEncoding == fltNanEncoding::AllOnes &&
1661
exponent == semantics->maxExponent && isSignificandAllOnes())
1662
return handleOverflow(rounding_mode);
1663
1664
/* Now round the number according to rounding_mode given the lost
1665
fraction. */
1666
1667
/* As specified in IEEE 754, since we do not trap we do not report
1668
underflow for exact results. */
1669
if (lost_fraction == lfExactlyZero) {
1670
/* Canonicalize zeroes. */
1671
if (omsb == 0) {
1672
category = fcZero;
1673
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1674
sign = false;
1675
}
1676
1677
return opOK;
1678
}
1679
1680
/* Increment the significand if we're rounding away from zero. */
1681
if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1682
if (omsb == 0)
1683
exponent = semantics->minExponent;
1684
1685
incrementSignificand();
1686
omsb = significandMSB() + 1;
1687
1688
/* Did the significand increment overflow? */
1689
if (omsb == (unsigned) semantics->precision + 1) {
1690
/* Renormalize by incrementing the exponent and shifting our
1691
significand right one. However if we already have the
1692
maximum exponent we overflow to infinity. */
1693
if (exponent == semantics->maxExponent)
1694
// Invoke overflow handling with a rounding mode that will guarantee
1695
// that the result gets turned into the correct infinity representation.
1696
// This is needed instead of just setting the category to infinity to
1697
// account for 8-bit floating point types that have no inf, only NaN.
1698
return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1699
1700
shiftSignificandRight(1);
1701
1702
return opInexact;
1703
}
1704
1705
// The all-ones values is an overflow if NaN is all ones. If NaN is
1706
// represented by negative zero, then it is a valid finite value.
1707
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1708
semantics->nanEncoding == fltNanEncoding::AllOnes &&
1709
exponent == semantics->maxExponent && isSignificandAllOnes())
1710
return handleOverflow(rounding_mode);
1711
}
1712
1713
/* The normal case - we were and are not denormal, and any
1714
significand increment above didn't overflow. */
1715
if (omsb == semantics->precision)
1716
return opInexact;
1717
1718
/* We have a non-zero denormal. */
1719
assert(omsb < semantics->precision);
1720
1721
/* Canonicalize zeroes. */
1722
if (omsb == 0) {
1723
category = fcZero;
1724
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1725
sign = false;
1726
}
1727
1728
/* The fcZero case is a denormal that underflowed to zero. */
1729
return (opStatus) (opUnderflow | opInexact);
1730
}
1731
1732
IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1733
bool subtract) {
1734
switch (PackCategoriesIntoKey(category, rhs.category)) {
1735
default:
1736
llvm_unreachable(nullptr);
1737
1738
case PackCategoriesIntoKey(fcZero, fcNaN):
1739
case PackCategoriesIntoKey(fcNormal, fcNaN):
1740
case PackCategoriesIntoKey(fcInfinity, fcNaN):
1741
assign(rhs);
1742
[[fallthrough]];
1743
case PackCategoriesIntoKey(fcNaN, fcZero):
1744
case PackCategoriesIntoKey(fcNaN, fcNormal):
1745
case PackCategoriesIntoKey(fcNaN, fcInfinity):
1746
case PackCategoriesIntoKey(fcNaN, fcNaN):
1747
if (isSignaling()) {
1748
makeQuiet();
1749
return opInvalidOp;
1750
}
1751
return rhs.isSignaling() ? opInvalidOp : opOK;
1752
1753
case PackCategoriesIntoKey(fcNormal, fcZero):
1754
case PackCategoriesIntoKey(fcInfinity, fcNormal):
1755
case PackCategoriesIntoKey(fcInfinity, fcZero):
1756
return opOK;
1757
1758
case PackCategoriesIntoKey(fcNormal, fcInfinity):
1759
case PackCategoriesIntoKey(fcZero, fcInfinity):
1760
category = fcInfinity;
1761
sign = rhs.sign ^ subtract;
1762
return opOK;
1763
1764
case PackCategoriesIntoKey(fcZero, fcNormal):
1765
assign(rhs);
1766
sign = rhs.sign ^ subtract;
1767
return opOK;
1768
1769
case PackCategoriesIntoKey(fcZero, fcZero):
1770
/* Sign depends on rounding mode; handled by caller. */
1771
return opOK;
1772
1773
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1774
/* Differently signed infinities can only be validly
1775
subtracted. */
1776
if (((sign ^ rhs.sign)!=0) != subtract) {
1777
makeNaN();
1778
return opInvalidOp;
1779
}
1780
1781
return opOK;
1782
1783
case PackCategoriesIntoKey(fcNormal, fcNormal):
1784
return opDivByZero;
1785
}
1786
}
1787
1788
/* Add or subtract two normal numbers. */
1789
lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1790
bool subtract) {
1791
integerPart carry;
1792
lostFraction lost_fraction;
1793
int bits;
1794
1795
/* Determine if the operation on the absolute values is effectively
1796
an addition or subtraction. */
1797
subtract ^= static_cast<bool>(sign ^ rhs.sign);
1798
1799
/* Are we bigger exponent-wise than the RHS? */
1800
bits = exponent - rhs.exponent;
1801
1802
/* Subtraction is more subtle than one might naively expect. */
1803
if (subtract) {
1804
IEEEFloat temp_rhs(rhs);
1805
1806
if (bits == 0)
1807
lost_fraction = lfExactlyZero;
1808
else if (bits > 0) {
1809
lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1810
shiftSignificandLeft(1);
1811
} else {
1812
lost_fraction = shiftSignificandRight(-bits - 1);
1813
temp_rhs.shiftSignificandLeft(1);
1814
}
1815
1816
// Should we reverse the subtraction.
1817
if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1818
carry = temp_rhs.subtractSignificand
1819
(*this, lost_fraction != lfExactlyZero);
1820
copySignificand(temp_rhs);
1821
sign = !sign;
1822
} else {
1823
carry = subtractSignificand
1824
(temp_rhs, lost_fraction != lfExactlyZero);
1825
}
1826
1827
/* Invert the lost fraction - it was on the RHS and
1828
subtracted. */
1829
if (lost_fraction == lfLessThanHalf)
1830
lost_fraction = lfMoreThanHalf;
1831
else if (lost_fraction == lfMoreThanHalf)
1832
lost_fraction = lfLessThanHalf;
1833
1834
/* The code above is intended to ensure that no borrow is
1835
necessary. */
1836
assert(!carry);
1837
(void)carry;
1838
} else {
1839
if (bits > 0) {
1840
IEEEFloat temp_rhs(rhs);
1841
1842
lost_fraction = temp_rhs.shiftSignificandRight(bits);
1843
carry = addSignificand(temp_rhs);
1844
} else {
1845
lost_fraction = shiftSignificandRight(-bits);
1846
carry = addSignificand(rhs);
1847
}
1848
1849
/* We have a guard bit; generating a carry cannot happen. */
1850
assert(!carry);
1851
(void)carry;
1852
}
1853
1854
return lost_fraction;
1855
}
1856
1857
IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1858
switch (PackCategoriesIntoKey(category, rhs.category)) {
1859
default:
1860
llvm_unreachable(nullptr);
1861
1862
case PackCategoriesIntoKey(fcZero, fcNaN):
1863
case PackCategoriesIntoKey(fcNormal, fcNaN):
1864
case PackCategoriesIntoKey(fcInfinity, fcNaN):
1865
assign(rhs);
1866
sign = false;
1867
[[fallthrough]];
1868
case PackCategoriesIntoKey(fcNaN, fcZero):
1869
case PackCategoriesIntoKey(fcNaN, fcNormal):
1870
case PackCategoriesIntoKey(fcNaN, fcInfinity):
1871
case PackCategoriesIntoKey(fcNaN, fcNaN):
1872
sign ^= rhs.sign; // restore the original sign
1873
if (isSignaling()) {
1874
makeQuiet();
1875
return opInvalidOp;
1876
}
1877
return rhs.isSignaling() ? opInvalidOp : opOK;
1878
1879
case PackCategoriesIntoKey(fcNormal, fcInfinity):
1880
case PackCategoriesIntoKey(fcInfinity, fcNormal):
1881
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1882
category = fcInfinity;
1883
return opOK;
1884
1885
case PackCategoriesIntoKey(fcZero, fcNormal):
1886
case PackCategoriesIntoKey(fcNormal, fcZero):
1887
case PackCategoriesIntoKey(fcZero, fcZero):
1888
category = fcZero;
1889
return opOK;
1890
1891
case PackCategoriesIntoKey(fcZero, fcInfinity):
1892
case PackCategoriesIntoKey(fcInfinity, fcZero):
1893
makeNaN();
1894
return opInvalidOp;
1895
1896
case PackCategoriesIntoKey(fcNormal, fcNormal):
1897
return opOK;
1898
}
1899
}
1900
1901
IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1902
switch (PackCategoriesIntoKey(category, rhs.category)) {
1903
default:
1904
llvm_unreachable(nullptr);
1905
1906
case PackCategoriesIntoKey(fcZero, fcNaN):
1907
case PackCategoriesIntoKey(fcNormal, fcNaN):
1908
case PackCategoriesIntoKey(fcInfinity, fcNaN):
1909
assign(rhs);
1910
sign = false;
1911
[[fallthrough]];
1912
case PackCategoriesIntoKey(fcNaN, fcZero):
1913
case PackCategoriesIntoKey(fcNaN, fcNormal):
1914
case PackCategoriesIntoKey(fcNaN, fcInfinity):
1915
case PackCategoriesIntoKey(fcNaN, fcNaN):
1916
sign ^= rhs.sign; // restore the original sign
1917
if (isSignaling()) {
1918
makeQuiet();
1919
return opInvalidOp;
1920
}
1921
return rhs.isSignaling() ? opInvalidOp : opOK;
1922
1923
case PackCategoriesIntoKey(fcInfinity, fcZero):
1924
case PackCategoriesIntoKey(fcInfinity, fcNormal):
1925
case PackCategoriesIntoKey(fcZero, fcInfinity):
1926
case PackCategoriesIntoKey(fcZero, fcNormal):
1927
return opOK;
1928
1929
case PackCategoriesIntoKey(fcNormal, fcInfinity):
1930
category = fcZero;
1931
return opOK;
1932
1933
case PackCategoriesIntoKey(fcNormal, fcZero):
1934
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1935
makeNaN(false, sign);
1936
else
1937
category = fcInfinity;
1938
return opDivByZero;
1939
1940
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1941
case PackCategoriesIntoKey(fcZero, fcZero):
1942
makeNaN();
1943
return opInvalidOp;
1944
1945
case PackCategoriesIntoKey(fcNormal, fcNormal):
1946
return opOK;
1947
}
1948
}
1949
1950
IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1951
switch (PackCategoriesIntoKey(category, rhs.category)) {
1952
default:
1953
llvm_unreachable(nullptr);
1954
1955
case PackCategoriesIntoKey(fcZero, fcNaN):
1956
case PackCategoriesIntoKey(fcNormal, fcNaN):
1957
case PackCategoriesIntoKey(fcInfinity, fcNaN):
1958
assign(rhs);
1959
[[fallthrough]];
1960
case PackCategoriesIntoKey(fcNaN, fcZero):
1961
case PackCategoriesIntoKey(fcNaN, fcNormal):
1962
case PackCategoriesIntoKey(fcNaN, fcInfinity):
1963
case PackCategoriesIntoKey(fcNaN, fcNaN):
1964
if (isSignaling()) {
1965
makeQuiet();
1966
return opInvalidOp;
1967
}
1968
return rhs.isSignaling() ? opInvalidOp : opOK;
1969
1970
case PackCategoriesIntoKey(fcZero, fcInfinity):
1971
case PackCategoriesIntoKey(fcZero, fcNormal):
1972
case PackCategoriesIntoKey(fcNormal, fcInfinity):
1973
return opOK;
1974
1975
case PackCategoriesIntoKey(fcNormal, fcZero):
1976
case PackCategoriesIntoKey(fcInfinity, fcZero):
1977
case PackCategoriesIntoKey(fcInfinity, fcNormal):
1978
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1979
case PackCategoriesIntoKey(fcZero, fcZero):
1980
makeNaN();
1981
return opInvalidOp;
1982
1983
case PackCategoriesIntoKey(fcNormal, fcNormal):
1984
return opOK;
1985
}
1986
}
1987
1988
IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1989
switch (PackCategoriesIntoKey(category, rhs.category)) {
1990
default:
1991
llvm_unreachable(nullptr);
1992
1993
case PackCategoriesIntoKey(fcZero, fcNaN):
1994
case PackCategoriesIntoKey(fcNormal, fcNaN):
1995
case PackCategoriesIntoKey(fcInfinity, fcNaN):
1996
assign(rhs);
1997
[[fallthrough]];
1998
case PackCategoriesIntoKey(fcNaN, fcZero):
1999
case PackCategoriesIntoKey(fcNaN, fcNormal):
2000
case PackCategoriesIntoKey(fcNaN, fcInfinity):
2001
case PackCategoriesIntoKey(fcNaN, fcNaN):
2002
if (isSignaling()) {
2003
makeQuiet();
2004
return opInvalidOp;
2005
}
2006
return rhs.isSignaling() ? opInvalidOp : opOK;
2007
2008
case PackCategoriesIntoKey(fcZero, fcInfinity):
2009
case PackCategoriesIntoKey(fcZero, fcNormal):
2010
case PackCategoriesIntoKey(fcNormal, fcInfinity):
2011
return opOK;
2012
2013
case PackCategoriesIntoKey(fcNormal, fcZero):
2014
case PackCategoriesIntoKey(fcInfinity, fcZero):
2015
case PackCategoriesIntoKey(fcInfinity, fcNormal):
2016
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2017
case PackCategoriesIntoKey(fcZero, fcZero):
2018
makeNaN();
2019
return opInvalidOp;
2020
2021
case PackCategoriesIntoKey(fcNormal, fcNormal):
2022
return opDivByZero; // fake status, indicating this is not a special case
2023
}
2024
}
2025
2026
/* Change sign. */
2027
void IEEEFloat::changeSign() {
2028
// With NaN-as-negative-zero, neither NaN or negative zero can change
2029
// their signs.
2030
if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2031
(isZero() || isNaN()))
2032
return;
2033
/* Look mummy, this one's easy. */
2034
sign = !sign;
2035
}
2036
2037
/* Normalized addition or subtraction. */
2038
IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2039
roundingMode rounding_mode,
2040
bool subtract) {
2041
opStatus fs;
2042
2043
fs = addOrSubtractSpecials(rhs, subtract);
2044
2045
/* This return code means it was not a simple case. */
2046
if (fs == opDivByZero) {
2047
lostFraction lost_fraction;
2048
2049
lost_fraction = addOrSubtractSignificand(rhs, subtract);
2050
fs = normalize(rounding_mode, lost_fraction);
2051
2052
/* Can only be zero if we lost no fraction. */
2053
assert(category != fcZero || lost_fraction == lfExactlyZero);
2054
}
2055
2056
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2057
positive zero unless rounding to minus infinity, except that
2058
adding two like-signed zeroes gives that zero. */
2059
if (category == fcZero) {
2060
if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2061
sign = (rounding_mode == rmTowardNegative);
2062
// NaN-in-negative-zero means zeros need to be normalized to +0.
2063
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2064
sign = false;
2065
}
2066
2067
return fs;
2068
}
2069
2070
/* Normalized addition. */
2071
IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2072
roundingMode rounding_mode) {
2073
return addOrSubtract(rhs, rounding_mode, false);
2074
}
2075
2076
/* Normalized subtraction. */
2077
IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2078
roundingMode rounding_mode) {
2079
return addOrSubtract(rhs, rounding_mode, true);
2080
}
2081
2082
/* Normalized multiply. */
2083
IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2084
roundingMode rounding_mode) {
2085
opStatus fs;
2086
2087
sign ^= rhs.sign;
2088
fs = multiplySpecials(rhs);
2089
2090
if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2091
sign = false;
2092
if (isFiniteNonZero()) {
2093
lostFraction lost_fraction = multiplySignificand(rhs);
2094
fs = normalize(rounding_mode, lost_fraction);
2095
if (lost_fraction != lfExactlyZero)
2096
fs = (opStatus) (fs | opInexact);
2097
}
2098
2099
return fs;
2100
}
2101
2102
/* Normalized divide. */
2103
IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2104
roundingMode rounding_mode) {
2105
opStatus fs;
2106
2107
sign ^= rhs.sign;
2108
fs = divideSpecials(rhs);
2109
2110
if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2111
sign = false;
2112
if (isFiniteNonZero()) {
2113
lostFraction lost_fraction = divideSignificand(rhs);
2114
fs = normalize(rounding_mode, lost_fraction);
2115
if (lost_fraction != lfExactlyZero)
2116
fs = (opStatus) (fs | opInexact);
2117
}
2118
2119
return fs;
2120
}
2121
2122
/* Normalized remainder. */
2123
IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2124
opStatus fs;
2125
unsigned int origSign = sign;
2126
2127
// First handle the special cases.
2128
fs = remainderSpecials(rhs);
2129
if (fs != opDivByZero)
2130
return fs;
2131
2132
fs = opOK;
2133
2134
// Make sure the current value is less than twice the denom. If the addition
2135
// did not succeed (an overflow has happened), which means that the finite
2136
// value we currently posses must be less than twice the denom (as we are
2137
// using the same semantics).
2138
IEEEFloat P2 = rhs;
2139
if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2140
fs = mod(P2);
2141
assert(fs == opOK);
2142
}
2143
2144
// Lets work with absolute numbers.
2145
IEEEFloat P = rhs;
2146
P.sign = false;
2147
sign = false;
2148
2149
//
2150
// To calculate the remainder we use the following scheme.
2151
//
2152
// The remainder is defained as follows:
2153
//
2154
// remainder = numer - rquot * denom = x - r * p
2155
//
2156
// Where r is the result of: x/p, rounded toward the nearest integral value
2157
// (with halfway cases rounded toward the even number).
2158
//
2159
// Currently, (after x mod 2p):
2160
// r is the number of 2p's present inside x, which is inherently, an even
2161
// number of p's.
2162
//
2163
// We may split the remaining calculation into 4 options:
2164
// - if x < 0.5p then we round to the nearest number with is 0, and are done.
2165
// - if x == 0.5p then we round to the nearest even number which is 0, and we
2166
// are done as well.
2167
// - if 0.5p < x < p then we round to nearest number which is 1, and we have
2168
// to subtract 1p at least once.
2169
// - if x >= p then we must subtract p at least once, as x must be a
2170
// remainder.
2171
//
2172
// By now, we were done, or we added 1 to r, which in turn, now an odd number.
2173
//
2174
// We can now split the remaining calculation to the following 3 options:
2175
// - if x < 0.5p then we round to the nearest number with is 0, and are done.
2176
// - if x == 0.5p then we round to the nearest even number. As r is odd, we
2177
// must round up to the next even number. so we must subtract p once more.
2178
// - if x > 0.5p (and inherently x < p) then we must round r up to the next
2179
// integral, and subtract p once more.
2180
//
2181
2182
// Extend the semantics to prevent an overflow/underflow or inexact result.
2183
bool losesInfo;
2184
fltSemantics extendedSemantics = *semantics;
2185
extendedSemantics.maxExponent++;
2186
extendedSemantics.minExponent--;
2187
extendedSemantics.precision += 2;
2188
2189
IEEEFloat VEx = *this;
2190
fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2191
assert(fs == opOK && !losesInfo);
2192
IEEEFloat PEx = P;
2193
fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2194
assert(fs == opOK && !losesInfo);
2195
2196
// It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2197
// any fraction.
2198
fs = VEx.add(VEx, rmNearestTiesToEven);
2199
assert(fs == opOK);
2200
2201
if (VEx.compare(PEx) == cmpGreaterThan) {
2202
fs = subtract(P, rmNearestTiesToEven);
2203
assert(fs == opOK);
2204
2205
// Make VEx = this.add(this), but because we have different semantics, we do
2206
// not want to `convert` again, so we just subtract PEx twice (which equals
2207
// to the desired value).
2208
fs = VEx.subtract(PEx, rmNearestTiesToEven);
2209
assert(fs == opOK);
2210
fs = VEx.subtract(PEx, rmNearestTiesToEven);
2211
assert(fs == opOK);
2212
2213
cmpResult result = VEx.compare(PEx);
2214
if (result == cmpGreaterThan || result == cmpEqual) {
2215
fs = subtract(P, rmNearestTiesToEven);
2216
assert(fs == opOK);
2217
}
2218
}
2219
2220
if (isZero()) {
2221
sign = origSign; // IEEE754 requires this
2222
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223
// But some 8-bit floats only have positive 0.
2224
sign = false;
2225
}
2226
2227
else
2228
sign ^= origSign;
2229
return fs;
2230
}
2231
2232
/* Normalized llvm frem (C fmod). */
2233
IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2234
opStatus fs;
2235
fs = modSpecials(rhs);
2236
unsigned int origSign = sign;
2237
2238
while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2239
compareAbsoluteValue(rhs) != cmpLessThan) {
2240
int Exp = ilogb(*this) - ilogb(rhs);
2241
IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2242
// V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2243
// check for it.
2244
if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2245
V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2246
V.sign = sign;
2247
2248
fs = subtract(V, rmNearestTiesToEven);
2249
assert(fs==opOK);
2250
}
2251
if (isZero()) {
2252
sign = origSign; // fmod requires this
2253
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2254
sign = false;
2255
}
2256
return fs;
2257
}
2258
2259
/* Normalized fused-multiply-add. */
2260
IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2261
const IEEEFloat &addend,
2262
roundingMode rounding_mode) {
2263
opStatus fs;
2264
2265
/* Post-multiplication sign, before addition. */
2266
sign ^= multiplicand.sign;
2267
2268
/* If and only if all arguments are normal do we need to do an
2269
extended-precision calculation. */
2270
if (isFiniteNonZero() &&
2271
multiplicand.isFiniteNonZero() &&
2272
addend.isFinite()) {
2273
lostFraction lost_fraction;
2274
2275
lost_fraction = multiplySignificand(multiplicand, addend);
2276
fs = normalize(rounding_mode, lost_fraction);
2277
if (lost_fraction != lfExactlyZero)
2278
fs = (opStatus) (fs | opInexact);
2279
2280
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2281
positive zero unless rounding to minus infinity, except that
2282
adding two like-signed zeroes gives that zero. */
2283
if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2284
sign = (rounding_mode == rmTowardNegative);
2285
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2286
sign = false;
2287
}
2288
} else {
2289
fs = multiplySpecials(multiplicand);
2290
2291
/* FS can only be opOK or opInvalidOp. There is no more work
2292
to do in the latter case. The IEEE-754R standard says it is
2293
implementation-defined in this case whether, if ADDEND is a
2294
quiet NaN, we raise invalid op; this implementation does so.
2295
2296
If we need to do the addition we can do so with normal
2297
precision. */
2298
if (fs == opOK)
2299
fs = addOrSubtract(addend, rounding_mode, false);
2300
}
2301
2302
return fs;
2303
}
2304
2305
/* Rounding-mode correct round to integral value. */
2306
IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2307
opStatus fs;
2308
2309
if (isInfinity())
2310
// [IEEE Std 754-2008 6.1]:
2311
// The behavior of infinity in floating-point arithmetic is derived from the
2312
// limiting cases of real arithmetic with operands of arbitrarily
2313
// large magnitude, when such a limit exists.
2314
// ...
2315
// Operations on infinite operands are usually exact and therefore signal no
2316
// exceptions ...
2317
return opOK;
2318
2319
if (isNaN()) {
2320
if (isSignaling()) {
2321
// [IEEE Std 754-2008 6.2]:
2322
// Under default exception handling, any operation signaling an invalid
2323
// operation exception and for which a floating-point result is to be
2324
// delivered shall deliver a quiet NaN.
2325
makeQuiet();
2326
// [IEEE Std 754-2008 6.2]:
2327
// Signaling NaNs shall be reserved operands that, under default exception
2328
// handling, signal the invalid operation exception(see 7.2) for every
2329
// general-computational and signaling-computational operation except for
2330
// the conversions described in 5.12.
2331
return opInvalidOp;
2332
} else {
2333
// [IEEE Std 754-2008 6.2]:
2334
// For an operation with quiet NaN inputs, other than maximum and minimum
2335
// operations, if a floating-point result is to be delivered the result
2336
// shall be a quiet NaN which should be one of the input NaNs.
2337
// ...
2338
// Every general-computational and quiet-computational operation involving
2339
// one or more input NaNs, none of them signaling, shall signal no
2340
// exception, except fusedMultiplyAdd might signal the invalid operation
2341
// exception(see 7.2).
2342
return opOK;
2343
}
2344
}
2345
2346
if (isZero()) {
2347
// [IEEE Std 754-2008 6.3]:
2348
// ... the sign of the result of conversions, the quantize operation, the
2349
// roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2350
// the sign of the first or only operand.
2351
return opOK;
2352
}
2353
2354
// If the exponent is large enough, we know that this value is already
2355
// integral, and the arithmetic below would potentially cause it to saturate
2356
// to +/-Inf. Bail out early instead.
2357
if (exponent+1 >= (int)semanticsPrecision(*semantics))
2358
return opOK;
2359
2360
// The algorithm here is quite simple: we add 2^(p-1), where p is the
2361
// precision of our format, and then subtract it back off again. The choice
2362
// of rounding modes for the addition/subtraction determines the rounding mode
2363
// for our integral rounding as well.
2364
// NOTE: When the input value is negative, we do subtraction followed by
2365
// addition instead.
2366
APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2367
IntegerConstant <<= semanticsPrecision(*semantics)-1;
2368
IEEEFloat MagicConstant(*semantics);
2369
fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2370
rmNearestTiesToEven);
2371
assert(fs == opOK);
2372
MagicConstant.sign = sign;
2373
2374
// Preserve the input sign so that we can handle the case of zero result
2375
// correctly.
2376
bool inputSign = isNegative();
2377
2378
fs = add(MagicConstant, rounding_mode);
2379
2380
// Current value and 'MagicConstant' are both integers, so the result of the
2381
// subtraction is always exact according to Sterbenz' lemma.
2382
subtract(MagicConstant, rounding_mode);
2383
2384
// Restore the input sign.
2385
if (inputSign != isNegative())
2386
changeSign();
2387
2388
return fs;
2389
}
2390
2391
2392
/* Comparison requires normalized numbers. */
2393
IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2394
cmpResult result;
2395
2396
assert(semantics == rhs.semantics);
2397
2398
switch (PackCategoriesIntoKey(category, rhs.category)) {
2399
default:
2400
llvm_unreachable(nullptr);
2401
2402
case PackCategoriesIntoKey(fcNaN, fcZero):
2403
case PackCategoriesIntoKey(fcNaN, fcNormal):
2404
case PackCategoriesIntoKey(fcNaN, fcInfinity):
2405
case PackCategoriesIntoKey(fcNaN, fcNaN):
2406
case PackCategoriesIntoKey(fcZero, fcNaN):
2407
case PackCategoriesIntoKey(fcNormal, fcNaN):
2408
case PackCategoriesIntoKey(fcInfinity, fcNaN):
2409
return cmpUnordered;
2410
2411
case PackCategoriesIntoKey(fcInfinity, fcNormal):
2412
case PackCategoriesIntoKey(fcInfinity, fcZero):
2413
case PackCategoriesIntoKey(fcNormal, fcZero):
2414
if (sign)
2415
return cmpLessThan;
2416
else
2417
return cmpGreaterThan;
2418
2419
case PackCategoriesIntoKey(fcNormal, fcInfinity):
2420
case PackCategoriesIntoKey(fcZero, fcInfinity):
2421
case PackCategoriesIntoKey(fcZero, fcNormal):
2422
if (rhs.sign)
2423
return cmpGreaterThan;
2424
else
2425
return cmpLessThan;
2426
2427
case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2428
if (sign == rhs.sign)
2429
return cmpEqual;
2430
else if (sign)
2431
return cmpLessThan;
2432
else
2433
return cmpGreaterThan;
2434
2435
case PackCategoriesIntoKey(fcZero, fcZero):
2436
return cmpEqual;
2437
2438
case PackCategoriesIntoKey(fcNormal, fcNormal):
2439
break;
2440
}
2441
2442
/* Two normal numbers. Do they have the same sign? */
2443
if (sign != rhs.sign) {
2444
if (sign)
2445
result = cmpLessThan;
2446
else
2447
result = cmpGreaterThan;
2448
} else {
2449
/* Compare absolute values; invert result if negative. */
2450
result = compareAbsoluteValue(rhs);
2451
2452
if (sign) {
2453
if (result == cmpLessThan)
2454
result = cmpGreaterThan;
2455
else if (result == cmpGreaterThan)
2456
result = cmpLessThan;
2457
}
2458
}
2459
2460
return result;
2461
}
2462
2463
/// IEEEFloat::convert - convert a value of one floating point type to another.
2464
/// The return value corresponds to the IEEE754 exceptions. *losesInfo
2465
/// records whether the transformation lost information, i.e. whether
2466
/// converting the result back to the original type will produce the
2467
/// original value (this is almost the same as return value==fsOK, but there
2468
/// are edge cases where this is not so).
2469
2470
IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2471
roundingMode rounding_mode,
2472
bool *losesInfo) {
2473
lostFraction lostFraction;
2474
unsigned int newPartCount, oldPartCount;
2475
opStatus fs;
2476
int shift;
2477
const fltSemantics &fromSemantics = *semantics;
2478
bool is_signaling = isSignaling();
2479
2480
lostFraction = lfExactlyZero;
2481
newPartCount = partCountForBits(toSemantics.precision + 1);
2482
oldPartCount = partCount();
2483
shift = toSemantics.precision - fromSemantics.precision;
2484
2485
bool X86SpecialNan = false;
2486
if (&fromSemantics == &semX87DoubleExtended &&
2487
&toSemantics != &semX87DoubleExtended && category == fcNaN &&
2488
(!(*significandParts() & 0x8000000000000000ULL) ||
2489
!(*significandParts() & 0x4000000000000000ULL))) {
2490
// x86 has some unusual NaNs which cannot be represented in any other
2491
// format; note them here.
2492
X86SpecialNan = true;
2493
}
2494
2495
// If this is a truncation of a denormal number, and the target semantics
2496
// has larger exponent range than the source semantics (this can happen
2497
// when truncating from PowerPC double-double to double format), the
2498
// right shift could lose result mantissa bits. Adjust exponent instead
2499
// of performing excessive shift.
2500
// Also do a similar trick in case shifting denormal would produce zero
2501
// significand as this case isn't handled correctly by normalize.
2502
if (shift < 0 && isFiniteNonZero()) {
2503
int omsb = significandMSB() + 1;
2504
int exponentChange = omsb - fromSemantics.precision;
2505
if (exponent + exponentChange < toSemantics.minExponent)
2506
exponentChange = toSemantics.minExponent - exponent;
2507
if (exponentChange < shift)
2508
exponentChange = shift;
2509
if (exponentChange < 0) {
2510
shift -= exponentChange;
2511
exponent += exponentChange;
2512
} else if (omsb <= -shift) {
2513
exponentChange = omsb + shift - 1; // leave at least one bit set
2514
shift -= exponentChange;
2515
exponent += exponentChange;
2516
}
2517
}
2518
2519
// If this is a truncation, perform the shift before we narrow the storage.
2520
if (shift < 0 && (isFiniteNonZero() ||
2521
(category == fcNaN && semantics->nonFiniteBehavior !=
2522
fltNonfiniteBehavior::NanOnly)))
2523
lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2524
2525
// Fix the storage so it can hold to new value.
2526
if (newPartCount > oldPartCount) {
2527
// The new type requires more storage; make it available.
2528
integerPart *newParts;
2529
newParts = new integerPart[newPartCount];
2530
APInt::tcSet(newParts, 0, newPartCount);
2531
if (isFiniteNonZero() || category==fcNaN)
2532
APInt::tcAssign(newParts, significandParts(), oldPartCount);
2533
freeSignificand();
2534
significand.parts = newParts;
2535
} else if (newPartCount == 1 && oldPartCount != 1) {
2536
// Switch to built-in storage for a single part.
2537
integerPart newPart = 0;
2538
if (isFiniteNonZero() || category==fcNaN)
2539
newPart = significandParts()[0];
2540
freeSignificand();
2541
significand.part = newPart;
2542
}
2543
2544
// Now that we have the right storage, switch the semantics.
2545
semantics = &toSemantics;
2546
2547
// If this is an extension, perform the shift now that the storage is
2548
// available.
2549
if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2550
APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2551
2552
if (isFiniteNonZero()) {
2553
fs = normalize(rounding_mode, lostFraction);
2554
*losesInfo = (fs != opOK);
2555
} else if (category == fcNaN) {
2556
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2557
*losesInfo =
2558
fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2559
makeNaN(false, sign);
2560
return is_signaling ? opInvalidOp : opOK;
2561
}
2562
2563
// If NaN is negative zero, we need to create a new NaN to avoid converting
2564
// NaN to -Inf.
2565
if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2566
semantics->nanEncoding != fltNanEncoding::NegativeZero)
2567
makeNaN(false, false);
2568
2569
*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2570
2571
// For x87 extended precision, we want to make a NaN, not a special NaN if
2572
// the input wasn't special either.
2573
if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2574
APInt::tcSetBit(significandParts(), semantics->precision - 1);
2575
2576
// Convert of sNaN creates qNaN and raises an exception (invalid op).
2577
// This also guarantees that a sNaN does not become Inf on a truncation
2578
// that loses all payload bits.
2579
if (is_signaling) {
2580
makeQuiet();
2581
fs = opInvalidOp;
2582
} else {
2583
fs = opOK;
2584
}
2585
} else if (category == fcInfinity &&
2586
semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2587
makeNaN(false, sign);
2588
*losesInfo = true;
2589
fs = opInexact;
2590
} else if (category == fcZero &&
2591
semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2592
// Negative zero loses info, but positive zero doesn't.
2593
*losesInfo =
2594
fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2595
fs = *losesInfo ? opInexact : opOK;
2596
// NaN is negative zero means -0 -> +0, which can lose information
2597
sign = false;
2598
} else {
2599
*losesInfo = false;
2600
fs = opOK;
2601
}
2602
2603
return fs;
2604
}
2605
2606
/* Convert a floating point number to an integer according to the
2607
rounding mode. If the rounded integer value is out of range this
2608
returns an invalid operation exception and the contents of the
2609
destination parts are unspecified. If the rounded value is in
2610
range but the floating point number is not the exact integer, the C
2611
standard doesn't require an inexact exception to be raised. IEEE
2612
854 does require it so we do that.
2613
2614
Note that for conversions to integer type the C standard requires
2615
round-to-zero to always be used. */
2616
IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2617
MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2618
roundingMode rounding_mode, bool *isExact) const {
2619
lostFraction lost_fraction;
2620
const integerPart *src;
2621
unsigned int dstPartsCount, truncatedBits;
2622
2623
*isExact = false;
2624
2625
/* Handle the three special cases first. */
2626
if (category == fcInfinity || category == fcNaN)
2627
return opInvalidOp;
2628
2629
dstPartsCount = partCountForBits(width);
2630
assert(dstPartsCount <= parts.size() && "Integer too big");
2631
2632
if (category == fcZero) {
2633
APInt::tcSet(parts.data(), 0, dstPartsCount);
2634
// Negative zero can't be represented as an int.
2635
*isExact = !sign;
2636
return opOK;
2637
}
2638
2639
src = significandParts();
2640
2641
/* Step 1: place our absolute value, with any fraction truncated, in
2642
the destination. */
2643
if (exponent < 0) {
2644
/* Our absolute value is less than one; truncate everything. */
2645
APInt::tcSet(parts.data(), 0, dstPartsCount);
2646
/* For exponent -1 the integer bit represents .5, look at that.
2647
For smaller exponents leftmost truncated bit is 0. */
2648
truncatedBits = semantics->precision -1U - exponent;
2649
} else {
2650
/* We want the most significant (exponent + 1) bits; the rest are
2651
truncated. */
2652
unsigned int bits = exponent + 1U;
2653
2654
/* Hopelessly large in magnitude? */
2655
if (bits > width)
2656
return opInvalidOp;
2657
2658
if (bits < semantics->precision) {
2659
/* We truncate (semantics->precision - bits) bits. */
2660
truncatedBits = semantics->precision - bits;
2661
APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2662
} else {
2663
/* We want at least as many bits as are available. */
2664
APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2665
0);
2666
APInt::tcShiftLeft(parts.data(), dstPartsCount,
2667
bits - semantics->precision);
2668
truncatedBits = 0;
2669
}
2670
}
2671
2672
/* Step 2: work out any lost fraction, and increment the absolute
2673
value if we would round away from zero. */
2674
if (truncatedBits) {
2675
lost_fraction = lostFractionThroughTruncation(src, partCount(),
2676
truncatedBits);
2677
if (lost_fraction != lfExactlyZero &&
2678
roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2679
if (APInt::tcIncrement(parts.data(), dstPartsCount))
2680
return opInvalidOp; /* Overflow. */
2681
}
2682
} else {
2683
lost_fraction = lfExactlyZero;
2684
}
2685
2686
/* Step 3: check if we fit in the destination. */
2687
unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2688
2689
if (sign) {
2690
if (!isSigned) {
2691
/* Negative numbers cannot be represented as unsigned. */
2692
if (omsb != 0)
2693
return opInvalidOp;
2694
} else {
2695
/* It takes omsb bits to represent the unsigned integer value.
2696
We lose a bit for the sign, but care is needed as the
2697
maximally negative integer is a special case. */
2698
if (omsb == width &&
2699
APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2700
return opInvalidOp;
2701
2702
/* This case can happen because of rounding. */
2703
if (omsb > width)
2704
return opInvalidOp;
2705
}
2706
2707
APInt::tcNegate (parts.data(), dstPartsCount);
2708
} else {
2709
if (omsb >= width + !isSigned)
2710
return opInvalidOp;
2711
}
2712
2713
if (lost_fraction == lfExactlyZero) {
2714
*isExact = true;
2715
return opOK;
2716
} else
2717
return opInexact;
2718
}
2719
2720
/* Same as convertToSignExtendedInteger, except we provide
2721
deterministic values in case of an invalid operation exception,
2722
namely zero for NaNs and the minimal or maximal value respectively
2723
for underflow or overflow.
2724
The *isExact output tells whether the result is exact, in the sense
2725
that converting it back to the original floating point type produces
2726
the original value. This is almost equivalent to result==opOK,
2727
except for negative zeroes.
2728
*/
2729
IEEEFloat::opStatus
2730
IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2731
unsigned int width, bool isSigned,
2732
roundingMode rounding_mode, bool *isExact) const {
2733
opStatus fs;
2734
2735
fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2736
isExact);
2737
2738
if (fs == opInvalidOp) {
2739
unsigned int bits, dstPartsCount;
2740
2741
dstPartsCount = partCountForBits(width);
2742
assert(dstPartsCount <= parts.size() && "Integer too big");
2743
2744
if (category == fcNaN)
2745
bits = 0;
2746
else if (sign)
2747
bits = isSigned;
2748
else
2749
bits = width - isSigned;
2750
2751
tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2752
if (sign && isSigned)
2753
APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2754
}
2755
2756
return fs;
2757
}
2758
2759
/* Convert an unsigned integer SRC to a floating point number,
2760
rounding according to ROUNDING_MODE. The sign of the floating
2761
point number is not modified. */
2762
IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2763
const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2764
unsigned int omsb, precision, dstCount;
2765
integerPart *dst;
2766
lostFraction lost_fraction;
2767
2768
category = fcNormal;
2769
omsb = APInt::tcMSB(src, srcCount) + 1;
2770
dst = significandParts();
2771
dstCount = partCount();
2772
precision = semantics->precision;
2773
2774
/* We want the most significant PRECISION bits of SRC. There may not
2775
be that many; extract what we can. */
2776
if (precision <= omsb) {
2777
exponent = omsb - 1;
2778
lost_fraction = lostFractionThroughTruncation(src, srcCount,
2779
omsb - precision);
2780
APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2781
} else {
2782
exponent = precision - 1;
2783
lost_fraction = lfExactlyZero;
2784
APInt::tcExtract(dst, dstCount, src, omsb, 0);
2785
}
2786
2787
return normalize(rounding_mode, lost_fraction);
2788
}
2789
2790
IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2791
roundingMode rounding_mode) {
2792
unsigned int partCount = Val.getNumWords();
2793
APInt api = Val;
2794
2795
sign = false;
2796
if (isSigned && api.isNegative()) {
2797
sign = true;
2798
api = -api;
2799
}
2800
2801
return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2802
}
2803
2804
/* Convert a two's complement integer SRC to a floating point number,
2805
rounding according to ROUNDING_MODE. ISSIGNED is true if the
2806
integer is signed, in which case it must be sign-extended. */
2807
IEEEFloat::opStatus
2808
IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2809
unsigned int srcCount, bool isSigned,
2810
roundingMode rounding_mode) {
2811
opStatus status;
2812
2813
if (isSigned &&
2814
APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2815
integerPart *copy;
2816
2817
/* If we're signed and negative negate a copy. */
2818
sign = true;
2819
copy = new integerPart[srcCount];
2820
APInt::tcAssign(copy, src, srcCount);
2821
APInt::tcNegate(copy, srcCount);
2822
status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2823
delete [] copy;
2824
} else {
2825
sign = false;
2826
status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2827
}
2828
2829
return status;
2830
}
2831
2832
/* FIXME: should this just take a const APInt reference? */
2833
IEEEFloat::opStatus
2834
IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2835
unsigned int width, bool isSigned,
2836
roundingMode rounding_mode) {
2837
unsigned int partCount = partCountForBits(width);
2838
APInt api = APInt(width, ArrayRef(parts, partCount));
2839
2840
sign = false;
2841
if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2842
sign = true;
2843
api = -api;
2844
}
2845
2846
return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2847
}
2848
2849
Expected<IEEEFloat::opStatus>
2850
IEEEFloat::convertFromHexadecimalString(StringRef s,
2851
roundingMode rounding_mode) {
2852
lostFraction lost_fraction = lfExactlyZero;
2853
2854
category = fcNormal;
2855
zeroSignificand();
2856
exponent = 0;
2857
2858
integerPart *significand = significandParts();
2859
unsigned partsCount = partCount();
2860
unsigned bitPos = partsCount * integerPartWidth;
2861
bool computedTrailingFraction = false;
2862
2863
// Skip leading zeroes and any (hexa)decimal point.
2864
StringRef::iterator begin = s.begin();
2865
StringRef::iterator end = s.end();
2866
StringRef::iterator dot;
2867
auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2868
if (!PtrOrErr)
2869
return PtrOrErr.takeError();
2870
StringRef::iterator p = *PtrOrErr;
2871
StringRef::iterator firstSignificantDigit = p;
2872
2873
while (p != end) {
2874
integerPart hex_value;
2875
2876
if (*p == '.') {
2877
if (dot != end)
2878
return createError("String contains multiple dots");
2879
dot = p++;
2880
continue;
2881
}
2882
2883
hex_value = hexDigitValue(*p);
2884
if (hex_value == UINT_MAX)
2885
break;
2886
2887
p++;
2888
2889
// Store the number while we have space.
2890
if (bitPos) {
2891
bitPos -= 4;
2892
hex_value <<= bitPos % integerPartWidth;
2893
significand[bitPos / integerPartWidth] |= hex_value;
2894
} else if (!computedTrailingFraction) {
2895
auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2896
if (!FractOrErr)
2897
return FractOrErr.takeError();
2898
lost_fraction = *FractOrErr;
2899
computedTrailingFraction = true;
2900
}
2901
}
2902
2903
/* Hex floats require an exponent but not a hexadecimal point. */
2904
if (p == end)
2905
return createError("Hex strings require an exponent");
2906
if (*p != 'p' && *p != 'P')
2907
return createError("Invalid character in significand");
2908
if (p == begin)
2909
return createError("Significand has no digits");
2910
if (dot != end && p - begin == 1)
2911
return createError("Significand has no digits");
2912
2913
/* Ignore the exponent if we are zero. */
2914
if (p != firstSignificantDigit) {
2915
int expAdjustment;
2916
2917
/* Implicit hexadecimal point? */
2918
if (dot == end)
2919
dot = p;
2920
2921
/* Calculate the exponent adjustment implicit in the number of
2922
significant digits. */
2923
expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2924
if (expAdjustment < 0)
2925
expAdjustment++;
2926
expAdjustment = expAdjustment * 4 - 1;
2927
2928
/* Adjust for writing the significand starting at the most
2929
significant nibble. */
2930
expAdjustment += semantics->precision;
2931
expAdjustment -= partsCount * integerPartWidth;
2932
2933
/* Adjust for the given exponent. */
2934
auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2935
if (!ExpOrErr)
2936
return ExpOrErr.takeError();
2937
exponent = *ExpOrErr;
2938
}
2939
2940
return normalize(rounding_mode, lost_fraction);
2941
}
2942
2943
IEEEFloat::opStatus
2944
IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2945
unsigned sigPartCount, int exp,
2946
roundingMode rounding_mode) {
2947
unsigned int parts, pow5PartCount;
2948
fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2949
integerPart pow5Parts[maxPowerOfFiveParts];
2950
bool isNearest;
2951
2952
isNearest = (rounding_mode == rmNearestTiesToEven ||
2953
rounding_mode == rmNearestTiesToAway);
2954
2955
parts = partCountForBits(semantics->precision + 11);
2956
2957
/* Calculate pow(5, abs(exp)). */
2958
pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2959
2960
for (;; parts *= 2) {
2961
opStatus sigStatus, powStatus;
2962
unsigned int excessPrecision, truncatedBits;
2963
2964
calcSemantics.precision = parts * integerPartWidth - 1;
2965
excessPrecision = calcSemantics.precision - semantics->precision;
2966
truncatedBits = excessPrecision;
2967
2968
IEEEFloat decSig(calcSemantics, uninitialized);
2969
decSig.makeZero(sign);
2970
IEEEFloat pow5(calcSemantics);
2971
2972
sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2973
rmNearestTiesToEven);
2974
powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2975
rmNearestTiesToEven);
2976
/* Add exp, as 10^n = 5^n * 2^n. */
2977
decSig.exponent += exp;
2978
2979
lostFraction calcLostFraction;
2980
integerPart HUerr, HUdistance;
2981
unsigned int powHUerr;
2982
2983
if (exp >= 0) {
2984
/* multiplySignificand leaves the precision-th bit set to 1. */
2985
calcLostFraction = decSig.multiplySignificand(pow5);
2986
powHUerr = powStatus != opOK;
2987
} else {
2988
calcLostFraction = decSig.divideSignificand(pow5);
2989
/* Denormal numbers have less precision. */
2990
if (decSig.exponent < semantics->minExponent) {
2991
excessPrecision += (semantics->minExponent - decSig.exponent);
2992
truncatedBits = excessPrecision;
2993
if (excessPrecision > calcSemantics.precision)
2994
excessPrecision = calcSemantics.precision;
2995
}
2996
/* Extra half-ulp lost in reciprocal of exponent. */
2997
powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2998
}
2999
3000
/* Both multiplySignificand and divideSignificand return the
3001
result with the integer bit set. */
3002
assert(APInt::tcExtractBit
3003
(decSig.significandParts(), calcSemantics.precision - 1) == 1);
3004
3005
HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3006
powHUerr);
3007
HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3008
excessPrecision, isNearest);
3009
3010
/* Are we guaranteed to round correctly if we truncate? */
3011
if (HUdistance >= HUerr) {
3012
APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3013
calcSemantics.precision - excessPrecision,
3014
excessPrecision);
3015
/* Take the exponent of decSig. If we tcExtract-ed less bits
3016
above we must adjust our exponent to compensate for the
3017
implicit right shift. */
3018
exponent = (decSig.exponent + semantics->precision
3019
- (calcSemantics.precision - excessPrecision));
3020
calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3021
decSig.partCount(),
3022
truncatedBits);
3023
return normalize(rounding_mode, calcLostFraction);
3024
}
3025
}
3026
}
3027
3028
Expected<IEEEFloat::opStatus>
3029
IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3030
decimalInfo D;
3031
opStatus fs;
3032
3033
/* Scan the text. */
3034
StringRef::iterator p = str.begin();
3035
if (Error Err = interpretDecimal(p, str.end(), &D))
3036
return std::move(Err);
3037
3038
/* Handle the quick cases. First the case of no significant digits,
3039
i.e. zero, and then exponents that are obviously too large or too
3040
small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3041
definitely overflows if
3042
3043
(exp - 1) * L >= maxExponent
3044
3045
and definitely underflows to zero where
3046
3047
(exp + 1) * L <= minExponent - precision
3048
3049
With integer arithmetic the tightest bounds for L are
3050
3051
93/28 < L < 196/59 [ numerator <= 256 ]
3052
42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3053
*/
3054
3055
// Test if we have a zero number allowing for strings with no null terminators
3056
// and zero decimals with non-zero exponents.
3057
//
3058
// We computed firstSigDigit by ignoring all zeros and dots. Thus if
3059
// D->firstSigDigit equals str.end(), every digit must be a zero and there can
3060
// be at most one dot. On the other hand, if we have a zero with a non-zero
3061
// exponent, then we know that D.firstSigDigit will be non-numeric.
3062
if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3063
category = fcZero;
3064
fs = opOK;
3065
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3066
sign = false;
3067
3068
/* Check whether the normalized exponent is high enough to overflow
3069
max during the log-rebasing in the max-exponent check below. */
3070
} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3071
fs = handleOverflow(rounding_mode);
3072
3073
/* If it wasn't, then it also wasn't high enough to overflow max
3074
during the log-rebasing in the min-exponent check. Check that it
3075
won't overflow min in either check, then perform the min-exponent
3076
check. */
3077
} else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3078
(D.normalizedExponent + 1) * 28738 <=
3079
8651 * (semantics->minExponent - (int) semantics->precision)) {
3080
/* Underflow to zero and round. */
3081
category = fcNormal;
3082
zeroSignificand();
3083
fs = normalize(rounding_mode, lfLessThanHalf);
3084
3085
/* We can finally safely perform the max-exponent check. */
3086
} else if ((D.normalizedExponent - 1) * 42039
3087
>= 12655 * semantics->maxExponent) {
3088
/* Overflow and round. */
3089
fs = handleOverflow(rounding_mode);
3090
} else {
3091
integerPart *decSignificand;
3092
unsigned int partCount;
3093
3094
/* A tight upper bound on number of bits required to hold an
3095
N-digit decimal integer is N * 196 / 59. Allocate enough space
3096
to hold the full significand, and an extra part required by
3097
tcMultiplyPart. */
3098
partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3099
partCount = partCountForBits(1 + 196 * partCount / 59);
3100
decSignificand = new integerPart[partCount + 1];
3101
partCount = 0;
3102
3103
/* Convert to binary efficiently - we do almost all multiplication
3104
in an integerPart. When this would overflow do we do a single
3105
bignum multiplication, and then revert again to multiplication
3106
in an integerPart. */
3107
do {
3108
integerPart decValue, val, multiplier;
3109
3110
val = 0;
3111
multiplier = 1;
3112
3113
do {
3114
if (*p == '.') {
3115
p++;
3116
if (p == str.end()) {
3117
break;
3118
}
3119
}
3120
decValue = decDigitValue(*p++);
3121
if (decValue >= 10U) {
3122
delete[] decSignificand;
3123
return createError("Invalid character in significand");
3124
}
3125
multiplier *= 10;
3126
val = val * 10 + decValue;
3127
/* The maximum number that can be multiplied by ten with any
3128
digit added without overflowing an integerPart. */
3129
} while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3130
3131
/* Multiply out the current part. */
3132
APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3133
partCount, partCount + 1, false);
3134
3135
/* If we used another part (likely but not guaranteed), increase
3136
the count. */
3137
if (decSignificand[partCount])
3138
partCount++;
3139
} while (p <= D.lastSigDigit);
3140
3141
category = fcNormal;
3142
fs = roundSignificandWithExponent(decSignificand, partCount,
3143
D.exponent, rounding_mode);
3144
3145
delete [] decSignificand;
3146
}
3147
3148
return fs;
3149
}
3150
3151
bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3152
const size_t MIN_NAME_SIZE = 3;
3153
3154
if (str.size() < MIN_NAME_SIZE)
3155
return false;
3156
3157
if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3158
makeInf(false);
3159
return true;
3160
}
3161
3162
bool IsNegative = str.front() == '-';
3163
if (IsNegative) {
3164
str = str.drop_front();
3165
if (str.size() < MIN_NAME_SIZE)
3166
return false;
3167
3168
if (str == "inf" || str == "INFINITY" || str == "Inf") {
3169
makeInf(true);
3170
return true;
3171
}
3172
}
3173
3174
// If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3175
bool IsSignaling = str.front() == 's' || str.front() == 'S';
3176
if (IsSignaling) {
3177
str = str.drop_front();
3178
if (str.size() < MIN_NAME_SIZE)
3179
return false;
3180
}
3181
3182
if (str.starts_with("nan") || str.starts_with("NaN")) {
3183
str = str.drop_front(3);
3184
3185
// A NaN without payload.
3186
if (str.empty()) {
3187
makeNaN(IsSignaling, IsNegative);
3188
return true;
3189
}
3190
3191
// Allow the payload to be inside parentheses.
3192
if (str.front() == '(') {
3193
// Parentheses should be balanced (and not empty).
3194
if (str.size() <= 2 || str.back() != ')')
3195
return false;
3196
3197
str = str.slice(1, str.size() - 1);
3198
}
3199
3200
// Determine the payload number's radix.
3201
unsigned Radix = 10;
3202
if (str[0] == '0') {
3203
if (str.size() > 1 && tolower(str[1]) == 'x') {
3204
str = str.drop_front(2);
3205
Radix = 16;
3206
} else
3207
Radix = 8;
3208
}
3209
3210
// Parse the payload and make the NaN.
3211
APInt Payload;
3212
if (!str.getAsInteger(Radix, Payload)) {
3213
makeNaN(IsSignaling, IsNegative, &Payload);
3214
return true;
3215
}
3216
}
3217
3218
return false;
3219
}
3220
3221
Expected<IEEEFloat::opStatus>
3222
IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3223
if (str.empty())
3224
return createError("Invalid string length");
3225
3226
// Handle special cases.
3227
if (convertFromStringSpecials(str))
3228
return opOK;
3229
3230
/* Handle a leading minus sign. */
3231
StringRef::iterator p = str.begin();
3232
size_t slen = str.size();
3233
sign = *p == '-' ? 1 : 0;
3234
if (*p == '-' || *p == '+') {
3235
p++;
3236
slen--;
3237
if (!slen)
3238
return createError("String has no digits");
3239
}
3240
3241
if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3242
if (slen == 2)
3243
return createError("Invalid string");
3244
return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3245
rounding_mode);
3246
}
3247
3248
return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3249
}
3250
3251
/* Write out a hexadecimal representation of the floating point value
3252
to DST, which must be of sufficient size, in the C99 form
3253
[-]0xh.hhhhp[+-]d. Return the number of characters written,
3254
excluding the terminating NUL.
3255
3256
If UPPERCASE, the output is in upper case, otherwise in lower case.
3257
3258
HEXDIGITS digits appear altogether, rounding the value if
3259
necessary. If HEXDIGITS is 0, the minimal precision to display the
3260
number precisely is used instead. If nothing would appear after
3261
the decimal point it is suppressed.
3262
3263
The decimal exponent is always printed and has at least one digit.
3264
Zero values display an exponent of zero. Infinities and NaNs
3265
appear as "infinity" or "nan" respectively.
3266
3267
The above rules are as specified by C99. There is ambiguity about
3268
what the leading hexadecimal digit should be. This implementation
3269
uses whatever is necessary so that the exponent is displayed as
3270
stored. This implies the exponent will fall within the IEEE format
3271
range, and the leading hexadecimal digit will be 0 (for denormals),
3272
1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3273
any other digits zero).
3274
*/
3275
unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3276
bool upperCase,
3277
roundingMode rounding_mode) const {
3278
char *p;
3279
3280
p = dst;
3281
if (sign)
3282
*dst++ = '-';
3283
3284
switch (category) {
3285
case fcInfinity:
3286
memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3287
dst += sizeof infinityL - 1;
3288
break;
3289
3290
case fcNaN:
3291
memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3292
dst += sizeof NaNU - 1;
3293
break;
3294
3295
case fcZero:
3296
*dst++ = '0';
3297
*dst++ = upperCase ? 'X': 'x';
3298
*dst++ = '0';
3299
if (hexDigits > 1) {
3300
*dst++ = '.';
3301
memset (dst, '0', hexDigits - 1);
3302
dst += hexDigits - 1;
3303
}
3304
*dst++ = upperCase ? 'P': 'p';
3305
*dst++ = '0';
3306
break;
3307
3308
case fcNormal:
3309
dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3310
break;
3311
}
3312
3313
*dst = 0;
3314
3315
return static_cast<unsigned int>(dst - p);
3316
}
3317
3318
/* Does the hard work of outputting the correctly rounded hexadecimal
3319
form of a normal floating point number with the specified number of
3320
hexadecimal digits. If HEXDIGITS is zero the minimum number of
3321
digits necessary to print the value precisely is output. */
3322
char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3323
bool upperCase,
3324
roundingMode rounding_mode) const {
3325
unsigned int count, valueBits, shift, partsCount, outputDigits;
3326
const char *hexDigitChars;
3327
const integerPart *significand;
3328
char *p;
3329
bool roundUp;
3330
3331
*dst++ = '0';
3332
*dst++ = upperCase ? 'X': 'x';
3333
3334
roundUp = false;
3335
hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3336
3337
significand = significandParts();
3338
partsCount = partCount();
3339
3340
/* +3 because the first digit only uses the single integer bit, so
3341
we have 3 virtual zero most-significant-bits. */
3342
valueBits = semantics->precision + 3;
3343
shift = integerPartWidth - valueBits % integerPartWidth;
3344
3345
/* The natural number of digits required ignoring trailing
3346
insignificant zeroes. */
3347
outputDigits = (valueBits - significandLSB () + 3) / 4;
3348
3349
/* hexDigits of zero means use the required number for the
3350
precision. Otherwise, see if we are truncating. If we are,
3351
find out if we need to round away from zero. */
3352
if (hexDigits) {
3353
if (hexDigits < outputDigits) {
3354
/* We are dropping non-zero bits, so need to check how to round.
3355
"bits" is the number of dropped bits. */
3356
unsigned int bits;
3357
lostFraction fraction;
3358
3359
bits = valueBits - hexDigits * 4;
3360
fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3361
roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3362
}
3363
outputDigits = hexDigits;
3364
}
3365
3366
/* Write the digits consecutively, and start writing in the location
3367
of the hexadecimal point. We move the most significant digit
3368
left and add the hexadecimal point later. */
3369
p = ++dst;
3370
3371
count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3372
3373
while (outputDigits && count) {
3374
integerPart part;
3375
3376
/* Put the most significant integerPartWidth bits in "part". */
3377
if (--count == partsCount)
3378
part = 0; /* An imaginary higher zero part. */
3379
else
3380
part = significand[count] << shift;
3381
3382
if (count && shift)
3383
part |= significand[count - 1] >> (integerPartWidth - shift);
3384
3385
/* Convert as much of "part" to hexdigits as we can. */
3386
unsigned int curDigits = integerPartWidth / 4;
3387
3388
if (curDigits > outputDigits)
3389
curDigits = outputDigits;
3390
dst += partAsHex (dst, part, curDigits, hexDigitChars);
3391
outputDigits -= curDigits;
3392
}
3393
3394
if (roundUp) {
3395
char *q = dst;
3396
3397
/* Note that hexDigitChars has a trailing '0'. */
3398
do {
3399
q--;
3400
*q = hexDigitChars[hexDigitValue (*q) + 1];
3401
} while (*q == '0');
3402
assert(q >= p);
3403
} else {
3404
/* Add trailing zeroes. */
3405
memset (dst, '0', outputDigits);
3406
dst += outputDigits;
3407
}
3408
3409
/* Move the most significant digit to before the point, and if there
3410
is something after the decimal point add it. This must come
3411
after rounding above. */
3412
p[-1] = p[0];
3413
if (dst -1 == p)
3414
dst--;
3415
else
3416
p[0] = '.';
3417
3418
/* Finally output the exponent. */
3419
*dst++ = upperCase ? 'P': 'p';
3420
3421
return writeSignedDecimal (dst, exponent);
3422
}
3423
3424
hash_code hash_value(const IEEEFloat &Arg) {
3425
if (!Arg.isFiniteNonZero())
3426
return hash_combine((uint8_t)Arg.category,
3427
// NaN has no sign, fix it at zero.
3428
Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3429
Arg.semantics->precision);
3430
3431
// Normal floats need their exponent and significand hashed.
3432
return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3433
Arg.semantics->precision, Arg.exponent,
3434
hash_combine_range(
3435
Arg.significandParts(),
3436
Arg.significandParts() + Arg.partCount()));
3437
}
3438
3439
// Conversion from APFloat to/from host float/double. It may eventually be
3440
// possible to eliminate these and have everybody deal with APFloats, but that
3441
// will take a while. This approach will not easily extend to long double.
3442
// Current implementation requires integerPartWidth==64, which is correct at
3443
// the moment but could be made more general.
3444
3445
// Denormals have exponent minExponent in APFloat, but minExponent-1 in
3446
// the actual IEEE respresentations. We compensate for that here.
3447
3448
APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3449
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3450
assert(partCount()==2);
3451
3452
uint64_t myexponent, mysignificand;
3453
3454
if (isFiniteNonZero()) {
3455
myexponent = exponent+16383; //bias
3456
mysignificand = significandParts()[0];
3457
if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3458
myexponent = 0; // denormal
3459
} else if (category==fcZero) {
3460
myexponent = 0;
3461
mysignificand = 0;
3462
} else if (category==fcInfinity) {
3463
myexponent = 0x7fff;
3464
mysignificand = 0x8000000000000000ULL;
3465
} else {
3466
assert(category == fcNaN && "Unknown category");
3467
myexponent = 0x7fff;
3468
mysignificand = significandParts()[0];
3469
}
3470
3471
uint64_t words[2];
3472
words[0] = mysignificand;
3473
words[1] = ((uint64_t)(sign & 1) << 15) |
3474
(myexponent & 0x7fffLL);
3475
return APInt(80, words);
3476
}
3477
3478
APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3479
assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3480
assert(partCount()==2);
3481
3482
uint64_t words[2];
3483
opStatus fs;
3484
bool losesInfo;
3485
3486
// Convert number to double. To avoid spurious underflows, we re-
3487
// normalize against the "double" minExponent first, and only *then*
3488
// truncate the mantissa. The result of that second conversion
3489
// may be inexact, but should never underflow.
3490
// Declare fltSemantics before APFloat that uses it (and
3491
// saves pointer to it) to ensure correct destruction order.
3492
fltSemantics extendedSemantics = *semantics;
3493
extendedSemantics.minExponent = semIEEEdouble.minExponent;
3494
IEEEFloat extended(*this);
3495
fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3496
assert(fs == opOK && !losesInfo);
3497
(void)fs;
3498
3499
IEEEFloat u(extended);
3500
fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3501
assert(fs == opOK || fs == opInexact);
3502
(void)fs;
3503
words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3504
3505
// If conversion was exact or resulted in a special case, we're done;
3506
// just set the second double to zero. Otherwise, re-convert back to
3507
// the extended format and compute the difference. This now should
3508
// convert exactly to double.
3509
if (u.isFiniteNonZero() && losesInfo) {
3510
fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3511
assert(fs == opOK && !losesInfo);
3512
(void)fs;
3513
3514
IEEEFloat v(extended);
3515
v.subtract(u, rmNearestTiesToEven);
3516
fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3517
assert(fs == opOK && !losesInfo);
3518
(void)fs;
3519
words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3520
} else {
3521
words[1] = 0;
3522
}
3523
3524
return APInt(128, words);
3525
}
3526
3527
template <const fltSemantics &S>
3528
APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3529
assert(semantics == &S);
3530
3531
constexpr int bias = -(S.minExponent - 1);
3532
constexpr unsigned int trailing_significand_bits = S.precision - 1;
3533
constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3534
constexpr integerPart integer_bit =
3535
integerPart{1} << (trailing_significand_bits % integerPartWidth);
3536
constexpr uint64_t significand_mask = integer_bit - 1;
3537
constexpr unsigned int exponent_bits =
3538
S.sizeInBits - 1 - trailing_significand_bits;
3539
static_assert(exponent_bits < 64);
3540
constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3541
3542
uint64_t myexponent;
3543
std::array<integerPart, partCountForBits(trailing_significand_bits)>
3544
mysignificand;
3545
3546
if (isFiniteNonZero()) {
3547
myexponent = exponent + bias;
3548
std::copy_n(significandParts(), mysignificand.size(),
3549
mysignificand.begin());
3550
if (myexponent == 1 &&
3551
!(significandParts()[integer_bit_part] & integer_bit))
3552
myexponent = 0; // denormal
3553
} else if (category == fcZero) {
3554
myexponent = ::exponentZero(S) + bias;
3555
mysignificand.fill(0);
3556
} else if (category == fcInfinity) {
3557
if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3558
S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3559
llvm_unreachable("semantics don't support inf!");
3560
myexponent = ::exponentInf(S) + bias;
3561
mysignificand.fill(0);
3562
} else {
3563
assert(category == fcNaN && "Unknown category!");
3564
if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3565
llvm_unreachable("semantics don't support NaN!");
3566
myexponent = ::exponentNaN(S) + bias;
3567
std::copy_n(significandParts(), mysignificand.size(),
3568
mysignificand.begin());
3569
}
3570
std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3571
auto words_iter =
3572
std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3573
if constexpr (significand_mask != 0) {
3574
// Clear the integer bit.
3575
words[mysignificand.size() - 1] &= significand_mask;
3576
}
3577
std::fill(words_iter, words.end(), uint64_t{0});
3578
constexpr size_t last_word = words.size() - 1;
3579
uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3580
<< ((S.sizeInBits - 1) % 64);
3581
words[last_word] |= shifted_sign;
3582
uint64_t shifted_exponent = (myexponent & exponent_mask)
3583
<< (trailing_significand_bits % 64);
3584
words[last_word] |= shifted_exponent;
3585
if constexpr (last_word == 0) {
3586
return APInt(S.sizeInBits, words[0]);
3587
}
3588
return APInt(S.sizeInBits, words);
3589
}
3590
3591
APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3592
assert(partCount() == 2);
3593
return convertIEEEFloatToAPInt<semIEEEquad>();
3594
}
3595
3596
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3597
assert(partCount()==1);
3598
return convertIEEEFloatToAPInt<semIEEEdouble>();
3599
}
3600
3601
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3602
assert(partCount()==1);
3603
return convertIEEEFloatToAPInt<semIEEEsingle>();
3604
}
3605
3606
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3607
assert(partCount() == 1);
3608
return convertIEEEFloatToAPInt<semBFloat>();
3609
}
3610
3611
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3612
assert(partCount()==1);
3613
return convertIEEEFloatToAPInt<semIEEEhalf>();
3614
}
3615
3616
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3617
assert(partCount() == 1);
3618
return convertIEEEFloatToAPInt<semFloat8E5M2>();
3619
}
3620
3621
APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3622
assert(partCount() == 1);
3623
return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3624
}
3625
3626
APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3627
assert(partCount() == 1);
3628
return convertIEEEFloatToAPInt<semFloat8E4M3>();
3629
}
3630
3631
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3632
assert(partCount() == 1);
3633
return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3634
}
3635
3636
APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3637
assert(partCount() == 1);
3638
return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3639
}
3640
3641
APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3642
assert(partCount() == 1);
3643
return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3644
}
3645
3646
APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3647
assert(partCount() == 1);
3648
return convertIEEEFloatToAPInt<semFloatTF32>();
3649
}
3650
3651
APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3652
assert(partCount() == 1);
3653
return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3654
}
3655
3656
APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3657
assert(partCount() == 1);
3658
return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3659
}
3660
3661
APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3662
assert(partCount() == 1);
3663
return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3664
}
3665
3666
// This function creates an APInt that is just a bit map of the floating
3667
// point constant as it would appear in memory. It is not a conversion,
3668
// and treating the result as a normal integer is unlikely to be useful.
3669
3670
APInt IEEEFloat::bitcastToAPInt() const {
3671
if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3672
return convertHalfAPFloatToAPInt();
3673
3674
if (semantics == (const llvm::fltSemantics *)&semBFloat)
3675
return convertBFloatAPFloatToAPInt();
3676
3677
if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3678
return convertFloatAPFloatToAPInt();
3679
3680
if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3681
return convertDoubleAPFloatToAPInt();
3682
3683
if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3684
return convertQuadrupleAPFloatToAPInt();
3685
3686
if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3687
return convertPPCDoubleDoubleAPFloatToAPInt();
3688
3689
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3690
return convertFloat8E5M2APFloatToAPInt();
3691
3692
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3693
return convertFloat8E5M2FNUZAPFloatToAPInt();
3694
3695
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3696
return convertFloat8E4M3APFloatToAPInt();
3697
3698
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3699
return convertFloat8E4M3FNAPFloatToAPInt();
3700
3701
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3702
return convertFloat8E4M3FNUZAPFloatToAPInt();
3703
3704
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3705
return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3706
3707
if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3708
return convertFloatTF32APFloatToAPInt();
3709
3710
if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3711
return convertFloat6E3M2FNAPFloatToAPInt();
3712
3713
if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3714
return convertFloat6E2M3FNAPFloatToAPInt();
3715
3716
if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3717
return convertFloat4E2M1FNAPFloatToAPInt();
3718
3719
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3720
"unknown format!");
3721
return convertF80LongDoubleAPFloatToAPInt();
3722
}
3723
3724
float IEEEFloat::convertToFloat() const {
3725
assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3726
"Float semantics are not IEEEsingle");
3727
APInt api = bitcastToAPInt();
3728
return api.bitsToFloat();
3729
}
3730
3731
double IEEEFloat::convertToDouble() const {
3732
assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3733
"Float semantics are not IEEEdouble");
3734
APInt api = bitcastToAPInt();
3735
return api.bitsToDouble();
3736
}
3737
3738
#ifdef HAS_IEE754_FLOAT128
3739
float128 IEEEFloat::convertToQuad() const {
3740
assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3741
"Float semantics are not IEEEquads");
3742
APInt api = bitcastToAPInt();
3743
return api.bitsToQuad();
3744
}
3745
#endif
3746
3747
/// Integer bit is explicit in this format. Intel hardware (387 and later)
3748
/// does not support these bit patterns:
3749
/// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3750
/// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3751
/// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3752
/// exponent = 0, integer bit 1 ("pseudodenormal")
3753
/// At the moment, the first three are treated as NaNs, the last one as Normal.
3754
void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3755
uint64_t i1 = api.getRawData()[0];
3756
uint64_t i2 = api.getRawData()[1];
3757
uint64_t myexponent = (i2 & 0x7fff);
3758
uint64_t mysignificand = i1;
3759
uint8_t myintegerbit = mysignificand >> 63;
3760
3761
initialize(&semX87DoubleExtended);
3762
assert(partCount()==2);
3763
3764
sign = static_cast<unsigned int>(i2>>15);
3765
if (myexponent == 0 && mysignificand == 0) {
3766
makeZero(sign);
3767
} else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3768
makeInf(sign);
3769
} else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3770
(myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3771
category = fcNaN;
3772
exponent = exponentNaN();
3773
significandParts()[0] = mysignificand;
3774
significandParts()[1] = 0;
3775
} else {
3776
category = fcNormal;
3777
exponent = myexponent - 16383;
3778
significandParts()[0] = mysignificand;
3779
significandParts()[1] = 0;
3780
if (myexponent==0) // denormal
3781
exponent = -16382;
3782
}
3783
}
3784
3785
void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3786
uint64_t i1 = api.getRawData()[0];
3787
uint64_t i2 = api.getRawData()[1];
3788
opStatus fs;
3789
bool losesInfo;
3790
3791
// Get the first double and convert to our format.
3792
initFromDoubleAPInt(APInt(64, i1));
3793
fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3794
assert(fs == opOK && !losesInfo);
3795
(void)fs;
3796
3797
// Unless we have a special case, add in second double.
3798
if (isFiniteNonZero()) {
3799
IEEEFloat v(semIEEEdouble, APInt(64, i2));
3800
fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3801
assert(fs == opOK && !losesInfo);
3802
(void)fs;
3803
3804
add(v, rmNearestTiesToEven);
3805
}
3806
}
3807
3808
template <const fltSemantics &S>
3809
void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3810
assert(api.getBitWidth() == S.sizeInBits);
3811
constexpr integerPart integer_bit = integerPart{1}
3812
<< ((S.precision - 1) % integerPartWidth);
3813
constexpr uint64_t significand_mask = integer_bit - 1;
3814
constexpr unsigned int trailing_significand_bits = S.precision - 1;
3815
constexpr unsigned int stored_significand_parts =
3816
partCountForBits(trailing_significand_bits);
3817
constexpr unsigned int exponent_bits =
3818
S.sizeInBits - 1 - trailing_significand_bits;
3819
static_assert(exponent_bits < 64);
3820
constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3821
constexpr int bias = -(S.minExponent - 1);
3822
3823
// Copy the bits of the significand. We need to clear out the exponent and
3824
// sign bit in the last word.
3825
std::array<integerPart, stored_significand_parts> mysignificand;
3826
std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3827
if constexpr (significand_mask != 0) {
3828
mysignificand[mysignificand.size() - 1] &= significand_mask;
3829
}
3830
3831
// We assume the last word holds the sign bit, the exponent, and potentially
3832
// some of the trailing significand field.
3833
uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3834
uint64_t myexponent =
3835
(last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3836
3837
initialize(&S);
3838
assert(partCount() == mysignificand.size());
3839
3840
sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3841
3842
bool all_zero_significand =
3843
llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3844
3845
bool is_zero = myexponent == 0 && all_zero_significand;
3846
3847
if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3848
if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3849
makeInf(sign);
3850
return;
3851
}
3852
}
3853
3854
bool is_nan = false;
3855
3856
if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3857
is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3858
} else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3859
bool all_ones_significand =
3860
std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3861
[](integerPart bits) { return bits == ~integerPart{0}; }) &&
3862
(!significand_mask ||
3863
mysignificand[mysignificand.size() - 1] == significand_mask);
3864
is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3865
} else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3866
is_nan = is_zero && sign;
3867
}
3868
3869
if (is_nan) {
3870
category = fcNaN;
3871
exponent = ::exponentNaN(S);
3872
std::copy_n(mysignificand.begin(), mysignificand.size(),
3873
significandParts());
3874
return;
3875
}
3876
3877
if (is_zero) {
3878
makeZero(sign);
3879
return;
3880
}
3881
3882
category = fcNormal;
3883
exponent = myexponent - bias;
3884
std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3885
if (myexponent == 0) // denormal
3886
exponent = S.minExponent;
3887
else
3888
significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3889
}
3890
3891
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3892
initFromIEEEAPInt<semIEEEquad>(api);
3893
}
3894
3895
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3896
initFromIEEEAPInt<semIEEEdouble>(api);
3897
}
3898
3899
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3900
initFromIEEEAPInt<semIEEEsingle>(api);
3901
}
3902
3903
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3904
initFromIEEEAPInt<semBFloat>(api);
3905
}
3906
3907
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3908
initFromIEEEAPInt<semIEEEhalf>(api);
3909
}
3910
3911
void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3912
initFromIEEEAPInt<semFloat8E5M2>(api);
3913
}
3914
3915
void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3916
initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3917
}
3918
3919
void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
3920
initFromIEEEAPInt<semFloat8E4M3>(api);
3921
}
3922
3923
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3924
initFromIEEEAPInt<semFloat8E4M3FN>(api);
3925
}
3926
3927
void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3928
initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3929
}
3930
3931
void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3932
initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3933
}
3934
3935
void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3936
initFromIEEEAPInt<semFloatTF32>(api);
3937
}
3938
3939
void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
3940
initFromIEEEAPInt<semFloat6E3M2FN>(api);
3941
}
3942
3943
void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
3944
initFromIEEEAPInt<semFloat6E2M3FN>(api);
3945
}
3946
3947
void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
3948
initFromIEEEAPInt<semFloat4E2M1FN>(api);
3949
}
3950
3951
/// Treat api as containing the bits of a floating point number.
3952
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3953
assert(api.getBitWidth() == Sem->sizeInBits);
3954
if (Sem == &semIEEEhalf)
3955
return initFromHalfAPInt(api);
3956
if (Sem == &semBFloat)
3957
return initFromBFloatAPInt(api);
3958
if (Sem == &semIEEEsingle)
3959
return initFromFloatAPInt(api);
3960
if (Sem == &semIEEEdouble)
3961
return initFromDoubleAPInt(api);
3962
if (Sem == &semX87DoubleExtended)
3963
return initFromF80LongDoubleAPInt(api);
3964
if (Sem == &semIEEEquad)
3965
return initFromQuadrupleAPInt(api);
3966
if (Sem == &semPPCDoubleDoubleLegacy)
3967
return initFromPPCDoubleDoubleAPInt(api);
3968
if (Sem == &semFloat8E5M2)
3969
return initFromFloat8E5M2APInt(api);
3970
if (Sem == &semFloat8E5M2FNUZ)
3971
return initFromFloat8E5M2FNUZAPInt(api);
3972
if (Sem == &semFloat8E4M3)
3973
return initFromFloat8E4M3APInt(api);
3974
if (Sem == &semFloat8E4M3FN)
3975
return initFromFloat8E4M3FNAPInt(api);
3976
if (Sem == &semFloat8E4M3FNUZ)
3977
return initFromFloat8E4M3FNUZAPInt(api);
3978
if (Sem == &semFloat8E4M3B11FNUZ)
3979
return initFromFloat8E4M3B11FNUZAPInt(api);
3980
if (Sem == &semFloatTF32)
3981
return initFromFloatTF32APInt(api);
3982
if (Sem == &semFloat6E3M2FN)
3983
return initFromFloat6E3M2FNAPInt(api);
3984
if (Sem == &semFloat6E2M3FN)
3985
return initFromFloat6E2M3FNAPInt(api);
3986
if (Sem == &semFloat4E2M1FN)
3987
return initFromFloat4E2M1FNAPInt(api);
3988
3989
llvm_unreachable(nullptr);
3990
}
3991
3992
/// Make this number the largest magnitude normal number in the given
3993
/// semantics.
3994
void IEEEFloat::makeLargest(bool Negative) {
3995
// We want (in interchange format):
3996
// sign = {Negative}
3997
// exponent = 1..10
3998
// significand = 1..1
3999
category = fcNormal;
4000
sign = Negative;
4001
exponent = semantics->maxExponent;
4002
4003
// Use memset to set all but the highest integerPart to all ones.
4004
integerPart *significand = significandParts();
4005
unsigned PartCount = partCount();
4006
memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4007
4008
// Set the high integerPart especially setting all unused top bits for
4009
// internal consistency.
4010
const unsigned NumUnusedHighBits =
4011
PartCount*integerPartWidth - semantics->precision;
4012
significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4013
? (~integerPart(0) >> NumUnusedHighBits)
4014
: 0;
4015
4016
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4017
semantics->nanEncoding == fltNanEncoding::AllOnes)
4018
significand[0] &= ~integerPart(1);
4019
}
4020
4021
/// Make this number the smallest magnitude denormal number in the given
4022
/// semantics.
4023
void IEEEFloat::makeSmallest(bool Negative) {
4024
// We want (in interchange format):
4025
// sign = {Negative}
4026
// exponent = 0..0
4027
// significand = 0..01
4028
category = fcNormal;
4029
sign = Negative;
4030
exponent = semantics->minExponent;
4031
APInt::tcSet(significandParts(), 1, partCount());
4032
}
4033
4034
void IEEEFloat::makeSmallestNormalized(bool Negative) {
4035
// We want (in interchange format):
4036
// sign = {Negative}
4037
// exponent = 0..0
4038
// significand = 10..0
4039
4040
category = fcNormal;
4041
zeroSignificand();
4042
sign = Negative;
4043
exponent = semantics->minExponent;
4044
APInt::tcSetBit(significandParts(), semantics->precision - 1);
4045
}
4046
4047
IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4048
initFromAPInt(&Sem, API);
4049
}
4050
4051
IEEEFloat::IEEEFloat(float f) {
4052
initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4053
}
4054
4055
IEEEFloat::IEEEFloat(double d) {
4056
initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4057
}
4058
4059
namespace {
4060
void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4061
Buffer.append(Str.begin(), Str.end());
4062
}
4063
4064
/// Removes data from the given significand until it is no more
4065
/// precise than is required for the desired precision.
4066
void AdjustToPrecision(APInt &significand,
4067
int &exp, unsigned FormatPrecision) {
4068
unsigned bits = significand.getActiveBits();
4069
4070
// 196/59 is a very slight overestimate of lg_2(10).
4071
unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4072
4073
if (bits <= bitsRequired) return;
4074
4075
unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4076
if (!tensRemovable) return;
4077
4078
exp += tensRemovable;
4079
4080
APInt divisor(significand.getBitWidth(), 1);
4081
APInt powten(significand.getBitWidth(), 10);
4082
while (true) {
4083
if (tensRemovable & 1)
4084
divisor *= powten;
4085
tensRemovable >>= 1;
4086
if (!tensRemovable) break;
4087
powten *= powten;
4088
}
4089
4090
significand = significand.udiv(divisor);
4091
4092
// Truncate the significand down to its active bit count.
4093
significand = significand.trunc(significand.getActiveBits());
4094
}
4095
4096
4097
void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4098
int &exp, unsigned FormatPrecision) {
4099
unsigned N = buffer.size();
4100
if (N <= FormatPrecision) return;
4101
4102
// The most significant figures are the last ones in the buffer.
4103
unsigned FirstSignificant = N - FormatPrecision;
4104
4105
// Round.
4106
// FIXME: this probably shouldn't use 'round half up'.
4107
4108
// Rounding down is just a truncation, except we also want to drop
4109
// trailing zeros from the new result.
4110
if (buffer[FirstSignificant - 1] < '5') {
4111
while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4112
FirstSignificant++;
4113
4114
exp += FirstSignificant;
4115
buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4116
return;
4117
}
4118
4119
// Rounding up requires a decimal add-with-carry. If we continue
4120
// the carry, the newly-introduced zeros will just be truncated.
4121
for (unsigned I = FirstSignificant; I != N; ++I) {
4122
if (buffer[I] == '9') {
4123
FirstSignificant++;
4124
} else {
4125
buffer[I]++;
4126
break;
4127
}
4128
}
4129
4130
// If we carried through, we have exactly one digit of precision.
4131
if (FirstSignificant == N) {
4132
exp += FirstSignificant;
4133
buffer.clear();
4134
buffer.push_back('1');
4135
return;
4136
}
4137
4138
exp += FirstSignificant;
4139
buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4140
}
4141
4142
void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4143
APInt significand, unsigned FormatPrecision,
4144
unsigned FormatMaxPadding, bool TruncateZero) {
4145
const int semanticsPrecision = significand.getBitWidth();
4146
4147
if (isNeg)
4148
Str.push_back('-');
4149
4150
// Set FormatPrecision if zero. We want to do this before we
4151
// truncate trailing zeros, as those are part of the precision.
4152
if (!FormatPrecision) {
4153
// We use enough digits so the number can be round-tripped back to an
4154
// APFloat. The formula comes from "How to Print Floating-Point Numbers
4155
// Accurately" by Steele and White.
4156
// FIXME: Using a formula based purely on the precision is conservative;
4157
// we can print fewer digits depending on the actual value being printed.
4158
4159
// FormatPrecision = 2 + floor(significandBits / lg_2(10))
4160
FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4161
}
4162
4163
// Ignore trailing binary zeros.
4164
int trailingZeros = significand.countr_zero();
4165
exp += trailingZeros;
4166
significand.lshrInPlace(trailingZeros);
4167
4168
// Change the exponent from 2^e to 10^e.
4169
if (exp == 0) {
4170
// Nothing to do.
4171
} else if (exp > 0) {
4172
// Just shift left.
4173
significand = significand.zext(semanticsPrecision + exp);
4174
significand <<= exp;
4175
exp = 0;
4176
} else { /* exp < 0 */
4177
int texp = -exp;
4178
4179
// We transform this using the identity:
4180
// (N)(2^-e) == (N)(5^e)(10^-e)
4181
// This means we have to multiply N (the significand) by 5^e.
4182
// To avoid overflow, we have to operate on numbers large
4183
// enough to store N * 5^e:
4184
// log2(N * 5^e) == log2(N) + e * log2(5)
4185
// <= semantics->precision + e * 137 / 59
4186
// (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4187
4188
unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4189
4190
// Multiply significand by 5^e.
4191
// N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4192
significand = significand.zext(precision);
4193
APInt five_to_the_i(precision, 5);
4194
while (true) {
4195
if (texp & 1)
4196
significand *= five_to_the_i;
4197
4198
texp >>= 1;
4199
if (!texp)
4200
break;
4201
five_to_the_i *= five_to_the_i;
4202
}
4203
}
4204
4205
AdjustToPrecision(significand, exp, FormatPrecision);
4206
4207
SmallVector<char, 256> buffer;
4208
4209
// Fill the buffer.
4210
unsigned precision = significand.getBitWidth();
4211
if (precision < 4) {
4212
// We need enough precision to store the value 10.
4213
precision = 4;
4214
significand = significand.zext(precision);
4215
}
4216
APInt ten(precision, 10);
4217
APInt digit(precision, 0);
4218
4219
bool inTrail = true;
4220
while (significand != 0) {
4221
// digit <- significand % 10
4222
// significand <- significand / 10
4223
APInt::udivrem(significand, ten, significand, digit);
4224
4225
unsigned d = digit.getZExtValue();
4226
4227
// Drop trailing zeros.
4228
if (inTrail && !d)
4229
exp++;
4230
else {
4231
buffer.push_back((char) ('0' + d));
4232
inTrail = false;
4233
}
4234
}
4235
4236
assert(!buffer.empty() && "no characters in buffer!");
4237
4238
// Drop down to FormatPrecision.
4239
// TODO: don't do more precise calculations above than are required.
4240
AdjustToPrecision(buffer, exp, FormatPrecision);
4241
4242
unsigned NDigits = buffer.size();
4243
4244
// Check whether we should use scientific notation.
4245
bool FormatScientific;
4246
if (!FormatMaxPadding)
4247
FormatScientific = true;
4248
else {
4249
if (exp >= 0) {
4250
// 765e3 --> 765000
4251
// ^^^
4252
// But we shouldn't make the number look more precise than it is.
4253
FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4254
NDigits + (unsigned) exp > FormatPrecision);
4255
} else {
4256
// Power of the most significant digit.
4257
int MSD = exp + (int) (NDigits - 1);
4258
if (MSD >= 0) {
4259
// 765e-2 == 7.65
4260
FormatScientific = false;
4261
} else {
4262
// 765e-5 == 0.00765
4263
// ^ ^^
4264
FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4265
}
4266
}
4267
}
4268
4269
// Scientific formatting is pretty straightforward.
4270
if (FormatScientific) {
4271
exp += (NDigits - 1);
4272
4273
Str.push_back(buffer[NDigits-1]);
4274
Str.push_back('.');
4275
if (NDigits == 1 && TruncateZero)
4276
Str.push_back('0');
4277
else
4278
for (unsigned I = 1; I != NDigits; ++I)
4279
Str.push_back(buffer[NDigits-1-I]);
4280
// Fill with zeros up to FormatPrecision.
4281
if (!TruncateZero && FormatPrecision > NDigits - 1)
4282
Str.append(FormatPrecision - NDigits + 1, '0');
4283
// For !TruncateZero we use lower 'e'.
4284
Str.push_back(TruncateZero ? 'E' : 'e');
4285
4286
Str.push_back(exp >= 0 ? '+' : '-');
4287
if (exp < 0)
4288
exp = -exp;
4289
SmallVector<char, 6> expbuf;
4290
do {
4291
expbuf.push_back((char) ('0' + (exp % 10)));
4292
exp /= 10;
4293
} while (exp);
4294
// Exponent always at least two digits if we do not truncate zeros.
4295
if (!TruncateZero && expbuf.size() < 2)
4296
expbuf.push_back('0');
4297
for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4298
Str.push_back(expbuf[E-1-I]);
4299
return;
4300
}
4301
4302
// Non-scientific, positive exponents.
4303
if (exp >= 0) {
4304
for (unsigned I = 0; I != NDigits; ++I)
4305
Str.push_back(buffer[NDigits-1-I]);
4306
for (unsigned I = 0; I != (unsigned) exp; ++I)
4307
Str.push_back('0');
4308
return;
4309
}
4310
4311
// Non-scientific, negative exponents.
4312
4313
// The number of digits to the left of the decimal point.
4314
int NWholeDigits = exp + (int) NDigits;
4315
4316
unsigned I = 0;
4317
if (NWholeDigits > 0) {
4318
for (; I != (unsigned) NWholeDigits; ++I)
4319
Str.push_back(buffer[NDigits-I-1]);
4320
Str.push_back('.');
4321
} else {
4322
unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4323
4324
Str.push_back('0');
4325
Str.push_back('.');
4326
for (unsigned Z = 1; Z != NZeros; ++Z)
4327
Str.push_back('0');
4328
}
4329
4330
for (; I != NDigits; ++I)
4331
Str.push_back(buffer[NDigits-I-1]);
4332
4333
}
4334
} // namespace
4335
4336
void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4337
unsigned FormatMaxPadding, bool TruncateZero) const {
4338
switch (category) {
4339
case fcInfinity:
4340
if (isNegative())
4341
return append(Str, "-Inf");
4342
else
4343
return append(Str, "+Inf");
4344
4345
case fcNaN: return append(Str, "NaN");
4346
4347
case fcZero:
4348
if (isNegative())
4349
Str.push_back('-');
4350
4351
if (!FormatMaxPadding) {
4352
if (TruncateZero)
4353
append(Str, "0.0E+0");
4354
else {
4355
append(Str, "0.0");
4356
if (FormatPrecision > 1)
4357
Str.append(FormatPrecision - 1, '0');
4358
append(Str, "e+00");
4359
}
4360
} else
4361
Str.push_back('0');
4362
return;
4363
4364
case fcNormal:
4365
break;
4366
}
4367
4368
// Decompose the number into an APInt and an exponent.
4369
int exp = exponent - ((int) semantics->precision - 1);
4370
APInt significand(
4371
semantics->precision,
4372
ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4373
4374
toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4375
FormatMaxPadding, TruncateZero);
4376
4377
}
4378
4379
bool IEEEFloat::getExactInverse(APFloat *inv) const {
4380
// Special floats and denormals have no exact inverse.
4381
if (!isFiniteNonZero())
4382
return false;
4383
4384
// Check that the number is a power of two by making sure that only the
4385
// integer bit is set in the significand.
4386
if (significandLSB() != semantics->precision - 1)
4387
return false;
4388
4389
// Get the inverse.
4390
IEEEFloat reciprocal(*semantics, 1ULL);
4391
if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4392
return false;
4393
4394
// Avoid multiplication with a denormal, it is not safe on all platforms and
4395
// may be slower than a normal division.
4396
if (reciprocal.isDenormal())
4397
return false;
4398
4399
assert(reciprocal.isFiniteNonZero() &&
4400
reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4401
4402
if (inv)
4403
*inv = APFloat(reciprocal, *semantics);
4404
4405
return true;
4406
}
4407
4408
int IEEEFloat::getExactLog2Abs() const {
4409
if (!isFinite() || isZero())
4410
return INT_MIN;
4411
4412
const integerPart *Parts = significandParts();
4413
const int PartCount = partCountForBits(semantics->precision);
4414
4415
int PopCount = 0;
4416
for (int i = 0; i < PartCount; ++i) {
4417
PopCount += llvm::popcount(Parts[i]);
4418
if (PopCount > 1)
4419
return INT_MIN;
4420
}
4421
4422
if (exponent != semantics->minExponent)
4423
return exponent;
4424
4425
int CountrParts = 0;
4426
for (int i = 0; i < PartCount;
4427
++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4428
if (Parts[i] != 0) {
4429
return exponent - semantics->precision + CountrParts +
4430
llvm::countr_zero(Parts[i]) + 1;
4431
}
4432
}
4433
4434
llvm_unreachable("didn't find the set bit");
4435
}
4436
4437
bool IEEEFloat::isSignaling() const {
4438
if (!isNaN())
4439
return false;
4440
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4441
semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4442
return false;
4443
4444
// IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4445
// first bit of the trailing significand being 0.
4446
return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4447
}
4448
4449
/// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4450
///
4451
/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4452
/// appropriate sign switching before/after the computation.
4453
IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4454
// If we are performing nextDown, swap sign so we have -x.
4455
if (nextDown)
4456
changeSign();
4457
4458
// Compute nextUp(x)
4459
opStatus result = opOK;
4460
4461
// Handle each float category separately.
4462
switch (category) {
4463
case fcInfinity:
4464
// nextUp(+inf) = +inf
4465
if (!isNegative())
4466
break;
4467
// nextUp(-inf) = -getLargest()
4468
makeLargest(true);
4469
break;
4470
case fcNaN:
4471
// IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4472
// IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4473
// change the payload.
4474
if (isSignaling()) {
4475
result = opInvalidOp;
4476
// For consistency, propagate the sign of the sNaN to the qNaN.
4477
makeNaN(false, isNegative(), nullptr);
4478
}
4479
break;
4480
case fcZero:
4481
// nextUp(pm 0) = +getSmallest()
4482
makeSmallest(false);
4483
break;
4484
case fcNormal:
4485
// nextUp(-getSmallest()) = -0
4486
if (isSmallest() && isNegative()) {
4487
APInt::tcSet(significandParts(), 0, partCount());
4488
category = fcZero;
4489
exponent = 0;
4490
if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4491
sign = false;
4492
break;
4493
}
4494
4495
if (isLargest() && !isNegative()) {
4496
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4497
// nextUp(getLargest()) == NAN
4498
makeNaN();
4499
break;
4500
} else if (semantics->nonFiniteBehavior ==
4501
fltNonfiniteBehavior::FiniteOnly) {
4502
// nextUp(getLargest()) == getLargest()
4503
break;
4504
} else {
4505
// nextUp(getLargest()) == INFINITY
4506
APInt::tcSet(significandParts(), 0, partCount());
4507
category = fcInfinity;
4508
exponent = semantics->maxExponent + 1;
4509
break;
4510
}
4511
}
4512
4513
// nextUp(normal) == normal + inc.
4514
if (isNegative()) {
4515
// If we are negative, we need to decrement the significand.
4516
4517
// We only cross a binade boundary that requires adjusting the exponent
4518
// if:
4519
// 1. exponent != semantics->minExponent. This implies we are not in the
4520
// smallest binade or are dealing with denormals.
4521
// 2. Our significand excluding the integral bit is all zeros.
4522
bool WillCrossBinadeBoundary =
4523
exponent != semantics->minExponent && isSignificandAllZeros();
4524
4525
// Decrement the significand.
4526
//
4527
// We always do this since:
4528
// 1. If we are dealing with a non-binade decrement, by definition we
4529
// just decrement the significand.
4530
// 2. If we are dealing with a normal -> normal binade decrement, since
4531
// we have an explicit integral bit the fact that all bits but the
4532
// integral bit are zero implies that subtracting one will yield a
4533
// significand with 0 integral bit and 1 in all other spots. Thus we
4534
// must just adjust the exponent and set the integral bit to 1.
4535
// 3. If we are dealing with a normal -> denormal binade decrement,
4536
// since we set the integral bit to 0 when we represent denormals, we
4537
// just decrement the significand.
4538
integerPart *Parts = significandParts();
4539
APInt::tcDecrement(Parts, partCount());
4540
4541
if (WillCrossBinadeBoundary) {
4542
// Our result is a normal number. Do the following:
4543
// 1. Set the integral bit to 1.
4544
// 2. Decrement the exponent.
4545
APInt::tcSetBit(Parts, semantics->precision - 1);
4546
exponent--;
4547
}
4548
} else {
4549
// If we are positive, we need to increment the significand.
4550
4551
// We only cross a binade boundary that requires adjusting the exponent if
4552
// the input is not a denormal and all of said input's significand bits
4553
// are set. If all of said conditions are true: clear the significand, set
4554
// the integral bit to 1, and increment the exponent. If we have a
4555
// denormal always increment since moving denormals and the numbers in the
4556
// smallest normal binade have the same exponent in our representation.
4557
bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4558
4559
if (WillCrossBinadeBoundary) {
4560
integerPart *Parts = significandParts();
4561
APInt::tcSet(Parts, 0, partCount());
4562
APInt::tcSetBit(Parts, semantics->precision - 1);
4563
assert(exponent != semantics->maxExponent &&
4564
"We can not increment an exponent beyond the maxExponent allowed"
4565
" by the given floating point semantics.");
4566
exponent++;
4567
} else {
4568
incrementSignificand();
4569
}
4570
}
4571
break;
4572
}
4573
4574
// If we are performing nextDown, swap sign so we have -nextUp(-x)
4575
if (nextDown)
4576
changeSign();
4577
4578
return result;
4579
}
4580
4581
APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4582
return ::exponentNaN(*semantics);
4583
}
4584
4585
APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4586
return ::exponentInf(*semantics);
4587
}
4588
4589
APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4590
return ::exponentZero(*semantics);
4591
}
4592
4593
void IEEEFloat::makeInf(bool Negative) {
4594
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4595
llvm_unreachable("This floating point format does not support Inf");
4596
4597
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4598
// There is no Inf, so make NaN instead.
4599
makeNaN(false, Negative);
4600
return;
4601
}
4602
category = fcInfinity;
4603
sign = Negative;
4604
exponent = exponentInf();
4605
APInt::tcSet(significandParts(), 0, partCount());
4606
}
4607
4608
void IEEEFloat::makeZero(bool Negative) {
4609
category = fcZero;
4610
sign = Negative;
4611
if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4612
// Merge negative zero to positive because 0b10000...000 is used for NaN
4613
sign = false;
4614
}
4615
exponent = exponentZero();
4616
APInt::tcSet(significandParts(), 0, partCount());
4617
}
4618
4619
void IEEEFloat::makeQuiet() {
4620
assert(isNaN());
4621
if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4622
APInt::tcSetBit(significandParts(), semantics->precision - 2);
4623
}
4624
4625
int ilogb(const IEEEFloat &Arg) {
4626
if (Arg.isNaN())
4627
return IEEEFloat::IEK_NaN;
4628
if (Arg.isZero())
4629
return IEEEFloat::IEK_Zero;
4630
if (Arg.isInfinity())
4631
return IEEEFloat::IEK_Inf;
4632
if (!Arg.isDenormal())
4633
return Arg.exponent;
4634
4635
IEEEFloat Normalized(Arg);
4636
int SignificandBits = Arg.getSemantics().precision - 1;
4637
4638
Normalized.exponent += SignificandBits;
4639
Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4640
return Normalized.exponent - SignificandBits;
4641
}
4642
4643
IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4644
auto MaxExp = X.getSemantics().maxExponent;
4645
auto MinExp = X.getSemantics().minExponent;
4646
4647
// If Exp is wildly out-of-scale, simply adding it to X.exponent will
4648
// overflow; clamp it to a safe range before adding, but ensure that the range
4649
// is large enough that the clamp does not change the result. The range we
4650
// need to support is the difference between the largest possible exponent and
4651
// the normalized exponent of half the smallest denormal.
4652
4653
int SignificandBits = X.getSemantics().precision - 1;
4654
int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4655
4656
// Clamp to one past the range ends to let normalize handle overlflow.
4657
X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4658
X.normalize(RoundingMode, lfExactlyZero);
4659
if (X.isNaN())
4660
X.makeQuiet();
4661
return X;
4662
}
4663
4664
IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4665
Exp = ilogb(Val);
4666
4667
// Quiet signalling nans.
4668
if (Exp == IEEEFloat::IEK_NaN) {
4669
IEEEFloat Quiet(Val);
4670
Quiet.makeQuiet();
4671
return Quiet;
4672
}
4673
4674
if (Exp == IEEEFloat::IEK_Inf)
4675
return Val;
4676
4677
// 1 is added because frexp is defined to return a normalized fraction in
4678
// +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4679
Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4680
return scalbn(Val, -Exp, RM);
4681
}
4682
4683
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4684
: Semantics(&S),
4685
Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4686
assert(Semantics == &semPPCDoubleDouble);
4687
}
4688
4689
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4690
: Semantics(&S),
4691
Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4692
APFloat(semIEEEdouble, uninitialized)}) {
4693
assert(Semantics == &semPPCDoubleDouble);
4694
}
4695
4696
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4697
: Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4698
APFloat(semIEEEdouble)}) {
4699
assert(Semantics == &semPPCDoubleDouble);
4700
}
4701
4702
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4703
: Semantics(&S),
4704
Floats(new APFloat[2]{
4705
APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4706
APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4707
assert(Semantics == &semPPCDoubleDouble);
4708
}
4709
4710
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4711
APFloat &&Second)
4712
: Semantics(&S),
4713
Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4714
assert(Semantics == &semPPCDoubleDouble);
4715
assert(&Floats[0].getSemantics() == &semIEEEdouble);
4716
assert(&Floats[1].getSemantics() == &semIEEEdouble);
4717
}
4718
4719
DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4720
: Semantics(RHS.Semantics),
4721
Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4722
APFloat(RHS.Floats[1])}
4723
: nullptr) {
4724
assert(Semantics == &semPPCDoubleDouble);
4725
}
4726
4727
DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4728
: Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4729
RHS.Semantics = &semBogus;
4730
assert(Semantics == &semPPCDoubleDouble);
4731
}
4732
4733
DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4734
if (Semantics == RHS.Semantics && RHS.Floats) {
4735
Floats[0] = RHS.Floats[0];
4736
Floats[1] = RHS.Floats[1];
4737
} else if (this != &RHS) {
4738
this->~DoubleAPFloat();
4739
new (this) DoubleAPFloat(RHS);
4740
}
4741
return *this;
4742
}
4743
4744
// Implement addition, subtraction, multiplication and division based on:
4745
// "Software for Doubled-Precision Floating-Point Computations",
4746
// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4747
APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4748
const APFloat &c, const APFloat &cc,
4749
roundingMode RM) {
4750
int Status = opOK;
4751
APFloat z = a;
4752
Status |= z.add(c, RM);
4753
if (!z.isFinite()) {
4754
if (!z.isInfinity()) {
4755
Floats[0] = std::move(z);
4756
Floats[1].makeZero(/* Neg = */ false);
4757
return (opStatus)Status;
4758
}
4759
Status = opOK;
4760
auto AComparedToC = a.compareAbsoluteValue(c);
4761
z = cc;
4762
Status |= z.add(aa, RM);
4763
if (AComparedToC == APFloat::cmpGreaterThan) {
4764
// z = cc + aa + c + a;
4765
Status |= z.add(c, RM);
4766
Status |= z.add(a, RM);
4767
} else {
4768
// z = cc + aa + a + c;
4769
Status |= z.add(a, RM);
4770
Status |= z.add(c, RM);
4771
}
4772
if (!z.isFinite()) {
4773
Floats[0] = std::move(z);
4774
Floats[1].makeZero(/* Neg = */ false);
4775
return (opStatus)Status;
4776
}
4777
Floats[0] = z;
4778
APFloat zz = aa;
4779
Status |= zz.add(cc, RM);
4780
if (AComparedToC == APFloat::cmpGreaterThan) {
4781
// Floats[1] = a - z + c + zz;
4782
Floats[1] = a;
4783
Status |= Floats[1].subtract(z, RM);
4784
Status |= Floats[1].add(c, RM);
4785
Status |= Floats[1].add(zz, RM);
4786
} else {
4787
// Floats[1] = c - z + a + zz;
4788
Floats[1] = c;
4789
Status |= Floats[1].subtract(z, RM);
4790
Status |= Floats[1].add(a, RM);
4791
Status |= Floats[1].add(zz, RM);
4792
}
4793
} else {
4794
// q = a - z;
4795
APFloat q = a;
4796
Status |= q.subtract(z, RM);
4797
4798
// zz = q + c + (a - (q + z)) + aa + cc;
4799
// Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4800
auto zz = q;
4801
Status |= zz.add(c, RM);
4802
Status |= q.add(z, RM);
4803
Status |= q.subtract(a, RM);
4804
q.changeSign();
4805
Status |= zz.add(q, RM);
4806
Status |= zz.add(aa, RM);
4807
Status |= zz.add(cc, RM);
4808
if (zz.isZero() && !zz.isNegative()) {
4809
Floats[0] = std::move(z);
4810
Floats[1].makeZero(/* Neg = */ false);
4811
return opOK;
4812
}
4813
Floats[0] = z;
4814
Status |= Floats[0].add(zz, RM);
4815
if (!Floats[0].isFinite()) {
4816
Floats[1].makeZero(/* Neg = */ false);
4817
return (opStatus)Status;
4818
}
4819
Floats[1] = std::move(z);
4820
Status |= Floats[1].subtract(Floats[0], RM);
4821
Status |= Floats[1].add(zz, RM);
4822
}
4823
return (opStatus)Status;
4824
}
4825
4826
APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4827
const DoubleAPFloat &RHS,
4828
DoubleAPFloat &Out,
4829
roundingMode RM) {
4830
if (LHS.getCategory() == fcNaN) {
4831
Out = LHS;
4832
return opOK;
4833
}
4834
if (RHS.getCategory() == fcNaN) {
4835
Out = RHS;
4836
return opOK;
4837
}
4838
if (LHS.getCategory() == fcZero) {
4839
Out = RHS;
4840
return opOK;
4841
}
4842
if (RHS.getCategory() == fcZero) {
4843
Out = LHS;
4844
return opOK;
4845
}
4846
if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4847
LHS.isNegative() != RHS.isNegative()) {
4848
Out.makeNaN(false, Out.isNegative(), nullptr);
4849
return opInvalidOp;
4850
}
4851
if (LHS.getCategory() == fcInfinity) {
4852
Out = LHS;
4853
return opOK;
4854
}
4855
if (RHS.getCategory() == fcInfinity) {
4856
Out = RHS;
4857
return opOK;
4858
}
4859
assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4860
4861
APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4862
CC(RHS.Floats[1]);
4863
assert(&A.getSemantics() == &semIEEEdouble);
4864
assert(&AA.getSemantics() == &semIEEEdouble);
4865
assert(&C.getSemantics() == &semIEEEdouble);
4866
assert(&CC.getSemantics() == &semIEEEdouble);
4867
assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4868
assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4869
return Out.addImpl(A, AA, C, CC, RM);
4870
}
4871
4872
APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4873
roundingMode RM) {
4874
return addWithSpecial(*this, RHS, *this, RM);
4875
}
4876
4877
APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4878
roundingMode RM) {
4879
changeSign();
4880
auto Ret = add(RHS, RM);
4881
changeSign();
4882
return Ret;
4883
}
4884
4885
APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4886
APFloat::roundingMode RM) {
4887
const auto &LHS = *this;
4888
auto &Out = *this;
4889
/* Interesting observation: For special categories, finding the lowest
4890
common ancestor of the following layered graph gives the correct
4891
return category:
4892
4893
NaN
4894
/ \
4895
Zero Inf
4896
\ /
4897
Normal
4898
4899
e.g. NaN * NaN = NaN
4900
Zero * Inf = NaN
4901
Normal * Zero = Zero
4902
Normal * Inf = Inf
4903
*/
4904
if (LHS.getCategory() == fcNaN) {
4905
Out = LHS;
4906
return opOK;
4907
}
4908
if (RHS.getCategory() == fcNaN) {
4909
Out = RHS;
4910
return opOK;
4911
}
4912
if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4913
(LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4914
Out.makeNaN(false, false, nullptr);
4915
return opOK;
4916
}
4917
if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4918
Out = LHS;
4919
return opOK;
4920
}
4921
if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4922
Out = RHS;
4923
return opOK;
4924
}
4925
assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4926
"Special cases not handled exhaustively");
4927
4928
int Status = opOK;
4929
APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4930
// t = a * c
4931
APFloat T = A;
4932
Status |= T.multiply(C, RM);
4933
if (!T.isFiniteNonZero()) {
4934
Floats[0] = T;
4935
Floats[1].makeZero(/* Neg = */ false);
4936
return (opStatus)Status;
4937
}
4938
4939
// tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4940
APFloat Tau = A;
4941
T.changeSign();
4942
Status |= Tau.fusedMultiplyAdd(C, T, RM);
4943
T.changeSign();
4944
{
4945
// v = a * d
4946
APFloat V = A;
4947
Status |= V.multiply(D, RM);
4948
// w = b * c
4949
APFloat W = B;
4950
Status |= W.multiply(C, RM);
4951
Status |= V.add(W, RM);
4952
// tau += v + w
4953
Status |= Tau.add(V, RM);
4954
}
4955
// u = t + tau
4956
APFloat U = T;
4957
Status |= U.add(Tau, RM);
4958
4959
Floats[0] = U;
4960
if (!U.isFinite()) {
4961
Floats[1].makeZero(/* Neg = */ false);
4962
} else {
4963
// Floats[1] = (t - u) + tau
4964
Status |= T.subtract(U, RM);
4965
Status |= T.add(Tau, RM);
4966
Floats[1] = T;
4967
}
4968
return (opStatus)Status;
4969
}
4970
4971
APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4972
APFloat::roundingMode RM) {
4973
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4974
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4975
auto Ret =
4976
Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4977
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4978
return Ret;
4979
}
4980
4981
APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4982
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4983
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4984
auto Ret =
4985
Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4986
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4987
return Ret;
4988
}
4989
4990
APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4991
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4992
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4993
auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4994
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4995
return Ret;
4996
}
4997
4998
APFloat::opStatus
4999
DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5000
const DoubleAPFloat &Addend,
5001
APFloat::roundingMode RM) {
5002
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5003
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5004
auto Ret = Tmp.fusedMultiplyAdd(
5005
APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5006
APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5007
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5008
return Ret;
5009
}
5010
5011
APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5012
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5013
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5014
auto Ret = Tmp.roundToIntegral(RM);
5015
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5016
return Ret;
5017
}
5018
5019
void DoubleAPFloat::changeSign() {
5020
Floats[0].changeSign();
5021
Floats[1].changeSign();
5022
}
5023
5024
APFloat::cmpResult
5025
DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5026
auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5027
if (Result != cmpEqual)
5028
return Result;
5029
Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5030
if (Result == cmpLessThan || Result == cmpGreaterThan) {
5031
auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5032
auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5033
if (Against && !RHSAgainst)
5034
return cmpLessThan;
5035
if (!Against && RHSAgainst)
5036
return cmpGreaterThan;
5037
if (!Against && !RHSAgainst)
5038
return Result;
5039
if (Against && RHSAgainst)
5040
return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5041
}
5042
return Result;
5043
}
5044
5045
APFloat::fltCategory DoubleAPFloat::getCategory() const {
5046
return Floats[0].getCategory();
5047
}
5048
5049
bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5050
5051
void DoubleAPFloat::makeInf(bool Neg) {
5052
Floats[0].makeInf(Neg);
5053
Floats[1].makeZero(/* Neg = */ false);
5054
}
5055
5056
void DoubleAPFloat::makeZero(bool Neg) {
5057
Floats[0].makeZero(Neg);
5058
Floats[1].makeZero(/* Neg = */ false);
5059
}
5060
5061
void DoubleAPFloat::makeLargest(bool Neg) {
5062
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5063
Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5064
Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5065
if (Neg)
5066
changeSign();
5067
}
5068
5069
void DoubleAPFloat::makeSmallest(bool Neg) {
5070
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5071
Floats[0].makeSmallest(Neg);
5072
Floats[1].makeZero(/* Neg = */ false);
5073
}
5074
5075
void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5076
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5077
Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5078
if (Neg)
5079
Floats[0].changeSign();
5080
Floats[1].makeZero(/* Neg = */ false);
5081
}
5082
5083
void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5084
Floats[0].makeNaN(SNaN, Neg, fill);
5085
Floats[1].makeZero(/* Neg = */ false);
5086
}
5087
5088
APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5089
auto Result = Floats[0].compare(RHS.Floats[0]);
5090
// |Float[0]| > |Float[1]|
5091
if (Result == APFloat::cmpEqual)
5092
return Floats[1].compare(RHS.Floats[1]);
5093
return Result;
5094
}
5095
5096
bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5097
return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5098
Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5099
}
5100
5101
hash_code hash_value(const DoubleAPFloat &Arg) {
5102
if (Arg.Floats)
5103
return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5104
return hash_combine(Arg.Semantics);
5105
}
5106
5107
APInt DoubleAPFloat::bitcastToAPInt() const {
5108
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5109
uint64_t Data[] = {
5110
Floats[0].bitcastToAPInt().getRawData()[0],
5111
Floats[1].bitcastToAPInt().getRawData()[0],
5112
};
5113
return APInt(128, 2, Data);
5114
}
5115
5116
Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5117
roundingMode RM) {
5118
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5119
APFloat Tmp(semPPCDoubleDoubleLegacy);
5120
auto Ret = Tmp.convertFromString(S, RM);
5121
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5122
return Ret;
5123
}
5124
5125
APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5126
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5127
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5128
auto Ret = Tmp.next(nextDown);
5129
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5130
return Ret;
5131
}
5132
5133
APFloat::opStatus
5134
DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5135
unsigned int Width, bool IsSigned,
5136
roundingMode RM, bool *IsExact) const {
5137
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5138
return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5139
.convertToInteger(Input, Width, IsSigned, RM, IsExact);
5140
}
5141
5142
APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5143
bool IsSigned,
5144
roundingMode RM) {
5145
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5146
APFloat Tmp(semPPCDoubleDoubleLegacy);
5147
auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5148
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5149
return Ret;
5150
}
5151
5152
APFloat::opStatus
5153
DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5154
unsigned int InputSize,
5155
bool IsSigned, roundingMode RM) {
5156
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5157
APFloat Tmp(semPPCDoubleDoubleLegacy);
5158
auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5159
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5160
return Ret;
5161
}
5162
5163
APFloat::opStatus
5164
DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5165
unsigned int InputSize,
5166
bool IsSigned, roundingMode RM) {
5167
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5168
APFloat Tmp(semPPCDoubleDoubleLegacy);
5169
auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5170
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5171
return Ret;
5172
}
5173
5174
unsigned int DoubleAPFloat::convertToHexString(char *DST,
5175
unsigned int HexDigits,
5176
bool UpperCase,
5177
roundingMode RM) const {
5178
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5179
return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5180
.convertToHexString(DST, HexDigits, UpperCase, RM);
5181
}
5182
5183
bool DoubleAPFloat::isDenormal() const {
5184
return getCategory() == fcNormal &&
5185
(Floats[0].isDenormal() || Floats[1].isDenormal() ||
5186
// (double)(Hi + Lo) == Hi defines a normal number.
5187
Floats[0] != Floats[0] + Floats[1]);
5188
}
5189
5190
bool DoubleAPFloat::isSmallest() const {
5191
if (getCategory() != fcNormal)
5192
return false;
5193
DoubleAPFloat Tmp(*this);
5194
Tmp.makeSmallest(this->isNegative());
5195
return Tmp.compare(*this) == cmpEqual;
5196
}
5197
5198
bool DoubleAPFloat::isSmallestNormalized() const {
5199
if (getCategory() != fcNormal)
5200
return false;
5201
5202
DoubleAPFloat Tmp(*this);
5203
Tmp.makeSmallestNormalized(this->isNegative());
5204
return Tmp.compare(*this) == cmpEqual;
5205
}
5206
5207
bool DoubleAPFloat::isLargest() const {
5208
if (getCategory() != fcNormal)
5209
return false;
5210
DoubleAPFloat Tmp(*this);
5211
Tmp.makeLargest(this->isNegative());
5212
return Tmp.compare(*this) == cmpEqual;
5213
}
5214
5215
bool DoubleAPFloat::isInteger() const {
5216
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5217
return Floats[0].isInteger() && Floats[1].isInteger();
5218
}
5219
5220
void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5221
unsigned FormatPrecision,
5222
unsigned FormatMaxPadding,
5223
bool TruncateZero) const {
5224
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5225
APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5226
.toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5227
}
5228
5229
bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5230
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5231
APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5232
if (!inv)
5233
return Tmp.getExactInverse(nullptr);
5234
APFloat Inv(semPPCDoubleDoubleLegacy);
5235
auto Ret = Tmp.getExactInverse(&Inv);
5236
*inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5237
return Ret;
5238
}
5239
5240
int DoubleAPFloat::getExactLog2() const {
5241
// TODO: Implement me
5242
return INT_MIN;
5243
}
5244
5245
int DoubleAPFloat::getExactLog2Abs() const {
5246
// TODO: Implement me
5247
return INT_MIN;
5248
}
5249
5250
DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5251
APFloat::roundingMode RM) {
5252
assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5253
return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5254
scalbn(Arg.Floats[1], Exp, RM));
5255
}
5256
5257
DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5258
APFloat::roundingMode RM) {
5259
assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5260
APFloat First = frexp(Arg.Floats[0], Exp, RM);
5261
APFloat Second = Arg.Floats[1];
5262
if (Arg.getCategory() == APFloat::fcNormal)
5263
Second = scalbn(Second, -Exp, RM);
5264
return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5265
}
5266
5267
} // namespace detail
5268
5269
APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5270
if (usesLayout<IEEEFloat>(Semantics)) {
5271
new (&IEEE) IEEEFloat(std::move(F));
5272
return;
5273
}
5274
if (usesLayout<DoubleAPFloat>(Semantics)) {
5275
const fltSemantics& S = F.getSemantics();
5276
new (&Double)
5277
DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5278
APFloat(semIEEEdouble));
5279
return;
5280
}
5281
llvm_unreachable("Unexpected semantics");
5282
}
5283
5284
Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5285
roundingMode RM) {
5286
APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5287
}
5288
5289
hash_code hash_value(const APFloat &Arg) {
5290
if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5291
return hash_value(Arg.U.IEEE);
5292
if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5293
return hash_value(Arg.U.Double);
5294
llvm_unreachable("Unexpected semantics");
5295
}
5296
5297
APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5298
: APFloat(Semantics) {
5299
auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5300
assert(StatusOrErr && "Invalid floating point representation");
5301
consumeError(StatusOrErr.takeError());
5302
}
5303
5304
FPClassTest APFloat::classify() const {
5305
if (isZero())
5306
return isNegative() ? fcNegZero : fcPosZero;
5307
if (isNormal())
5308
return isNegative() ? fcNegNormal : fcPosNormal;
5309
if (isDenormal())
5310
return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5311
if (isInfinity())
5312
return isNegative() ? fcNegInf : fcPosInf;
5313
assert(isNaN() && "Other class of FP constant");
5314
return isSignaling() ? fcSNan : fcQNan;
5315
}
5316
5317
APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5318
roundingMode RM, bool *losesInfo) {
5319
if (&getSemantics() == &ToSemantics) {
5320
*losesInfo = false;
5321
return opOK;
5322
}
5323
if (usesLayout<IEEEFloat>(getSemantics()) &&
5324
usesLayout<IEEEFloat>(ToSemantics))
5325
return U.IEEE.convert(ToSemantics, RM, losesInfo);
5326
if (usesLayout<IEEEFloat>(getSemantics()) &&
5327
usesLayout<DoubleAPFloat>(ToSemantics)) {
5328
assert(&ToSemantics == &semPPCDoubleDouble);
5329
auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5330
*this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5331
return Ret;
5332
}
5333
if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5334
usesLayout<IEEEFloat>(ToSemantics)) {
5335
auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5336
*this = APFloat(std::move(getIEEE()), ToSemantics);
5337
return Ret;
5338
}
5339
llvm_unreachable("Unexpected semantics");
5340
}
5341
5342
APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5343
return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5344
}
5345
5346
void APFloat::print(raw_ostream &OS) const {
5347
SmallVector<char, 16> Buffer;
5348
toString(Buffer);
5349
OS << Buffer << "\n";
5350
}
5351
5352
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5353
LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5354
#endif
5355
5356
void APFloat::Profile(FoldingSetNodeID &NID) const {
5357
NID.Add(bitcastToAPInt());
5358
}
5359
5360
/* Same as convertToInteger(integerPart*, ...), except the result is returned in
5361
an APSInt, whose initial bit-width and signed-ness are used to determine the
5362
precision of the conversion.
5363
*/
5364
APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5365
roundingMode rounding_mode,
5366
bool *isExact) const {
5367
unsigned bitWidth = result.getBitWidth();
5368
SmallVector<uint64_t, 4> parts(result.getNumWords());
5369
opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5370
rounding_mode, isExact);
5371
// Keeps the original signed-ness.
5372
result = APInt(bitWidth, parts);
5373
return status;
5374
}
5375
5376
double APFloat::convertToDouble() const {
5377
if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5378
return getIEEE().convertToDouble();
5379
assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5380
"Float semantics is not representable by IEEEdouble");
5381
APFloat Temp = *this;
5382
bool LosesInfo;
5383
opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5384
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5385
(void)St;
5386
return Temp.getIEEE().convertToDouble();
5387
}
5388
5389
#ifdef HAS_IEE754_FLOAT128
5390
float128 APFloat::convertToQuad() const {
5391
if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5392
return getIEEE().convertToQuad();
5393
assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5394
"Float semantics is not representable by IEEEquad");
5395
APFloat Temp = *this;
5396
bool LosesInfo;
5397
opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5398
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5399
(void)St;
5400
return Temp.getIEEE().convertToQuad();
5401
}
5402
#endif
5403
5404
float APFloat::convertToFloat() const {
5405
if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5406
return getIEEE().convertToFloat();
5407
assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5408
"Float semantics is not representable by IEEEsingle");
5409
APFloat Temp = *this;
5410
bool LosesInfo;
5411
opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5412
assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5413
(void)St;
5414
return Temp.getIEEE().convertToFloat();
5415
}
5416
5417
} // namespace llvm
5418
5419
#undef APFLOAT_DISPATCH_ON_SEMANTICS
5420
5421