Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/vixl/src/utils-vixl.cc
4253 views
1
// Copyright 2015, VIXL authors
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are met:
6
//
7
// * Redistributions of source code must retain the above copyright notice,
8
// this list of conditions and the following disclaimer.
9
// * Redistributions in binary form must reproduce the above copyright notice,
10
// this list of conditions and the following disclaimer in the documentation
11
// and/or other materials provided with the distribution.
12
// * Neither the name of ARM Limited nor the names of its contributors may be
13
// used to endorse or promote products derived from this software without
14
// specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27
#include "utils-vixl.h"
28
29
#include <cstdio>
30
31
namespace vixl {
32
33
// The default NaN values (for FPCR.DN=1).
34
const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
35
const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
36
const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
37
38
// Floating-point zero values.
39
const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
40
const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
41
42
// Floating-point infinity values.
43
const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
44
const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
45
const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
46
const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
47
const double kFP64PositiveInfinity =
48
RawbitsToDouble(UINT64_C(0x7ff0000000000000));
49
const double kFP64NegativeInfinity =
50
RawbitsToDouble(UINT64_C(0xfff0000000000000));
51
52
bool IsZero(Float16 value) {
53
uint16_t bits = Float16ToRawbits(value);
54
return (bits == Float16ToRawbits(kFP16PositiveZero) ||
55
bits == Float16ToRawbits(kFP16NegativeZero));
56
}
57
58
uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
59
60
uint32_t FloatToRawbits(float value) {
61
uint32_t bits = 0;
62
memcpy(&bits, &value, 4);
63
return bits;
64
}
65
66
67
uint64_t DoubleToRawbits(double value) {
68
uint64_t bits = 0;
69
memcpy(&bits, &value, 8);
70
return bits;
71
}
72
73
74
Float16 RawbitsToFloat16(uint16_t bits) {
75
Float16 f;
76
f.rawbits_ = bits;
77
return f;
78
}
79
80
81
float RawbitsToFloat(uint32_t bits) {
82
float value = 0.0;
83
memcpy(&value, &bits, 4);
84
return value;
85
}
86
87
88
double RawbitsToDouble(uint64_t bits) {
89
double value = 0.0;
90
memcpy(&value, &bits, 8);
91
return value;
92
}
93
94
95
uint32_t Float16Sign(internal::SimFloat16 val) {
96
uint16_t rawbits = Float16ToRawbits(val);
97
return ExtractUnsignedBitfield32(15, 15, rawbits);
98
}
99
100
101
uint32_t Float16Exp(internal::SimFloat16 val) {
102
uint16_t rawbits = Float16ToRawbits(val);
103
return ExtractUnsignedBitfield32(14, 10, rawbits);
104
}
105
106
uint32_t Float16Mantissa(internal::SimFloat16 val) {
107
uint16_t rawbits = Float16ToRawbits(val);
108
return ExtractUnsignedBitfield32(9, 0, rawbits);
109
}
110
111
112
uint32_t FloatSign(float val) {
113
uint32_t rawbits = FloatToRawbits(val);
114
return ExtractUnsignedBitfield32(31, 31, rawbits);
115
}
116
117
118
uint32_t FloatExp(float val) {
119
uint32_t rawbits = FloatToRawbits(val);
120
return ExtractUnsignedBitfield32(30, 23, rawbits);
121
}
122
123
124
uint32_t FloatMantissa(float val) {
125
uint32_t rawbits = FloatToRawbits(val);
126
return ExtractUnsignedBitfield32(22, 0, rawbits);
127
}
128
129
130
uint32_t DoubleSign(double val) {
131
uint64_t rawbits = DoubleToRawbits(val);
132
return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
133
}
134
135
136
uint32_t DoubleExp(double val) {
137
uint64_t rawbits = DoubleToRawbits(val);
138
return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
139
}
140
141
142
uint64_t DoubleMantissa(double val) {
143
uint64_t rawbits = DoubleToRawbits(val);
144
return ExtractUnsignedBitfield64(51, 0, rawbits);
145
}
146
147
148
internal::SimFloat16 Float16Pack(uint16_t sign,
149
uint16_t exp,
150
uint16_t mantissa) {
151
uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
152
return RawbitsToFloat16(bits);
153
}
154
155
156
float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
157
uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
158
return RawbitsToFloat(bits);
159
}
160
161
162
double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
163
uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
164
return RawbitsToDouble(bits);
165
}
166
167
168
int Float16Classify(Float16 value) {
169
uint16_t bits = Float16ToRawbits(value);
170
uint16_t exponent_max = (1 << 5) - 1;
171
uint16_t exponent_mask = exponent_max << 10;
172
uint16_t mantissa_mask = (1 << 10) - 1;
173
174
uint16_t exponent = (bits & exponent_mask) >> 10;
175
uint16_t mantissa = bits & mantissa_mask;
176
if (exponent == 0) {
177
if (mantissa == 0) {
178
return FP_ZERO;
179
}
180
return FP_SUBNORMAL;
181
} else if (exponent == exponent_max) {
182
if (mantissa == 0) {
183
return FP_INFINITE;
184
}
185
return FP_NAN;
186
}
187
return FP_NORMAL;
188
}
189
190
191
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
192
VIXL_ASSERT((reg_size % 8) == 0);
193
int count = 0;
194
for (unsigned i = 0; i < (reg_size / 16); i++) {
195
if ((imm & 0xffff) == 0) {
196
count++;
197
}
198
imm >>= 16;
199
}
200
return count;
201
}
202
203
204
int BitCount(uint64_t value) { return CountSetBits(value); }
205
206
// Float16 definitions.
207
208
Float16::Float16(double dvalue) {
209
rawbits_ =
210
Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
211
}
212
213
namespace internal {
214
215
SimFloat16 SimFloat16::operator-() const {
216
return RawbitsToFloat16(rawbits_ ^ 0x8000);
217
}
218
219
// SimFloat16 definitions.
220
SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
221
return static_cast<double>(*this) + static_cast<double>(rhs);
222
}
223
224
SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
225
return static_cast<double>(*this) - static_cast<double>(rhs);
226
}
227
228
SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
229
return static_cast<double>(*this) * static_cast<double>(rhs);
230
}
231
232
SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
233
return static_cast<double>(*this) / static_cast<double>(rhs);
234
}
235
236
bool SimFloat16::operator<(SimFloat16 rhs) const {
237
return static_cast<double>(*this) < static_cast<double>(rhs);
238
}
239
240
bool SimFloat16::operator>(SimFloat16 rhs) const {
241
return static_cast<double>(*this) > static_cast<double>(rhs);
242
}
243
244
bool SimFloat16::operator==(SimFloat16 rhs) const {
245
if (IsNaN(*this) || IsNaN(rhs)) {
246
return false;
247
} else if (IsZero(rhs) && IsZero(*this)) {
248
// +0 and -0 should be treated as equal.
249
return true;
250
}
251
return this->rawbits_ == rhs.rawbits_;
252
}
253
254
bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
255
256
bool SimFloat16::operator==(double rhs) const {
257
return static_cast<double>(*this) == static_cast<double>(rhs);
258
}
259
260
SimFloat16::operator double() const {
261
return FPToDouble(*this, kIgnoreDefaultNaN);
262
}
263
264
Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
265
266
} // namespace internal
267
268
float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
269
uint16_t bits = Float16ToRawbits(value);
270
uint32_t sign = bits >> 15;
271
uint32_t exponent =
272
ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
273
kFloat16MantissaBits,
274
bits);
275
uint32_t mantissa =
276
ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
277
278
switch (Float16Classify(value)) {
279
case FP_ZERO:
280
return (sign == 0) ? 0.0f : -0.0f;
281
282
case FP_INFINITE:
283
return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
284
285
case FP_SUBNORMAL: {
286
// Calculate shift required to put mantissa into the most-significant bits
287
// of the destination mantissa.
288
int shift = CountLeadingZeros(mantissa << (32 - 10));
289
290
// Shift mantissa and discard implicit '1'.
291
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
292
mantissa &= (1 << kFloatMantissaBits) - 1;
293
294
// Adjust the exponent for the shift applied, and rebias.
295
exponent = exponent - shift + (-15 + 127);
296
break;
297
}
298
299
case FP_NAN:
300
if (IsSignallingNaN(value)) {
301
if (exception != NULL) {
302
*exception = true;
303
}
304
}
305
if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
306
307
// Convert NaNs as the processor would:
308
// - The sign is propagated.
309
// - The payload (mantissa) is transferred entirely, except that the top
310
// bit is forced to '1', making the result a quiet NaN. The unused
311
// (low-order) payload bits are set to 0.
312
exponent = (1 << kFloatExponentBits) - 1;
313
314
// Increase bits in mantissa, making low-order bits 0.
315
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
316
mantissa |= 1 << 22; // Force a quiet NaN.
317
break;
318
319
case FP_NORMAL:
320
// Increase bits in mantissa, making low-order bits 0.
321
mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
322
323
// Change exponent bias.
324
exponent += (-15 + 127);
325
break;
326
327
default:
328
VIXL_UNREACHABLE();
329
}
330
return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
331
mantissa);
332
}
333
334
335
float FPToFloat(double value,
336
FPRounding round_mode,
337
UseDefaultNaN DN,
338
bool* exception) {
339
// Only the FPTieEven rounding mode is implemented.
340
VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
341
USE(round_mode);
342
343
switch (std::fpclassify(value)) {
344
case FP_NAN: {
345
if (IsSignallingNaN(value)) {
346
if (exception != NULL) {
347
*exception = true;
348
}
349
}
350
if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
351
352
// Convert NaNs as the processor would:
353
// - The sign is propagated.
354
// - The payload (mantissa) is transferred as much as possible, except
355
// that the top bit is forced to '1', making the result a quiet NaN.
356
uint64_t raw = DoubleToRawbits(value);
357
358
uint32_t sign = raw >> 63;
359
uint32_t exponent = (1 << 8) - 1;
360
uint32_t payload =
361
static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
362
payload |= (1 << 22); // Force a quiet NaN.
363
364
return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
365
}
366
367
case FP_ZERO:
368
case FP_INFINITE: {
369
// In a C++ cast, any value representable in the target type will be
370
// unchanged. This is always the case for +/-0.0 and infinities.
371
return static_cast<float>(value);
372
}
373
374
case FP_NORMAL:
375
case FP_SUBNORMAL: {
376
// Convert double-to-float as the processor would, assuming that FPCR.FZ
377
// (flush-to-zero) is not set.
378
uint64_t raw = DoubleToRawbits(value);
379
// Extract the IEEE-754 double components.
380
uint32_t sign = raw >> 63;
381
// Extract the exponent and remove the IEEE-754 encoding bias.
382
int32_t exponent =
383
static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
384
// Extract the mantissa and add the implicit '1' bit.
385
uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
386
if (std::fpclassify(value) == FP_NORMAL) {
387
mantissa |= (UINT64_C(1) << 52);
388
}
389
return FPRoundToFloat(sign, exponent, mantissa, round_mode);
390
}
391
}
392
393
VIXL_UNREACHABLE();
394
return static_cast<float>(value);
395
}
396
397
// TODO: We should consider implementing a full FPToDouble(Float16)
398
// conversion function (for performance reasons).
399
double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
400
// We can rely on implicit float to double conversion here.
401
return FPToFloat(value, DN, exception);
402
}
403
404
405
double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
406
switch (std::fpclassify(value)) {
407
case FP_NAN: {
408
if (IsSignallingNaN(value)) {
409
if (exception != NULL) {
410
*exception = true;
411
}
412
}
413
if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
414
415
// Convert NaNs as the processor would:
416
// - The sign is propagated.
417
// - The payload (mantissa) is transferred entirely, except that the top
418
// bit is forced to '1', making the result a quiet NaN. The unused
419
// (low-order) payload bits are set to 0.
420
uint32_t raw = FloatToRawbits(value);
421
422
uint64_t sign = raw >> 31;
423
uint64_t exponent = (1 << 11) - 1;
424
uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
425
payload <<= (52 - 23); // The unused low-order bits should be 0.
426
payload |= (UINT64_C(1) << 51); // Force a quiet NaN.
427
428
return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
429
}
430
431
case FP_ZERO:
432
case FP_NORMAL:
433
case FP_SUBNORMAL:
434
case FP_INFINITE: {
435
// All other inputs are preserved in a standard cast, because every value
436
// representable using an IEEE-754 float is also representable using an
437
// IEEE-754 double.
438
return static_cast<double>(value);
439
}
440
}
441
442
VIXL_UNREACHABLE();
443
return static_cast<double>(value);
444
}
445
446
447
Float16 FPToFloat16(float value,
448
FPRounding round_mode,
449
UseDefaultNaN DN,
450
bool* exception) {
451
// Only the FPTieEven rounding mode is implemented.
452
VIXL_ASSERT(round_mode == FPTieEven);
453
USE(round_mode);
454
455
uint32_t raw = FloatToRawbits(value);
456
int32_t sign = raw >> 31;
457
int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
458
uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
459
460
switch (std::fpclassify(value)) {
461
case FP_NAN: {
462
if (IsSignallingNaN(value)) {
463
if (exception != NULL) {
464
*exception = true;
465
}
466
}
467
if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
468
469
// Convert NaNs as the processor would:
470
// - The sign is propagated.
471
// - The payload (mantissa) is transferred as much as possible, except
472
// that the top bit is forced to '1', making the result a quiet NaN.
473
uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
474
: Float16ToRawbits(kFP16NegativeInfinity);
475
result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
476
result |= (1 << 9); // Force a quiet NaN;
477
return RawbitsToFloat16(result);
478
}
479
480
case FP_ZERO:
481
return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
482
483
case FP_INFINITE:
484
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
485
486
case FP_NORMAL:
487
case FP_SUBNORMAL: {
488
// Convert float-to-half as the processor would, assuming that FPCR.FZ
489
// (flush-to-zero) is not set.
490
491
// Add the implicit '1' bit to the mantissa.
492
mantissa += (1 << 23);
493
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
494
}
495
}
496
497
VIXL_UNREACHABLE();
498
return kFP16PositiveZero;
499
}
500
501
502
Float16 FPToFloat16(double value,
503
FPRounding round_mode,
504
UseDefaultNaN DN,
505
bool* exception) {
506
// Only the FPTieEven rounding mode is implemented.
507
VIXL_ASSERT(round_mode == FPTieEven);
508
USE(round_mode);
509
510
uint64_t raw = DoubleToRawbits(value);
511
int32_t sign = raw >> 63;
512
int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
513
uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
514
515
switch (std::fpclassify(value)) {
516
case FP_NAN: {
517
if (IsSignallingNaN(value)) {
518
if (exception != NULL) {
519
*exception = true;
520
}
521
}
522
if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
523
524
// Convert NaNs as the processor would:
525
// - The sign is propagated.
526
// - The payload (mantissa) is transferred as much as possible, except
527
// that the top bit is forced to '1', making the result a quiet NaN.
528
uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
529
: Float16ToRawbits(kFP16NegativeInfinity);
530
result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
531
result |= (1 << 9); // Force a quiet NaN;
532
return RawbitsToFloat16(result);
533
}
534
535
case FP_ZERO:
536
return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
537
538
case FP_INFINITE:
539
return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
540
case FP_NORMAL:
541
case FP_SUBNORMAL: {
542
// Convert double-to-half as the processor would, assuming that FPCR.FZ
543
// (flush-to-zero) is not set.
544
545
// Add the implicit '1' bit to the mantissa.
546
mantissa += (UINT64_C(1) << 52);
547
return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
548
}
549
}
550
551
VIXL_UNREACHABLE();
552
return kFP16PositiveZero;
553
}
554
555
} // namespace vixl
556
557