CoCalc -- utils-vixl.cc

GitHub Repository: stenzek/duckstation
Path: blob/master/dep/vixl/src/utils-vixl.cc
⁴²⁵³ views
1
// Copyright 2015, VIXL authors
2
// All rights reserved.
3
//
4
// Redistribution and use in source and binary forms, with or without
5
// modification, are permitted provided that the following conditions are met:
6
//
7
//   * Redistributions of source code must retain the above copyright notice,
8
//     this list of conditions and the following disclaimer.
9
//   * Redistributions in binary form must reproduce the above copyright notice,
10
//     this list of conditions and the following disclaimer in the documentation
11
//     and/or other materials provided with the distribution.
12
//   * Neither the name of ARM Limited nor the names of its contributors may be
13
//     used to endorse or promote products derived from this software without
14
//     specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26

27
#include "utils-vixl.h"
28

29
#include <cstdio>
30

31
namespace vixl {
32

33
// The default NaN values (for FPCR.DN=1).
34
const double kFP64DefaultNaN = RawbitsToDouble(UINT64_C(0x7ff8000000000000));
35
const float kFP32DefaultNaN = RawbitsToFloat(0x7fc00000);
36
const Float16 kFP16DefaultNaN = RawbitsToFloat16(0x7e00);
37

38
// Floating-point zero values.
39
const Float16 kFP16PositiveZero = RawbitsToFloat16(0x0);
40
const Float16 kFP16NegativeZero = RawbitsToFloat16(0x8000);
41

42
// Floating-point infinity values.
43
const Float16 kFP16PositiveInfinity = RawbitsToFloat16(0x7c00);
44
const Float16 kFP16NegativeInfinity = RawbitsToFloat16(0xfc00);
45
const float kFP32PositiveInfinity = RawbitsToFloat(0x7f800000);
46
const float kFP32NegativeInfinity = RawbitsToFloat(0xff800000);
47
const double kFP64PositiveInfinity =
48
    RawbitsToDouble(UINT64_C(0x7ff0000000000000));
49
const double kFP64NegativeInfinity =
50
    RawbitsToDouble(UINT64_C(0xfff0000000000000));
51

52
bool IsZero(Float16 value) {
53
  uint16_t bits = Float16ToRawbits(value);
54
  return (bits == Float16ToRawbits(kFP16PositiveZero) ||
55
          bits == Float16ToRawbits(kFP16NegativeZero));
56
}
57

58
uint16_t Float16ToRawbits(Float16 value) { return value.rawbits_; }
59

60
uint32_t FloatToRawbits(float value) {
61
  uint32_t bits = 0;
62
  memcpy(&bits, &value, 4);
63
  return bits;
64
}
65

66

67
uint64_t DoubleToRawbits(double value) {
68
  uint64_t bits = 0;
69
  memcpy(&bits, &value, 8);
70
  return bits;
71
}
72

73

74
Float16 RawbitsToFloat16(uint16_t bits) {
75
  Float16 f;
76
  f.rawbits_ = bits;
77
  return f;
78
}
79

80

81
float RawbitsToFloat(uint32_t bits) {
82
  float value = 0.0;
83
  memcpy(&value, &bits, 4);
84
  return value;
85
}
86

87

88
double RawbitsToDouble(uint64_t bits) {
89
  double value = 0.0;
90
  memcpy(&value, &bits, 8);
91
  return value;
92
}
93

94

95
uint32_t Float16Sign(internal::SimFloat16 val) {
96
  uint16_t rawbits = Float16ToRawbits(val);
97
  return ExtractUnsignedBitfield32(15, 15, rawbits);
98
}
99

100

101
uint32_t Float16Exp(internal::SimFloat16 val) {
102
  uint16_t rawbits = Float16ToRawbits(val);
103
  return ExtractUnsignedBitfield32(14, 10, rawbits);
104
}
105

106
uint32_t Float16Mantissa(internal::SimFloat16 val) {
107
  uint16_t rawbits = Float16ToRawbits(val);
108
  return ExtractUnsignedBitfield32(9, 0, rawbits);
109
}
110

111

112
uint32_t FloatSign(float val) {
113
  uint32_t rawbits = FloatToRawbits(val);
114
  return ExtractUnsignedBitfield32(31, 31, rawbits);
115
}
116

117

118
uint32_t FloatExp(float val) {
119
  uint32_t rawbits = FloatToRawbits(val);
120
  return ExtractUnsignedBitfield32(30, 23, rawbits);
121
}
122

123

124
uint32_t FloatMantissa(float val) {
125
  uint32_t rawbits = FloatToRawbits(val);
126
  return ExtractUnsignedBitfield32(22, 0, rawbits);
127
}
128

129

130
uint32_t DoubleSign(double val) {
131
  uint64_t rawbits = DoubleToRawbits(val);
132
  return static_cast<uint32_t>(ExtractUnsignedBitfield64(63, 63, rawbits));
133
}
134

135

136
uint32_t DoubleExp(double val) {
137
  uint64_t rawbits = DoubleToRawbits(val);
138
  return static_cast<uint32_t>(ExtractUnsignedBitfield64(62, 52, rawbits));
139
}
140

141

142
uint64_t DoubleMantissa(double val) {
143
  uint64_t rawbits = DoubleToRawbits(val);
144
  return ExtractUnsignedBitfield64(51, 0, rawbits);
145
}
146

147

148
internal::SimFloat16 Float16Pack(uint16_t sign,
149
                                 uint16_t exp,
150
                                 uint16_t mantissa) {
151
  uint16_t bits = (sign << 15) | (exp << 10) | mantissa;
152
  return RawbitsToFloat16(bits);
153
}
154

155

156
float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa) {
157
  uint32_t bits = (sign << 31) | (exp << 23) | mantissa;
158
  return RawbitsToFloat(bits);
159
}
160

161

162
double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa) {
163
  uint64_t bits = (sign << 63) | (exp << 52) | mantissa;
164
  return RawbitsToDouble(bits);
165
}
166

167

168
int Float16Classify(Float16 value) {
169
  uint16_t bits = Float16ToRawbits(value);
170
  uint16_t exponent_max = (1 << 5) - 1;
171
  uint16_t exponent_mask = exponent_max << 10;
172
  uint16_t mantissa_mask = (1 << 10) - 1;
173

174
  uint16_t exponent = (bits & exponent_mask) >> 10;
175
  uint16_t mantissa = bits & mantissa_mask;
176
  if (exponent == 0) {
177
    if (mantissa == 0) {
178
      return FP_ZERO;
179
    }
180
    return FP_SUBNORMAL;
181
  } else if (exponent == exponent_max) {
182
    if (mantissa == 0) {
183
      return FP_INFINITE;
184
    }
185
    return FP_NAN;
186
  }
187
  return FP_NORMAL;
188
}
189

190

191
unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size) {
192
  VIXL_ASSERT((reg_size % 8) == 0);
193
  int count = 0;
194
  for (unsigned i = 0; i < (reg_size / 16); i++) {
195
    if ((imm & 0xffff) == 0) {
196
      count++;
197
    }
198
    imm >>= 16;
199
  }
200
  return count;
201
}
202

203

204
int BitCount(uint64_t value) { return CountSetBits(value); }
205

206
// Float16 definitions.
207

208
Float16::Float16(double dvalue) {
209
  rawbits_ =
210
      Float16ToRawbits(FPToFloat16(dvalue, FPTieEven, kIgnoreDefaultNaN));
211
}
212

213
namespace internal {
214

215
SimFloat16 SimFloat16::operator-() const {
216
  return RawbitsToFloat16(rawbits_ ^ 0x8000);
217
}
218

219
// SimFloat16 definitions.
220
SimFloat16 SimFloat16::operator+(SimFloat16 rhs) const {
221
  return static_cast<double>(*this) + static_cast<double>(rhs);
222
}
223

224
SimFloat16 SimFloat16::operator-(SimFloat16 rhs) const {
225
  return static_cast<double>(*this) - static_cast<double>(rhs);
226
}
227

228
SimFloat16 SimFloat16::operator*(SimFloat16 rhs) const {
229
  return static_cast<double>(*this) * static_cast<double>(rhs);
230
}
231

232
SimFloat16 SimFloat16::operator/(SimFloat16 rhs) const {
233
  return static_cast<double>(*this) / static_cast<double>(rhs);
234
}
235

236
bool SimFloat16::operator<(SimFloat16 rhs) const {
237
  return static_cast<double>(*this) < static_cast<double>(rhs);
238
}
239

240
bool SimFloat16::operator>(SimFloat16 rhs) const {
241
  return static_cast<double>(*this) > static_cast<double>(rhs);
242
}
243

244
bool SimFloat16::operator==(SimFloat16 rhs) const {
245
  if (IsNaN(*this) || IsNaN(rhs)) {
246
    return false;
247
  } else if (IsZero(rhs) && IsZero(*this)) {
248
    // +0 and -0 should be treated as equal.
249
    return true;
250
  }
251
  return this->rawbits_ == rhs.rawbits_;
252
}
253

254
bool SimFloat16::operator!=(SimFloat16 rhs) const { return !(*this == rhs); }
255

256
bool SimFloat16::operator==(double rhs) const {
257
  return static_cast<double>(*this) == static_cast<double>(rhs);
258
}
259

260
SimFloat16::operator double() const {
261
  return FPToDouble(*this, kIgnoreDefaultNaN);
262
}
263

264
Int64 BitCount(Uint32 value) { return CountSetBits(value.Get()); }
265

266
}  // namespace internal
267

268
float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception) {
269
  uint16_t bits = Float16ToRawbits(value);
270
  uint32_t sign = bits >> 15;
271
  uint32_t exponent =
272
      ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
273
                                kFloat16MantissaBits,
274
                                bits);
275
  uint32_t mantissa =
276
      ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, bits);
277

278
  switch (Float16Classify(value)) {
279
    case FP_ZERO:
280
      return (sign == 0) ? 0.0f : -0.0f;
281

282
    case FP_INFINITE:
283
      return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
284

285
    case FP_SUBNORMAL: {
286
      // Calculate shift required to put mantissa into the most-significant bits
287
      // of the destination mantissa.
288
      int shift = CountLeadingZeros(mantissa << (32 - 10));
289

290
      // Shift mantissa and discard implicit '1'.
291
      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
292
      mantissa &= (1 << kFloatMantissaBits) - 1;
293

294
      // Adjust the exponent for the shift applied, and rebias.
295
      exponent = exponent - shift + (-15 + 127);
296
      break;
297
    }
298

299
    case FP_NAN:
300
      if (IsSignallingNaN(value)) {
301
        if (exception != NULL) {
302
          *exception = true;
303
        }
304
      }
305
      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
306

307
      // Convert NaNs as the processor would:
308
      //  - The sign is propagated.
309
      //  - The payload (mantissa) is transferred entirely, except that the top
310
      //    bit is forced to '1', making the result a quiet NaN. The unused
311
      //    (low-order) payload bits are set to 0.
312
      exponent = (1 << kFloatExponentBits) - 1;
313

314
      // Increase bits in mantissa, making low-order bits 0.
315
      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
316
      mantissa |= 1 << 22;  // Force a quiet NaN.
317
      break;
318

319
    case FP_NORMAL:
320
      // Increase bits in mantissa, making low-order bits 0.
321
      mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
322

323
      // Change exponent bias.
324
      exponent += (-15 + 127);
325
      break;
326

327
    default:
328
      VIXL_UNREACHABLE();
329
  }
330
  return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
331
                        mantissa);
332
}
333

334

335
float FPToFloat(double value,
336
                FPRounding round_mode,
337
                UseDefaultNaN DN,
338
                bool* exception) {
339
  // Only the FPTieEven rounding mode is implemented.
340
  VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
341
  USE(round_mode);
342

343
  switch (std::fpclassify(value)) {
344
    case FP_NAN: {
345
      if (IsSignallingNaN(value)) {
346
        if (exception != NULL) {
347
          *exception = true;
348
        }
349
      }
350
      if (DN == kUseDefaultNaN) return kFP32DefaultNaN;
351

352
      // Convert NaNs as the processor would:
353
      //  - The sign is propagated.
354
      //  - The payload (mantissa) is transferred as much as possible, except
355
      //    that the top bit is forced to '1', making the result a quiet NaN.
356
      uint64_t raw = DoubleToRawbits(value);
357

358
      uint32_t sign = raw >> 63;
359
      uint32_t exponent = (1 << 8) - 1;
360
      uint32_t payload =
361
          static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
362
      payload |= (1 << 22);  // Force a quiet NaN.
363

364
      return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
365
    }
366

367
    case FP_ZERO:
368
    case FP_INFINITE: {
369
      // In a C++ cast, any value representable in the target type will be
370
      // unchanged. This is always the case for +/-0.0 and infinities.
371
      return static_cast<float>(value);
372
    }
373

374
    case FP_NORMAL:
375
    case FP_SUBNORMAL: {
376
      // Convert double-to-float as the processor would, assuming that FPCR.FZ
377
      // (flush-to-zero) is not set.
378
      uint64_t raw = DoubleToRawbits(value);
379
      // Extract the IEEE-754 double components.
380
      uint32_t sign = raw >> 63;
381
      // Extract the exponent and remove the IEEE-754 encoding bias.
382
      int32_t exponent =
383
          static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
384
      // Extract the mantissa and add the implicit '1' bit.
385
      uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
386
      if (std::fpclassify(value) == FP_NORMAL) {
387
        mantissa |= (UINT64_C(1) << 52);
388
      }
389
      return FPRoundToFloat(sign, exponent, mantissa, round_mode);
390
    }
391
  }
392

393
  VIXL_UNREACHABLE();
394
  return static_cast<float>(value);
395
}
396

397
// TODO: We should consider implementing a full FPToDouble(Float16)
398
// conversion function (for performance reasons).
399
double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception) {
400
  // We can rely on implicit float to double conversion here.
401
  return FPToFloat(value, DN, exception);
402
}
403

404

405
double FPToDouble(float value, UseDefaultNaN DN, bool* exception) {
406
  switch (std::fpclassify(value)) {
407
    case FP_NAN: {
408
      if (IsSignallingNaN(value)) {
409
        if (exception != NULL) {
410
          *exception = true;
411
        }
412
      }
413
      if (DN == kUseDefaultNaN) return kFP64DefaultNaN;
414

415
      // Convert NaNs as the processor would:
416
      //  - The sign is propagated.
417
      //  - The payload (mantissa) is transferred entirely, except that the top
418
      //    bit is forced to '1', making the result a quiet NaN. The unused
419
      //    (low-order) payload bits are set to 0.
420
      uint32_t raw = FloatToRawbits(value);
421

422
      uint64_t sign = raw >> 31;
423
      uint64_t exponent = (1 << 11) - 1;
424
      uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
425
      payload <<= (52 - 23);           // The unused low-order bits should be 0.
426
      payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
427

428
      return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
429
    }
430

431
    case FP_ZERO:
432
    case FP_NORMAL:
433
    case FP_SUBNORMAL:
434
    case FP_INFINITE: {
435
      // All other inputs are preserved in a standard cast, because every value
436
      // representable using an IEEE-754 float is also representable using an
437
      // IEEE-754 double.
438
      return static_cast<double>(value);
439
    }
440
  }
441

442
  VIXL_UNREACHABLE();
443
  return static_cast<double>(value);
444
}
445

446

447
Float16 FPToFloat16(float value,
448
                    FPRounding round_mode,
449
                    UseDefaultNaN DN,
450
                    bool* exception) {
451
  // Only the FPTieEven rounding mode is implemented.
452
  VIXL_ASSERT(round_mode == FPTieEven);
453
  USE(round_mode);
454

455
  uint32_t raw = FloatToRawbits(value);
456
  int32_t sign = raw >> 31;
457
  int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
458
  uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
459

460
  switch (std::fpclassify(value)) {
461
    case FP_NAN: {
462
      if (IsSignallingNaN(value)) {
463
        if (exception != NULL) {
464
          *exception = true;
465
        }
466
      }
467
      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
468

469
      // Convert NaNs as the processor would:
470
      //  - The sign is propagated.
471
      //  - The payload (mantissa) is transferred as much as possible, except
472
      //    that the top bit is forced to '1', making the result a quiet NaN.
473
      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
474
                                    : Float16ToRawbits(kFP16NegativeInfinity);
475
      result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
476
      result |= (1 << 9);  // Force a quiet NaN;
477
      return RawbitsToFloat16(result);
478
    }
479

480
    case FP_ZERO:
481
      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
482

483
    case FP_INFINITE:
484
      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
485

486
    case FP_NORMAL:
487
    case FP_SUBNORMAL: {
488
      // Convert float-to-half as the processor would, assuming that FPCR.FZ
489
      // (flush-to-zero) is not set.
490

491
      // Add the implicit '1' bit to the mantissa.
492
      mantissa += (1 << 23);
493
      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
494
    }
495
  }
496

497
  VIXL_UNREACHABLE();
498
  return kFP16PositiveZero;
499
}
500

501

502
Float16 FPToFloat16(double value,
503
                    FPRounding round_mode,
504
                    UseDefaultNaN DN,
505
                    bool* exception) {
506
  // Only the FPTieEven rounding mode is implemented.
507
  VIXL_ASSERT(round_mode == FPTieEven);
508
  USE(round_mode);
509

510
  uint64_t raw = DoubleToRawbits(value);
511
  int32_t sign = raw >> 63;
512
  int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
513
  uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
514

515
  switch (std::fpclassify(value)) {
516
    case FP_NAN: {
517
      if (IsSignallingNaN(value)) {
518
        if (exception != NULL) {
519
          *exception = true;
520
        }
521
      }
522
      if (DN == kUseDefaultNaN) return kFP16DefaultNaN;
523

524
      // Convert NaNs as the processor would:
525
      //  - The sign is propagated.
526
      //  - The payload (mantissa) is transferred as much as possible, except
527
      //    that the top bit is forced to '1', making the result a quiet NaN.
528
      uint16_t result = (sign == 0) ? Float16ToRawbits(kFP16PositiveInfinity)
529
                                    : Float16ToRawbits(kFP16NegativeInfinity);
530
      result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
531
      result |= (1 << 9);  // Force a quiet NaN;
532
      return RawbitsToFloat16(result);
533
    }
534

535
    case FP_ZERO:
536
      return (sign == 0) ? kFP16PositiveZero : kFP16NegativeZero;
537

538
    case FP_INFINITE:
539
      return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
540
    case FP_NORMAL:
541
    case FP_SUBNORMAL: {
542
      // Convert double-to-half as the processor would, assuming that FPCR.FZ
543
      // (flush-to-zero) is not set.
544

545
      // Add the implicit '1' bit to the mantissa.
546
      mantissa += (UINT64_C(1) << 52);
547
      return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
548
    }
549
  }
550

551
  VIXL_UNREACHABLE();
552
  return kFP16PositiveZero;
553
}
554

555
}  // namespace vixl
556

557
Product

Resources

Company