Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp
9896 views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2021 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
// http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
* @brief Soft-float library for IEEE-754.
20
*/
21
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
22
23
#include "astcenc_mathlib.h"
24
25
/* sized soft-float types. These are mapped to the sized integer
26
types of C99, instead of C's floating-point types; this is because
27
the library needs to maintain exact, bit-level control on all
28
operations on these data types. */
29
typedef uint16_t sf16;
30
typedef uint32_t sf32;
31
32
/******************************************
33
helper functions and their lookup tables
34
******************************************/
35
/* count leading zeros functions. Only used when the input is nonzero. */
36
37
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
38
#elif defined(__arm__) && defined(__ARMCC_VERSION)
39
#elif defined(__arm__) && defined(__GNUC__)
40
#else
41
/* table used for the slow default versions. */
42
static const uint8_t clz_table[256] =
43
{
44
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
45
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
46
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
60
};
61
#endif
62
63
/*
64
32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
65
static uint32_t clz32(uint32_t inp)
66
{
67
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
68
uint32_t bsr;
69
__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
70
return 31 - bsr;
71
#else
72
#if defined(__arm__) && defined(__ARMCC_VERSION)
73
return __clz(inp); /* armcc builtin */
74
#else
75
#if defined(__arm__) && defined(__GNUC__)
76
uint32_t lz;
77
__asm__("clz %0, %1": "=r"(lz):"r"(inp));
78
return lz;
79
#else
80
/* slow default version */
81
uint32_t summa = 24;
82
if (inp >= UINT32_C(0x10000))
83
{
84
inp >>= 16;
85
summa -= 16;
86
}
87
if (inp >= UINT32_C(0x100))
88
{
89
inp >>= 8;
90
summa -= 8;
91
}
92
return summa + clz_table[inp];
93
#endif
94
#endif
95
#endif
96
}
97
98
/* the five rounding modes that IEEE-754r defines */
99
typedef enum
100
{
101
SF_UP = 0, /* round towards positive infinity */
102
SF_DOWN = 1, /* round towards negative infinity */
103
SF_TOZERO = 2, /* round towards zero */
104
SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */
105
SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */
106
} roundmode;
107
108
109
static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
110
{
111
uint32_t vl1 = UINT32_C(1) << shamt;
112
uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */
113
uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
114
msk--; /* negative if even, nonnegative if odd. */
115
inp2 -= (msk >> 31); /* subtract epsilon before shift if even. */
116
inp2 >>= shamt;
117
return inp2;
118
}
119
120
static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
121
{
122
uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
123
inp += vl1;
124
inp >>= shamt;
125
return inp;
126
}
127
128
static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
129
{
130
uint32_t vl1 = UINT32_C(1) << shamt;
131
inp += vl1;
132
inp--;
133
inp >>= shamt;
134
return inp;
135
}
136
137
/* convert from FP16 to FP32. */
138
static sf32 sf16_to_sf32(sf16 inp)
139
{
140
uint32_t inpx = inp;
141
142
/*
143
This table contains, for every FP16 sign/exponent value combination,
144
the difference between the input FP16 value and the value obtained
145
by shifting the correct FP32 result right by 13 bits.
146
This table allows us to handle every case except denormals and NaN
147
with just 1 table lookup, 2 shifts and 1 add.
148
*/
149
150
#define WITH_MSB(a) (UINT32_C(a) | (1u << 31))
151
static const uint32_t tbl[64] =
152
{
153
WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
154
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
155
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,
156
0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000),
157
WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
158
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
159
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,
160
0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000)
161
};
162
163
uint32_t res = tbl[inpx >> 10];
164
res += inpx;
165
166
/* Normal cases: MSB of 'res' not set. */
167
if ((res & WITH_MSB(0)) == 0)
168
{
169
return res << 13;
170
}
171
172
/* Infinity and Zero: 10 LSB of 'res' not set. */
173
if ((res & 0x3FF) == 0)
174
{
175
return res << 13;
176
}
177
178
/* NaN: the exponent field of 'inp' is non-zero. */
179
if ((inpx & 0x7C00) != 0)
180
{
181
/* All NaNs are quietened. */
182
return (res << 13) | 0x400000;
183
}
184
185
/* Denormal cases */
186
uint32_t sign = (inpx & 0x8000) << 16;
187
uint32_t mskval = inpx & 0x7FFF;
188
uint32_t leadingzeroes = clz32(mskval);
189
mskval <<= leadingzeroes;
190
return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
191
}
192
193
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
194
static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
195
{
196
/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
197
static const uint8_t tab[512] {
198
0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
199
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
200
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
201
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
202
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
203
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
204
10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
205
20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
206
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
207
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
208
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
209
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
210
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
211
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
212
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
213
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
214
215
5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
216
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
217
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
218
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
219
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
220
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
221
15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
222
25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
223
35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
224
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
225
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
226
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
227
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
228
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
229
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
230
45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
231
};
232
233
/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
234
size. */
235
static const uint32_t tabx[60] {
236
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
237
UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
238
UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
239
UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
240
UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
241
UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
242
UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
243
UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
244
UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
245
};
246
247
uint32_t p;
248
uint32_t idx = rmode + tab[inp >> 23];
249
uint32_t vlx = tabx[idx];
250
switch (idx)
251
{
252
/*
253
Positive number which may be Infinity or NaN.
254
We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
255
(If we don't do this quieting, then a NaN that is distinguished only by having
256
its low-order bits set, would be turned into an INF. */
257
case 50:
258
case 51:
259
case 52:
260
case 53:
261
case 54:
262
case 55:
263
case 56:
264
case 57:
265
case 58:
266
case 59:
267
/*
268
the input value is 0x7F800000 or 0xFF800000 if it is INF.
269
By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
270
For NaNs, however, this operation will keep bit 23 with the value 1.
271
We can then extract bit 23, and logical-OR bit 9 of the result with this
272
bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
273
of the mantissa is set.)
274
*/
275
p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */
276
return static_cast<sf16>(((inp + vlx) >> 13) | (p >> 14));
277
/*
278
positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
279
If it is, then return 0, else return 1 (the smallest representable nonzero number)
280
*/
281
case 0:
282
/*
283
-inp will set the MSB if the input number is nonzero.
284
Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
285
*/
286
return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31);
287
288
/*
289
negative, exponent = , round-mode == DOWN, need to check whether number is
290
actually 0. If it is, return 0x8000 ( float -0.0 )
291
Else return the smallest negative number ( 0x8001 ) */
292
case 6:
293
/*
294
in this case 'vlx' is 0x80000000. By subtracting the input value from it,
295
we obtain a value that is 0 if the input value is in fact zero and has
296
the MSB set if it isn't. We then right-shift the value by 31 places to
297
get a value that is 0 if the input is -0.0 and 1 otherwise.
298
*/
299
return static_cast<sf16>(((vlx - inp) >> 31) + UINT32_C(0x8000));
300
301
/*
302
for all other cases involving underflow/overflow, we don't need to
303
do actual tests; we just return 'vlx'.
304
*/
305
case 1:
306
case 2:
307
case 3:
308
case 4:
309
case 5:
310
case 7:
311
case 8:
312
case 9:
313
case 10:
314
case 11:
315
case 12:
316
case 13:
317
case 14:
318
case 15:
319
case 16:
320
case 17:
321
case 18:
322
case 19:
323
case 40:
324
case 41:
325
case 42:
326
case 43:
327
case 44:
328
case 45:
329
case 46:
330
case 47:
331
case 48:
332
case 49:
333
return static_cast<sf16>(vlx);
334
335
/*
336
for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
337
FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
338
baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
339
from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
340
for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
341
except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
342
343
/* normal number, all rounding modes except round-to-nearest-even: */
344
case 30:
345
case 31:
346
case 32:
347
case 34:
348
case 35:
349
case 36:
350
case 37:
351
case 39:
352
return static_cast<sf16>((inp + vlx) >> 13);
353
354
/* normal number, round-to-nearest-even. */
355
case 33:
356
case 38:
357
p = inp + vlx;
358
p += (inp >> 13) & 1;
359
return static_cast<sf16>(p >> 13);
360
361
/*
362
the various denormal cases. These are not expected to be common, so their performance is a bit
363
less important. For each of these cases, we need to extract an exponent and a mantissa
364
(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
365
depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
366
sign of the resulting denormal number.
367
*/
368
case 21:
369
case 22:
370
case 25:
371
case 27:
372
/* denormal, round towards zero. */
373
p = 126 - ((inp >> 23) & 0xFF);
374
return static_cast<sf16>((((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx);
375
case 20:
376
case 26:
377
/* denormal, round away from zero. */
378
p = 126 - ((inp >> 23) & 0xFF);
379
return static_cast<sf16>(rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
380
case 24:
381
case 29:
382
/* denormal, round to nearest-away */
383
p = 126 - ((inp >> 23) & 0xFF);
384
return static_cast<sf16>(rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
385
case 23:
386
case 28:
387
/* denormal, round to nearest-even. */
388
p = 126 - ((inp >> 23) & 0xFF);
389
return static_cast<sf16>(rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
390
}
391
392
return 0;
393
}
394
395
/* convert from soft-float to native-float */
396
float sf16_to_float(uint16_t p)
397
{
398
if32 i;
399
i.u = sf16_to_sf32(p);
400
return i.f;
401
}
402
403
/* convert from native-float to soft-float */
404
uint16_t float_to_sf16(float p)
405
{
406
if32 i;
407
i.f = p;
408
return sf32_to_sf16(i.u, SF_NEARESTEVEN);
409
}
410
411
#endif
412
413