CoCalc -- astcenc_mathlib

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_mathlib_softfloat.cpp
⁹⁸⁹⁶ views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2021 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17

18
/**
19
 * @brief Soft-float library for IEEE-754.
20
 */
21
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
22

23
#include "astcenc_mathlib.h"
24

25
/*	sized soft-float types. These are mapped to the sized integer
26
    types of C99, instead of C's floating-point types; this is because
27
    the library needs to maintain exact, bit-level control on all
28
    operations on these data types. */
29
typedef uint16_t sf16;
30
typedef uint32_t sf32;
31

32
/******************************************
33
  helper functions and their lookup tables
34
 ******************************************/
35
/* count leading zeros functions. Only used when the input is nonzero. */
36

37
#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
38
#elif defined(__arm__) && defined(__ARMCC_VERSION)
39
#elif defined(__arm__) && defined(__GNUC__)
40
#else
41
	/* table used for the slow default versions. */
42
	static const uint8_t clz_table[256] =
43
	{
44
		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
45
		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
46
		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47
		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
49
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51
		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
60
	};
61
#endif
62

63
/*
64
   32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
65
static uint32_t clz32(uint32_t inp)
66
{
67
	#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
68
		uint32_t bsr;
69
		__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
70
		return 31 - bsr;
71
	#else
72
		#if defined(__arm__) && defined(__ARMCC_VERSION)
73
			return __clz(inp);			/* armcc builtin */
74
		#else
75
			#if defined(__arm__) && defined(__GNUC__)
76
				uint32_t lz;
77
				__asm__("clz %0, %1": "=r"(lz):"r"(inp));
78
				return lz;
79
			#else
80
				/* slow default version */
81
				uint32_t summa = 24;
82
				if (inp >= UINT32_C(0x10000))
83
				{
84
					inp >>= 16;
85
					summa -= 16;
86
				}
87
				if (inp >= UINT32_C(0x100))
88
				{
89
					inp >>= 8;
90
					summa -= 8;
91
				}
92
				return summa + clz_table[inp];
93
			#endif
94
		#endif
95
	#endif
96
}
97

98
/* the five rounding modes that IEEE-754r defines */
99
typedef enum
100
{
101
	SF_UP = 0,				/* round towards positive infinity */
102
	SF_DOWN = 1,			/* round towards negative infinity */
103
	SF_TOZERO = 2,			/* round towards zero */
104
	SF_NEARESTEVEN = 3,		/* round toward nearest value; if mid-between, round to even value */
105
	SF_NEARESTAWAY = 4		/* round toward nearest value; if mid-between, round away from zero */
106
} roundmode;
107

108

109
static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt)
110
{
111
	uint32_t vl1 = UINT32_C(1) << shamt;
112
	uint32_t inp2 = inp + (vl1 >> 1);	/* added 0.5 ULP */
113
	uint32_t msk = (inp | UINT32_C(1)) & vl1;	/* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */
114
	msk--;						/* negative if even, nonnegative if odd. */
115
	inp2 -= (msk >> 31);		/* subtract epsilon before shift if even. */
116
	inp2 >>= shamt;
117
	return inp2;
118
}
119

120
static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt)
121
{
122
	uint32_t vl1 = (UINT32_C(1) << shamt) >> 1;
123
	inp += vl1;
124
	inp >>= shamt;
125
	return inp;
126
}
127

128
static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt)
129
{
130
	uint32_t vl1 = UINT32_C(1) << shamt;
131
	inp += vl1;
132
	inp--;
133
	inp >>= shamt;
134
	return inp;
135
}
136

137
/* convert from FP16 to FP32. */
138
static sf32 sf16_to_sf32(sf16 inp)
139
{
140
	uint32_t inpx = inp;
141

142
	/*
143
		This table contains, for every FP16 sign/exponent value combination,
144
		the difference between the input FP16 value and the value obtained
145
		by shifting the correct FP32 result right by 13 bits.
146
		This table allows us to handle every case except denormals and NaN
147
		with just 1 table lookup, 2 shifts and 1 add.
148
	*/
149

150
	#define WITH_MSB(a) (UINT32_C(a) | (1u << 31))
151
	static const uint32_t tbl[64] =
152
	{
153
		WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
154
		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
155
		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000,          0x1C000,
156
		         0x1C000,  0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000),
157
		WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
158
		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
159
		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000,          0x54000,
160
		         0x54000,  0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000)
161
	};
162

163
	uint32_t res = tbl[inpx >> 10];
164
	res += inpx;
165

166
	/* Normal cases: MSB of 'res' not set. */
167
	if ((res & WITH_MSB(0)) == 0)
168
	{
169
		return res << 13;
170
	}
171

172
	/* Infinity and Zero: 10 LSB of 'res' not set. */
173
	if ((res & 0x3FF) == 0)
174
	{
175
		return res << 13;
176
	}
177

178
	/* NaN: the exponent field of 'inp' is non-zero. */
179
	if ((inpx & 0x7C00) != 0)
180
	{
181
		/* All NaNs are quietened. */
182
		return (res << 13) | 0x400000;
183
	}
184

185
	/* Denormal cases */
186
	uint32_t sign = (inpx & 0x8000) << 16;
187
	uint32_t mskval = inpx & 0x7FFF;
188
	uint32_t leadingzeroes = clz32(mskval);
189
	mskval <<= leadingzeroes;
190
	return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
191
}
192

193
/* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */
194
static sf16 sf32_to_sf16(sf32 inp, roundmode rmode)
195
{
196
	/* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */
197
	static const uint8_t tab[512] {
198
		0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
199
		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
200
		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
201
		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
202
		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
203
		10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
204
		10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
205
		20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
206
		30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40,
207
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
208
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
209
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
210
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
211
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
212
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
213
		40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50,
214

215
		5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
216
		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
217
		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
218
		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
219
		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
220
		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
221
		15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
222
		25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
223
		35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45,
224
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
225
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
226
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
227
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
228
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
229
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
230
		45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55,
231
	};
232

233
	/* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code
234
	   size. */
235
	static const uint32_t tabx[60] {
236
		UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
237
		UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
238
		UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000),
239
		UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000),
240
		UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000),
241
		UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00),
242
		UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00),
243
		UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000),
244
		UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000)
245
	};
246

247
	uint32_t p;
248
	uint32_t idx = rmode + tab[inp >> 23];
249
	uint32_t vlx = tabx[idx];
250
	switch (idx)
251
	{
252
		/*
253
			Positive number which may be Infinity or NaN.
254
			We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa.
255
			(If we don't do this quieting, then a NaN  that is distinguished only by having
256
			its low-order bits set, would be turned into an INF. */
257
	case 50:
258
	case 51:
259
	case 52:
260
	case 53:
261
	case 54:
262
	case 55:
263
	case 56:
264
	case 57:
265
	case 58:
266
	case 59:
267
		/*
268
			the input value is 0x7F800000 or 0xFF800000 if it is INF.
269
			By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero.
270
			For NaNs, however, this operation will keep bit 23 with the value 1.
271
			We can then extract bit 23, and logical-OR bit 9 of the result with this
272
			bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit
273
			of the mantissa is set.)
274
		*/
275
		p = (inp - 1) & UINT32_C(0x800000);	/* zero if INF, nonzero if NaN. */
276
		return static_cast<sf16>(((inp + vlx) >> 13) | (p >> 14));
277
		/*
278
			positive, exponent = 0, round-mode == UP; need to check whether number actually is 0.
279
			If it is, then return 0, else return 1 (the smallest representable nonzero number)
280
		*/
281
	case 0:
282
		/*
283
			-inp will set the MSB if the input number is nonzero.
284
			Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise.
285
		*/
286
		return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31);
287

288
		/*
289
			negative, exponent = , round-mode == DOWN, need to check whether number is
290
			actually 0. If it is, return 0x8000 ( float -0.0 )
291
			Else return the smallest negative number ( 0x8001 ) */
292
	case 6:
293
		/*
294
			in this case 'vlx' is 0x80000000. By subtracting the input value from it,
295
			we obtain a value that is 0 if the input value is in fact zero and has
296
			the MSB set if it isn't. We then right-shift the value by 31 places to
297
			get a value that is 0 if the input is -0.0 and 1 otherwise.
298
		*/
299
		return static_cast<sf16>(((vlx - inp) >> 31) + UINT32_C(0x8000));
300

301
		/*
302
			for all other cases involving underflow/overflow, we don't need to
303
			do actual tests; we just return 'vlx'.
304
		*/
305
	case 1:
306
	case 2:
307
	case 3:
308
	case 4:
309
	case 5:
310
	case 7:
311
	case 8:
312
	case 9:
313
	case 10:
314
	case 11:
315
	case 12:
316
	case 13:
317
	case 14:
318
	case 15:
319
	case 16:
320
	case 17:
321
	case 18:
322
	case 19:
323
	case 40:
324
	case 41:
325
	case 42:
326
	case 43:
327
	case 44:
328
	case 45:
329
	case 46:
330
	case 47:
331
	case 48:
332
	case 49:
333
		return static_cast<sf16>(vlx);
334

335
		/*
336
			for normal numbers, 'vlx' is the difference between the FP32 value of a number and the
337
			FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is
338
			baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away
339
			from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero.
340
			for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even
341
			except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */
342

343
		/* normal number, all rounding modes except round-to-nearest-even: */
344
	case 30:
345
	case 31:
346
	case 32:
347
	case 34:
348
	case 35:
349
	case 36:
350
	case 37:
351
	case 39:
352
		return static_cast<sf16>((inp + vlx) >> 13);
353

354
		/* normal number, round-to-nearest-even. */
355
	case 33:
356
	case 38:
357
		p = inp + vlx;
358
		p += (inp >> 13) & 1;
359
		return static_cast<sf16>(p >> 13);
360

361
		/*
362
			the various denormal cases. These are not expected to be common, so their performance is a bit
363
			less important. For each of these cases, we need to extract an exponent and a mantissa
364
			(including the implicit '1'!), and then right-shift the mantissa by a shift-amount that
365
			depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the
366
			sign of the resulting denormal number.
367
		*/
368
	case 21:
369
	case 22:
370
	case 25:
371
	case 27:
372
		/* denormal, round towards zero. */
373
		p = 126 - ((inp >> 23) & 0xFF);
374
		return static_cast<sf16>((((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx);
375
	case 20:
376
	case 26:
377
		/* denormal, round away from zero. */
378
		p = 126 - ((inp >> 23) & 0xFF);
379
		return static_cast<sf16>(rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
380
	case 24:
381
	case 29:
382
		/* denormal, round to nearest-away */
383
		p = 126 - ((inp >> 23) & 0xFF);
384
		return static_cast<sf16>(rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
385
	case 23:
386
	case 28:
387
		/* denormal, round to nearest-even. */
388
		p = 126 - ((inp >> 23) & 0xFF);
389
		return static_cast<sf16>(rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx);
390
	}
391

392
	return 0;
393
}
394

395
/* convert from soft-float to native-float */
396
float sf16_to_float(uint16_t p)
397
{
398
	if32 i;
399
	i.u = sf16_to_sf32(p);
400
	return i.f;
401
}
402

403
/* convert from native-float to soft-float */
404
uint16_t float_to_sf16(float p)
405
{
406
	if32 i;
407
	i.f = p;
408
	return sf32_to_sf16(i.u, SF_NEARESTEVEN);
409
}
410

411
#endif
412

413
Product

Resources

Company