Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_mathlib.h
9896 views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
// http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/*
19
* This module implements a variety of mathematical data types and library
20
* functions used by the codec.
21
*/
22
23
#ifndef ASTC_MATHLIB_H_INCLUDED
24
#define ASTC_MATHLIB_H_INCLUDED
25
26
#include <cassert>
27
#include <cstdint>
28
#include <cmath>
29
30
#ifndef ASTCENC_POPCNT
31
#if defined(__POPCNT__)
32
#define ASTCENC_POPCNT 1
33
#else
34
#define ASTCENC_POPCNT 0
35
#endif
36
#endif
37
38
#ifndef ASTCENC_F16C
39
#if defined(__F16C__)
40
#define ASTCENC_F16C 1
41
#else
42
#define ASTCENC_F16C 0
43
#endif
44
#endif
45
46
#ifndef ASTCENC_SSE
47
#if defined(__SSE4_2__)
48
#define ASTCENC_SSE 42
49
#elif defined(__SSE4_1__)
50
#define ASTCENC_SSE 41
51
#elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
52
#define ASTCENC_SSE 20
53
#else
54
#define ASTCENC_SSE 0
55
#endif
56
#endif
57
58
#ifndef ASTCENC_AVX
59
#if defined(__AVX2__)
60
#define ASTCENC_AVX 2
61
#define ASTCENC_X86_GATHERS 1
62
#elif defined(__AVX__)
63
#define ASTCENC_AVX 1
64
#define ASTCENC_X86_GATHERS 1
65
#else
66
#define ASTCENC_AVX 0
67
#endif
68
#endif
69
70
#ifndef ASTCENC_NEON
71
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
72
#define ASTCENC_NEON 1
73
#else
74
#define ASTCENC_NEON 0
75
#endif
76
#endif
77
78
#ifndef ASTCENC_SVE
79
#if defined(__ARM_FEATURE_SVE)
80
#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
81
#define ASTCENC_SVE 8
82
// Auto-detected SVE can only assume vector width of 4 is available, but
83
// must also allow for hardware being longer and so all use of intrinsics
84
// must explicitly use predicate masks to limit to 4-wide.
85
#else
86
#define ASTCENC_SVE 4
87
#endif
88
#else
89
#define ASTCENC_SVE 0
90
#endif
91
#endif
92
93
// Force vector-sized SIMD alignment
94
#if ASTCENC_AVX || ASTCENC_SVE == 8
95
#define ASTCENC_VECALIGN 32
96
#elif ASTCENC_SSE || ASTCENC_NEON || ASTCENC_SVE == 4
97
#define ASTCENC_VECALIGN 16
98
// Use default alignment for non-SIMD builds
99
#else
100
#define ASTCENC_VECALIGN 0
101
#endif
102
103
// C++11 states that alignas(0) should be ignored but GCC doesn't do
104
// this on some versions, so workaround and avoid emitting alignas(0)
105
#if ASTCENC_VECALIGN > 0
106
#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
107
#else
108
#define ASTCENC_ALIGNAS
109
#endif
110
111
#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
112
#include <immintrin.h>
113
#endif
114
115
/* ============================================================================
116
Fast math library; note that many of the higher-order functions in this set
117
use approximations which are less accurate, but faster, than <cmath> standard
118
library equivalents.
119
120
Note: Many of these are not necessarily faster than simple C versions when
121
used on a single scalar value, but are included for testing purposes as most
122
have an option based on SSE intrinsics and therefore provide an obvious route
123
to future vectorization.
124
============================================================================ */
125
126
// Union for manipulation of float bit patterns
127
typedef union
128
{
129
uint32_t u;
130
int32_t s;
131
float f;
132
} if32;
133
134
// These are namespaced to avoid colliding with C standard library functions.
135
namespace astc
136
{
137
138
static const float PI = 3.14159265358979323846f;
139
static const float PI_OVER_TWO = 1.57079632679489661923f;
140
141
/**
142
* @brief SP float absolute value.
143
*
144
* @param v The value to make absolute.
145
*
146
* @return The absolute value.
147
*/
148
static inline float fabs(float v)
149
{
150
return std::fabs(v);
151
}
152
153
/**
154
* @brief Test if a float value is a nan.
155
*
156
* @param v The value test.
157
*
158
* @return Zero is not a NaN, non-zero otherwise.
159
*/
160
static inline bool isnan(float v)
161
{
162
return v != v;
163
}
164
165
/**
166
* @brief Return the minimum of two values.
167
*
168
* For floats, NaNs are turned into @c q.
169
*
170
* @param p The first value to compare.
171
* @param q The second value to compare.
172
*
173
* @return The smallest value.
174
*/
175
template<typename T>
176
static inline T min(T p, T q)
177
{
178
return p < q ? p : q;
179
}
180
181
/**
182
* @brief Return the minimum of three values.
183
*
184
* For floats, NaNs are turned into @c r.
185
*
186
* @param p The first value to compare.
187
* @param q The second value to compare.
188
* @param r The third value to compare.
189
*
190
* @return The smallest value.
191
*/
192
template<typename T>
193
static inline T min(T p, T q, T r)
194
{
195
return min(min(p, q), r);
196
}
197
198
/**
199
* @brief Return the minimum of four values.
200
*
201
* For floats, NaNs are turned into @c s.
202
*
203
* @param p The first value to compare.
204
* @param q The second value to compare.
205
* @param r The third value to compare.
206
* @param s The fourth value to compare.
207
*
208
* @return The smallest value.
209
*/
210
template<typename T>
211
static inline T min(T p, T q, T r, T s)
212
{
213
return min(min(p, q), min(r, s));
214
}
215
216
/**
217
* @brief Return the maximum of two values.
218
*
219
* For floats, NaNs are turned into @c q.
220
*
221
* @param p The first value to compare.
222
* @param q The second value to compare.
223
*
224
* @return The largest value.
225
*/
226
template<typename T>
227
static inline T max(T p, T q)
228
{
229
return p > q ? p : q;
230
}
231
232
/**
233
* @brief Return the maximum of three values.
234
*
235
* For floats, NaNs are turned into @c r.
236
*
237
* @param p The first value to compare.
238
* @param q The second value to compare.
239
* @param r The third value to compare.
240
*
241
* @return The largest value.
242
*/
243
template<typename T>
244
static inline T max(T p, T q, T r)
245
{
246
return max(max(p, q), r);
247
}
248
249
/**
250
* @brief Return the maximum of four values.
251
*
252
* For floats, NaNs are turned into @c s.
253
*
254
* @param p The first value to compare.
255
* @param q The second value to compare.
256
* @param r The third value to compare.
257
* @param s The fourth value to compare.
258
*
259
* @return The largest value.
260
*/
261
template<typename T>
262
static inline T max(T p, T q, T r, T s)
263
{
264
return max(max(p, q), max(r, s));
265
}
266
267
/**
268
* @brief Clamp a value value between @c mn and @c mx.
269
*
270
* For floats, NaNs are turned into @c mn.
271
*
272
* @param v The value to clamp.
273
* @param mn The min value (inclusive).
274
* @param mx The max value (inclusive).
275
*
276
* @return The clamped value.
277
*/
278
template<typename T>
279
inline T clamp(T v, T mn, T mx)
280
{
281
// Do not reorder; correct NaN handling relies on the fact that comparison
282
// with NaN returns false and will fall-though to the "min" value.
283
if (v > mx) return mx;
284
if (v > mn) return v;
285
return mn;
286
}
287
288
/**
289
* @brief Clamp a float value between 0.0f and 1.0f.
290
*
291
* NaNs are turned into 0.0f.
292
*
293
* @param v The value to clamp.
294
*
295
* @return The clamped value.
296
*/
297
static inline float clamp1f(float v)
298
{
299
return astc::clamp(v, 0.0f, 1.0f);
300
}
301
302
/**
303
* @brief Clamp a float value between 0.0f and 255.0f.
304
*
305
* NaNs are turned into 0.0f.
306
*
307
* @param v The value to clamp.
308
*
309
* @return The clamped value.
310
*/
311
static inline float clamp255f(float v)
312
{
313
return astc::clamp(v, 0.0f, 255.0f);
314
}
315
316
/**
317
* @brief SP float round-down.
318
*
319
* @param v The value to round.
320
*
321
* @return The rounded value.
322
*/
323
static inline float flt_rd(float v)
324
{
325
return std::floor(v);
326
}
327
328
/**
329
* @brief SP float round-to-nearest and convert to integer.
330
*
331
* @param v The value to round.
332
*
333
* @return The rounded value.
334
*/
335
static inline int flt2int_rtn(float v)
336
{
337
338
return static_cast<int>(v + 0.5f);
339
}
340
341
/**
342
* @brief SP float round down and convert to integer.
343
*
344
* @param v The value to round.
345
*
346
* @return The rounded value.
347
*/
348
static inline int flt2int_rd(float v)
349
{
350
return static_cast<int>(v);
351
}
352
353
/**
354
* @brief SP float bit-interpreted as an integer.
355
*
356
* @param v The value to bitcast.
357
*
358
* @return The converted value.
359
*/
360
static inline int float_as_int(float v)
361
{
362
union { int a; float b; } u;
363
u.b = v;
364
return u.a;
365
}
366
367
/**
368
* @brief Integer bit-interpreted as an SP float.
369
*
370
* @param v The value to bitcast.
371
*
372
* @return The converted value.
373
*/
374
static inline float int_as_float(int v)
375
{
376
union { int a; float b; } u;
377
u.a = v;
378
return u.b;
379
}
380
381
/**
382
* @brief Fast approximation of 1.0 / sqrt(val).
383
*
384
* @param v The input value.
385
*
386
* @return The approximated result.
387
*/
388
static inline float rsqrt(float v)
389
{
390
return 1.0f / std::sqrt(v);
391
}
392
393
/**
394
* @brief Fast approximation of sqrt(val).
395
*
396
* @param v The input value.
397
*
398
* @return The approximated result.
399
*/
400
static inline float sqrt(float v)
401
{
402
return std::sqrt(v);
403
}
404
405
/**
406
* @brief Extract mantissa and exponent of a float value.
407
*
408
* @param v The input value.
409
* @param[out] expo The output exponent.
410
*
411
* @return The mantissa.
412
*/
413
static inline float frexp(float v, int* expo)
414
{
415
if32 p;
416
p.f = v;
417
*expo = ((p.u >> 23) & 0xFF) - 126;
418
p.u = (p.u & 0x807fffff) | 0x3f000000;
419
return p.f;
420
}
421
422
/**
423
* @brief Initialize the seed structure for a random number generator.
424
*
425
* Important note: For the purposes of ASTC we want sets of random numbers to
426
* use the codec, but we want the same seed value across instances and threads
427
* to ensure that image output is stable across compressor runs and across
428
* platforms. Every PRNG created by this call will therefore return the same
429
* sequence of values ...
430
*
431
* @param state The state structure to initialize.
432
*/
433
void rand_init(uint64_t state[2]);
434
435
/**
436
* @brief Return the next random number from the generator.
437
*
438
* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
439
* public-domain implementation given by David Blackman & Sebastiano Vigna at
440
* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
441
*
442
* @param state The state structure to use/update.
443
*/
444
uint64_t rand(uint64_t state[2]);
445
446
}
447
448
/* ============================================================================
449
Softfloat library with fp32 and fp16 conversion functionality.
450
============================================================================ */
451
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
452
/* narrowing float->float conversions */
453
uint16_t float_to_sf16(float val);
454
float sf16_to_float(uint16_t val);
455
#endif
456
457
/*********************************
458
Vector library
459
*********************************/
460
#include "astcenc_vecmathlib.h"
461
462
/*********************************
463
Declaration of line types
464
*********************************/
465
// parametric line, 2D: The line is given by line = a + b * t.
466
467
struct line2
468
{
469
vfloat4 a;
470
vfloat4 b;
471
};
472
473
// parametric line, 3D
474
struct line3
475
{
476
vfloat4 a;
477
vfloat4 b;
478
};
479
480
struct line4
481
{
482
vfloat4 a;
483
vfloat4 b;
484
};
485
486
487
struct processed_line2
488
{
489
vfloat4 amod;
490
vfloat4 bs;
491
};
492
493
struct processed_line3
494
{
495
vfloat4 amod;
496
vfloat4 bs;
497
};
498
499
struct processed_line4
500
{
501
vfloat4 amod;
502
vfloat4 bs;
503
};
504
505
#endif
506
507