CoCalc -- astcenc

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_mathlib.h
⁹⁸⁹⁶ views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17

18
/*
19
 * This module implements a variety of mathematical data types and library
20
 * functions used by the codec.
21
 */
22

23
#ifndef ASTC_MATHLIB_H_INCLUDED
24
#define ASTC_MATHLIB_H_INCLUDED
25

26
#include <cassert>
27
#include <cstdint>
28
#include <cmath>
29

30
#ifndef ASTCENC_POPCNT
31
  #if defined(__POPCNT__)
32
    #define ASTCENC_POPCNT 1
33
  #else
34
    #define ASTCENC_POPCNT 0
35
  #endif
36
#endif
37

38
#ifndef ASTCENC_F16C
39
  #if defined(__F16C__)
40
    #define ASTCENC_F16C 1
41
  #else
42
    #define ASTCENC_F16C 0
43
  #endif
44
#endif
45

46
#ifndef ASTCENC_SSE
47
  #if defined(__SSE4_2__)
48
    #define ASTCENC_SSE 42
49
  #elif defined(__SSE4_1__)
50
    #define ASTCENC_SSE 41
51
  #elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
52
    #define ASTCENC_SSE 20
53
  #else
54
    #define ASTCENC_SSE 0
55
  #endif
56
#endif
57

58
#ifndef ASTCENC_AVX
59
  #if defined(__AVX2__)
60
    #define ASTCENC_AVX 2
61
    #define ASTCENC_X86_GATHERS 1
62
  #elif defined(__AVX__)
63
    #define ASTCENC_AVX 1
64
    #define ASTCENC_X86_GATHERS 1
65
  #else
66
    #define ASTCENC_AVX 0
67
  #endif
68
#endif
69

70
#ifndef ASTCENC_NEON
71
  #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
72
    #define ASTCENC_NEON 1
73
  #else
74
    #define ASTCENC_NEON 0
75
  #endif
76
#endif
77

78
#ifndef ASTCENC_SVE
79
  #if defined(__ARM_FEATURE_SVE)
80
    #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
81
      #define ASTCENC_SVE 8
82
    // Auto-detected SVE can only assume vector width of 4 is available, but
83
    // must also allow for hardware being longer and so all use of intrinsics
84
    // must explicitly use predicate masks to limit to 4-wide.
85
    #else
86
      #define ASTCENC_SVE 4
87
    #endif
88
    #else
89
    #define ASTCENC_SVE 0
90
  #endif
91
#endif
92

93
// Force vector-sized SIMD alignment
94
#if ASTCENC_AVX || ASTCENC_SVE == 8
95
  #define ASTCENC_VECALIGN 32
96
#elif ASTCENC_SSE || ASTCENC_NEON || ASTCENC_SVE == 4
97
  #define ASTCENC_VECALIGN 16
98
// Use default alignment for non-SIMD builds
99
#else
100
  #define ASTCENC_VECALIGN 0
101
#endif
102

103
// C++11 states that alignas(0) should be ignored but GCC doesn't do
104
// this on some versions, so workaround and avoid emitting alignas(0)
105
#if ASTCENC_VECALIGN > 0
106
	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
107
#else
108
	#define ASTCENC_ALIGNAS
109
#endif
110

111
#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
112
	#include <immintrin.h>
113
#endif
114

115
/* ============================================================================
116
  Fast math library; note that many of the higher-order functions in this set
117
  use approximations which are less accurate, but faster, than <cmath> standard
118
  library equivalents.
119

120
  Note: Many of these are not necessarily faster than simple C versions when
121
  used on a single scalar value, but are included for testing purposes as most
122
  have an option based on SSE intrinsics and therefore provide an obvious route
123
  to future vectorization.
124
============================================================================ */
125

126
// Union for manipulation of float bit patterns
127
typedef union
128
{
129
	uint32_t u;
130
	int32_t s;
131
	float f;
132
} if32;
133

134
// These are namespaced to avoid colliding with C standard library functions.
135
namespace astc
136
{
137

138
static const float PI          = 3.14159265358979323846f;
139
static const float PI_OVER_TWO = 1.57079632679489661923f;
140

141
/**
142
 * @brief SP float absolute value.
143
 *
144
 * @param v   The value to make absolute.
145
 *
146
 * @return The absolute value.
147
 */
148
static inline float fabs(float v)
149
{
150
	return std::fabs(v);
151
}
152

153
/**
154
 * @brief Test if a float value is a nan.
155
 *
156
 * @param v    The value test.
157
 *
158
 * @return Zero is not a NaN, non-zero otherwise.
159
 */
160
static inline bool isnan(float v)
161
{
162
	return v != v;
163
}
164

165
/**
166
 * @brief Return the minimum of two values.
167
 *
168
 * For floats, NaNs are turned into @c q.
169
 *
170
 * @param p   The first value to compare.
171
 * @param q   The second value to compare.
172
 *
173
 * @return The smallest value.
174
 */
175
template<typename T>
176
static inline T min(T p, T q)
177
{
178
	return p < q ? p : q;
179
}
180

181
/**
182
 * @brief Return the minimum of three values.
183
 *
184
 * For floats, NaNs are turned into @c r.
185
 *
186
 * @param p   The first value to compare.
187
 * @param q   The second value to compare.
188
 * @param r   The third value to compare.
189
 *
190
 * @return The smallest value.
191
 */
192
template<typename T>
193
static inline T min(T p, T q, T r)
194
{
195
	return min(min(p, q), r);
196
}
197

198
/**
199
 * @brief Return the minimum of four values.
200
 *
201
 * For floats, NaNs are turned into @c s.
202
 *
203
 * @param p   The first value to compare.
204
 * @param q   The second value to compare.
205
 * @param r   The third value to compare.
206
 * @param s   The fourth value to compare.
207
 *
208
 * @return The smallest value.
209
 */
210
template<typename T>
211
static inline T min(T p, T q, T r, T s)
212
{
213
	return min(min(p, q), min(r, s));
214
}
215

216
/**
217
 * @brief Return the maximum of two values.
218
 *
219
 * For floats, NaNs are turned into @c q.
220
 *
221
 * @param p   The first value to compare.
222
 * @param q   The second value to compare.
223
 *
224
 * @return The largest value.
225
 */
226
template<typename T>
227
static inline T max(T p, T q)
228
{
229
	return p > q ? p : q;
230
}
231

232
/**
233
 * @brief Return the maximum of three values.
234
 *
235
 * For floats, NaNs are turned into @c r.
236
 *
237
 * @param p   The first value to compare.
238
 * @param q   The second value to compare.
239
 * @param r   The third value to compare.
240
 *
241
 * @return The largest value.
242
 */
243
template<typename T>
244
static inline T max(T p, T q, T r)
245
{
246
	return max(max(p, q), r);
247
}
248

249
/**
250
 * @brief Return the maximum of four values.
251
 *
252
 * For floats, NaNs are turned into @c s.
253
 *
254
 * @param p   The first value to compare.
255
 * @param q   The second value to compare.
256
 * @param r   The third value to compare.
257
 * @param s   The fourth value to compare.
258
 *
259
 * @return The largest value.
260
 */
261
template<typename T>
262
static inline T max(T p, T q, T r, T s)
263
{
264
	return max(max(p, q), max(r, s));
265
}
266

267
/**
268
 * @brief Clamp a value value between @c mn and @c mx.
269
 *
270
 * For floats, NaNs are turned into @c mn.
271
 *
272
 * @param v      The value to clamp.
273
 * @param mn     The min value (inclusive).
274
 * @param mx     The max value (inclusive).
275
 *
276
 * @return The clamped value.
277
 */
278
template<typename T>
279
inline T clamp(T v, T mn, T mx)
280
{
281
	// Do not reorder; correct NaN handling relies on the fact that comparison
282
	// with NaN returns false and will fall-though to the "min" value.
283
	if (v > mx) return mx;
284
	if (v > mn) return v;
285
	return mn;
286
}
287

288
/**
289
 * @brief Clamp a float value between 0.0f and 1.0f.
290
 *
291
 * NaNs are turned into 0.0f.
292
 *
293
 * @param v   The value to clamp.
294
 *
295
 * @return The clamped value.
296
 */
297
static inline float clamp1f(float v)
298
{
299
	return astc::clamp(v, 0.0f, 1.0f);
300
}
301

302
/**
303
 * @brief Clamp a float value between 0.0f and 255.0f.
304
 *
305
 * NaNs are turned into 0.0f.
306
 *
307
 * @param v  The value to clamp.
308
 *
309
 * @return The clamped value.
310
 */
311
static inline float clamp255f(float v)
312
{
313
	return astc::clamp(v, 0.0f, 255.0f);
314
}
315

316
/**
317
 * @brief SP float round-down.
318
 *
319
 * @param v   The value to round.
320
 *
321
 * @return The rounded value.
322
 */
323
static inline float flt_rd(float v)
324
{
325
	return std::floor(v);
326
}
327

328
/**
329
 * @brief SP float round-to-nearest and convert to integer.
330
 *
331
 * @param v   The value to round.
332
 *
333
 * @return The rounded value.
334
 */
335
static inline int flt2int_rtn(float v)
336
{
337

338
	return static_cast<int>(v + 0.5f);
339
}
340

341
/**
342
 * @brief SP float round down and convert to integer.
343
 *
344
 * @param v   The value to round.
345
 *
346
 * @return The rounded value.
347
 */
348
static inline int flt2int_rd(float v)
349
{
350
	return static_cast<int>(v);
351
}
352

353
/**
354
 * @brief SP float bit-interpreted as an integer.
355
 *
356
 * @param v   The value to bitcast.
357
 *
358
 * @return The converted value.
359
 */
360
static inline int float_as_int(float v)
361
{
362
	union { int a; float b; } u;
363
	u.b = v;
364
	return u.a;
365
}
366

367
/**
368
 * @brief Integer bit-interpreted as an SP float.
369
 *
370
 * @param v   The value to bitcast.
371
 *
372
 * @return The converted value.
373
 */
374
static inline float int_as_float(int v)
375
{
376
	union { int a; float b; } u;
377
	u.a = v;
378
	return u.b;
379
}
380

381
/**
382
 * @brief Fast approximation of 1.0 / sqrt(val).
383
 *
384
 * @param v   The input value.
385
 *
386
 * @return The approximated result.
387
 */
388
static inline float rsqrt(float v)
389
{
390
	return 1.0f / std::sqrt(v);
391
}
392

393
/**
394
 * @brief Fast approximation of sqrt(val).
395
 *
396
 * @param v   The input value.
397
 *
398
 * @return The approximated result.
399
 */
400
static inline float sqrt(float v)
401
{
402
	return std::sqrt(v);
403
}
404

405
/**
406
 * @brief Extract mantissa and exponent of a float value.
407
 *
408
 * @param      v      The input value.
409
 * @param[out] expo   The output exponent.
410
 *
411
 * @return The mantissa.
412
 */
413
static inline float frexp(float v, int* expo)
414
{
415
	if32 p;
416
	p.f = v;
417
	*expo = ((p.u >> 23) & 0xFF) - 126;
418
	p.u = (p.u & 0x807fffff) | 0x3f000000;
419
	return p.f;
420
}
421

422
/**
423
 * @brief Initialize the seed structure for a random number generator.
424
 *
425
 * Important note: For the purposes of ASTC we want sets of random numbers to
426
 * use the codec, but we want the same seed value across instances and threads
427
 * to ensure that image output is stable across compressor runs and across
428
 * platforms. Every PRNG created by this call will therefore return the same
429
 * sequence of values ...
430
 *
431
 * @param state The state structure to initialize.
432
 */
433
void rand_init(uint64_t state[2]);
434

435
/**
436
 * @brief Return the next random number from the generator.
437
 *
438
 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
439
 * public-domain implementation given by David Blackman & Sebastiano Vigna at
440
 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
441
 *
442
 * @param state The state structure to use/update.
443
 */
444
uint64_t rand(uint64_t state[2]);
445

446
}
447

448
/* ============================================================================
449
  Softfloat library with fp32 and fp16 conversion functionality.
450
============================================================================ */
451
#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
452
	/* narrowing float->float conversions */
453
	uint16_t float_to_sf16(float val);
454
	float sf16_to_float(uint16_t val);
455
#endif
456

457
/*********************************
458
  Vector library
459
*********************************/
460
#include "astcenc_vecmathlib.h"
461

462
/*********************************
463
  Declaration of line types
464
*********************************/
465
// parametric line, 2D: The line is given by line = a + b * t.
466

467
struct line2
468
{
469
	vfloat4 a;
470
	vfloat4 b;
471
};
472

473
// parametric line, 3D
474
struct line3
475
{
476
	vfloat4 a;
477
	vfloat4 b;
478
};
479

480
struct line4
481
{
482
	vfloat4 a;
483
	vfloat4 b;
484
};
485

486

487
struct processed_line2
488
{
489
	vfloat4 amod;
490
	vfloat4 bs;
491
};
492

493
struct processed_line3
494
{
495
	vfloat4 amod;
496
	vfloat4 bs;
497
};
498

499
struct processed_line4
500
{
501
	vfloat4 amod;
502
	vfloat4 bs;
503
};
504

505
#endif
506

507
Product

Resources

Company