Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/astcenc/astcenc_vecmathlib_common_4.h
9896 views
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2020-2025 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
// http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
* @brief Generic 4x32-bit vector functions.
20
*
21
* This module implements generic 4-wide vector functions that are valid for
22
* all instruction sets, typically implemented using lower level 4-wide
23
* operations that are ISA-specific.
24
*/
25
26
#ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
27
#define ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
28
29
#ifndef ASTCENC_SIMD_INLINE
30
#error "Include astcenc_vecmathlib.h, do not include directly"
31
#endif
32
33
#include <cstdio>
34
#include <limits>
35
36
// ============================================================================
37
// vint4 operators and functions
38
// ============================================================================
39
40
/**
41
* @brief Overload: vector by scalar addition.
42
*/
43
ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b)
44
{
45
return a + vint4(b);
46
}
47
48
/**
49
* @brief Overload: vector by vector incremental addition.
50
*/
51
ASTCENC_SIMD_INLINE vint4& operator+=(vint4& a, const vint4& b)
52
{
53
a = a + b;
54
return a;
55
}
56
57
/**
58
* @brief Overload: vector by scalar subtraction.
59
*/
60
ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, int b)
61
{
62
return a - vint4(b);
63
}
64
65
/**
66
* @brief Overload: vector by scalar multiplication.
67
*/
68
ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, int b)
69
{
70
return a * vint4(b);
71
}
72
73
/**
74
* @brief Overload: vector by scalar bitwise or.
75
*/
76
ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, int b)
77
{
78
return a | vint4(b);
79
}
80
81
/**
82
* @brief Overload: vector by scalar bitwise and.
83
*/
84
ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, int b)
85
{
86
return a & vint4(b);
87
}
88
89
/**
90
* @brief Overload: vector by scalar bitwise xor.
91
*/
92
ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, int b)
93
{
94
return a ^ vint4(b);
95
}
96
97
/**
98
* @brief Return the clamped value between min and max.
99
*/
100
ASTCENC_SIMD_INLINE vint4 clamp(int minv, int maxv, vint4 a)
101
{
102
return min(max(a, vint4(minv)), vint4(maxv));
103
}
104
105
/**
106
* @brief Return the horizontal sum of RGB vector lanes as a scalar.
107
*/
108
ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a)
109
{
110
return a.lane<0>() + a.lane<1>() + a.lane<2>();
111
}
112
113
/**
114
* @brief Return the horizontal minimum of a vector.
115
*/
116
ASTCENC_SIMD_INLINE int hmin_s(vint4 a)
117
{
118
return hmin(a).lane<0>();
119
}
120
121
/**
122
* @brief Generate a vint4 from a size_t.
123
*/
124
ASTCENC_SIMD_INLINE vint4 vint4_from_size(size_t a)
125
{
126
assert(a <= std::numeric_limits<int>::max());
127
return vint4(static_cast<int>(a));
128
}
129
130
/**
131
* @brief Return the horizontal maximum of a vector.
132
*/
133
ASTCENC_SIMD_INLINE int hmax_s(vint4 a)
134
{
135
return hmax(a).lane<0>();
136
}
137
138
// ============================================================================
139
// vfloat4 operators and functions
140
// ============================================================================
141
142
/**
143
* @brief Overload: vector by vector incremental addition.
144
*/
145
ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b)
146
{
147
a = a + b;
148
return a;
149
}
150
151
/**
152
* @brief Overload: vector by scalar addition.
153
*/
154
ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, float b)
155
{
156
return a + vfloat4(b);
157
}
158
159
/**
160
* @brief Overload: vector by scalar subtraction.
161
*/
162
ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, float b)
163
{
164
return a - vfloat4(b);
165
}
166
167
/**
168
* @brief Overload: vector by scalar multiplication.
169
*/
170
ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, float b)
171
{
172
return a * vfloat4(b);
173
}
174
175
/**
176
* @brief Overload: scalar by vector multiplication.
177
*/
178
ASTCENC_SIMD_INLINE vfloat4 operator*(float a, vfloat4 b)
179
{
180
return vfloat4(a) * b;
181
}
182
183
/**
184
* @brief Overload: vector by scalar division.
185
*/
186
ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, float b)
187
{
188
return a / vfloat4(b);
189
}
190
191
/**
192
* @brief Overload: scalar by vector division.
193
*/
194
ASTCENC_SIMD_INLINE vfloat4 operator/(float a, vfloat4 b)
195
{
196
return vfloat4(a) / b;
197
}
198
199
/**
200
* @brief Return the min vector of a vector and a scalar.
201
*
202
* If either lane value is NaN, @c b will be returned for that lane.
203
*/
204
ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, float b)
205
{
206
return min(a, vfloat4(b));
207
}
208
209
/**
210
* @brief Return the max vector of a vector and a scalar.
211
*
212
* If either lane value is NaN, @c b will be returned for that lane.
213
*/
214
ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, float b)
215
{
216
return max(a, vfloat4(b));
217
}
218
219
/**
220
* @brief Return the clamped value between min and max.
221
*
222
* It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN
223
* then @c min will be returned for that lane.
224
*/
225
ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a)
226
{
227
// Do not reorder - second operand will return if either is NaN
228
return min(max(a, minv), maxv);
229
}
230
231
/**
232
* @brief Return the clamped value between 0.0f and 1.0f.
233
*
234
* If @c a is NaN then zero will be returned for that lane.
235
*/
236
ASTCENC_SIMD_INLINE vfloat4 clampzo(vfloat4 a)
237
{
238
// Do not reorder - second operand will return if either is NaN
239
return min(max(a, vfloat4::zero()), 1.0f);
240
}
241
242
/**
243
* @brief Return the horizontal minimum of a vector.
244
*/
245
ASTCENC_SIMD_INLINE float hmin_s(vfloat4 a)
246
{
247
return hmin(a).lane<0>();
248
}
249
250
/**
251
* @brief Return the horizontal min of RGB vector lanes as a scalar.
252
*/
253
ASTCENC_SIMD_INLINE float hmin_rgb_s(vfloat4 a)
254
{
255
a.set_lane<3>(a.lane<0>());
256
return hmin_s(a);
257
}
258
259
/**
260
* @brief Return the horizontal maximum of a vector.
261
*/
262
ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a)
263
{
264
return hmax(a).lane<0>();
265
}
266
267
/**
268
* @brief Accumulate lane-wise sums for a vector.
269
*/
270
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a)
271
{
272
accum = accum + a;
273
}
274
275
/**
276
* @brief Accumulate lane-wise sums for a masked vector.
277
*/
278
ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a, vmask4 m)
279
{
280
a = select(vfloat4::zero(), a, m);
281
haccumulate(accum, a);
282
}
283
284
/**
285
* @brief Return the horizontal sum of RGB vector lanes as a scalar.
286
*/
287
ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a)
288
{
289
return a.lane<0>() + a.lane<1>() + a.lane<2>();
290
}
291
292
#if !defined(ASTCENC_USE_NATIVE_DOT_PRODUCT)
293
294
/**
295
* @brief Return the dot product for the full 4 lanes, returning scalar.
296
*/
297
ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
298
{
299
vfloat4 m = a * b;
300
return hadd_s(m);
301
}
302
303
/**
304
* @brief Return the dot product for the full 4 lanes, returning vector.
305
*/
306
ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
307
{
308
vfloat4 m = a * b;
309
return vfloat4(hadd_s(m));
310
}
311
312
/**
313
* @brief Return the dot product for the bottom 3 lanes, returning scalar.
314
*/
315
ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
316
{
317
vfloat4 m = a * b;
318
return hadd_rgb_s(m);
319
}
320
321
/**
322
* @brief Return the dot product for the bottom 3 lanes, returning vector.
323
*/
324
ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
325
{
326
vfloat4 m = a * b;
327
float d3 = hadd_rgb_s(m);
328
return vfloat4(d3, d3, d3, 0.0f);
329
}
330
331
#endif
332
333
#if !defined(ASTCENC_USE_NATIVE_POPCOUNT)
334
335
/**
336
* @brief Population bit count.
337
*
338
* @param v The value to population count.
339
*
340
* @return The number of 1 bits.
341
*/
342
static inline int popcount(uint64_t v)
343
{
344
uint64_t mask1 = 0x5555555555555555ULL;
345
uint64_t mask2 = 0x3333333333333333ULL;
346
uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL;
347
v -= (v >> 1) & mask1;
348
v = (v & mask2) + ((v >> 2) & mask2);
349
v += v >> 4;
350
v &= mask3;
351
v *= 0x0101010101010101ULL;
352
v >>= 56;
353
return static_cast<int>(v);
354
}
355
356
#endif
357
358
/**
359
* @brief Apply signed bit transfer.
360
*
361
* @param input0 The first encoded endpoint.
362
* @param input1 The second encoded endpoint.
363
*/
364
static ASTCENC_SIMD_INLINE void bit_transfer_signed(
365
vint4& input0,
366
vint4& input1
367
) {
368
input1 = lsr<1>(input1) | (input0 & 0x80);
369
input0 = lsr<1>(input0) & 0x3F;
370
371
vmask4 mask = (input0 & 0x20) != vint4::zero();
372
input0 = select(input0, input0 - 0x40, mask);
373
}
374
375
/**
376
* @brief Debug function to print a vector of ints.
377
*/
378
ASTCENC_SIMD_INLINE void print(vint4 a)
379
{
380
ASTCENC_ALIGNAS int v[4];
381
storea(a, v);
382
printf("v4_i32:\n %8d %8d %8d %8d\n",
383
v[0], v[1], v[2], v[3]);
384
}
385
386
/**
387
* @brief Debug function to print a vector of ints.
388
*/
389
ASTCENC_SIMD_INLINE void printx(vint4 a)
390
{
391
ASTCENC_ALIGNAS int v[4];
392
storea(a, v);
393
394
unsigned int uv[4];
395
std::memcpy(uv, v, sizeof(int) * 4);
396
397
printf("v4_i32:\n %08x %08x %08x %08x\n",
398
uv[0], uv[1], uv[2], uv[3]);
399
}
400
401
/**
402
* @brief Debug function to print a vector of floats.
403
*/
404
ASTCENC_SIMD_INLINE void print(vfloat4 a)
405
{
406
ASTCENC_ALIGNAS float v[4];
407
storea(a, v);
408
printf("v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n",
409
static_cast<double>(v[0]), static_cast<double>(v[1]),
410
static_cast<double>(v[2]), static_cast<double>(v[3]));
411
}
412
413
/**
414
* @brief Debug function to print a vector of masks.
415
*/
416
ASTCENC_SIMD_INLINE void print(vmask4 a)
417
{
418
print(select(vint4(0), vint4(1), a));
419
}
420
421
#endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED
422
423