Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp
4574 views
1
/****************************************************************************
2
* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
****************************************************************************/
23
#pragma once
24
#if 0
25
//===========================================================================
26
// Placeholder name representing either SIMD4, SIMD256, or SIMD16 structures.
27
//===========================================================================
28
struct SIMD256 // or SIMD4 or SIMD16
29
{
30
//=======================================================================
31
// SIMD Types
32
//
33
// These typedefs are examples. The SIMD256 and SIMD16 implementations will
34
// use different base types with this same naming.
35
using Float = __m256; // Packed single-precision float vector
36
using Double = __m256d; // Packed double-precision float vector
37
using Integer = __m256i; // Packed integer vector (mutable element widths)
38
using Mask = uint8_t; // Integer representing mask bits
39
40
//=======================================================================
41
// Standard interface
42
// (available in both SIMD256 and SIMD16 widths)
43
//=======================================================================
44
45
//-----------------------------------------------------------------------
46
// Single precision floating point arithmetic operations
47
//-----------------------------------------------------------------------
48
static Float add_ps(Float a, Float b); // return a + b
49
static Float div_ps(Float a, Float b); // return a / b
50
static Float fmadd_ps(Float a, Float b, Float c); // return (a * b) + c
51
static Float fmsub_ps(Float a, Float b, Float c); // return (a * b) - c
52
static Float max_ps(Float a, Float b); // return (a > b) ? a : b
53
static Float min_ps(Float a, Float b); // return (a < b) ? a : b
54
static Float mul_ps(Float a, Float b); // return a * b
55
static Float rcp_ps(Float a); // return 1.0f / a
56
static Float rsqrt_ps(Float a); // return 1.0f / sqrt(a)
57
static Float sub_ps(Float a, Float b); // return a - b
58
59
enum class RoundMode
60
{
61
TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + (signof(value))0.5)
62
TO_NEG_INF = 0x01, // Round to negative infinity
63
TO_POS_INF = 0x02, // Round to positive infinity
64
TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
65
CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
66
67
RAISE_EXC = 0x00, // Raise exception on overflow
68
NO_EXC = 0x08, // Suppress exceptions
69
70
NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
71
NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
72
FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
73
FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
74
CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
75
CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
76
TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
77
TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
78
RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
79
NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
80
};
81
82
// return round_func(a)
83
//
84
// round_func is chosen on the RMT template parameter. See the documentation
85
// for the RoundMode enumeration above.
86
template <RoundMode RMT>
87
static Float round_ps(Float a); // return round(a)
88
89
90
//-----------------------------------------------------------------------
91
// Integer (various width) arithmetic operations
92
//-----------------------------------------------------------------------
93
static Integer abs_epi32(Integer a); // return absolute_value(a) (int32)
94
static Integer add_epi32(Integer a, Integer b); // return a + b (int32)
95
static Integer add_epi8(Integer a, Integer b); // return a + b (int8)
96
static Integer adds_epu8(Integer a, Integer b); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
97
static Integer max_epi32(Integer a, Integer b); // return (a > b) ? a : b (int32)
98
static Integer max_epu32(Integer a, Integer b); // return (a > b) ? a : b (uint32)
99
static Integer min_epi32(Integer a, Integer b); // return (a < b) ? a : b (int32)
100
static Integer min_epu32(Integer a, Integer b); // return (a < b) ? a : b (uint32)
101
static Integer mul_epi32(Integer a, Integer b); // return a * b (int32)
102
103
// return (a * b) & 0xFFFFFFFF
104
//
105
// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
106
// and store the low 32 bits of the intermediate integers in dst.
107
static Float mullo_epi32(Integer a, Integer b);
108
109
static Integer sub_epi32(Integer a, Integer b); // return a - b (int32)
110
static Integer sub_epi64(Integer a, Integer b); // return a - b (int64)
111
static Integer subs_epu8(Integer a, Integer b); // return (b > a) ? 0 : (a - b) (uint8)
112
113
//-----------------------------------------------------------------------
114
// Logical operations
115
//-----------------------------------------------------------------------
116
static Float and_ps(Float a, Float b); // return a & b (float treated as int)
117
static Integer and_si(Integer a, Integer b); // return a & b (int)
118
static Float andnot_ps(Float a, Float b); // return (~a) & b (float treated as int)
119
static Integer andnot_si(Integer a, Integer b); // return (~a) & b (int)
120
static Float or_ps(Float a, Float b); // return a | b (float treated as int)
121
static Float or_si(Integer a, Integer b); // return a | b (int)
122
static Float xor_ps(Float a, Float b); // return a ^ b (float treated as int)
123
static Integer xor_si(Integer a, Integer b); // return a ^ b (int)
124
125
//-----------------------------------------------------------------------
126
// Shift operations
127
//-----------------------------------------------------------------------
128
template<int ImmT>
129
static Integer slli_epi32(Integer a); // return a << ImmT
130
static Integer sllv_epi32(Integer a, Integer b); // return a << b
131
template<int ImmT>
132
static Integer srai_epi32(Integer a); // return a >> ImmT (int32)
133
template<int ImmT>
134
static Integer srli_epi32(Integer a); // return a >> ImmT (uint32)
135
template<int ImmT> // for each 128-bit lane:
136
static Integer srli_si(Integer a); // return a >> (ImmT*8) (uint)
137
template<int ImmT>
138
static Float srlisi_ps(Float a); // same as srli_si, but with Float cast to int
139
static Integer srlv_epi32(Integer a, Integer b); // return a >> b (uint32)
140
141
//-----------------------------------------------------------------------
142
// Conversion operations
143
//-----------------------------------------------------------------------
144
static Float castpd_ps(Double a); // return *(Float*)(&a)
145
static Integer castps_si(Float a); // return *(Integer*)(&a)
146
static Double castsi_pd(Integer a); // return *(Double*)(&a)
147
static Double castps_pd(Float a); // return *(Double*)(&a)
148
static Float castsi_ps(Integer a); // return *(Float*)(&a)
149
static Float cvtepi32_ps(Integer a); // return (float)a (int32 --> float)
150
static Integer cvtepu8_epi16(Integer a); // return (int16)a (uint8 --> int16)
151
static Integer cvtepu8_epi32(Integer a); // return (int32)a (uint8 --> int32)
152
static Integer cvtepu16_epi32(Integer a); // return (int32)a (uint16 --> int32)
153
static Integer cvtepu16_epi64(Integer a); // return (int64)a (uint16 --> int64)
154
static Integer cvtepu32_epi64(Integer a); // return (int64)a (uint32 --> int64)
155
static Integer cvtps_epi32(Float a); // return (int32)a (float --> int32)
156
static Integer cvttps_epi32(Float a); // return (int32)a (rnd_to_zero(float) --> int32)
157
158
//-----------------------------------------------------------------------
159
// Comparison operations
160
//-----------------------------------------------------------------------
161
162
// Comparison types used with cmp_ps:
163
// - ordered comparisons are always false if either operand is NaN
164
// - unordered comparisons are always true if either operand is NaN
165
// - signaling comparisons raise an exception if either operand is NaN
166
// - non-signaling comparisons will never raise an exception
167
//
168
// Ordered: return (a != NaN) && (b != NaN) && (a cmp b)
169
// Unordered: return (a == NaN) || (b == NaN) || (a cmp b)
170
enum class CompareType
171
{
172
EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
173
LT_OS = 0x01, // Less-than (ordered, signaling)
174
LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
175
UNORD_Q = 0x03, // Unordered (nonsignaling)
176
NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
177
NLT_US = 0x05, // Not-less-than (unordered, signaling)
178
NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
179
ORD_Q = 0x07, // Ordered (nonsignaling)
180
EQ_UQ = 0x08, // Equal (unordered, non-signaling)
181
NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
182
NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
183
FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
184
NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
185
GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
186
GT_OS = 0x0E, // Greater-than (ordered, signaling)
187
TRUE_UQ = 0x0F, // True (unordered, non-signaling)
188
EQ_OS = 0x10, // Equal (ordered, signaling)
189
LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
190
LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
191
UNORD_S = 0x13, // Unordered (signaling)
192
NEQ_US = 0x14, // Not-equal (unordered, signaling)
193
NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
194
NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
195
ORD_S = 0x17, // Ordered (signaling)
196
EQ_US = 0x18, // Equal (unordered, signaling)
197
NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
198
NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
199
FALSE_OS = 0x1B, // False (ordered, signaling)
200
NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
201
GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
202
GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
203
TRUE_US = 0x1F, // True (unordered, signaling)
204
};
205
206
// return a (CmpTypeT) b (float)
207
//
208
// See documentation for CompareType above for valid values for CmpTypeT.
209
template<CompareType CmpTypeT>
210
static Float cmp_ps(Float a, Float b); // return a (CmtTypeT) b (see above)
211
static Float cmpgt_ps(Float a, Float b); // return cmp_ps<CompareType::GT_OQ>(a, b)
212
static Float cmple_ps(Float a, Float b); // return cmp_ps<CompareType::LE_OQ>(a, b)
213
static Float cmplt_ps(Float a, Float b); // return cmp_ps<CompareType::LT_OQ>(a, b)
214
static Float cmpneq_ps(Float a, Float b); // return cmp_ps<CompareType::NEQ_OQ>(a, b)
215
static Float cmpeq_ps(Float a, Float b); // return cmp_ps<CompareType::EQ_OQ>(a, b)
216
static Float cmpge_ps(Float a, Float b); // return cmp_ps<CompareType::GE_OQ>(a, b)
217
static Integer cmpeq_epi8(Integer a, Integer b); // return a == b (int8)
218
static Integer cmpeq_epi16(Integer a, Integer b); // return a == b (int16)
219
static Integer cmpeq_epi32(Integer a, Integer b); // return a == b (int32)
220
static Integer cmpeq_epi64(Integer a, Integer b); // return a == b (int64)
221
static Integer cmpgt_epi8(Integer a, Integer b); // return a > b (int8)
222
static Integer cmpgt_epi16(Integer a, Integer b); // return a > b (int16)
223
static Integer cmpgt_epi32(Integer a, Integer b); // return a > b (int32)
224
static Integer cmpgt_epi64(Integer a, Integer b); // return a > b (int64)
225
static Integer cmplt_epi32(Integer a, Integer b); // return a < b (int32)
226
static bool testz_ps(Float a, Float b); // return all_lanes_zero(a & b) ? 1 : 0 (float)
227
static bool testz_si(Integer a, Integer b); // return all_lanes_zero(a & b) ? 1 : 0 (int)
228
229
//-----------------------------------------------------------------------
230
// Blend / shuffle / permute operations
231
//-----------------------------------------------------------------------
232
template<int ImmT>
233
static Float blend_ps(Float a, Float b); // return ImmT ? b : a (float)
234
static Integer blendv_epi32(Integer a, Integer b, Float mask); // return mask ? b : a (int)
235
static Float blendv_ps(Float a, Float b, Float mask); // return mask ? b : a (float)
236
static Float broadcast_ss(float const *p); // return *p (all elements in vector get same value)
237
static Integer packs_epi16(Integer a, Integer b); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
238
static Integer packs_epi32(Integer a, Integer b); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
239
static Integer packus_epi16(Integer a, Integer b); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
240
static Integer packus_epi32(Integer a, Integer b); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
241
static Float permute_epi32(Integer a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (int32)
242
static Float permute_ps(Float a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (float)
243
template<int SwizT>
244
static Integer shuffle_epi32(Integer a, Integer b);
245
template<int SwizT>
246
static Integer shuffle_epi64(Integer a, Integer b);
247
static Integer shuffle_epi8(Integer a, Integer b);
248
template<int SwizT>
249
static Float shuffle_pd(Double a, Double b);
250
template<int SwizT>
251
static Float shuffle_ps(Float a, Float b);
252
static Integer unpackhi_epi16(Integer a, Integer b);
253
static Integer unpackhi_epi32(Integer a, Integer b);
254
static Integer unpackhi_epi64(Integer a, Integer b);
255
static Integer unpackhi_epi8(Integer a, Integer b);
256
static Float unpackhi_pd(Double a, Double b);
257
static Float unpackhi_ps(Float a, Float b);
258
static Integer unpacklo_epi16(Integer a, Integer b);
259
static Integer unpacklo_epi32(Integer a, Integer b);
260
static Integer unpacklo_epi64(Integer a, Integer b);
261
static Integer unpacklo_epi8(Integer a, Integer b);
262
static Float unpacklo_pd(Double a, Double b);
263
static Float unpacklo_ps(Float a, Float b);
264
265
//-----------------------------------------------------------------------
266
// Load / store operations
267
//-----------------------------------------------------------------------
268
enum class ScaleFactor
269
{
270
SF_1, // No scaling
271
SF_2, // Scale offset by 2
272
SF_4, // Scale offset by 4
273
SF_8, // Scale offset by 8
274
};
275
276
template<ScaleFactor ScaleT = ScaleFactor::SF_1>
277
static Float i32gather_ps(float const* p, Integer idx); // return *(float*)(((int8*)p) + (idx * ScaleT))
278
static Float load1_ps(float const *p); // return *p (broadcast 1 value to all elements)
279
static Float load_ps(float const *p); // return *p (loads SIMD width elements from memory)
280
static Integer load_si(Integer const *p); // return *p
281
static Float loadu_ps(float const *p); // return *p (same as load_ps but allows for unaligned mem)
282
static Integer loadu_si(Integer const *p); // return *p (same as load_si but allows for unaligned mem)
283
284
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
285
template<int ScaleT>
286
static Float mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask);
287
288
static void maskstore_ps(float *p, Integer mask, Float src);
289
static int movemask_epi8(Integer a);
290
static int movemask_pd(Double a);
291
static int movemask_ps(Float a);
292
static Integer set1_epi32(int i); // return i (all elements are same value)
293
static Integer set1_epi8(char i); // return i (all elements are same value)
294
static Float set1_ps(float f); // return f (all elements are same value)
295
static Float setzero_ps(); // return 0 (float)
296
static Integer setzero_si(); // return 0 (integer)
297
static void store_ps(float *p, Float a); // *p = a (stores all elements contiguously in memory)
298
static void store_si(Integer *p, Integer a); // *p = a
299
static void stream_ps(float *p, Float a); // *p = a (same as store_ps, but doesn't keep memory in cache)
300
301
//=======================================================================
302
// Legacy interface (available only in SIMD256 width)
303
//=======================================================================
304
305
static Float broadcast_ps(__m128 const *p);
306
template<int ImmT>
307
static __m128d extractf128_pd(Double a);
308
template<int ImmT>
309
static __m128 extractf128_ps(Float a);
310
template<int ImmT>
311
static __m128i extractf128_si(Integer a);
312
template<int ImmT>
313
static Double insertf128_pd(Double a, __m128d b);
314
template<int ImmT>
315
static Float insertf128_ps(Float a, __m128 b);
316
template<int ImmT>
317
static Integer insertf128_si(Integer a, __m128i b);
318
static Integer loadu2_si(__m128 const* phi, __m128 const* plo);
319
template<int ImmT>
320
static Double permute2f128_pd(Double a, Double b);
321
template<int ImmT>
322
static Float permute2f128_ps(Float a, Float b);
323
template<int ImmT>
324
static Integer permute2f128_si(Integer a, Integer b);
325
static Integer set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0);
326
static void storeu2_si(__m128i *phi, __m128i *plo, Integer src);
327
328
//=======================================================================
329
// Advanced masking interface (currently available only in SIMD16 width)
330
//=======================================================================
331
};
332
#endif // #if 0
333
334