Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
4574 views
1
/****************************************************************************
2
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
****************************************************************************/
23
24
#ifndef __SWR_SIMDINTRIN_H__
25
#define __SWR_SIMDINTRIN_H__
26
27
#include "common/intrin.h"
28
#include "common/simdlib.hpp"
29
30
#if KNOB_SIMD_WIDTH == 8
31
typedef SIMD256 SIMD;
32
#else
33
#error Unsupported vector width
34
#endif // KNOB_SIMD16_WIDTH == 16
35
36
#define _simd128_maskstore_ps SIMD128::maskstore_ps
37
#define _simd128_fmadd_ps SIMD128::fmadd_ps
38
39
#define _simd_load_ps SIMD::load_ps
40
#define _simd_load1_ps SIMD::broadcast_ss
41
#define _simd_loadu_ps SIMD::loadu_ps
42
#define _simd_setzero_ps SIMD::setzero_ps
43
#define _simd_set1_ps SIMD::set1_ps
44
#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
45
#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
46
#define _simd_blendv_ps SIMD::blendv_ps
47
#define _simd_store_ps SIMD::store_ps
48
#define _simd_mul_ps SIMD::mul_ps
49
#define _simd_add_ps SIMD::add_ps
50
#define _simd_sub_ps SIMD::sub_ps
51
#define _simd_rsqrt_ps SIMD::rsqrt_ps
52
#define _simd_min_ps SIMD::min_ps
53
#define _simd_max_ps SIMD::max_ps
54
#define _simd_movemask_ps SIMD::movemask_ps
55
#define _simd_cvtps_epi32 SIMD::cvtps_epi32
56
#define _simd_cvttps_epi32 SIMD::cvttps_epi32
57
#define _simd_cvtepi32_ps SIMD::cvtepi32_ps
58
#define _simd_cmplt_ps SIMD::cmplt_ps
59
#define _simd_cmpgt_ps SIMD::cmpgt_ps
60
#define _simd_cmpneq_ps SIMD::cmpneq_ps
61
#define _simd_cmpeq_ps SIMD::cmpeq_ps
62
#define _simd_cmpge_ps SIMD::cmpge_ps
63
#define _simd_cmple_ps SIMD::cmple_ps
64
#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
65
#define _simd_and_ps SIMD::and_ps
66
#define _simd_or_ps SIMD::or_ps
67
#define _simd_rcp_ps SIMD::rcp_ps
68
#define _simd_div_ps SIMD::div_ps
69
#define _simd_castsi_ps SIMD::castsi_ps
70
#define _simd_castps_pd SIMD::castps_pd
71
#define _simd_castpd_ps SIMD::castpd_ps
72
#define _simd_andnot_ps SIMD::andnot_ps
73
#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
74
#define _simd_castpd_ps SIMD::castpd_ps
75
#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))
76
#define _simd_stream_ps SIMD::stream_ps
77
78
#define _simd_movemask_pd SIMD::movemask_pd
79
#define _simd_castsi_pd SIMD::castsi_pd
80
81
#define _simd_mul_epi32 SIMD::mul_epi32
82
#define _simd_mullo_epi32 SIMD::mullo_epi32
83
#define _simd_sub_epi32 SIMD::sub_epi32
84
#define _simd_sub_epi64 SIMD::sub_epi64
85
#define _simd_min_epi32 SIMD::min_epi32
86
#define _simd_min_epu32 SIMD::min_epu32
87
#define _simd_max_epi32 SIMD::max_epi32
88
#define _simd_max_epu32 SIMD::max_epu32
89
#define _simd_add_epi32 SIMD::add_epi32
90
#define _simd_and_si SIMD::and_si
91
#define _simd_andnot_si SIMD::andnot_si
92
#define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
93
#define _simd_cmplt_epi32 SIMD::cmplt_epi32
94
#define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
95
#define _simd_or_si SIMD::or_si
96
#define _simd_xor_si SIMD::xor_si
97
#define _simd_castps_si SIMD::castps_si
98
#define _simd_adds_epu8 SIMD::adds_epu8
99
#define _simd_subs_epu8 SIMD::subs_epu8
100
#define _simd_add_epi8 SIMD::add_epi8
101
#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
102
#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
103
#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
104
#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
105
#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
106
#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
107
#define _simd_movemask_epi8 SIMD::movemask_epi8
108
#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
109
#define _simd_permute_ps SIMD::permute_ps
110
#define _simd_permute_epi32 SIMD::permute_epi32
111
#define _simd_srlv_epi32 SIMD::srlv_epi32
112
#define _simd_sllv_epi32 SIMD::sllv_epi32
113
114
#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
115
#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
116
#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
117
#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
118
#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
119
#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
120
#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
121
#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
122
123
#define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)
124
#define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)
125
#define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)
126
#define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)
127
128
#define _simd_fmadd_ps SIMD::fmadd_ps
129
#define _simd_fmsub_ps SIMD::fmsub_ps
130
#define _simd_shuffle_epi8 SIMD::shuffle_epi8
131
132
#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
133
#define _simd_mask_i32gather_ps(r, p, o, m, s) \
134
SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
135
#define _simd_abs_epi32 SIMD::abs_epi32
136
137
#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
138
#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
139
#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
140
#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
141
#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
142
143
#define _simd_packus_epi16 SIMD::packus_epi16
144
#define _simd_packs_epi16 SIMD::packs_epi16
145
#define _simd_packus_epi32 SIMD::packus_epi32
146
#define _simd_packs_epi32 SIMD::packs_epi32
147
148
#define _simd_unpacklo_ps SIMD::unpacklo_ps
149
#define _simd_unpackhi_ps SIMD::unpackhi_ps
150
#define _simd_unpacklo_pd SIMD::unpacklo_pd
151
#define _simd_unpackhi_pd SIMD::unpackhi_pd
152
#define _simd_insertf128_ps SIMD::insertf128_ps
153
#define _simd_insertf128_pd SIMD::insertf128_pd
154
#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
155
#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
156
#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
157
#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
158
#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
159
#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
160
#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
161
#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
162
#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
163
#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
164
#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
165
#define _simd_set1_epi32 SIMD::set1_epi32
166
#define _simd_set_epi32 SIMD::set_epi32
167
#define _simd_set_ps SIMD::set_ps
168
#define _simd_set1_epi8 SIMD::set1_epi8
169
#define _simd_setzero_si SIMD::setzero_si
170
#define _simd_cvttps_epi32 SIMD::cvttps_epi32
171
#define _simd_store_si SIMD::store_si
172
#define _simd_broadcast_ss SIMD::broadcast_ss
173
#define _simd_maskstore_ps SIMD::maskstore_ps
174
#define _simd_load_si SIMD::load_si
175
#define _simd_loadu_si SIMD::loadu_si
176
#define _simd_sub_ps SIMD::sub_ps
177
#define _simd_testz_ps SIMD::testz_ps
178
#define _simd_testz_si SIMD::testz_si
179
#define _simd_xor_ps SIMD::xor_ps
180
181
#define _simd_loadu2_si SIMD::loadu2_si
182
#define _simd_storeu2_si SIMD::storeu2_si
183
184
#define _simd_blendv_epi32 SIMD::blendv_epi32
185
#define _simd_vmask_ps SIMD::vmask_ps
186
187
template <int mask>
188
SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)
189
{
190
return SIMD128::castps_si(
191
SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
192
}
193
194
//////////////////////////////////////////////////////////////////////////
195
/// @brief Compute plane equation vA * vX + vB * vY + vC
196
SIMDINLINE simdscalar vplaneps(simdscalar const& vA,
197
simdscalar const& vB,
198
simdscalar const& vC,
199
simdscalar const& vX,
200
simdscalar const& vY)
201
{
202
simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
203
vOut = _simd_fmadd_ps(vB, vY, vOut);
204
return vOut;
205
}
206
207
//////////////////////////////////////////////////////////////////////////
208
/// @brief Compute plane equation vA * vX + vB * vY + vC
209
SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,
210
simd4scalar const& vB,
211
simd4scalar const& vC,
212
simd4scalar const& vX,
213
simd4scalar const& vY)
214
{
215
simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);
216
vOut = _simd128_fmadd_ps(vB, vY, vOut);
217
return vOut;
218
}
219
220
//////////////////////////////////////////////////////////////////////////
221
/// @brief Interpolates a single component.
222
/// @param vI - barycentric I
223
/// @param vJ - barycentric J
224
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
225
template <UINT Attrib, UINT Comp, UINT numComponents = 4>
226
static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,
227
simdscalar const& vJ,
228
const float* pInterpBuffer)
229
{
230
const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
231
const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
232
const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
233
234
if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
235
{
236
// Ensure constant attribs are constant. Required for proper
237
// 3D resource copies.
238
return _simd_broadcast_ss(pInterpA);
239
}
240
241
simdscalar vA = _simd_broadcast_ss(pInterpA);
242
simdscalar vB = _simd_broadcast_ss(pInterpB);
243
simdscalar vC = _simd_broadcast_ss(pInterpC);
244
245
simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
246
vC = _simd_mul_ps(vk, vC);
247
248
return vplaneps(vA, vB, vC, vI, vJ);
249
}
250
251
//////////////////////////////////////////////////////////////////////////
252
/// @brief Interpolates a single component (flat shade).
253
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
254
template <UINT Attrib, UINT Comp, UINT numComponents = 4>
255
static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)
256
{
257
const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
258
259
simdscalar vA = _simd_broadcast_ss(pInterpA);
260
261
return vA;
262
}
263
264
//////////////////////////////////////////////////////////////////////////
265
/// @brief Interpolates a single component (flat shade).
266
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
267
template <UINT Attrib, UINT Comp, UINT numComponents = 4>
268
static SIMDINLINE simdscalari InterpolateComponentFlatInt(const uint32_t* pInterpBuffer)
269
{
270
const uint32_t interpA = pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
271
272
simdscalari vA = _simd_set1_epi32(interpA);
273
274
return vA;
275
}
276
277
//////////////////////////////////////////////////////////////////////////
278
/// @brief Interpolates a single component.
279
/// @param vI - barycentric I
280
/// @param vJ - barycentric J
281
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
282
template <UINT Attrib, UINT Comp, UINT numComponents = 4>
283
static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,
284
simd4scalar const& vJ,
285
const float* pInterpBuffer)
286
{
287
const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
288
const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
289
const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
290
291
if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
292
{
293
// Ensure constant attribs are constant. Required for proper
294
// 3D resource copies.
295
return SIMD128::broadcast_ss(pInterpA);
296
}
297
298
simd4scalar vA = SIMD128::broadcast_ss(pInterpA);
299
simd4scalar vB = SIMD128::broadcast_ss(pInterpB);
300
simd4scalar vC = SIMD128::broadcast_ss(pInterpC);
301
302
simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);
303
vC = SIMD128::mul_ps(vk, vC);
304
305
return vplaneps(vA, vB, vC, vI, vJ);
306
}
307
308
static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)
309
{
310
simd4scalari ai = SIMD128::castps_si(a);
311
return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));
312
}
313
314
static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)
315
{
316
simdscalari ai = _simd_castps_si(a);
317
return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
318
}
319
320
#include "simd16intrin.h"
321
322
#endif //__SWR_SIMDINTRIN_H__
323
324