Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
screetsec
GitHub Repository: screetsec/TheFatRat
Path: blob/master/tools/android-sdk/renderscript/clang-include/avx512erintrin.h
496 views
1
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
2
*
3
* Permission is hereby granted, free of charge, to any person obtaining a copy
4
* of this software and associated documentation files (the "Software"), to deal
5
* in the Software without restriction, including without limitation the rights
6
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
* copies of the Software, and to permit persons to whom the Software is
8
* furnished to do so, subject to the following conditions:
9
*
10
* The above copyright notice and this permission notice shall be included in
11
* all copies or substantial portions of the Software.
12
*
13
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
* THE SOFTWARE.
20
*
21
*===-----------------------------------------------------------------------===
22
*/
23
#ifndef __IMMINTRIN_H
24
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
25
#endif
26
27
#ifndef __AVX512ERINTRIN_H
28
#define __AVX512ERINTRIN_H
29
30
// exp2a23
31
#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \
32
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
33
(__v8df)_mm512_setzero_pd(), \
34
(__mmask8)-1, (int)(R)); })
35
36
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \
37
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
38
(__v8df)(__m512d)(S), (__mmask8)(M), \
39
(int)(R)); })
40
41
#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \
42
(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
43
(__v8df)_mm512_setzero_pd(), \
44
(__mmask8)(M), (int)(R)); })
45
46
#define _mm512_exp2a23_pd(A) \
47
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
48
49
#define _mm512_mask_exp2a23_pd(S, M, A) \
50
_mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
51
52
#define _mm512_maskz_exp2a23_pd(M, A) \
53
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
54
55
#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \
56
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
57
(__v16sf)_mm512_setzero_ps(), \
58
(__mmask16)-1, (int)(R)); })
59
60
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \
61
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
62
(__v16sf)(__m512)(S), (__mmask16)(M), \
63
(int)(R)); })
64
65
#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \
66
(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
67
(__v16sf)_mm512_setzero_ps(), \
68
(__mmask16)(M), (int)(R)); })
69
70
#define _mm512_exp2a23_ps(A) \
71
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
72
73
#define _mm512_mask_exp2a23_ps(S, M, A) \
74
_mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
75
76
#define _mm512_maskz_exp2a23_ps(M, A) \
77
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
78
79
// rsqrt28
80
#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \
81
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
82
(__v8df)_mm512_setzero_pd(), \
83
(__mmask8)-1, (int)(R)); })
84
85
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \
86
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
87
(__v8df)(__m512d)(S), (__mmask8)(M), \
88
(int)(R)); })
89
90
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \
91
(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
92
(__v8df)_mm512_setzero_pd(), \
93
(__mmask8)(M), (int)(R)); })
94
95
#define _mm512_rsqrt28_pd(A) \
96
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
97
98
#define _mm512_mask_rsqrt28_pd(S, M, A) \
99
_mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
100
101
#define _mm512_maskz_rsqrt28_pd(M, A) \
102
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
103
104
#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \
105
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
106
(__v16sf)_mm512_setzero_ps(), \
107
(__mmask16)-1, (int)(R)); })
108
109
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \
110
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
111
(__v16sf)(__m512)(S), (__mmask16)(M), \
112
(int)(R)); })
113
114
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \
115
(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
116
(__v16sf)_mm512_setzero_ps(), \
117
(__mmask16)(M), (int)(R)); })
118
119
#define _mm512_rsqrt28_ps(A) \
120
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
121
122
#define _mm512_mask_rsqrt28_ps(S, M, A) \
123
_mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
124
125
#define _mm512_maskz_rsqrt28_ps(M, A) \
126
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
127
128
#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
129
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
130
(__v4sf)(__m128)(B), \
131
(__v4sf)_mm_setzero_ps(), \
132
(__mmask8)-1, (int)(R)); })
133
134
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
135
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
136
(__v4sf)(__m128)(B), \
137
(__v4sf)(__m128)(S), \
138
(__mmask8)(M), (int)(R)); })
139
140
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
141
(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
142
(__v4sf)(__m128)(B), \
143
(__v4sf)_mm_setzero_ps(), \
144
(__mmask8)(M), (int)(R)); })
145
146
#define _mm_rsqrt28_ss(A, B) \
147
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
148
149
#define _mm_mask_rsqrt28_ss(S, M, A, B) \
150
_mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
151
152
#define _mm_maskz_rsqrt28_ss(M, A, B) \
153
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
154
155
#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
156
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
157
(__v2df)(__m128d)(B), \
158
(__v2df)_mm_setzero_pd(), \
159
(__mmask8)-1, (int)(R)); })
160
161
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
162
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
163
(__v2df)(__m128d)(B), \
164
(__v2df)(__m128d)(S), \
165
(__mmask8)(M), (int)(R)); })
166
167
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
168
(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
169
(__v2df)(__m128d)(B), \
170
(__v2df)_mm_setzero_pd(), \
171
(__mmask8)(M), (int)(R)); })
172
173
#define _mm_rsqrt28_sd(A, B) \
174
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
175
176
#define _mm_mask_rsqrt28_sd(S, M, A, B) \
177
_mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
178
179
#define _mm_maskz_rsqrt28_sd(M, A, B) \
180
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
181
182
// rcp28
183
#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \
184
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
185
(__v8df)_mm512_setzero_pd(), \
186
(__mmask8)-1, (int)(R)); })
187
188
#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \
189
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
190
(__v8df)(__m512d)(S), (__mmask8)(M), \
191
(int)(R)); })
192
193
#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \
194
(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
195
(__v8df)_mm512_setzero_pd(), \
196
(__mmask8)(M), (int)(R)); })
197
198
#define _mm512_rcp28_pd(A) \
199
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
200
201
#define _mm512_mask_rcp28_pd(S, M, A) \
202
_mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
203
204
#define _mm512_maskz_rcp28_pd(M, A) \
205
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
206
207
#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \
208
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
209
(__v16sf)_mm512_setzero_ps(), \
210
(__mmask16)-1, (int)(R)); })
211
212
#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \
213
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
214
(__v16sf)(__m512)(S), (__mmask16)(M), \
215
(int)(R)); })
216
217
#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \
218
(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
219
(__v16sf)_mm512_setzero_ps(), \
220
(__mmask16)(M), (int)(R)); })
221
222
#define _mm512_rcp28_ps(A) \
223
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
224
225
#define _mm512_mask_rcp28_ps(S, M, A) \
226
_mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
227
228
#define _mm512_maskz_rcp28_ps(M, A) \
229
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
230
231
#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
232
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
233
(__v4sf)(__m128)(B), \
234
(__v4sf)_mm_setzero_ps(), \
235
(__mmask8)-1, (int)(R)); })
236
237
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
238
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
239
(__v4sf)(__m128)(B), \
240
(__v4sf)(__m128)(S), \
241
(__mmask8)(M), (int)(R)); })
242
243
#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
244
(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
245
(__v4sf)(__m128)(B), \
246
(__v4sf)_mm_setzero_ps(), \
247
(__mmask8)(M), (int)(R)); })
248
249
#define _mm_rcp28_ss(A, B) \
250
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
251
252
#define _mm_mask_rcp28_ss(S, M, A, B) \
253
_mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
254
255
#define _mm_maskz_rcp28_ss(M, A, B) \
256
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
257
258
#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
259
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
260
(__v2df)(__m128d)(B), \
261
(__v2df)_mm_setzero_pd(), \
262
(__mmask8)-1, (int)(R)); })
263
264
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
265
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
266
(__v2df)(__m128d)(B), \
267
(__v2df)(__m128d)(S), \
268
(__mmask8)(M), (int)(R)); })
269
270
#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
271
(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
272
(__v2df)(__m128d)(B), \
273
(__v2df)_mm_setzero_pd(), \
274
(__mmask8)(M), (int)(R)); })
275
276
#define _mm_rcp28_sd(A, B) \
277
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
278
279
#define _mm_mask_rcp28_sd(S, M, A, B) \
280
_mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
281
282
#define _mm_maskz_rcp28_sd(M, A, B) \
283
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
284
285
#endif // __AVX512ERINTRIN_H
286
287