Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
screetsec
GitHub Repository: screetsec/TheFatRat
Path: blob/master/tools/android-sdk/renderscript/clang-include/avx512dqintrin.h
496 views
1
/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2
*
3
* Permission is hereby granted, free of charge, to any person obtaining a copy
4
* of this software and associated documentation files (the "Software"), to deal
5
* in the Software without restriction, including without limitation the rights
6
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
* copies of the Software, and to permit persons to whom the Software is
8
* furnished to do so, subject to the following conditions:
9
*
10
* The above copyright notice and this permission notice shall be included in
11
* all copies or substantial portions of the Software.
12
*
13
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
* THE SOFTWARE.
20
*
21
*===-----------------------------------------------------------------------===
22
*/
23
24
#ifndef __IMMINTRIN_H
25
#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26
#endif
27
28
#ifndef __AVX512DQINTRIN_H
29
#define __AVX512DQINTRIN_H
30
31
/* Define the default attributes for the functions in this file. */
32
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
33
34
static __inline__ __m512i __DEFAULT_FN_ATTRS
35
_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36
return (__m512i) ((__v8du) __A * (__v8du) __B);
37
}
38
39
static __inline__ __m512i __DEFAULT_FN_ATTRS
40
_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41
return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
42
(__v8di) __B,
43
(__v8di) __W,
44
(__mmask8) __U);
45
}
46
47
static __inline__ __m512i __DEFAULT_FN_ATTRS
48
_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) {
49
return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
50
(__v8di) __B,
51
(__v8di)
52
_mm512_setzero_si512 (),
53
(__mmask8) __U);
54
}
55
56
static __inline__ __m512d __DEFAULT_FN_ATTRS
57
_mm512_xor_pd (__m512d __A, __m512d __B) {
58
return (__m512d) ((__v8du) __A ^ (__v8du) __B);
59
}
60
61
static __inline__ __m512d __DEFAULT_FN_ATTRS
62
_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
63
return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
64
(__v8df) __B,
65
(__v8df) __W,
66
(__mmask8) __U);
67
}
68
69
static __inline__ __m512d __DEFAULT_FN_ATTRS
70
_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) {
71
return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
72
(__v8df) __B,
73
(__v8df)
74
_mm512_setzero_pd (),
75
(__mmask8) __U);
76
}
77
78
static __inline__ __m512 __DEFAULT_FN_ATTRS
79
_mm512_xor_ps (__m512 __A, __m512 __B) {
80
return (__m512) ((__v16su) __A ^ (__v16su) __B);
81
}
82
83
static __inline__ __m512 __DEFAULT_FN_ATTRS
84
_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
85
return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
86
(__v16sf) __B,
87
(__v16sf) __W,
88
(__mmask16) __U);
89
}
90
91
static __inline__ __m512 __DEFAULT_FN_ATTRS
92
_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) {
93
return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
94
(__v16sf) __B,
95
(__v16sf)
96
_mm512_setzero_ps (),
97
(__mmask16) __U);
98
}
99
100
static __inline__ __m512d __DEFAULT_FN_ATTRS
101
_mm512_or_pd (__m512d __A, __m512d __B) {
102
return (__m512d) ((__v8du) __A | (__v8du) __B);
103
}
104
105
static __inline__ __m512d __DEFAULT_FN_ATTRS
106
_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
107
return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
108
(__v8df) __B,
109
(__v8df) __W,
110
(__mmask8) __U);
111
}
112
113
static __inline__ __m512d __DEFAULT_FN_ATTRS
114
_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) {
115
return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
116
(__v8df) __B,
117
(__v8df)
118
_mm512_setzero_pd (),
119
(__mmask8) __U);
120
}
121
122
static __inline__ __m512 __DEFAULT_FN_ATTRS
123
_mm512_or_ps (__m512 __A, __m512 __B) {
124
return (__m512) ((__v16su) __A | (__v16su) __B);
125
}
126
127
static __inline__ __m512 __DEFAULT_FN_ATTRS
128
_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
129
return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
130
(__v16sf) __B,
131
(__v16sf) __W,
132
(__mmask16) __U);
133
}
134
135
static __inline__ __m512 __DEFAULT_FN_ATTRS
136
_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) {
137
return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
138
(__v16sf) __B,
139
(__v16sf)
140
_mm512_setzero_ps (),
141
(__mmask16) __U);
142
}
143
144
static __inline__ __m512d __DEFAULT_FN_ATTRS
145
_mm512_and_pd (__m512d __A, __m512d __B) {
146
return (__m512d) ((__v8du) __A & (__v8du) __B);
147
}
148
149
static __inline__ __m512d __DEFAULT_FN_ATTRS
150
_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
151
return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
152
(__v8df) __B,
153
(__v8df) __W,
154
(__mmask8) __U);
155
}
156
157
static __inline__ __m512d __DEFAULT_FN_ATTRS
158
_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) {
159
return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
160
(__v8df) __B,
161
(__v8df)
162
_mm512_setzero_pd (),
163
(__mmask8) __U);
164
}
165
166
static __inline__ __m512 __DEFAULT_FN_ATTRS
167
_mm512_and_ps (__m512 __A, __m512 __B) {
168
return (__m512) ((__v16su) __A & (__v16su) __B);
169
}
170
171
static __inline__ __m512 __DEFAULT_FN_ATTRS
172
_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
173
return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
174
(__v16sf) __B,
175
(__v16sf) __W,
176
(__mmask16) __U);
177
}
178
179
static __inline__ __m512 __DEFAULT_FN_ATTRS
180
_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {
181
return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
182
(__v16sf) __B,
183
(__v16sf)
184
_mm512_setzero_ps (),
185
(__mmask16) __U);
186
}
187
188
static __inline__ __m512d __DEFAULT_FN_ATTRS
189
_mm512_andnot_pd (__m512d __A, __m512d __B) {
190
return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
191
(__v8df) __B,
192
(__v8df)
193
_mm512_setzero_pd (),
194
(__mmask8) -1);
195
}
196
197
static __inline__ __m512d __DEFAULT_FN_ATTRS
198
_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
199
return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
200
(__v8df) __B,
201
(__v8df) __W,
202
(__mmask8) __U);
203
}
204
205
static __inline__ __m512d __DEFAULT_FN_ATTRS
206
_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
207
return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
208
(__v8df) __B,
209
(__v8df)
210
_mm512_setzero_pd (),
211
(__mmask8) __U);
212
}
213
214
static __inline__ __m512 __DEFAULT_FN_ATTRS
215
_mm512_andnot_ps (__m512 __A, __m512 __B) {
216
return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
217
(__v16sf) __B,
218
(__v16sf)
219
_mm512_setzero_ps (),
220
(__mmask16) -1);
221
}
222
223
static __inline__ __m512 __DEFAULT_FN_ATTRS
224
_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
225
return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
226
(__v16sf) __B,
227
(__v16sf) __W,
228
(__mmask16) __U);
229
}
230
231
static __inline__ __m512 __DEFAULT_FN_ATTRS
232
_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
233
return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
234
(__v16sf) __B,
235
(__v16sf)
236
_mm512_setzero_ps (),
237
(__mmask16) __U);
238
}
239
240
static __inline__ __m512i __DEFAULT_FN_ATTRS
241
_mm512_cvtpd_epi64 (__m512d __A) {
242
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
243
(__v8di) _mm512_setzero_si512(),
244
(__mmask8) -1,
245
_MM_FROUND_CUR_DIRECTION);
246
}
247
248
static __inline__ __m512i __DEFAULT_FN_ATTRS
249
_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
250
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
251
(__v8di) __W,
252
(__mmask8) __U,
253
_MM_FROUND_CUR_DIRECTION);
254
}
255
256
static __inline__ __m512i __DEFAULT_FN_ATTRS
257
_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
258
return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
259
(__v8di) _mm512_setzero_si512(),
260
(__mmask8) __U,
261
_MM_FROUND_CUR_DIRECTION);
262
}
263
264
#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \
265
(__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
266
(__v8di)_mm512_setzero_si512(), \
267
(__mmask8)-1, (int)(R)); })
268
269
#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
270
(__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
271
(__v8di)(__m512i)(W), \
272
(__mmask8)(U), (int)(R)); })
273
274
#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \
275
(__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
276
(__v8di)_mm512_setzero_si512(), \
277
(__mmask8)(U), (int)(R)); })
278
279
static __inline__ __m512i __DEFAULT_FN_ATTRS
280
_mm512_cvtpd_epu64 (__m512d __A) {
281
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
282
(__v8di) _mm512_setzero_si512(),
283
(__mmask8) -1,
284
_MM_FROUND_CUR_DIRECTION);
285
}
286
287
static __inline__ __m512i __DEFAULT_FN_ATTRS
288
_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
289
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
290
(__v8di) __W,
291
(__mmask8) __U,
292
_MM_FROUND_CUR_DIRECTION);
293
}
294
295
static __inline__ __m512i __DEFAULT_FN_ATTRS
296
_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
297
return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
298
(__v8di) _mm512_setzero_si512(),
299
(__mmask8) __U,
300
_MM_FROUND_CUR_DIRECTION);
301
}
302
303
#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \
304
(__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
305
(__v8di)_mm512_setzero_si512(), \
306
(__mmask8)-1, (int)(R)); })
307
308
#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
309
(__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
310
(__v8di)(__m512i)(W), \
311
(__mmask8)(U), (int)(R)); })
312
313
#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \
314
(__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
315
(__v8di)_mm512_setzero_si512(), \
316
(__mmask8)(U), (int)(R)); })
317
318
static __inline__ __m512i __DEFAULT_FN_ATTRS
319
_mm512_cvtps_epi64 (__m256 __A) {
320
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
321
(__v8di) _mm512_setzero_si512(),
322
(__mmask8) -1,
323
_MM_FROUND_CUR_DIRECTION);
324
}
325
326
static __inline__ __m512i __DEFAULT_FN_ATTRS
327
_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
328
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
329
(__v8di) __W,
330
(__mmask8) __U,
331
_MM_FROUND_CUR_DIRECTION);
332
}
333
334
static __inline__ __m512i __DEFAULT_FN_ATTRS
335
_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
336
return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
337
(__v8di) _mm512_setzero_si512(),
338
(__mmask8) __U,
339
_MM_FROUND_CUR_DIRECTION);
340
}
341
342
#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \
343
(__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
344
(__v8di)_mm512_setzero_si512(), \
345
(__mmask8)-1, (int)(R)); })
346
347
#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
348
(__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
349
(__v8di)(__m512i)(W), \
350
(__mmask8)(U), (int)(R)); })
351
352
#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \
353
(__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
354
(__v8di)_mm512_setzero_si512(), \
355
(__mmask8)(U), (int)(R)); })
356
357
static __inline__ __m512i __DEFAULT_FN_ATTRS
358
_mm512_cvtps_epu64 (__m256 __A) {
359
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
360
(__v8di) _mm512_setzero_si512(),
361
(__mmask8) -1,
362
_MM_FROUND_CUR_DIRECTION);
363
}
364
365
static __inline__ __m512i __DEFAULT_FN_ATTRS
366
_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
367
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
368
(__v8di) __W,
369
(__mmask8) __U,
370
_MM_FROUND_CUR_DIRECTION);
371
}
372
373
static __inline__ __m512i __DEFAULT_FN_ATTRS
374
_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
375
return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
376
(__v8di) _mm512_setzero_si512(),
377
(__mmask8) __U,
378
_MM_FROUND_CUR_DIRECTION);
379
}
380
381
#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \
382
(__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
383
(__v8di)_mm512_setzero_si512(), \
384
(__mmask8)-1, (int)(R)); })
385
386
#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
387
(__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
388
(__v8di)(__m512i)(W), \
389
(__mmask8)(U), (int)(R)); })
390
391
#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \
392
(__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
393
(__v8di)_mm512_setzero_si512(), \
394
(__mmask8)(U), (int)(R)); })
395
396
397
static __inline__ __m512d __DEFAULT_FN_ATTRS
398
_mm512_cvtepi64_pd (__m512i __A) {
399
return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
400
(__v8df) _mm512_setzero_pd(),
401
(__mmask8) -1,
402
_MM_FROUND_CUR_DIRECTION);
403
}
404
405
static __inline__ __m512d __DEFAULT_FN_ATTRS
406
_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
407
return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
408
(__v8df) __W,
409
(__mmask8) __U,
410
_MM_FROUND_CUR_DIRECTION);
411
}
412
413
static __inline__ __m512d __DEFAULT_FN_ATTRS
414
_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
415
return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
416
(__v8df) _mm512_setzero_pd(),
417
(__mmask8) __U,
418
_MM_FROUND_CUR_DIRECTION);
419
}
420
421
#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \
422
(__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
423
(__v8df)_mm512_setzero_pd(), \
424
(__mmask8)-1, (int)(R)); })
425
426
#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
427
(__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
428
(__v8df)(__m512d)(W), \
429
(__mmask8)(U), (int)(R)); })
430
431
#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
432
(__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
433
(__v8df)_mm512_setzero_pd(), \
434
(__mmask8)(U), (int)(R)); })
435
436
static __inline__ __m256 __DEFAULT_FN_ATTRS
437
_mm512_cvtepi64_ps (__m512i __A) {
438
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
439
(__v8sf) _mm256_setzero_ps(),
440
(__mmask8) -1,
441
_MM_FROUND_CUR_DIRECTION);
442
}
443
444
static __inline__ __m256 __DEFAULT_FN_ATTRS
445
_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
446
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
447
(__v8sf) __W,
448
(__mmask8) __U,
449
_MM_FROUND_CUR_DIRECTION);
450
}
451
452
static __inline__ __m256 __DEFAULT_FN_ATTRS
453
_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
454
return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
455
(__v8sf) _mm256_setzero_ps(),
456
(__mmask8) __U,
457
_MM_FROUND_CUR_DIRECTION);
458
}
459
460
#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \
461
(__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
462
(__v8sf)_mm256_setzero_ps(), \
463
(__mmask8)-1, (int)(R)); })
464
465
#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
466
(__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
467
(__v8sf)(__m256)(W), (__mmask8)(U), \
468
(int)(R)); })
469
470
#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
471
(__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
472
(__v8sf)_mm256_setzero_ps(), \
473
(__mmask8)(U), (int)(R)); })
474
475
476
static __inline__ __m512i __DEFAULT_FN_ATTRS
477
_mm512_cvttpd_epi64 (__m512d __A) {
478
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
479
(__v8di) _mm512_setzero_si512(),
480
(__mmask8) -1,
481
_MM_FROUND_CUR_DIRECTION);
482
}
483
484
static __inline__ __m512i __DEFAULT_FN_ATTRS
485
_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
486
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
487
(__v8di) __W,
488
(__mmask8) __U,
489
_MM_FROUND_CUR_DIRECTION);
490
}
491
492
static __inline__ __m512i __DEFAULT_FN_ATTRS
493
_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
494
return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
495
(__v8di) _mm512_setzero_si512(),
496
(__mmask8) __U,
497
_MM_FROUND_CUR_DIRECTION);
498
}
499
500
#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \
501
(__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
502
(__v8di)_mm512_setzero_si512(), \
503
(__mmask8)-1, (int)(R)); })
504
505
#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
506
(__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
507
(__v8di)(__m512i)(W), \
508
(__mmask8)(U), (int)(R)); })
509
510
#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
511
(__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
512
(__v8di)_mm512_setzero_si512(), \
513
(__mmask8)(U), (int)(R)); })
514
515
static __inline__ __m512i __DEFAULT_FN_ATTRS
516
_mm512_cvttpd_epu64 (__m512d __A) {
517
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
518
(__v8di) _mm512_setzero_si512(),
519
(__mmask8) -1,
520
_MM_FROUND_CUR_DIRECTION);
521
}
522
523
static __inline__ __m512i __DEFAULT_FN_ATTRS
524
_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
525
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
526
(__v8di) __W,
527
(__mmask8) __U,
528
_MM_FROUND_CUR_DIRECTION);
529
}
530
531
static __inline__ __m512i __DEFAULT_FN_ATTRS
532
_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
533
return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
534
(__v8di) _mm512_setzero_si512(),
535
(__mmask8) __U,
536
_MM_FROUND_CUR_DIRECTION);
537
}
538
539
#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \
540
(__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
541
(__v8di)_mm512_setzero_si512(), \
542
(__mmask8)-1, (int)(R)); })
543
544
#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
545
(__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
546
(__v8di)(__m512i)(W), \
547
(__mmask8)(U), (int)(R)); })
548
549
#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \
550
(__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
551
(__v8di)_mm512_setzero_si512(), \
552
(__mmask8)(U), (int)(R)); })
553
554
static __inline__ __m512i __DEFAULT_FN_ATTRS
555
_mm512_cvttps_epi64 (__m256 __A) {
556
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
557
(__v8di) _mm512_setzero_si512(),
558
(__mmask8) -1,
559
_MM_FROUND_CUR_DIRECTION);
560
}
561
562
static __inline__ __m512i __DEFAULT_FN_ATTRS
563
_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
564
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
565
(__v8di) __W,
566
(__mmask8) __U,
567
_MM_FROUND_CUR_DIRECTION);
568
}
569
570
static __inline__ __m512i __DEFAULT_FN_ATTRS
571
_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
572
return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
573
(__v8di) _mm512_setzero_si512(),
574
(__mmask8) __U,
575
_MM_FROUND_CUR_DIRECTION);
576
}
577
578
#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \
579
(__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
580
(__v8di)_mm512_setzero_si512(), \
581
(__mmask8)-1, (int)(R)); })
582
583
#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
584
(__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
585
(__v8di)(__m512i)(W), \
586
(__mmask8)(U), (int)(R)); })
587
588
#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \
589
(__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
590
(__v8di)_mm512_setzero_si512(), \
591
(__mmask8)(U), (int)(R)); })
592
593
static __inline__ __m512i __DEFAULT_FN_ATTRS
594
_mm512_cvttps_epu64 (__m256 __A) {
595
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
596
(__v8di) _mm512_setzero_si512(),
597
(__mmask8) -1,
598
_MM_FROUND_CUR_DIRECTION);
599
}
600
601
static __inline__ __m512i __DEFAULT_FN_ATTRS
602
_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
603
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
604
(__v8di) __W,
605
(__mmask8) __U,
606
_MM_FROUND_CUR_DIRECTION);
607
}
608
609
static __inline__ __m512i __DEFAULT_FN_ATTRS
610
_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
611
return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
612
(__v8di) _mm512_setzero_si512(),
613
(__mmask8) __U,
614
_MM_FROUND_CUR_DIRECTION);
615
}
616
617
#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \
618
(__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
619
(__v8di)_mm512_setzero_si512(), \
620
(__mmask8)-1, (int)(R)); })
621
622
#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
623
(__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
624
(__v8di)(__m512i)(W), \
625
(__mmask8)(U), (int)(R)); })
626
627
#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \
628
(__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
629
(__v8di)_mm512_setzero_si512(), \
630
(__mmask8)(U), (int)(R)); })
631
632
static __inline__ __m512d __DEFAULT_FN_ATTRS
633
_mm512_cvtepu64_pd (__m512i __A) {
634
return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
635
(__v8df) _mm512_setzero_pd(),
636
(__mmask8) -1,
637
_MM_FROUND_CUR_DIRECTION);
638
}
639
640
static __inline__ __m512d __DEFAULT_FN_ATTRS
641
_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
642
return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
643
(__v8df) __W,
644
(__mmask8) __U,
645
_MM_FROUND_CUR_DIRECTION);
646
}
647
648
static __inline__ __m512d __DEFAULT_FN_ATTRS
649
_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
650
return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
651
(__v8df) _mm512_setzero_pd(),
652
(__mmask8) __U,
653
_MM_FROUND_CUR_DIRECTION);
654
}
655
656
#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \
657
(__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
658
(__v8df)_mm512_setzero_pd(), \
659
(__mmask8)-1, (int)(R)); })
660
661
#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
662
(__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
663
(__v8df)(__m512d)(W), \
664
(__mmask8)(U), (int)(R)); })
665
666
667
#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
668
(__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
669
(__v8df)_mm512_setzero_pd(), \
670
(__mmask8)(U), (int)(R)); })
671
672
673
static __inline__ __m256 __DEFAULT_FN_ATTRS
674
_mm512_cvtepu64_ps (__m512i __A) {
675
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
676
(__v8sf) _mm256_setzero_ps(),
677
(__mmask8) -1,
678
_MM_FROUND_CUR_DIRECTION);
679
}
680
681
static __inline__ __m256 __DEFAULT_FN_ATTRS
682
_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
683
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
684
(__v8sf) __W,
685
(__mmask8) __U,
686
_MM_FROUND_CUR_DIRECTION);
687
}
688
689
static __inline__ __m256 __DEFAULT_FN_ATTRS
690
_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
691
return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
692
(__v8sf) _mm256_setzero_ps(),
693
(__mmask8) __U,
694
_MM_FROUND_CUR_DIRECTION);
695
}
696
697
#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \
698
(__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
699
(__v8sf)_mm256_setzero_ps(), \
700
(__mmask8)-1, (int)(R)); })
701
702
#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
703
(__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
704
(__v8sf)(__m256)(W), (__mmask8)(U), \
705
(int)(R)); })
706
707
#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
708
(__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
709
(__v8sf)_mm256_setzero_ps(), \
710
(__mmask8)(U), (int)(R)); })
711
712
#define _mm512_range_pd(A, B, C) __extension__ ({ \
713
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
714
(__v8df)(__m512d)(B), (int)(C), \
715
(__v8df)_mm512_setzero_pd(), \
716
(__mmask8)-1, \
717
_MM_FROUND_CUR_DIRECTION); })
718
719
#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \
720
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
721
(__v8df)(__m512d)(B), (int)(C), \
722
(__v8df)(__m512d)(W), (__mmask8)(U), \
723
_MM_FROUND_CUR_DIRECTION); })
724
725
#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \
726
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
727
(__v8df)(__m512d)(B), (int)(C), \
728
(__v8df)_mm512_setzero_pd(), \
729
(__mmask8)(U), \
730
_MM_FROUND_CUR_DIRECTION); })
731
732
#define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \
733
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
734
(__v8df)(__m512d)(B), (int)(C), \
735
(__v8df)_mm512_setzero_pd(), \
736
(__mmask8)-1, (int)(R)); })
737
738
#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
739
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
740
(__v8df)(__m512d)(B), (int)(C), \
741
(__v8df)(__m512d)(W), (__mmask8)(U), \
742
(int)(R)); })
743
744
#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
745
(__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
746
(__v8df)(__m512d)(B), (int)(C), \
747
(__v8df)_mm512_setzero_pd(), \
748
(__mmask8)(U), (int)(R)); })
749
750
#define _mm512_range_ps(A, B, C) __extension__ ({ \
751
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
752
(__v16sf)(__m512)(B), (int)(C), \
753
(__v16sf)_mm512_setzero_ps(), \
754
(__mmask16)-1, \
755
_MM_FROUND_CUR_DIRECTION); })
756
757
#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \
758
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
759
(__v16sf)(__m512)(B), (int)(C), \
760
(__v16sf)(__m512)(W), (__mmask16)(U), \
761
_MM_FROUND_CUR_DIRECTION); })
762
763
#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \
764
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
765
(__v16sf)(__m512)(B), (int)(C), \
766
(__v16sf)_mm512_setzero_ps(), \
767
(__mmask16)(U), \
768
_MM_FROUND_CUR_DIRECTION); })
769
770
#define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \
771
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
772
(__v16sf)(__m512)(B), (int)(C), \
773
(__v16sf)_mm512_setzero_ps(), \
774
(__mmask16)-1, (int)(R)); })
775
776
#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
777
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
778
(__v16sf)(__m512)(B), (int)(C), \
779
(__v16sf)(__m512)(W), (__mmask16)(U), \
780
(int)(R)); })
781
782
#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
783
(__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
784
(__v16sf)(__m512)(B), (int)(C), \
785
(__v16sf)_mm512_setzero_ps(), \
786
(__mmask16)(U), (int)(R)); })
787
788
#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \
789
(__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
790
(__v4sf)(__m128)(B), \
791
(__v4sf)_mm_setzero_ps(), \
792
(__mmask8) -1, (int)(C),\
793
(int)(R)); })
794
795
#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
796
797
#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
798
(__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
799
(__v4sf)(__m128)(B), \
800
(__v4sf)(__m128)(W),\
801
(__mmask8)(U), (int)(C),\
802
(int)(R)); })
803
804
#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
805
806
#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
807
(__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
808
(__v4sf)(__m128)(B), \
809
(__v4sf)_mm_setzero_ps(), \
810
(__mmask8)(U), (int)(C),\
811
(int)(R)); })
812
813
#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
814
815
#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \
816
(__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
817
(__v2df)(__m128d)(B), \
818
(__v2df)_mm_setzero_pd(), \
819
(__mmask8) -1, (int)(C),\
820
(int)(R)); })
821
822
#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
823
824
#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
825
(__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
826
(__v2df)(__m128d)(B), \
827
(__v2df)(__m128d)(W),\
828
(__mmask8)(U), (int)(C),\
829
(int)(R)); })
830
831
#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
832
833
#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
834
(__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
835
(__v2df)(__m128d)(B), \
836
(__v2df)_mm_setzero_pd(), \
837
(__mmask8)(U), (int)(C),\
838
(int)(R)); })
839
840
#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
841
842
#define _mm512_reduce_pd(A, B) __extension__ ({ \
843
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
844
(__v8df)_mm512_setzero_pd(), \
845
(__mmask8)-1, \
846
_MM_FROUND_CUR_DIRECTION); })
847
848
#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
849
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850
(__v8df)(__m512d)(W), \
851
(__mmask8)(U), \
852
_MM_FROUND_CUR_DIRECTION); })
853
854
#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \
855
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
856
(__v8df)_mm512_setzero_pd(), \
857
(__mmask8)(U), \
858
_MM_FROUND_CUR_DIRECTION); })
859
860
#define _mm512_reduce_ps(A, B) __extension__ ({ \
861
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
862
(__v16sf)_mm512_setzero_ps(), \
863
(__mmask16)-1, \
864
_MM_FROUND_CUR_DIRECTION); })
865
866
#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \
867
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
868
(__v16sf)(__m512)(W), \
869
(__mmask16)(U), \
870
_MM_FROUND_CUR_DIRECTION); })
871
872
#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \
873
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
874
(__v16sf)_mm512_setzero_ps(), \
875
(__mmask16)(U), \
876
_MM_FROUND_CUR_DIRECTION); })
877
878
#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
879
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
880
(__v8df)_mm512_setzero_pd(), \
881
(__mmask8)-1, (int)(R)); })
882
883
#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
884
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
885
(__v8df)(__m512d)(W), \
886
(__mmask8)(U), (int)(R)); })
887
888
#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
889
(__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
890
(__v8df)_mm512_setzero_pd(), \
891
(__mmask8)(U), (int)(R)); })
892
893
#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
894
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
895
(__v16sf)_mm512_setzero_ps(), \
896
(__mmask16)-1, (int)(R)); })
897
898
#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
899
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
900
(__v16sf)(__m512)(W), \
901
(__mmask16)(U), (int)(R)); })
902
903
#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
904
(__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
905
(__v16sf)_mm512_setzero_ps(), \
906
(__mmask16)(U), (int)(R)); })
907
908
#define _mm_reduce_ss(A, B, C) __extension__ ({ \
909
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
910
(__v4sf)(__m128)(B), \
911
(__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
912
(int)(C), _MM_FROUND_CUR_DIRECTION); })
913
914
#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
915
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
916
(__v4sf)(__m128)(B), \
917
(__v4sf)(__m128)(W), (__mmask8)(U), \
918
(int)(C), _MM_FROUND_CUR_DIRECTION); })
919
920
#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
921
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
922
(__v4sf)(__m128)(B), \
923
(__v4sf)_mm_setzero_ps(), \
924
(__mmask8)(U), (int)(C), \
925
_MM_FROUND_CUR_DIRECTION); })
926
927
#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
928
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
929
(__v4sf)(__m128)(B), \
930
(__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
931
(int)(C), (int)(R)); })
932
933
#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
934
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
935
(__v4sf)(__m128)(B), \
936
(__v4sf)(__m128)(W), (__mmask8)(U), \
937
(int)(C), (int)(R)); })
938
939
#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
940
(__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
941
(__v4sf)(__m128)(B), \
942
(__v4sf)_mm_setzero_ps(), \
943
(__mmask8)(U), (int)(C), (int)(R)); })
944
945
#define _mm_reduce_sd(A, B, C) __extension__ ({ \
946
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
947
(__v2df)(__m128d)(B), \
948
(__v2df)_mm_setzero_pd(), \
949
(__mmask8)-1, (int)(C), \
950
_MM_FROUND_CUR_DIRECTION); })
951
952
#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
953
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
954
(__v2df)(__m128d)(B), \
955
(__v2df)(__m128d)(W), (__mmask8)(U), \
956
(int)(C), _MM_FROUND_CUR_DIRECTION); })
957
958
#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
959
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
960
(__v2df)(__m128d)(B), \
961
(__v2df)_mm_setzero_pd(), \
962
(__mmask8)(U), (int)(C), \
963
_MM_FROUND_CUR_DIRECTION); })
964
965
#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
966
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
967
(__v2df)(__m128d)(B), \
968
(__v2df)_mm_setzero_pd(), \
969
(__mmask8)-1, (int)(C), (int)(R)); })
970
971
#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
972
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
973
(__v2df)(__m128d)(B), \
974
(__v2df)(__m128d)(W), (__mmask8)(U), \
975
(int)(C), (int)(R)); })
976
977
#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
978
(__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
979
(__v2df)(__m128d)(B), \
980
(__v2df)_mm_setzero_pd(), \
981
(__mmask8)(U), (int)(C), (int)(R)); })
982
983
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
984
_mm512_movepi32_mask (__m512i __A)
985
{
986
return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
987
}
988
989
static __inline__ __m512i __DEFAULT_FN_ATTRS
990
_mm512_movm_epi32 (__mmask16 __A)
991
{
992
return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
993
}
994
995
static __inline__ __m512i __DEFAULT_FN_ATTRS
996
_mm512_movm_epi64 (__mmask8 __A)
997
{
998
return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
999
}
1000
1001
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1002
_mm512_movepi64_mask (__m512i __A)
1003
{
1004
return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1005
}
1006
1007
1008
static __inline__ __m512 __DEFAULT_FN_ATTRS
1009
_mm512_broadcast_f32x2 (__m128 __A)
1010
{
1011
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1012
(__v16sf)_mm512_undefined_ps(),
1013
(__mmask16) -1);
1014
}
1015
1016
static __inline__ __m512 __DEFAULT_FN_ATTRS
1017
_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1018
{
1019
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1020
(__v16sf)
1021
__O, __M);
1022
}
1023
1024
static __inline__ __m512 __DEFAULT_FN_ATTRS
1025
_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1026
{
1027
return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1028
(__v16sf)_mm512_setzero_ps (),
1029
__M);
1030
}
1031
1032
static __inline__ __m512 __DEFAULT_FN_ATTRS
1033
_mm512_broadcast_f32x8 (__m256 __A)
1034
{
1035
return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1036
_mm512_undefined_ps(),
1037
(__mmask16) -1);
1038
}
1039
1040
static __inline__ __m512 __DEFAULT_FN_ATTRS
1041
_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
1042
{
1043
return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1044
(__v16sf)__O,
1045
__M);
1046
}
1047
1048
static __inline__ __m512 __DEFAULT_FN_ATTRS
1049
_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
1050
{
1051
return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
1052
(__v16sf)_mm512_setzero_ps (),
1053
__M);
1054
}
1055
1056
static __inline__ __m512d __DEFAULT_FN_ATTRS
1057
_mm512_broadcast_f64x2 (__m128d __A)
1058
{
1059
return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1060
(__v8df)_mm512_undefined_pd(),
1061
(__mmask8) -1);
1062
}
1063
1064
static __inline__ __m512d __DEFAULT_FN_ATTRS
1065
_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
1066
{
1067
return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1068
(__v8df)
1069
__O, __M);
1070
}
1071
1072
static __inline__ __m512d __DEFAULT_FN_ATTRS
1073
_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1074
{
1075
return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
1076
(__v8df)_mm512_setzero_ps (),
1077
__M);
1078
}
1079
1080
static __inline__ __m512i __DEFAULT_FN_ATTRS
1081
_mm512_broadcast_i32x2 (__m128i __A)
1082
{
1083
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1084
(__v16si)_mm512_setzero_si512(),
1085
(__mmask16) -1);
1086
}
1087
1088
static __inline__ __m512i __DEFAULT_FN_ATTRS
1089
_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1090
{
1091
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1092
(__v16si)
1093
__O, __M);
1094
}
1095
1096
static __inline__ __m512i __DEFAULT_FN_ATTRS
1097
_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1098
{
1099
return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1100
(__v16si)_mm512_setzero_si512 (),
1101
__M);
1102
}
1103
1104
static __inline__ __m512i __DEFAULT_FN_ATTRS
1105
_mm512_broadcast_i32x8 (__m256i __A)
1106
{
1107
return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1108
(__v16si)_mm512_setzero_si512(),
1109
(__mmask16) -1);
1110
}
1111
1112
static __inline__ __m512i __DEFAULT_FN_ATTRS
1113
_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
1114
{
1115
return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1116
(__v16si)__O,
1117
__M);
1118
}
1119
1120
static __inline__ __m512i __DEFAULT_FN_ATTRS
1121
_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
1122
{
1123
return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
1124
(__v16si)
1125
_mm512_setzero_si512 (),
1126
__M);
1127
}
1128
1129
static __inline__ __m512i __DEFAULT_FN_ATTRS
1130
_mm512_broadcast_i64x2 (__m128i __A)
1131
{
1132
return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1133
(__v8di)_mm512_setzero_si512(),
1134
(__mmask8) -1);
1135
}
1136
1137
static __inline__ __m512i __DEFAULT_FN_ATTRS
1138
_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
1139
{
1140
return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1141
(__v8di)
1142
__O, __M);
1143
}
1144
1145
static __inline__ __m512i __DEFAULT_FN_ATTRS
1146
_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147
{
1148
return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
1149
(__v8di)_mm512_setzero_si512 (),
1150
__M);
1151
}
1152
1153
#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1154
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1155
(__v8sf)_mm256_setzero_ps(), \
1156
(__mmask8)-1); })
1157
1158
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1159
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1160
(__v8sf)(__m256)(W), \
1161
(__mmask8)(U)); })
1162
1163
#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1164
(__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1165
(__v8sf)_mm256_setzero_ps(), \
1166
(__mmask8)(U)); })
1167
1168
#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1169
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1170
(int)(imm), \
1171
(__v2df)_mm_setzero_pd(), \
1172
(__mmask8)-1); })
1173
1174
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1175
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1176
(int)(imm), \
1177
(__v2df)(__m128d)(W), \
1178
(__mmask8)(U)); })
1179
1180
#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1181
(__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1182
(int)(imm), \
1183
(__v2df)_mm_setzero_pd(), \
1184
(__mmask8)(U)); })
1185
1186
#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1187
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1188
(__v8si)_mm256_setzero_si256(), \
1189
(__mmask8)-1); })
1190
1191
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1192
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1193
(__v8si)(__m256i)(W), \
1194
(__mmask8)(U)); })
1195
1196
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1197
(__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1198
(__v8si)_mm256_setzero_si256(), \
1199
(__mmask8)(U)); })
1200
1201
#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1202
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1203
(int)(imm), \
1204
(__v2di)_mm_setzero_di(), \
1205
(__mmask8)-1); })
1206
1207
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1208
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1209
(int)(imm), \
1210
(__v2di)(__m128i)(W), \
1211
(__mmask8)(U)); })
1212
1213
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1214
(__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1215
(int)(imm), \
1216
(__v2di)_mm_setzero_di(), \
1217
(__mmask8)(U)); })
1218
1219
#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1220
(__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1221
(__v8sf)(__m256)(B), (int)(imm), \
1222
(__v16sf)_mm512_setzero_ps(), \
1223
(__mmask16)-1); })
1224
1225
#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1226
(__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1227
(__v8sf)(__m256)(B), (int)(imm), \
1228
(__v16sf)(__m512)(W), \
1229
(__mmask16)(U)); })
1230
1231
#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1232
(__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
1233
(__v8sf)(__m256)(B), (int)(imm), \
1234
(__v16sf)_mm512_setzero_ps(), \
1235
(__mmask16)(U)); })
1236
1237
#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1238
(__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1239
(__v2df)(__m128d)(B), \
1240
(int)(imm), \
1241
(__v8df)_mm512_setzero_pd(), \
1242
(__mmask8)-1); })
1243
1244
#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1245
(__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1246
(__v2df)(__m128d)(B), \
1247
(int)(imm), \
1248
(__v8df)(__m512d)(W), \
1249
(__mmask8)(U)); })
1250
1251
#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1252
(__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
1253
(__v2df)(__m128d)(B), \
1254
(int)(imm), \
1255
(__v8df)_mm512_setzero_pd(), \
1256
(__mmask8)(U)); })
1257
1258
#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1259
(__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1260
(__v8si)(__m256i)(B), (int)(imm), \
1261
(__v16si)_mm512_setzero_si512(), \
1262
(__mmask16)-1); })
1263
1264
#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1265
(__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1266
(__v8si)(__m256i)(B), (int)(imm), \
1267
(__v16si)(__m512i)(W), \
1268
(__mmask16)(U)); })
1269
1270
#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1271
(__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
1272
(__v8si)(__m256i)(B), (int)(imm), \
1273
(__v16si)_mm512_setzero_si512(), \
1274
(__mmask16)(U)); })
1275
1276
#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1277
(__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1278
(__v2di)(__m128i)(B), \
1279
(int)(imm), \
1280
(__v8di)_mm512_setzero_si512(), \
1281
(__mmask8)-1); })
1282
1283
#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1284
(__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1285
(__v2di)(__m128i)(B), \
1286
(int)(imm), \
1287
(__v8di)(__m512i)(W), \
1288
(__mmask8)(U)); })
1289
1290
#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1291
(__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
1292
(__v2di)(__m128i)(B), \
1293
(int)(imm), \
1294
(__v8di)_mm512_setzero_si512(), \
1295
(__mmask8)(U)); })
1296
1297
#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1298
(__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1299
(int)(imm), (__mmask16)(U)); })
1300
1301
#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1302
(__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1303
(int)(imm), (__mmask16)-1); })
1304
1305
#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1306
(__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1307
(__mmask8)(U)); })
1308
1309
#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1310
(__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1311
(__mmask8)-1); })
1312
1313
#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1314
(__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1315
(__mmask8)-1); })
1316
1317
#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1318
(__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1319
(__mmask8)(U)); })
1320
1321
#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1322
(__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1323
(__mmask8)-1); })
1324
1325
#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1326
(__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1327
(__mmask8)(U)); })
1328
1329
#undef __DEFAULT_FN_ATTRS
1330
1331
#endif
1332
1333