Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
screetsec
GitHub Repository: screetsec/TheFatRat
Path: blob/master/tools/android-sdk/renderscript/clang-include/avx512vldqintrin.h
496 views
1
/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2
*
3
* Permission is hereby granted, free of charge, to any person obtaining a copy
4
* of this software and associated documentation files (the "Software"), to deal
5
* in the Software without restriction, including without limitation the rights
6
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
* copies of the Software, and to permit persons to whom the Software is
8
* furnished to do so, subject to the following conditions:
9
*
10
* The above copyright notice and this permission notice shall be included in
11
* all copies or substantial portions of the Software.
12
*
13
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
* THE SOFTWARE.
20
*
21
*===-----------------------------------------------------------------------===
22
*/
23
24
#ifndef __IMMINTRIN_H
25
#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26
#endif
27
28
#ifndef __AVX512VLDQINTRIN_H
29
#define __AVX512VLDQINTRIN_H
30
31
/* Define the default attributes for the functions in this file. */
32
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
33
34
static __inline__ __m256i __DEFAULT_FN_ATTRS
35
_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36
return (__m256i) ((__v4du) __A * (__v4du) __B);
37
}
38
39
static __inline__ __m256i __DEFAULT_FN_ATTRS
40
_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41
return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
42
(__v4di) __B,
43
(__v4di) __W,
44
(__mmask8) __U);
45
}
46
47
static __inline__ __m256i __DEFAULT_FN_ATTRS
48
_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
49
return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50
(__v4di) __B,
51
(__v4di)
52
_mm256_setzero_si256 (),
53
(__mmask8) __U);
54
}
55
56
static __inline__ __m128i __DEFAULT_FN_ATTRS
57
_mm_mullo_epi64 (__m128i __A, __m128i __B) {
58
return (__m128i) ((__v2du) __A * (__v2du) __B);
59
}
60
61
static __inline__ __m128i __DEFAULT_FN_ATTRS
62
_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
63
return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
64
(__v2di) __B,
65
(__v2di) __W,
66
(__mmask8) __U);
67
}
68
69
static __inline__ __m128i __DEFAULT_FN_ATTRS
70
_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
71
return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
72
(__v2di) __B,
73
(__v2di)
74
_mm_setzero_si128 (),
75
(__mmask8) __U);
76
}
77
78
static __inline__ __m256d __DEFAULT_FN_ATTRS
79
_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
80
return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
81
(__v4df) __B,
82
(__v4df) __W,
83
(__mmask8) __U);
84
}
85
86
static __inline__ __m256d __DEFAULT_FN_ATTRS
87
_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
88
return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
89
(__v4df) __B,
90
(__v4df)
91
_mm256_setzero_pd (),
92
(__mmask8) __U);
93
}
94
95
static __inline__ __m128d __DEFAULT_FN_ATTRS
96
_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
97
return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
98
(__v2df) __B,
99
(__v2df) __W,
100
(__mmask8) __U);
101
}
102
103
static __inline__ __m128d __DEFAULT_FN_ATTRS
104
_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
105
return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
106
(__v2df) __B,
107
(__v2df)
108
_mm_setzero_pd (),
109
(__mmask8) __U);
110
}
111
112
static __inline__ __m256 __DEFAULT_FN_ATTRS
113
_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
114
return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
115
(__v8sf) __B,
116
(__v8sf) __W,
117
(__mmask8) __U);
118
}
119
120
static __inline__ __m256 __DEFAULT_FN_ATTRS
121
_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
122
return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123
(__v8sf) __B,
124
(__v8sf)
125
_mm256_setzero_ps (),
126
(__mmask8) __U);
127
}
128
129
static __inline__ __m128 __DEFAULT_FN_ATTRS
130
_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
131
return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
132
(__v4sf) __B,
133
(__v4sf) __W,
134
(__mmask8) __U);
135
}
136
137
static __inline__ __m128 __DEFAULT_FN_ATTRS
138
_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139
return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
140
(__v4sf) __B,
141
(__v4sf)
142
_mm_setzero_ps (),
143
(__mmask8) __U);
144
}
145
146
static __inline__ __m256d __DEFAULT_FN_ATTRS
147
_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
148
return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
149
(__v4df) __B,
150
(__v4df) __W,
151
(__mmask8) __U);
152
}
153
154
static __inline__ __m256d __DEFAULT_FN_ATTRS
155
_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
156
return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
157
(__v4df) __B,
158
(__v4df)
159
_mm256_setzero_pd (),
160
(__mmask8) __U);
161
}
162
163
static __inline__ __m128d __DEFAULT_FN_ATTRS
164
_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
165
return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
166
(__v2df) __B,
167
(__v2df) __W,
168
(__mmask8) __U);
169
}
170
171
static __inline__ __m128d __DEFAULT_FN_ATTRS
172
_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
173
return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
174
(__v2df) __B,
175
(__v2df)
176
_mm_setzero_pd (),
177
(__mmask8) __U);
178
}
179
180
static __inline__ __m256 __DEFAULT_FN_ATTRS
181
_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182
return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
183
(__v8sf) __B,
184
(__v8sf) __W,
185
(__mmask8) __U);
186
}
187
188
static __inline__ __m256 __DEFAULT_FN_ATTRS
189
_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
190
return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
191
(__v8sf) __B,
192
(__v8sf)
193
_mm256_setzero_ps (),
194
(__mmask8) __U);
195
}
196
197
static __inline__ __m128 __DEFAULT_FN_ATTRS
198
_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
199
return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
200
(__v4sf) __B,
201
(__v4sf) __W,
202
(__mmask8) __U);
203
}
204
205
static __inline__ __m128 __DEFAULT_FN_ATTRS
206
_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
207
return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
208
(__v4sf) __B,
209
(__v4sf)
210
_mm_setzero_ps (),
211
(__mmask8) __U);
212
}
213
214
static __inline__ __m256d __DEFAULT_FN_ATTRS
215
_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
216
__m256d __B) {
217
return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
218
(__v4df) __B,
219
(__v4df) __W,
220
(__mmask8) __U);
221
}
222
223
static __inline__ __m256d __DEFAULT_FN_ATTRS
224
_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
225
return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
226
(__v4df) __B,
227
(__v4df)
228
_mm256_setzero_pd (),
229
(__mmask8) __U);
230
}
231
232
static __inline__ __m128d __DEFAULT_FN_ATTRS
233
_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
234
return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
235
(__v2df) __B,
236
(__v2df) __W,
237
(__mmask8) __U);
238
}
239
240
static __inline__ __m128d __DEFAULT_FN_ATTRS
241
_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
242
return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
243
(__v2df) __B,
244
(__v2df)
245
_mm_setzero_pd (),
246
(__mmask8) __U);
247
}
248
249
static __inline__ __m256 __DEFAULT_FN_ATTRS
250
_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
251
return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
252
(__v8sf) __B,
253
(__v8sf) __W,
254
(__mmask8) __U);
255
}
256
257
static __inline__ __m256 __DEFAULT_FN_ATTRS
258
_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
259
return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
260
(__v8sf) __B,
261
(__v8sf)
262
_mm256_setzero_ps (),
263
(__mmask8) __U);
264
}
265
266
static __inline__ __m128 __DEFAULT_FN_ATTRS
267
_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268
return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
269
(__v4sf) __B,
270
(__v4sf) __W,
271
(__mmask8) __U);
272
}
273
274
static __inline__ __m128 __DEFAULT_FN_ATTRS
275
_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
276
return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
277
(__v4sf) __B,
278
(__v4sf)
279
_mm_setzero_ps (),
280
(__mmask8) __U);
281
}
282
283
static __inline__ __m256d __DEFAULT_FN_ATTRS
284
_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
285
return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
286
(__v4df) __B,
287
(__v4df) __W,
288
(__mmask8) __U);
289
}
290
291
static __inline__ __m256d __DEFAULT_FN_ATTRS
292
_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
293
return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
294
(__v4df) __B,
295
(__v4df)
296
_mm256_setzero_pd (),
297
(__mmask8) __U);
298
}
299
300
static __inline__ __m128d __DEFAULT_FN_ATTRS
301
_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
302
return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
303
(__v2df) __B,
304
(__v2df) __W,
305
(__mmask8) __U);
306
}
307
308
static __inline__ __m128d __DEFAULT_FN_ATTRS
309
_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
310
return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
311
(__v2df) __B,
312
(__v2df)
313
_mm_setzero_pd (),
314
(__mmask8) __U);
315
}
316
317
static __inline__ __m256 __DEFAULT_FN_ATTRS
318
_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
319
return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320
(__v8sf) __B,
321
(__v8sf) __W,
322
(__mmask8) __U);
323
}
324
325
static __inline__ __m256 __DEFAULT_FN_ATTRS
326
_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
327
return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
328
(__v8sf) __B,
329
(__v8sf)
330
_mm256_setzero_ps (),
331
(__mmask8) __U);
332
}
333
334
static __inline__ __m128 __DEFAULT_FN_ATTRS
335
_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
336
return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
337
(__v4sf) __B,
338
(__v4sf) __W,
339
(__mmask8) __U);
340
}
341
342
static __inline__ __m128 __DEFAULT_FN_ATTRS
343
_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
344
return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
345
(__v4sf) __B,
346
(__v4sf)
347
_mm_setzero_ps (),
348
(__mmask8) __U);
349
}
350
351
static __inline__ __m128i __DEFAULT_FN_ATTRS
352
_mm_cvtpd_epi64 (__m128d __A) {
353
return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
354
(__v2di) _mm_setzero_si128(),
355
(__mmask8) -1);
356
}
357
358
static __inline__ __m128i __DEFAULT_FN_ATTRS
359
_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
360
return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
361
(__v2di) __W,
362
(__mmask8) __U);
363
}
364
365
static __inline__ __m128i __DEFAULT_FN_ATTRS
366
_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
367
return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
368
(__v2di) _mm_setzero_si128(),
369
(__mmask8) __U);
370
}
371
372
static __inline__ __m256i __DEFAULT_FN_ATTRS
373
_mm256_cvtpd_epi64 (__m256d __A) {
374
return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
375
(__v4di) _mm256_setzero_si256(),
376
(__mmask8) -1);
377
}
378
379
static __inline__ __m256i __DEFAULT_FN_ATTRS
380
_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
381
return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
382
(__v4di) __W,
383
(__mmask8) __U);
384
}
385
386
static __inline__ __m256i __DEFAULT_FN_ATTRS
387
_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
388
return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
389
(__v4di) _mm256_setzero_si256(),
390
(__mmask8) __U);
391
}
392
393
static __inline__ __m128i __DEFAULT_FN_ATTRS
394
_mm_cvtpd_epu64 (__m128d __A) {
395
return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
396
(__v2di) _mm_setzero_si128(),
397
(__mmask8) -1);
398
}
399
400
static __inline__ __m128i __DEFAULT_FN_ATTRS
401
_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
402
return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
403
(__v2di) __W,
404
(__mmask8) __U);
405
}
406
407
static __inline__ __m128i __DEFAULT_FN_ATTRS
408
_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
409
return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
410
(__v2di) _mm_setzero_si128(),
411
(__mmask8) __U);
412
}
413
414
static __inline__ __m256i __DEFAULT_FN_ATTRS
415
_mm256_cvtpd_epu64 (__m256d __A) {
416
return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
417
(__v4di) _mm256_setzero_si256(),
418
(__mmask8) -1);
419
}
420
421
static __inline__ __m256i __DEFAULT_FN_ATTRS
422
_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
423
return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
424
(__v4di) __W,
425
(__mmask8) __U);
426
}
427
428
static __inline__ __m256i __DEFAULT_FN_ATTRS
429
_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
430
return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
431
(__v4di) _mm256_setzero_si256(),
432
(__mmask8) __U);
433
}
434
435
static __inline__ __m128i __DEFAULT_FN_ATTRS
436
_mm_cvtps_epi64 (__m128 __A) {
437
return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
438
(__v2di) _mm_setzero_si128(),
439
(__mmask8) -1);
440
}
441
442
static __inline__ __m128i __DEFAULT_FN_ATTRS
443
_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
444
return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
445
(__v2di) __W,
446
(__mmask8) __U);
447
}
448
449
static __inline__ __m128i __DEFAULT_FN_ATTRS
450
_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
451
return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
452
(__v2di) _mm_setzero_si128(),
453
(__mmask8) __U);
454
}
455
456
static __inline__ __m256i __DEFAULT_FN_ATTRS
457
_mm256_cvtps_epi64 (__m128 __A) {
458
return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
459
(__v4di) _mm256_setzero_si256(),
460
(__mmask8) -1);
461
}
462
463
static __inline__ __m256i __DEFAULT_FN_ATTRS
464
_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
465
return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
466
(__v4di) __W,
467
(__mmask8) __U);
468
}
469
470
static __inline__ __m256i __DEFAULT_FN_ATTRS
471
_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
472
return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
473
(__v4di) _mm256_setzero_si256(),
474
(__mmask8) __U);
475
}
476
477
static __inline__ __m128i __DEFAULT_FN_ATTRS
478
_mm_cvtps_epu64 (__m128 __A) {
479
return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
480
(__v2di) _mm_setzero_si128(),
481
(__mmask8) -1);
482
}
483
484
static __inline__ __m128i __DEFAULT_FN_ATTRS
485
_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
486
return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
487
(__v2di) __W,
488
(__mmask8) __U);
489
}
490
491
static __inline__ __m128i __DEFAULT_FN_ATTRS
492
_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
493
return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
494
(__v2di) _mm_setzero_si128(),
495
(__mmask8) __U);
496
}
497
498
static __inline__ __m256i __DEFAULT_FN_ATTRS
499
_mm256_cvtps_epu64 (__m128 __A) {
500
return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
501
(__v4di) _mm256_setzero_si256(),
502
(__mmask8) -1);
503
}
504
505
static __inline__ __m256i __DEFAULT_FN_ATTRS
506
_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
507
return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
508
(__v4di) __W,
509
(__mmask8) __U);
510
}
511
512
static __inline__ __m256i __DEFAULT_FN_ATTRS
513
_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
514
return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
515
(__v4di) _mm256_setzero_si256(),
516
(__mmask8) __U);
517
}
518
519
static __inline__ __m128d __DEFAULT_FN_ATTRS
520
_mm_cvtepi64_pd (__m128i __A) {
521
return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
522
(__v2df) _mm_setzero_pd(),
523
(__mmask8) -1);
524
}
525
526
static __inline__ __m128d __DEFAULT_FN_ATTRS
527
_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
528
return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
529
(__v2df) __W,
530
(__mmask8) __U);
531
}
532
533
static __inline__ __m128d __DEFAULT_FN_ATTRS
534
_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
535
return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
536
(__v2df) _mm_setzero_pd(),
537
(__mmask8) __U);
538
}
539
540
static __inline__ __m256d __DEFAULT_FN_ATTRS
541
_mm256_cvtepi64_pd (__m256i __A) {
542
return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
543
(__v4df) _mm256_setzero_pd(),
544
(__mmask8) -1);
545
}
546
547
static __inline__ __m256d __DEFAULT_FN_ATTRS
548
_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
549
return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
550
(__v4df) __W,
551
(__mmask8) __U);
552
}
553
554
static __inline__ __m256d __DEFAULT_FN_ATTRS
555
_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
556
return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
557
(__v4df) _mm256_setzero_pd(),
558
(__mmask8) __U);
559
}
560
561
static __inline__ __m128 __DEFAULT_FN_ATTRS
562
_mm_cvtepi64_ps (__m128i __A) {
563
return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
564
(__v4sf) _mm_setzero_ps(),
565
(__mmask8) -1);
566
}
567
568
static __inline__ __m128 __DEFAULT_FN_ATTRS
569
_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
570
return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
571
(__v4sf) __W,
572
(__mmask8) __U);
573
}
574
575
static __inline__ __m128 __DEFAULT_FN_ATTRS
576
_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
577
return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
578
(__v4sf) _mm_setzero_ps(),
579
(__mmask8) __U);
580
}
581
582
static __inline__ __m128 __DEFAULT_FN_ATTRS
583
_mm256_cvtepi64_ps (__m256i __A) {
584
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
585
(__v4sf) _mm_setzero_ps(),
586
(__mmask8) -1);
587
}
588
589
static __inline__ __m128 __DEFAULT_FN_ATTRS
590
_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
591
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
592
(__v4sf) __W,
593
(__mmask8) __U);
594
}
595
596
static __inline__ __m128 __DEFAULT_FN_ATTRS
597
_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
598
return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
599
(__v4sf) _mm_setzero_ps(),
600
(__mmask8) __U);
601
}
602
603
static __inline__ __m128i __DEFAULT_FN_ATTRS
604
_mm_cvttpd_epi64 (__m128d __A) {
605
return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
606
(__v2di) _mm_setzero_si128(),
607
(__mmask8) -1);
608
}
609
610
static __inline__ __m128i __DEFAULT_FN_ATTRS
611
_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
612
return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
613
(__v2di) __W,
614
(__mmask8) __U);
615
}
616
617
static __inline__ __m128i __DEFAULT_FN_ATTRS
618
_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
619
return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
620
(__v2di) _mm_setzero_si128(),
621
(__mmask8) __U);
622
}
623
624
static __inline__ __m256i __DEFAULT_FN_ATTRS
625
_mm256_cvttpd_epi64 (__m256d __A) {
626
return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
627
(__v4di) _mm256_setzero_si256(),
628
(__mmask8) -1);
629
}
630
631
static __inline__ __m256i __DEFAULT_FN_ATTRS
632
_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
633
return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
634
(__v4di) __W,
635
(__mmask8) __U);
636
}
637
638
static __inline__ __m256i __DEFAULT_FN_ATTRS
639
_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
640
return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
641
(__v4di) _mm256_setzero_si256(),
642
(__mmask8) __U);
643
}
644
645
static __inline__ __m128i __DEFAULT_FN_ATTRS
646
_mm_cvttpd_epu64 (__m128d __A) {
647
return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
648
(__v2di) _mm_setzero_si128(),
649
(__mmask8) -1);
650
}
651
652
static __inline__ __m128i __DEFAULT_FN_ATTRS
653
_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
654
return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
655
(__v2di) __W,
656
(__mmask8) __U);
657
}
658
659
static __inline__ __m128i __DEFAULT_FN_ATTRS
660
_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
661
return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
662
(__v2di) _mm_setzero_si128(),
663
(__mmask8) __U);
664
}
665
666
static __inline__ __m256i __DEFAULT_FN_ATTRS
667
_mm256_cvttpd_epu64 (__m256d __A) {
668
return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
669
(__v4di) _mm256_setzero_si256(),
670
(__mmask8) -1);
671
}
672
673
static __inline__ __m256i __DEFAULT_FN_ATTRS
674
_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
675
return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
676
(__v4di) __W,
677
(__mmask8) __U);
678
}
679
680
static __inline__ __m256i __DEFAULT_FN_ATTRS
681
_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
682
return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
683
(__v4di) _mm256_setzero_si256(),
684
(__mmask8) __U);
685
}
686
687
static __inline__ __m128i __DEFAULT_FN_ATTRS
688
_mm_cvttps_epi64 (__m128 __A) {
689
return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
690
(__v2di) _mm_setzero_si128(),
691
(__mmask8) -1);
692
}
693
694
static __inline__ __m128i __DEFAULT_FN_ATTRS
695
_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
696
return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
697
(__v2di) __W,
698
(__mmask8) __U);
699
}
700
701
static __inline__ __m128i __DEFAULT_FN_ATTRS
702
_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
703
return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
704
(__v2di) _mm_setzero_si128(),
705
(__mmask8) __U);
706
}
707
708
static __inline__ __m256i __DEFAULT_FN_ATTRS
709
_mm256_cvttps_epi64 (__m128 __A) {
710
return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
711
(__v4di) _mm256_setzero_si256(),
712
(__mmask8) -1);
713
}
714
715
static __inline__ __m256i __DEFAULT_FN_ATTRS
716
_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
717
return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
718
(__v4di) __W,
719
(__mmask8) __U);
720
}
721
722
static __inline__ __m256i __DEFAULT_FN_ATTRS
723
_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
724
return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
725
(__v4di) _mm256_setzero_si256(),
726
(__mmask8) __U);
727
}
728
729
static __inline__ __m128i __DEFAULT_FN_ATTRS
730
_mm_cvttps_epu64 (__m128 __A) {
731
return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
732
(__v2di) _mm_setzero_si128(),
733
(__mmask8) -1);
734
}
735
736
static __inline__ __m128i __DEFAULT_FN_ATTRS
737
_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
738
return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
739
(__v2di) __W,
740
(__mmask8) __U);
741
}
742
743
static __inline__ __m128i __DEFAULT_FN_ATTRS
744
_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
745
return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
746
(__v2di) _mm_setzero_si128(),
747
(__mmask8) __U);
748
}
749
750
static __inline__ __m256i __DEFAULT_FN_ATTRS
751
_mm256_cvttps_epu64 (__m128 __A) {
752
return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
753
(__v4di) _mm256_setzero_si256(),
754
(__mmask8) -1);
755
}
756
757
static __inline__ __m256i __DEFAULT_FN_ATTRS
758
_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
759
return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
760
(__v4di) __W,
761
(__mmask8) __U);
762
}
763
764
static __inline__ __m256i __DEFAULT_FN_ATTRS
765
_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
766
return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
767
(__v4di) _mm256_setzero_si256(),
768
(__mmask8) __U);
769
}
770
771
static __inline__ __m128d __DEFAULT_FN_ATTRS
772
_mm_cvtepu64_pd (__m128i __A) {
773
return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
774
(__v2df) _mm_setzero_pd(),
775
(__mmask8) -1);
776
}
777
778
static __inline__ __m128d __DEFAULT_FN_ATTRS
779
_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
780
return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
781
(__v2df) __W,
782
(__mmask8) __U);
783
}
784
785
static __inline__ __m128d __DEFAULT_FN_ATTRS
786
_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
787
return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
788
(__v2df) _mm_setzero_pd(),
789
(__mmask8) __U);
790
}
791
792
static __inline__ __m256d __DEFAULT_FN_ATTRS
793
_mm256_cvtepu64_pd (__m256i __A) {
794
return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
795
(__v4df) _mm256_setzero_pd(),
796
(__mmask8) -1);
797
}
798
799
static __inline__ __m256d __DEFAULT_FN_ATTRS
800
_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
801
return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
802
(__v4df) __W,
803
(__mmask8) __U);
804
}
805
806
static __inline__ __m256d __DEFAULT_FN_ATTRS
807
_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
808
return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
809
(__v4df) _mm256_setzero_pd(),
810
(__mmask8) __U);
811
}
812
813
static __inline__ __m128 __DEFAULT_FN_ATTRS
814
_mm_cvtepu64_ps (__m128i __A) {
815
return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
816
(__v4sf) _mm_setzero_ps(),
817
(__mmask8) -1);
818
}
819
820
static __inline__ __m128 __DEFAULT_FN_ATTRS
821
_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
822
return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
823
(__v4sf) __W,
824
(__mmask8) __U);
825
}
826
827
static __inline__ __m128 __DEFAULT_FN_ATTRS
828
_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
829
return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
830
(__v4sf) _mm_setzero_ps(),
831
(__mmask8) __U);
832
}
833
834
static __inline__ __m128 __DEFAULT_FN_ATTRS
835
_mm256_cvtepu64_ps (__m256i __A) {
836
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
837
(__v4sf) _mm_setzero_ps(),
838
(__mmask8) -1);
839
}
840
841
static __inline__ __m128 __DEFAULT_FN_ATTRS
842
_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
843
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
844
(__v4sf) __W,
845
(__mmask8) __U);
846
}
847
848
static __inline__ __m128 __DEFAULT_FN_ATTRS
849
_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
850
return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
851
(__v4sf) _mm_setzero_ps(),
852
(__mmask8) __U);
853
}
854
855
#define _mm_range_pd(A, B, C) __extension__ ({ \
856
(__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
857
(__v2df)(__m128d)(B), (int)(C), \
858
(__v2df)_mm_setzero_pd(), \
859
(__mmask8)-1); })
860
861
#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \
862
(__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
863
(__v2df)(__m128d)(B), (int)(C), \
864
(__v2df)(__m128d)(W), \
865
(__mmask8)(U)); })
866
867
#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \
868
(__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
869
(__v2df)(__m128d)(B), (int)(C), \
870
(__v2df)_mm_setzero_pd(), \
871
(__mmask8)(U)); })
872
873
#define _mm256_range_pd(A, B, C) __extension__ ({ \
874
(__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
875
(__v4df)(__m256d)(B), (int)(C), \
876
(__v4df)_mm256_setzero_pd(), \
877
(__mmask8)-1); })
878
879
#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \
880
(__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
881
(__v4df)(__m256d)(B), (int)(C), \
882
(__v4df)(__m256d)(W), \
883
(__mmask8)(U)); })
884
885
#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \
886
(__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
887
(__v4df)(__m256d)(B), (int)(C), \
888
(__v4df)_mm256_setzero_pd(), \
889
(__mmask8)(U)); })
890
891
#define _mm_range_ps(A, B, C) __extension__ ({ \
892
(__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
893
(__v4sf)(__m128)(B), (int)(C), \
894
(__v4sf)_mm_setzero_ps(), \
895
(__mmask8)-1); })
896
897
#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \
898
(__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
899
(__v4sf)(__m128)(B), (int)(C), \
900
(__v4sf)(__m128)(W), (__mmask8)(U)); })
901
902
#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \
903
(__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
904
(__v4sf)(__m128)(B), (int)(C), \
905
(__v4sf)_mm_setzero_ps(), \
906
(__mmask8)(U)); })
907
908
#define _mm256_range_ps(A, B, C) __extension__ ({ \
909
(__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
910
(__v8sf)(__m256)(B), (int)(C), \
911
(__v8sf)_mm256_setzero_ps(), \
912
(__mmask8)-1); })
913
914
#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \
915
(__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
916
(__v8sf)(__m256)(B), (int)(C), \
917
(__v8sf)(__m256)(W), (__mmask8)(U)); })
918
919
#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \
920
(__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
921
(__v8sf)(__m256)(B), (int)(C), \
922
(__v8sf)_mm256_setzero_ps(), \
923
(__mmask8)(U)); })
924
925
#define _mm_reduce_pd(A, B) __extension__ ({ \
926
(__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
927
(__v2df)_mm_setzero_pd(), \
928
(__mmask8)-1); })
929
930
#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
931
(__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
932
(__v2df)(__m128d)(W), \
933
(__mmask8)(U)); })
934
935
#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \
936
(__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
937
(__v2df)_mm_setzero_pd(), \
938
(__mmask8)(U)); })
939
940
#define _mm256_reduce_pd(A, B) __extension__ ({ \
941
(__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
942
(__v4df)_mm256_setzero_pd(), \
943
(__mmask8)-1); })
944
945
#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
946
(__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
947
(__v4df)(__m256d)(W), \
948
(__mmask8)(U)); })
949
950
#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \
951
(__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
952
(__v4df)_mm256_setzero_pd(), \
953
(__mmask8)(U)); })
954
955
#define _mm_reduce_ps(A, B) __extension__ ({ \
956
(__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
957
(__v4sf)_mm_setzero_ps(), \
958
(__mmask8)-1); })
959
960
#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \
961
(__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
962
(__v4sf)(__m128)(W), \
963
(__mmask8)(U)); })
964
965
#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \
966
(__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
967
(__v4sf)_mm_setzero_ps(), \
968
(__mmask8)(U)); })
969
970
#define _mm256_reduce_ps(A, B) __extension__ ({ \
971
(__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
972
(__v8sf)_mm256_setzero_ps(), \
973
(__mmask8)-1); })
974
975
#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
976
(__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
977
(__v8sf)(__m256)(W), \
978
(__mmask8)(U)); })
979
980
#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \
981
(__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
982
(__v8sf)_mm256_setzero_ps(), \
983
(__mmask8)(U)); })
984
985
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
986
_mm_movepi32_mask (__m128i __A)
987
{
988
return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
989
}
990
991
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
992
_mm256_movepi32_mask (__m256i __A)
993
{
994
return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
995
}
996
997
static __inline__ __m128i __DEFAULT_FN_ATTRS
998
_mm_movm_epi32 (__mmask8 __A)
999
{
1000
return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1001
}
1002
1003
static __inline__ __m256i __DEFAULT_FN_ATTRS
1004
_mm256_movm_epi32 (__mmask8 __A)
1005
{
1006
return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1007
}
1008
1009
static __inline__ __m128i __DEFAULT_FN_ATTRS
1010
_mm_movm_epi64 (__mmask8 __A)
1011
{
1012
return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1013
}
1014
1015
static __inline__ __m256i __DEFAULT_FN_ATTRS
1016
_mm256_movm_epi64 (__mmask8 __A)
1017
{
1018
return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1019
}
1020
1021
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1022
_mm_movepi64_mask (__m128i __A)
1023
{
1024
return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1025
}
1026
1027
static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1028
_mm256_movepi64_mask (__m256i __A)
1029
{
1030
return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1031
}
1032
1033
static __inline__ __m256 __DEFAULT_FN_ATTRS
1034
_mm256_broadcast_f32x2 (__m128 __A)
1035
{
1036
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1037
(__v8sf)_mm256_undefined_ps(),
1038
(__mmask8) -1);
1039
}
1040
1041
static __inline__ __m256 __DEFAULT_FN_ATTRS
1042
_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
1043
{
1044
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1045
(__v8sf) __O,
1046
__M);
1047
}
1048
1049
static __inline__ __m256 __DEFAULT_FN_ATTRS
1050
_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
1051
{
1052
return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1053
(__v8sf) _mm256_setzero_ps (),
1054
__M);
1055
}
1056
1057
static __inline__ __m256d __DEFAULT_FN_ATTRS
1058
_mm256_broadcast_f64x2 (__m128d __A)
1059
{
1060
return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1061
(__v4df)_mm256_undefined_pd(),
1062
(__mmask8) -1);
1063
}
1064
1065
static __inline__ __m256d __DEFAULT_FN_ATTRS
1066
_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
1067
{
1068
return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1069
(__v4df) __O,
1070
__M);
1071
}
1072
1073
static __inline__ __m256d __DEFAULT_FN_ATTRS
1074
_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1075
{
1076
return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1077
(__v4df) _mm256_setzero_ps (),
1078
__M);
1079
}
1080
1081
static __inline__ __m128i __DEFAULT_FN_ATTRS
1082
_mm_broadcast_i32x2 (__m128i __A)
1083
{
1084
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1085
(__v4si)_mm_undefined_si128(),
1086
(__mmask8) -1);
1087
}
1088
1089
static __inline__ __m128i __DEFAULT_FN_ATTRS
1090
_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1091
{
1092
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1093
(__v4si) __O,
1094
__M);
1095
}
1096
1097
static __inline__ __m128i __DEFAULT_FN_ATTRS
1098
_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1099
{
1100
return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1101
(__v4si) _mm_setzero_si128 (),
1102
__M);
1103
}
1104
1105
static __inline__ __m256i __DEFAULT_FN_ATTRS
1106
_mm256_broadcast_i32x2 (__m128i __A)
1107
{
1108
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1109
(__v8si)_mm256_undefined_si256(),
1110
(__mmask8) -1);
1111
}
1112
1113
static __inline__ __m256i __DEFAULT_FN_ATTRS
1114
_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1115
{
1116
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1117
(__v8si) __O,
1118
__M);
1119
}
1120
1121
static __inline__ __m256i __DEFAULT_FN_ATTRS
1122
_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1123
{
1124
return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1125
(__v8si) _mm256_setzero_si256 (),
1126
__M);
1127
}
1128
1129
static __inline__ __m256i __DEFAULT_FN_ATTRS
1130
_mm256_broadcast_i64x2 (__m128i __A)
1131
{
1132
return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1133
(__v4di)_mm256_undefined_si256(),
1134
(__mmask8) -1);
1135
}
1136
1137
static __inline__ __m256i __DEFAULT_FN_ATTRS
1138
_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
1139
{
1140
return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1141
(__v4di) __O,
1142
__M);
1143
}
1144
1145
static __inline__ __m256i __DEFAULT_FN_ATTRS
1146
_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147
{
1148
return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1149
(__v4di) _mm256_setzero_si256 (),
1150
__M);
1151
}
1152
1153
#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1154
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1155
(int)(imm), \
1156
(__v2df)_mm_setzero_pd(), \
1157
(__mmask8)-1); })
1158
1159
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1160
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1161
(int)(imm), \
1162
(__v2df)(__m128d)(W), \
1163
(__mmask8)(U)); })
1164
1165
#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1166
(__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1167
(int)(imm), \
1168
(__v2df)_mm_setzero_pd(), \
1169
(__mmask8)(U)); })
1170
1171
#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1172
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1173
(int)(imm), \
1174
(__v2di)_mm_setzero_di(), \
1175
(__mmask8)-1); })
1176
1177
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1178
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1179
(int)(imm), \
1180
(__v2di)(__m128i)(W), \
1181
(__mmask8)(U)); })
1182
1183
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1184
(__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1185
(int)(imm), \
1186
(__v2di)_mm_setzero_di(), \
1187
(__mmask8)(U)); })
1188
1189
#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1190
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1191
(__v2df)(__m128d)(B), \
1192
(int)(imm), \
1193
(__v4df)_mm256_setzero_pd(), \
1194
(__mmask8)-1); })
1195
1196
#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1197
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1198
(__v2df)(__m128d)(B), \
1199
(int)(imm), \
1200
(__v4df)(__m256d)(W), \
1201
(__mmask8)(U)); })
1202
1203
#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1204
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1205
(__v2df)(__m128d)(B), \
1206
(int)(imm), \
1207
(__v4df)_mm256_setzero_pd(), \
1208
(__mmask8)(U)); })
1209
1210
#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1211
(__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1212
(__v2di)(__m128i)(B), \
1213
(int)(imm), \
1214
(__v4di)_mm256_setzero_si256(), \
1215
(__mmask8)-1); })
1216
1217
#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1218
(__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1219
(__v2di)(__m128i)(B), \
1220
(int)(imm), \
1221
(__v4di)(__m256i)(W), \
1222
(__mmask8)(U)); })
1223
1224
#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1225
(__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1226
(__v2di)(__m128i)(B), \
1227
(int)(imm), \
1228
(__v4di)_mm256_setzero_si256(), \
1229
(__mmask8)(U)); })
1230
1231
#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1232
(__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1233
(__mmask8)(U)); })
1234
1235
#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1236
(__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1237
(__mmask8)-1); })
1238
1239
#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1240
(__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1241
(__mmask8)(U)); })
1242
1243
#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1244
(__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1245
(__mmask8)-1); })
1246
1247
#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1248
(__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1249
(__mmask8)(U)); })
1250
1251
#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1252
(__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1253
(__mmask8)-1); })
1254
1255
#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1256
(__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1257
(__mmask8)(U)); })
1258
1259
#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1260
(__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1261
(__mmask8)-1); })
1262
1263
#undef __DEFAULT_FN_ATTRS
1264
1265
#endif
1266
1267