Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/clang/lib/Headers/avx512bwintrin.h
35233 views
1
/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2
*
3
*
4
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5
* See https://llvm.org/LICENSE.txt for license information.
6
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7
*
8
*===-----------------------------------------------------------------------===
9
*/
10
#ifndef __IMMINTRIN_H
11
#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
12
#endif
13
14
#ifndef __AVX512BWINTRIN_H
15
#define __AVX512BWINTRIN_H
16
17
typedef unsigned int __mmask32;
18
typedef unsigned long long __mmask64;
19
20
/* Define the default attributes for the functions in this file. */
21
#define __DEFAULT_FN_ATTRS512 \
22
__attribute__((__always_inline__, __nodebug__, \
23
__target__("avx512bw,evex512"), __min_vector_width__(512)))
24
#define __DEFAULT_FN_ATTRS \
25
__attribute__((__always_inline__, __nodebug__, \
26
__target__("avx512bw,no-evex512")))
27
28
static __inline __mmask32 __DEFAULT_FN_ATTRS
29
_knot_mask32(__mmask32 __M)
30
{
31
return __builtin_ia32_knotsi(__M);
32
}
33
34
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
35
return __builtin_ia32_knotdi(__M);
36
}
37
38
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
39
_kand_mask32(__mmask32 __A, __mmask32 __B)
40
{
41
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
42
}
43
44
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
45
__mmask64 __B) {
46
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
47
}
48
49
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
50
_kandn_mask32(__mmask32 __A, __mmask32 __B)
51
{
52
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
53
}
54
55
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
56
__mmask64 __B) {
57
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
58
}
59
60
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
61
_kor_mask32(__mmask32 __A, __mmask32 __B)
62
{
63
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
64
}
65
66
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
67
__mmask64 __B) {
68
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
69
}
70
71
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
72
_kxnor_mask32(__mmask32 __A, __mmask32 __B)
73
{
74
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
75
}
76
77
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
78
__mmask64 __B) {
79
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
80
}
81
82
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
83
_kxor_mask32(__mmask32 __A, __mmask32 __B)
84
{
85
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
86
}
87
88
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
89
__mmask64 __B) {
90
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
91
}
92
93
static __inline__ unsigned char __DEFAULT_FN_ATTRS
94
_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
95
{
96
return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
97
}
98
99
static __inline__ unsigned char __DEFAULT_FN_ATTRS
100
_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
101
{
102
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
103
}
104
105
static __inline__ unsigned char __DEFAULT_FN_ATTRS
106
_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
107
*__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
108
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
109
}
110
111
static __inline__ unsigned char __DEFAULT_FN_ATTRS
112
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
113
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
114
}
115
116
static __inline__ unsigned char __DEFAULT_FN_ATTRS
117
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
118
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
119
}
120
121
static __inline__ unsigned char __DEFAULT_FN_ATTRS
122
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
123
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
124
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
125
}
126
127
static __inline__ unsigned char __DEFAULT_FN_ATTRS
128
_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
129
{
130
return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
131
}
132
133
static __inline__ unsigned char __DEFAULT_FN_ATTRS
134
_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
135
{
136
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
137
}
138
139
static __inline__ unsigned char __DEFAULT_FN_ATTRS
140
_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
141
*__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
142
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
143
}
144
145
static __inline__ unsigned char __DEFAULT_FN_ATTRS
146
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
147
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
148
}
149
150
static __inline__ unsigned char __DEFAULT_FN_ATTRS
151
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
152
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
153
}
154
155
static __inline__ unsigned char __DEFAULT_FN_ATTRS
156
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
157
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
158
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
159
}
160
161
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
162
_kadd_mask32(__mmask32 __A, __mmask32 __B)
163
{
164
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
165
}
166
167
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
168
__mmask64 __B) {
169
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
170
}
171
172
#define _kshiftli_mask32(A, I) \
173
((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)))
174
175
#define _kshiftri_mask32(A, I) \
176
((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)))
177
178
#define _kshiftli_mask64(A, I) \
179
((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)))
180
181
#define _kshiftri_mask64(A, I) \
182
((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)))
183
184
static __inline__ unsigned int __DEFAULT_FN_ATTRS
185
_cvtmask32_u32(__mmask32 __A) {
186
return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
187
}
188
189
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
190
_cvtmask64_u64(__mmask64 __A) {
191
return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
192
}
193
194
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
195
_cvtu32_mask32(unsigned int __A) {
196
return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
197
}
198
199
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
200
_cvtu64_mask64(unsigned long long __A) {
201
return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
202
}
203
204
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
205
_load_mask32(__mmask32 *__A) {
206
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
207
}
208
209
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
210
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
211
}
212
213
static __inline__ void __DEFAULT_FN_ATTRS
214
_store_mask32(__mmask32 *__A, __mmask32 __B) {
215
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
216
}
217
218
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
219
__mmask64 __B) {
220
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
221
}
222
223
/* Integer compare */
224
225
#define _mm512_cmp_epi8_mask(a, b, p) \
226
((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
227
(__v64qi)(__m512i)(b), (int)(p), \
228
(__mmask64)-1))
229
230
#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
231
((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
232
(__v64qi)(__m512i)(b), (int)(p), \
233
(__mmask64)(m)))
234
235
#define _mm512_cmp_epu8_mask(a, b, p) \
236
((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
237
(__v64qi)(__m512i)(b), (int)(p), \
238
(__mmask64)-1))
239
240
#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
241
((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
242
(__v64qi)(__m512i)(b), (int)(p), \
243
(__mmask64)(m)))
244
245
#define _mm512_cmp_epi16_mask(a, b, p) \
246
((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
247
(__v32hi)(__m512i)(b), (int)(p), \
248
(__mmask32)-1))
249
250
#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
251
((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
252
(__v32hi)(__m512i)(b), (int)(p), \
253
(__mmask32)(m)))
254
255
#define _mm512_cmp_epu16_mask(a, b, p) \
256
((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
257
(__v32hi)(__m512i)(b), (int)(p), \
258
(__mmask32)-1))
259
260
#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
261
((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
262
(__v32hi)(__m512i)(b), (int)(p), \
263
(__mmask32)(m)))
264
265
#define _mm512_cmpeq_epi8_mask(A, B) \
266
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
267
#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
268
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
269
#define _mm512_cmpge_epi8_mask(A, B) \
270
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
271
#define _mm512_mask_cmpge_epi8_mask(k, A, B) \
272
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
273
#define _mm512_cmpgt_epi8_mask(A, B) \
274
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
275
#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
276
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
277
#define _mm512_cmple_epi8_mask(A, B) \
278
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
279
#define _mm512_mask_cmple_epi8_mask(k, A, B) \
280
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
281
#define _mm512_cmplt_epi8_mask(A, B) \
282
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
283
#define _mm512_mask_cmplt_epi8_mask(k, A, B) \
284
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
285
#define _mm512_cmpneq_epi8_mask(A, B) \
286
_mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
287
#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
288
_mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
289
290
#define _mm512_cmpeq_epu8_mask(A, B) \
291
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
292
#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
293
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
294
#define _mm512_cmpge_epu8_mask(A, B) \
295
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
296
#define _mm512_mask_cmpge_epu8_mask(k, A, B) \
297
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
298
#define _mm512_cmpgt_epu8_mask(A, B) \
299
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
300
#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
301
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
302
#define _mm512_cmple_epu8_mask(A, B) \
303
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
304
#define _mm512_mask_cmple_epu8_mask(k, A, B) \
305
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
306
#define _mm512_cmplt_epu8_mask(A, B) \
307
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
308
#define _mm512_mask_cmplt_epu8_mask(k, A, B) \
309
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
310
#define _mm512_cmpneq_epu8_mask(A, B) \
311
_mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
312
#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
313
_mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
314
315
#define _mm512_cmpeq_epi16_mask(A, B) \
316
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
317
#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
318
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
319
#define _mm512_cmpge_epi16_mask(A, B) \
320
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
321
#define _mm512_mask_cmpge_epi16_mask(k, A, B) \
322
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
323
#define _mm512_cmpgt_epi16_mask(A, B) \
324
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
325
#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
326
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
327
#define _mm512_cmple_epi16_mask(A, B) \
328
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
329
#define _mm512_mask_cmple_epi16_mask(k, A, B) \
330
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
331
#define _mm512_cmplt_epi16_mask(A, B) \
332
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
333
#define _mm512_mask_cmplt_epi16_mask(k, A, B) \
334
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
335
#define _mm512_cmpneq_epi16_mask(A, B) \
336
_mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
337
#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
338
_mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
339
340
#define _mm512_cmpeq_epu16_mask(A, B) \
341
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
342
#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
343
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
344
#define _mm512_cmpge_epu16_mask(A, B) \
345
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
346
#define _mm512_mask_cmpge_epu16_mask(k, A, B) \
347
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
348
#define _mm512_cmpgt_epu16_mask(A, B) \
349
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
350
#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
351
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
352
#define _mm512_cmple_epu16_mask(A, B) \
353
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
354
#define _mm512_mask_cmple_epu16_mask(k, A, B) \
355
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
356
#define _mm512_cmplt_epu16_mask(A, B) \
357
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
358
#define _mm512_mask_cmplt_epu16_mask(k, A, B) \
359
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
360
#define _mm512_cmpneq_epu16_mask(A, B) \
361
_mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
362
#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
363
_mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
364
365
static __inline__ __m512i __DEFAULT_FN_ATTRS512
366
_mm512_add_epi8 (__m512i __A, __m512i __B) {
367
return (__m512i) ((__v64qu) __A + (__v64qu) __B);
368
}
369
370
static __inline__ __m512i __DEFAULT_FN_ATTRS512
371
_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
372
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
373
(__v64qi)_mm512_add_epi8(__A, __B),
374
(__v64qi)__W);
375
}
376
377
static __inline__ __m512i __DEFAULT_FN_ATTRS512
378
_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
379
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
380
(__v64qi)_mm512_add_epi8(__A, __B),
381
(__v64qi)_mm512_setzero_si512());
382
}
383
384
static __inline__ __m512i __DEFAULT_FN_ATTRS512
385
_mm512_sub_epi8 (__m512i __A, __m512i __B) {
386
return (__m512i) ((__v64qu) __A - (__v64qu) __B);
387
}
388
389
static __inline__ __m512i __DEFAULT_FN_ATTRS512
390
_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
391
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
392
(__v64qi)_mm512_sub_epi8(__A, __B),
393
(__v64qi)__W);
394
}
395
396
static __inline__ __m512i __DEFAULT_FN_ATTRS512
397
_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
398
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
399
(__v64qi)_mm512_sub_epi8(__A, __B),
400
(__v64qi)_mm512_setzero_si512());
401
}
402
403
static __inline__ __m512i __DEFAULT_FN_ATTRS512
404
_mm512_add_epi16 (__m512i __A, __m512i __B) {
405
return (__m512i) ((__v32hu) __A + (__v32hu) __B);
406
}
407
408
static __inline__ __m512i __DEFAULT_FN_ATTRS512
409
_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
410
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
411
(__v32hi)_mm512_add_epi16(__A, __B),
412
(__v32hi)__W);
413
}
414
415
static __inline__ __m512i __DEFAULT_FN_ATTRS512
416
_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
417
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
418
(__v32hi)_mm512_add_epi16(__A, __B),
419
(__v32hi)_mm512_setzero_si512());
420
}
421
422
static __inline__ __m512i __DEFAULT_FN_ATTRS512
423
_mm512_sub_epi16 (__m512i __A, __m512i __B) {
424
return (__m512i) ((__v32hu) __A - (__v32hu) __B);
425
}
426
427
static __inline__ __m512i __DEFAULT_FN_ATTRS512
428
_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
429
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
430
(__v32hi)_mm512_sub_epi16(__A, __B),
431
(__v32hi)__W);
432
}
433
434
static __inline__ __m512i __DEFAULT_FN_ATTRS512
435
_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
436
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
437
(__v32hi)_mm512_sub_epi16(__A, __B),
438
(__v32hi)_mm512_setzero_si512());
439
}
440
441
static __inline__ __m512i __DEFAULT_FN_ATTRS512
442
_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
443
return (__m512i) ((__v32hu) __A * (__v32hu) __B);
444
}
445
446
static __inline__ __m512i __DEFAULT_FN_ATTRS512
447
_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
448
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
449
(__v32hi)_mm512_mullo_epi16(__A, __B),
450
(__v32hi)__W);
451
}
452
453
static __inline__ __m512i __DEFAULT_FN_ATTRS512
454
_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
455
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
456
(__v32hi)_mm512_mullo_epi16(__A, __B),
457
(__v32hi)_mm512_setzero_si512());
458
}
459
460
static __inline__ __m512i __DEFAULT_FN_ATTRS512
461
_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
462
{
463
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
464
(__v64qi) __W,
465
(__v64qi) __A);
466
}
467
468
static __inline__ __m512i __DEFAULT_FN_ATTRS512
469
_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
470
{
471
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
472
(__v32hi) __W,
473
(__v32hi) __A);
474
}
475
476
static __inline__ __m512i __DEFAULT_FN_ATTRS512
477
_mm512_abs_epi8 (__m512i __A)
478
{
479
return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
480
}
481
482
static __inline__ __m512i __DEFAULT_FN_ATTRS512
483
_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
484
{
485
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
486
(__v64qi)_mm512_abs_epi8(__A),
487
(__v64qi)__W);
488
}
489
490
static __inline__ __m512i __DEFAULT_FN_ATTRS512
491
_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
492
{
493
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
494
(__v64qi)_mm512_abs_epi8(__A),
495
(__v64qi)_mm512_setzero_si512());
496
}
497
498
static __inline__ __m512i __DEFAULT_FN_ATTRS512
499
_mm512_abs_epi16 (__m512i __A)
500
{
501
return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
502
}
503
504
static __inline__ __m512i __DEFAULT_FN_ATTRS512
505
_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
506
{
507
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
508
(__v32hi)_mm512_abs_epi16(__A),
509
(__v32hi)__W);
510
}
511
512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
513
_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
514
{
515
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
516
(__v32hi)_mm512_abs_epi16(__A),
517
(__v32hi)_mm512_setzero_si512());
518
}
519
520
static __inline__ __m512i __DEFAULT_FN_ATTRS512
521
_mm512_packs_epi32(__m512i __A, __m512i __B)
522
{
523
return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
524
}
525
526
static __inline__ __m512i __DEFAULT_FN_ATTRS512
527
_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
528
{
529
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
530
(__v32hi)_mm512_packs_epi32(__A, __B),
531
(__v32hi)_mm512_setzero_si512());
532
}
533
534
static __inline__ __m512i __DEFAULT_FN_ATTRS512
535
_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
536
{
537
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
538
(__v32hi)_mm512_packs_epi32(__A, __B),
539
(__v32hi)__W);
540
}
541
542
static __inline__ __m512i __DEFAULT_FN_ATTRS512
543
_mm512_packs_epi16(__m512i __A, __m512i __B)
544
{
545
return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
546
}
547
548
static __inline__ __m512i __DEFAULT_FN_ATTRS512
549
_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
550
{
551
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
552
(__v64qi)_mm512_packs_epi16(__A, __B),
553
(__v64qi)__W);
554
}
555
556
static __inline__ __m512i __DEFAULT_FN_ATTRS512
557
_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
558
{
559
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
560
(__v64qi)_mm512_packs_epi16(__A, __B),
561
(__v64qi)_mm512_setzero_si512());
562
}
563
564
static __inline__ __m512i __DEFAULT_FN_ATTRS512
565
_mm512_packus_epi32(__m512i __A, __m512i __B)
566
{
567
return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
568
}
569
570
static __inline__ __m512i __DEFAULT_FN_ATTRS512
571
_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
572
{
573
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
574
(__v32hi)_mm512_packus_epi32(__A, __B),
575
(__v32hi)_mm512_setzero_si512());
576
}
577
578
static __inline__ __m512i __DEFAULT_FN_ATTRS512
579
_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
580
{
581
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
582
(__v32hi)_mm512_packus_epi32(__A, __B),
583
(__v32hi)__W);
584
}
585
586
static __inline__ __m512i __DEFAULT_FN_ATTRS512
587
_mm512_packus_epi16(__m512i __A, __m512i __B)
588
{
589
return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
590
}
591
592
static __inline__ __m512i __DEFAULT_FN_ATTRS512
593
_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
594
{
595
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
596
(__v64qi)_mm512_packus_epi16(__A, __B),
597
(__v64qi)__W);
598
}
599
600
static __inline__ __m512i __DEFAULT_FN_ATTRS512
601
_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
602
{
603
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
604
(__v64qi)_mm512_packus_epi16(__A, __B),
605
(__v64qi)_mm512_setzero_si512());
606
}
607
608
static __inline__ __m512i __DEFAULT_FN_ATTRS512
609
_mm512_adds_epi8 (__m512i __A, __m512i __B)
610
{
611
return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
612
}
613
614
static __inline__ __m512i __DEFAULT_FN_ATTRS512
615
_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
616
{
617
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
618
(__v64qi)_mm512_adds_epi8(__A, __B),
619
(__v64qi)__W);
620
}
621
622
static __inline__ __m512i __DEFAULT_FN_ATTRS512
623
_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
624
{
625
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
626
(__v64qi)_mm512_adds_epi8(__A, __B),
627
(__v64qi)_mm512_setzero_si512());
628
}
629
630
static __inline__ __m512i __DEFAULT_FN_ATTRS512
631
_mm512_adds_epi16 (__m512i __A, __m512i __B)
632
{
633
return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
634
}
635
636
static __inline__ __m512i __DEFAULT_FN_ATTRS512
637
_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
638
{
639
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
640
(__v32hi)_mm512_adds_epi16(__A, __B),
641
(__v32hi)__W);
642
}
643
644
static __inline__ __m512i __DEFAULT_FN_ATTRS512
645
_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
646
{
647
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
648
(__v32hi)_mm512_adds_epi16(__A, __B),
649
(__v32hi)_mm512_setzero_si512());
650
}
651
652
static __inline__ __m512i __DEFAULT_FN_ATTRS512
653
_mm512_adds_epu8 (__m512i __A, __m512i __B)
654
{
655
return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
656
}
657
658
static __inline__ __m512i __DEFAULT_FN_ATTRS512
659
_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
660
{
661
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
662
(__v64qi)_mm512_adds_epu8(__A, __B),
663
(__v64qi)__W);
664
}
665
666
static __inline__ __m512i __DEFAULT_FN_ATTRS512
667
_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
668
{
669
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
670
(__v64qi)_mm512_adds_epu8(__A, __B),
671
(__v64qi)_mm512_setzero_si512());
672
}
673
674
static __inline__ __m512i __DEFAULT_FN_ATTRS512
675
_mm512_adds_epu16 (__m512i __A, __m512i __B)
676
{
677
return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
678
}
679
680
static __inline__ __m512i __DEFAULT_FN_ATTRS512
681
_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
682
{
683
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
684
(__v32hi)_mm512_adds_epu16(__A, __B),
685
(__v32hi)__W);
686
}
687
688
static __inline__ __m512i __DEFAULT_FN_ATTRS512
689
_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
690
{
691
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
692
(__v32hi)_mm512_adds_epu16(__A, __B),
693
(__v32hi)_mm512_setzero_si512());
694
}
695
696
static __inline__ __m512i __DEFAULT_FN_ATTRS512
697
_mm512_avg_epu8 (__m512i __A, __m512i __B)
698
{
699
return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
700
}
701
702
static __inline__ __m512i __DEFAULT_FN_ATTRS512
703
_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
704
__m512i __B)
705
{
706
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
707
(__v64qi)_mm512_avg_epu8(__A, __B),
708
(__v64qi)__W);
709
}
710
711
static __inline__ __m512i __DEFAULT_FN_ATTRS512
712
_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
713
{
714
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
715
(__v64qi)_mm512_avg_epu8(__A, __B),
716
(__v64qi)_mm512_setzero_si512());
717
}
718
719
static __inline__ __m512i __DEFAULT_FN_ATTRS512
720
_mm512_avg_epu16 (__m512i __A, __m512i __B)
721
{
722
return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
723
}
724
725
static __inline__ __m512i __DEFAULT_FN_ATTRS512
726
_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
727
__m512i __B)
728
{
729
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
730
(__v32hi)_mm512_avg_epu16(__A, __B),
731
(__v32hi)__W);
732
}
733
734
static __inline__ __m512i __DEFAULT_FN_ATTRS512
735
_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
736
{
737
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
738
(__v32hi)_mm512_avg_epu16(__A, __B),
739
(__v32hi) _mm512_setzero_si512());
740
}
741
742
static __inline__ __m512i __DEFAULT_FN_ATTRS512
743
_mm512_max_epi8 (__m512i __A, __m512i __B)
744
{
745
return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
746
}
747
748
static __inline__ __m512i __DEFAULT_FN_ATTRS512
749
_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
750
{
751
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
752
(__v64qi)_mm512_max_epi8(__A, __B),
753
(__v64qi)_mm512_setzero_si512());
754
}
755
756
static __inline__ __m512i __DEFAULT_FN_ATTRS512
757
_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
758
{
759
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
760
(__v64qi)_mm512_max_epi8(__A, __B),
761
(__v64qi)__W);
762
}
763
764
static __inline__ __m512i __DEFAULT_FN_ATTRS512
765
_mm512_max_epi16 (__m512i __A, __m512i __B)
766
{
767
return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
768
}
769
770
static __inline__ __m512i __DEFAULT_FN_ATTRS512
771
_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
772
{
773
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
774
(__v32hi)_mm512_max_epi16(__A, __B),
775
(__v32hi)_mm512_setzero_si512());
776
}
777
778
static __inline__ __m512i __DEFAULT_FN_ATTRS512
779
_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
780
__m512i __B)
781
{
782
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
783
(__v32hi)_mm512_max_epi16(__A, __B),
784
(__v32hi)__W);
785
}
786
787
static __inline__ __m512i __DEFAULT_FN_ATTRS512
788
_mm512_max_epu8 (__m512i __A, __m512i __B)
789
{
790
return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
791
}
792
793
static __inline__ __m512i __DEFAULT_FN_ATTRS512
794
_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
795
{
796
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
797
(__v64qi)_mm512_max_epu8(__A, __B),
798
(__v64qi)_mm512_setzero_si512());
799
}
800
801
static __inline__ __m512i __DEFAULT_FN_ATTRS512
802
_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
803
{
804
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
805
(__v64qi)_mm512_max_epu8(__A, __B),
806
(__v64qi)__W);
807
}
808
809
static __inline__ __m512i __DEFAULT_FN_ATTRS512
810
_mm512_max_epu16 (__m512i __A, __m512i __B)
811
{
812
return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
813
}
814
815
static __inline__ __m512i __DEFAULT_FN_ATTRS512
816
_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
817
{
818
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
819
(__v32hi)_mm512_max_epu16(__A, __B),
820
(__v32hi)_mm512_setzero_si512());
821
}
822
823
static __inline__ __m512i __DEFAULT_FN_ATTRS512
824
_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
825
{
826
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
827
(__v32hi)_mm512_max_epu16(__A, __B),
828
(__v32hi)__W);
829
}
830
831
static __inline__ __m512i __DEFAULT_FN_ATTRS512
832
_mm512_min_epi8 (__m512i __A, __m512i __B)
833
{
834
return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
835
}
836
837
static __inline__ __m512i __DEFAULT_FN_ATTRS512
838
_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
839
{
840
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
841
(__v64qi)_mm512_min_epi8(__A, __B),
842
(__v64qi)_mm512_setzero_si512());
843
}
844
845
static __inline__ __m512i __DEFAULT_FN_ATTRS512
846
_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
847
{
848
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
849
(__v64qi)_mm512_min_epi8(__A, __B),
850
(__v64qi)__W);
851
}
852
853
static __inline__ __m512i __DEFAULT_FN_ATTRS512
854
_mm512_min_epi16 (__m512i __A, __m512i __B)
855
{
856
return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
857
}
858
859
static __inline__ __m512i __DEFAULT_FN_ATTRS512
860
_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
861
{
862
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
863
(__v32hi)_mm512_min_epi16(__A, __B),
864
(__v32hi)_mm512_setzero_si512());
865
}
866
867
static __inline__ __m512i __DEFAULT_FN_ATTRS512
868
_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
869
{
870
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
871
(__v32hi)_mm512_min_epi16(__A, __B),
872
(__v32hi)__W);
873
}
874
875
static __inline__ __m512i __DEFAULT_FN_ATTRS512
876
_mm512_min_epu8 (__m512i __A, __m512i __B)
877
{
878
return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
879
}
880
881
static __inline__ __m512i __DEFAULT_FN_ATTRS512
882
_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
883
{
884
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
885
(__v64qi)_mm512_min_epu8(__A, __B),
886
(__v64qi)_mm512_setzero_si512());
887
}
888
889
static __inline__ __m512i __DEFAULT_FN_ATTRS512
890
_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
891
{
892
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
893
(__v64qi)_mm512_min_epu8(__A, __B),
894
(__v64qi)__W);
895
}
896
897
static __inline__ __m512i __DEFAULT_FN_ATTRS512
898
_mm512_min_epu16 (__m512i __A, __m512i __B)
899
{
900
return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
901
}
902
903
static __inline__ __m512i __DEFAULT_FN_ATTRS512
904
_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
905
{
906
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
907
(__v32hi)_mm512_min_epu16(__A, __B),
908
(__v32hi)_mm512_setzero_si512());
909
}
910
911
static __inline__ __m512i __DEFAULT_FN_ATTRS512
912
_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
913
{
914
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
915
(__v32hi)_mm512_min_epu16(__A, __B),
916
(__v32hi)__W);
917
}
918
919
static __inline__ __m512i __DEFAULT_FN_ATTRS512
920
_mm512_shuffle_epi8(__m512i __A, __m512i __B)
921
{
922
return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
923
}
924
925
static __inline__ __m512i __DEFAULT_FN_ATTRS512
926
_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
927
{
928
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
929
(__v64qi)_mm512_shuffle_epi8(__A, __B),
930
(__v64qi)__W);
931
}
932
933
static __inline__ __m512i __DEFAULT_FN_ATTRS512
934
_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
935
{
936
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
937
(__v64qi)_mm512_shuffle_epi8(__A, __B),
938
(__v64qi)_mm512_setzero_si512());
939
}
940
941
static __inline__ __m512i __DEFAULT_FN_ATTRS512
942
_mm512_subs_epi8 (__m512i __A, __m512i __B)
943
{
944
return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
945
}
946
947
static __inline__ __m512i __DEFAULT_FN_ATTRS512
948
_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
949
{
950
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
951
(__v64qi)_mm512_subs_epi8(__A, __B),
952
(__v64qi)__W);
953
}
954
955
static __inline__ __m512i __DEFAULT_FN_ATTRS512
956
_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
957
{
958
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
959
(__v64qi)_mm512_subs_epi8(__A, __B),
960
(__v64qi)_mm512_setzero_si512());
961
}
962
963
static __inline__ __m512i __DEFAULT_FN_ATTRS512
964
_mm512_subs_epi16 (__m512i __A, __m512i __B)
965
{
966
return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
967
}
968
969
static __inline__ __m512i __DEFAULT_FN_ATTRS512
970
_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
971
{
972
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
973
(__v32hi)_mm512_subs_epi16(__A, __B),
974
(__v32hi)__W);
975
}
976
977
static __inline__ __m512i __DEFAULT_FN_ATTRS512
978
_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
979
{
980
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
981
(__v32hi)_mm512_subs_epi16(__A, __B),
982
(__v32hi)_mm512_setzero_si512());
983
}
984
985
static __inline__ __m512i __DEFAULT_FN_ATTRS512
986
_mm512_subs_epu8 (__m512i __A, __m512i __B)
987
{
988
return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
989
}
990
991
static __inline__ __m512i __DEFAULT_FN_ATTRS512
992
_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
993
{
994
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
995
(__v64qi)_mm512_subs_epu8(__A, __B),
996
(__v64qi)__W);
997
}
998
999
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1000
_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1001
{
1002
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1003
(__v64qi)_mm512_subs_epu8(__A, __B),
1004
(__v64qi)_mm512_setzero_si512());
1005
}
1006
1007
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1008
_mm512_subs_epu16 (__m512i __A, __m512i __B)
1009
{
1010
return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
1011
}
1012
1013
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1014
_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1015
{
1016
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1017
(__v32hi)_mm512_subs_epu16(__A, __B),
1018
(__v32hi)__W);
1019
}
1020
1021
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1022
_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1023
{
1024
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1025
(__v32hi)_mm512_subs_epu16(__A, __B),
1026
(__v32hi)_mm512_setzero_si512());
1027
}
1028
1029
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1030
_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1031
{
1032
return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1033
(__v32hi)__B);
1034
}
1035
1036
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1037
_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1038
__m512i __B)
1039
{
1040
return (__m512i)__builtin_ia32_selectw_512(__U,
1041
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1042
(__v32hi)__A);
1043
}
1044
1045
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1046
_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1047
__m512i __B)
1048
{
1049
return (__m512i)__builtin_ia32_selectw_512(__U,
1050
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1051
(__v32hi)__I);
1052
}
1053
1054
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1055
_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1056
__m512i __B)
1057
{
1058
return (__m512i)__builtin_ia32_selectw_512(__U,
1059
(__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1060
(__v32hi)_mm512_setzero_si512());
1061
}
1062
1063
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1064
_mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1065
{
1066
return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1067
}
1068
1069
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1070
_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1071
{
1072
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1073
(__v32hi)_mm512_mulhrs_epi16(__A, __B),
1074
(__v32hi)__W);
1075
}
1076
1077
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1078
_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1079
{
1080
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1081
(__v32hi)_mm512_mulhrs_epi16(__A, __B),
1082
(__v32hi)_mm512_setzero_si512());
1083
}
1084
1085
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1086
_mm512_mulhi_epi16(__m512i __A, __m512i __B)
1087
{
1088
return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1089
}
1090
1091
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1092
_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1093
__m512i __B)
1094
{
1095
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1096
(__v32hi)_mm512_mulhi_epi16(__A, __B),
1097
(__v32hi)__W);
1098
}
1099
1100
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101
_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1102
{
1103
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1104
(__v32hi)_mm512_mulhi_epi16(__A, __B),
1105
(__v32hi)_mm512_setzero_si512());
1106
}
1107
1108
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1109
_mm512_mulhi_epu16(__m512i __A, __m512i __B)
1110
{
1111
return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1112
}
1113
1114
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115
_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1116
{
1117
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1118
(__v32hi)_mm512_mulhi_epu16(__A, __B),
1119
(__v32hi)__W);
1120
}
1121
1122
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123
_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1124
{
1125
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1126
(__v32hi)_mm512_mulhi_epu16(__A, __B),
1127
(__v32hi)_mm512_setzero_si512());
1128
}
1129
1130
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1131
_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1132
return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1133
}
1134
1135
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1136
_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1137
__m512i __Y) {
1138
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1139
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
1140
(__v32hi)__W);
1141
}
1142
1143
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1144
_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1145
return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1146
(__v32hi)_mm512_maddubs_epi16(__X, __Y),
1147
(__v32hi)_mm512_setzero_si512());
1148
}
1149
1150
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1151
_mm512_madd_epi16(__m512i __A, __m512i __B) {
1152
return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1153
}
1154
1155
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1156
_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1157
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1158
(__v16si)_mm512_madd_epi16(__A, __B),
1159
(__v16si)__W);
1160
}
1161
1162
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1163
_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1164
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1165
(__v16si)_mm512_madd_epi16(__A, __B),
1166
(__v16si)_mm512_setzero_si512());
1167
}
1168
1169
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1170
_mm512_cvtsepi16_epi8 (__m512i __A) {
1171
return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1172
(__v32qi)_mm256_setzero_si256(),
1173
(__mmask32) -1);
1174
}
1175
1176
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1177
_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1178
return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1179
(__v32qi)__O,
1180
__M);
1181
}
1182
1183
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1184
_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1185
return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1186
(__v32qi) _mm256_setzero_si256(),
1187
__M);
1188
}
1189
1190
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1191
_mm512_cvtusepi16_epi8 (__m512i __A) {
1192
return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1193
(__v32qi) _mm256_setzero_si256(),
1194
(__mmask32) -1);
1195
}
1196
1197
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1198
_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1199
return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1200
(__v32qi) __O,
1201
__M);
1202
}
1203
1204
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1205
_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1206
return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1207
(__v32qi) _mm256_setzero_si256(),
1208
__M);
1209
}
1210
1211
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1212
_mm512_cvtepi16_epi8 (__m512i __A) {
1213
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1214
(__v32qi) _mm256_undefined_si256(),
1215
(__mmask32) -1);
1216
}
1217
1218
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1219
_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1220
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1221
(__v32qi) __O,
1222
__M);
1223
}
1224
1225
static __inline__ __m256i __DEFAULT_FN_ATTRS512
1226
_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1227
return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1228
(__v32qi) _mm256_setzero_si256(),
1229
__M);
1230
}
1231
1232
static __inline__ void __DEFAULT_FN_ATTRS512
1233
_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1234
{
1235
__builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1236
}
1237
1238
static __inline__ void __DEFAULT_FN_ATTRS512
1239
_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1240
{
1241
__builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1242
}
1243
1244
static __inline__ void __DEFAULT_FN_ATTRS512
1245
_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1246
{
1247
__builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1248
}
1249
1250
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1251
_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1252
return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1253
8, 64+8, 9, 64+9,
1254
10, 64+10, 11, 64+11,
1255
12, 64+12, 13, 64+13,
1256
14, 64+14, 15, 64+15,
1257
24, 64+24, 25, 64+25,
1258
26, 64+26, 27, 64+27,
1259
28, 64+28, 29, 64+29,
1260
30, 64+30, 31, 64+31,
1261
40, 64+40, 41, 64+41,
1262
42, 64+42, 43, 64+43,
1263
44, 64+44, 45, 64+45,
1264
46, 64+46, 47, 64+47,
1265
56, 64+56, 57, 64+57,
1266
58, 64+58, 59, 64+59,
1267
60, 64+60, 61, 64+61,
1268
62, 64+62, 63, 64+63);
1269
}
1270
1271
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1272
_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1273
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1274
(__v64qi)_mm512_unpackhi_epi8(__A, __B),
1275
(__v64qi)__W);
1276
}
1277
1278
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1279
_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1280
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1281
(__v64qi)_mm512_unpackhi_epi8(__A, __B),
1282
(__v64qi)_mm512_setzero_si512());
1283
}
1284
1285
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1286
_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1287
return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1288
4, 32+4, 5, 32+5,
1289
6, 32+6, 7, 32+7,
1290
12, 32+12, 13, 32+13,
1291
14, 32+14, 15, 32+15,
1292
20, 32+20, 21, 32+21,
1293
22, 32+22, 23, 32+23,
1294
28, 32+28, 29, 32+29,
1295
30, 32+30, 31, 32+31);
1296
}
1297
1298
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1299
_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1300
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1301
(__v32hi)_mm512_unpackhi_epi16(__A, __B),
1302
(__v32hi)__W);
1303
}
1304
1305
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1306
_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1307
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1308
(__v32hi)_mm512_unpackhi_epi16(__A, __B),
1309
(__v32hi)_mm512_setzero_si512());
1310
}
1311
1312
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1313
_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1314
return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1315
0, 64+0, 1, 64+1,
1316
2, 64+2, 3, 64+3,
1317
4, 64+4, 5, 64+5,
1318
6, 64+6, 7, 64+7,
1319
16, 64+16, 17, 64+17,
1320
18, 64+18, 19, 64+19,
1321
20, 64+20, 21, 64+21,
1322
22, 64+22, 23, 64+23,
1323
32, 64+32, 33, 64+33,
1324
34, 64+34, 35, 64+35,
1325
36, 64+36, 37, 64+37,
1326
38, 64+38, 39, 64+39,
1327
48, 64+48, 49, 64+49,
1328
50, 64+50, 51, 64+51,
1329
52, 64+52, 53, 64+53,
1330
54, 64+54, 55, 64+55);
1331
}
1332
1333
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1334
_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1335
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1336
(__v64qi)_mm512_unpacklo_epi8(__A, __B),
1337
(__v64qi)__W);
1338
}
1339
1340
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1341
_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1342
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1343
(__v64qi)_mm512_unpacklo_epi8(__A, __B),
1344
(__v64qi)_mm512_setzero_si512());
1345
}
1346
1347
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1348
_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1349
return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1350
0, 32+0, 1, 32+1,
1351
2, 32+2, 3, 32+3,
1352
8, 32+8, 9, 32+9,
1353
10, 32+10, 11, 32+11,
1354
16, 32+16, 17, 32+17,
1355
18, 32+18, 19, 32+19,
1356
24, 32+24, 25, 32+25,
1357
26, 32+26, 27, 32+27);
1358
}
1359
1360
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1361
_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1362
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1363
(__v32hi)_mm512_unpacklo_epi16(__A, __B),
1364
(__v32hi)__W);
1365
}
1366
1367
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1368
_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1369
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1370
(__v32hi)_mm512_unpacklo_epi16(__A, __B),
1371
(__v32hi)_mm512_setzero_si512());
1372
}
1373
1374
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1375
_mm512_cvtepi8_epi16(__m256i __A)
1376
{
1377
/* This function always performs a signed extension, but __v32qi is a char
1378
which may be signed or unsigned, so use __v32qs. */
1379
return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1380
}
1381
1382
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1383
_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1384
{
1385
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1386
(__v32hi)_mm512_cvtepi8_epi16(__A),
1387
(__v32hi)__W);
1388
}
1389
1390
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1391
_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1392
{
1393
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1394
(__v32hi)_mm512_cvtepi8_epi16(__A),
1395
(__v32hi)_mm512_setzero_si512());
1396
}
1397
1398
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1399
_mm512_cvtepu8_epi16(__m256i __A)
1400
{
1401
return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1402
}
1403
1404
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1405
_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1406
{
1407
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1408
(__v32hi)_mm512_cvtepu8_epi16(__A),
1409
(__v32hi)__W);
1410
}
1411
1412
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1413
_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1414
{
1415
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1416
(__v32hi)_mm512_cvtepu8_epi16(__A),
1417
(__v32hi)_mm512_setzero_si512());
1418
}
1419
1420
1421
#define _mm512_shufflehi_epi16(A, imm) \
1422
((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)))
1423
1424
#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1425
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1426
(__v32hi)_mm512_shufflehi_epi16((A), \
1427
(imm)), \
1428
(__v32hi)(__m512i)(W)))
1429
1430
#define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1431
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1432
(__v32hi)_mm512_shufflehi_epi16((A), \
1433
(imm)), \
1434
(__v32hi)_mm512_setzero_si512()))
1435
1436
#define _mm512_shufflelo_epi16(A, imm) \
1437
((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)))
1438
1439
1440
#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1441
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1442
(__v32hi)_mm512_shufflelo_epi16((A), \
1443
(imm)), \
1444
(__v32hi)(__m512i)(W)))
1445
1446
1447
#define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1448
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1449
(__v32hi)_mm512_shufflelo_epi16((A), \
1450
(imm)), \
1451
(__v32hi)_mm512_setzero_si512()))
1452
1453
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1454
_mm512_sllv_epi16(__m512i __A, __m512i __B)
1455
{
1456
return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1457
}
1458
1459
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1460
_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1461
{
1462
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1463
(__v32hi)_mm512_sllv_epi16(__A, __B),
1464
(__v32hi)__W);
1465
}
1466
1467
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1468
_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1469
{
1470
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1471
(__v32hi)_mm512_sllv_epi16(__A, __B),
1472
(__v32hi)_mm512_setzero_si512());
1473
}
1474
1475
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1476
_mm512_sll_epi16(__m512i __A, __m128i __B)
1477
{
1478
return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1479
}
1480
1481
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1482
_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1483
{
1484
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1485
(__v32hi)_mm512_sll_epi16(__A, __B),
1486
(__v32hi)__W);
1487
}
1488
1489
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1490
_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1491
{
1492
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1493
(__v32hi)_mm512_sll_epi16(__A, __B),
1494
(__v32hi)_mm512_setzero_si512());
1495
}
1496
1497
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1498
_mm512_slli_epi16(__m512i __A, unsigned int __B)
1499
{
1500
return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B);
1501
}
1502
1503
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1504
_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1505
unsigned int __B)
1506
{
1507
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1508
(__v32hi)_mm512_slli_epi16(__A, __B),
1509
(__v32hi)__W);
1510
}
1511
1512
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513
_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
1514
{
1515
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516
(__v32hi)_mm512_slli_epi16(__A, __B),
1517
(__v32hi)_mm512_setzero_si512());
1518
}
1519
1520
#define _mm512_bslli_epi128(a, imm) \
1521
((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
1522
1523
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1524
_mm512_srlv_epi16(__m512i __A, __m512i __B)
1525
{
1526
return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1527
}
1528
1529
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1530
_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1531
{
1532
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1533
(__v32hi)_mm512_srlv_epi16(__A, __B),
1534
(__v32hi)__W);
1535
}
1536
1537
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1538
_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1539
{
1540
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1541
(__v32hi)_mm512_srlv_epi16(__A, __B),
1542
(__v32hi)_mm512_setzero_si512());
1543
}
1544
1545
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1546
_mm512_srav_epi16(__m512i __A, __m512i __B)
1547
{
1548
return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1549
}
1550
1551
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1552
_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1553
{
1554
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1555
(__v32hi)_mm512_srav_epi16(__A, __B),
1556
(__v32hi)__W);
1557
}
1558
1559
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560
_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1561
{
1562
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563
(__v32hi)_mm512_srav_epi16(__A, __B),
1564
(__v32hi)_mm512_setzero_si512());
1565
}
1566
1567
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568
_mm512_sra_epi16(__m512i __A, __m128i __B)
1569
{
1570
return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1571
}
1572
1573
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1574
_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1575
{
1576
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1577
(__v32hi)_mm512_sra_epi16(__A, __B),
1578
(__v32hi)__W);
1579
}
1580
1581
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582
_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1583
{
1584
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585
(__v32hi)_mm512_sra_epi16(__A, __B),
1586
(__v32hi)_mm512_setzero_si512());
1587
}
1588
1589
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590
_mm512_srai_epi16(__m512i __A, unsigned int __B)
1591
{
1592
return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B);
1593
}
1594
1595
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1596
_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1597
unsigned int __B)
1598
{
1599
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1600
(__v32hi)_mm512_srai_epi16(__A, __B),
1601
(__v32hi)__W);
1602
}
1603
1604
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1605
_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
1606
{
1607
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1608
(__v32hi)_mm512_srai_epi16(__A, __B),
1609
(__v32hi)_mm512_setzero_si512());
1610
}
1611
1612
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1613
_mm512_srl_epi16(__m512i __A, __m128i __B)
1614
{
1615
return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1616
}
1617
1618
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1619
_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1620
{
1621
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1622
(__v32hi)_mm512_srl_epi16(__A, __B),
1623
(__v32hi)__W);
1624
}
1625
1626
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1627
_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1628
{
1629
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1630
(__v32hi)_mm512_srl_epi16(__A, __B),
1631
(__v32hi)_mm512_setzero_si512());
1632
}
1633
1634
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1635
_mm512_srli_epi16(__m512i __A, unsigned int __B)
1636
{
1637
return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B);
1638
}
1639
1640
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1641
_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1642
unsigned int __B)
1643
{
1644
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1645
(__v32hi)_mm512_srli_epi16(__A, __B),
1646
(__v32hi)__W);
1647
}
1648
1649
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1650
_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1651
{
1652
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1653
(__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B),
1654
(__v32hi)_mm512_setzero_si512());
1655
}
1656
1657
#define _mm512_bsrli_epi128(a, imm) \
1658
((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
1659
1660
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1661
_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1662
{
1663
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1664
(__v32hi) __A,
1665
(__v32hi) __W);
1666
}
1667
1668
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1669
_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1670
{
1671
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1672
(__v32hi) __A,
1673
(__v32hi) _mm512_setzero_si512 ());
1674
}
1675
1676
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1677
_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1678
{
1679
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1680
(__v64qi) __A,
1681
(__v64qi) __W);
1682
}
1683
1684
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1685
_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1686
{
1687
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1688
(__v64qi) __A,
1689
(__v64qi) _mm512_setzero_si512 ());
1690
}
1691
1692
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1693
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1694
{
1695
return (__m512i) __builtin_ia32_selectb_512(__M,
1696
(__v64qi)_mm512_set1_epi8(__A),
1697
(__v64qi) __O);
1698
}
1699
1700
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1701
_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1702
{
1703
return (__m512i) __builtin_ia32_selectb_512(__M,
1704
(__v64qi) _mm512_set1_epi8(__A),
1705
(__v64qi) _mm512_setzero_si512());
1706
}
1707
1708
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
1709
__mmask64 __B) {
1710
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1711
(__mmask64) __B);
1712
}
1713
1714
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1715
_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1716
{
1717
return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1718
(__mmask32) __B);
1719
}
1720
1721
static __inline __m512i __DEFAULT_FN_ATTRS512
1722
_mm512_loadu_epi16 (void const *__P)
1723
{
1724
struct __loadu_epi16 {
1725
__m512i_u __v;
1726
} __attribute__((__packed__, __may_alias__));
1727
return ((const struct __loadu_epi16*)__P)->__v;
1728
}
1729
1730
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1731
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1732
{
1733
return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
1734
(__v32hi) __W,
1735
(__mmask32) __U);
1736
}
1737
1738
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1739
_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1740
{
1741
return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
1742
(__v32hi)
1743
_mm512_setzero_si512 (),
1744
(__mmask32) __U);
1745
}
1746
1747
static __inline __m512i __DEFAULT_FN_ATTRS512
1748
_mm512_loadu_epi8 (void const *__P)
1749
{
1750
struct __loadu_epi8 {
1751
__m512i_u __v;
1752
} __attribute__((__packed__, __may_alias__));
1753
return ((const struct __loadu_epi8*)__P)->__v;
1754
}
1755
1756
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1757
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1758
{
1759
return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
1760
(__v64qi) __W,
1761
(__mmask64) __U);
1762
}
1763
1764
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1765
_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1766
{
1767
return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
1768
(__v64qi)
1769
_mm512_setzero_si512 (),
1770
(__mmask64) __U);
1771
}
1772
1773
static __inline void __DEFAULT_FN_ATTRS512
1774
_mm512_storeu_epi16 (void *__P, __m512i __A)
1775
{
1776
struct __storeu_epi16 {
1777
__m512i_u __v;
1778
} __attribute__((__packed__, __may_alias__));
1779
((struct __storeu_epi16*)__P)->__v = __A;
1780
}
1781
1782
static __inline__ void __DEFAULT_FN_ATTRS512
1783
_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1784
{
1785
__builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1786
(__v32hi) __A,
1787
(__mmask32) __U);
1788
}
1789
1790
static __inline void __DEFAULT_FN_ATTRS512
1791
_mm512_storeu_epi8 (void *__P, __m512i __A)
1792
{
1793
struct __storeu_epi8 {
1794
__m512i_u __v;
1795
} __attribute__((__packed__, __may_alias__));
1796
((struct __storeu_epi8*)__P)->__v = __A;
1797
}
1798
1799
static __inline__ void __DEFAULT_FN_ATTRS512
1800
_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1801
{
1802
__builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1803
(__v64qi) __A,
1804
(__mmask64) __U);
1805
}
1806
1807
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1808
_mm512_test_epi8_mask (__m512i __A, __m512i __B)
1809
{
1810
return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1811
_mm512_setzero_si512());
1812
}
1813
1814
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1815
_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1816
{
1817
return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1818
_mm512_setzero_si512());
1819
}
1820
1821
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1822
_mm512_test_epi16_mask (__m512i __A, __m512i __B)
1823
{
1824
return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1825
_mm512_setzero_si512());
1826
}
1827
1828
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1829
_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1830
{
1831
return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1832
_mm512_setzero_si512());
1833
}
1834
1835
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1836
_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1837
{
1838
return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1839
}
1840
1841
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1842
_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1843
{
1844
return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1845
_mm512_setzero_si512());
1846
}
1847
1848
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1849
_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1850
{
1851
return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1852
_mm512_setzero_si512());
1853
}
1854
1855
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1856
_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1857
{
1858
return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1859
_mm512_setzero_si512());
1860
}
1861
1862
static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1863
_mm512_movepi8_mask (__m512i __A)
1864
{
1865
return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1866
}
1867
1868
static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1869
_mm512_movepi16_mask (__m512i __A)
1870
{
1871
return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1872
}
1873
1874
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875
_mm512_movm_epi8 (__mmask64 __A)
1876
{
1877
return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1878
}
1879
1880
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1881
_mm512_movm_epi16 (__mmask32 __A)
1882
{
1883
return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1884
}
1885
1886
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1887
_mm512_broadcastb_epi8 (__m128i __A)
1888
{
1889
return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1890
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1891
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1892
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1893
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1894
}
1895
1896
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1897
_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1898
{
1899
return (__m512i)__builtin_ia32_selectb_512(__M,
1900
(__v64qi) _mm512_broadcastb_epi8(__A),
1901
(__v64qi) __O);
1902
}
1903
1904
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1905
_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1906
{
1907
return (__m512i)__builtin_ia32_selectb_512(__M,
1908
(__v64qi) _mm512_broadcastb_epi8(__A),
1909
(__v64qi) _mm512_setzero_si512());
1910
}
1911
1912
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1913
_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1914
{
1915
return (__m512i) __builtin_ia32_selectw_512(__M,
1916
(__v32hi) _mm512_set1_epi16(__A),
1917
(__v32hi) __O);
1918
}
1919
1920
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1921
_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1922
{
1923
return (__m512i) __builtin_ia32_selectw_512(__M,
1924
(__v32hi) _mm512_set1_epi16(__A),
1925
(__v32hi) _mm512_setzero_si512());
1926
}
1927
1928
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1929
_mm512_broadcastw_epi16 (__m128i __A)
1930
{
1931
return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1932
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1933
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1934
}
1935
1936
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1937
_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1938
{
1939
return (__m512i)__builtin_ia32_selectw_512(__M,
1940
(__v32hi) _mm512_broadcastw_epi16(__A),
1941
(__v32hi) __O);
1942
}
1943
1944
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1945
_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1946
{
1947
return (__m512i)__builtin_ia32_selectw_512(__M,
1948
(__v32hi) _mm512_broadcastw_epi16(__A),
1949
(__v32hi) _mm512_setzero_si512());
1950
}
1951
1952
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1953
_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1954
{
1955
return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1956
}
1957
1958
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1959
_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1960
__m512i __B)
1961
{
1962
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1963
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
1964
(__v32hi)_mm512_setzero_si512());
1965
}
1966
1967
static __inline__ __m512i __DEFAULT_FN_ATTRS512
1968
_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1969
__m512i __B)
1970
{
1971
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1972
(__v32hi)_mm512_permutexvar_epi16(__A, __B),
1973
(__v32hi)__W);
1974
}
1975
1976
#define _mm512_alignr_epi8(A, B, N) \
1977
((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1978
(__v64qi)(__m512i)(B), (int)(N)))
1979
1980
#define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1981
((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1982
(__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1983
(__v64qi)(__m512i)(W)))
1984
1985
#define _mm512_maskz_alignr_epi8(U, A, B, N) \
1986
((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1987
(__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1988
(__v64qi)(__m512i)_mm512_setzero_si512()))
1989
1990
#define _mm512_dbsad_epu8(A, B, imm) \
1991
((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1992
(__v64qi)(__m512i)(B), (int)(imm)))
1993
1994
#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
1995
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1996
(__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
1997
(__v32hi)(__m512i)(W)))
1998
1999
#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2000
((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2001
(__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2002
(__v32hi)_mm512_setzero_si512()))
2003
2004
static __inline__ __m512i __DEFAULT_FN_ATTRS512
2005
_mm512_sad_epu8 (__m512i __A, __m512i __B)
2006
{
2007
return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2008
(__v64qi) __B);
2009
}
2010
2011
#undef __DEFAULT_FN_ATTRS512
2012
#undef __DEFAULT_FN_ATTRS
2013
2014
#endif
2015
2016