Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
emscripten-core
GitHub Repository: emscripten-core/emscripten
Path: blob/main/system/include/compat/tmmintrin.h
6171 views
1
/*
2
* Copyright 2020 The Emscripten Authors. All rights reserved.
3
* Emscripten is available under two separate licenses, the MIT license and the
4
* University of Illinois/NCSA Open Source License. Both these licenses can be
5
* found in the LICENSE file.
6
*/
7
#ifndef __emscripten_tmmintrin_h__
8
#define __emscripten_tmmintrin_h__
9
10
#ifndef __SSSE3__
11
#error "SSSE3 instruction set not enabled"
12
#endif
13
14
#include <pmmintrin.h>
15
16
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
17
_mm_abs_epi8(__m128i __a)
18
{
19
return (__m128i)wasm_i8x16_abs((v128_t)__a);
20
}
21
22
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
23
_mm_abs_epi16(__m128i __a)
24
{
25
return (__m128i)wasm_i16x8_abs((v128_t)__a);
26
}
27
28
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
29
_mm_abs_epi32(__m128i __a)
30
{
31
return (__m128i)wasm_i32x4_abs((v128_t)__a);
32
}
33
34
#define _mm_alignr_epi8(__a, __b, __count) \
35
((__count <= 16) \
36
? (_mm_or_si128(_mm_bslli_si128((__a), 16 - (((unsigned int)(__count)) & 0xFF)), _mm_bsrli_si128((__b), (((unsigned int)(__count)) & 0xFF)))) \
37
: (_mm_bsrli_si128((__a), (((unsigned int)(__count)) & 0xFF) - 16)))
38
39
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
40
_mm_hadd_epi16(__m128i __a, __m128i __b)
41
{
42
return _mm_add_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),
43
(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));
44
}
45
46
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
47
_mm_hadd_epi32(__m128i __a, __m128i __b)
48
{
49
return _mm_add_epi32((__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(2, 0, 2, 0)),
50
(__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(3, 1, 3, 1)));
51
}
52
53
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
54
_mm_hadds_epi16(__m128i __a, __m128i __b)
55
{
56
return _mm_adds_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),
57
(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));
58
}
59
60
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
61
_mm_hsub_epi16(__m128i __a, __m128i __b)
62
{
63
return _mm_sub_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),
64
(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));
65
}
66
67
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
68
_mm_hsub_epi32(__m128i __a, __m128i __b)
69
{
70
return _mm_sub_epi32((__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(2, 0, 2, 0)),
71
(__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(3, 1, 3, 1)));
72
}
73
74
75
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
76
_mm_hsubs_epi16(__m128i __a, __m128i __b)
77
{
78
return _mm_subs_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),
79
(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));
80
}
81
82
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
83
_mm_maddubs_epi16(__m128i __a, __m128i __b)
84
{
85
return _mm_adds_epi16(
86
_mm_mullo_epi16(
87
_mm_and_si128(__a, _mm_set1_epi16(0x00FF)),
88
_mm_srai_epi16(_mm_slli_epi16(__b, 8), 8)),
89
_mm_mullo_epi16(_mm_srli_epi16(__a, 8), _mm_srai_epi16(__b, 8)));
90
}
91
92
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
93
_mm_mulhrs_epi16(__m128i __a, __m128i __b)
94
{
95
v128_t __lo = wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8((v128_t)__a), wasm_i32x4_widen_low_i16x8((v128_t)__b));
96
v128_t __hi = wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8((v128_t)__a), wasm_i32x4_widen_high_i16x8((v128_t)__b));
97
const v128_t __inc = wasm_i32x4_splat(0x4000);
98
__lo = wasm_i32x4_add(__lo, __inc);
99
__hi = wasm_i32x4_add(__hi, __inc);
100
__lo = wasm_i32x4_add(__lo, __lo);
101
__hi = wasm_i32x4_add(__hi, __hi);
102
return (__m128i)wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15);
103
}
104
105
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
106
_mm_shuffle_epi8(__m128i __a, __m128i __b)
107
{
108
return (__m128i)wasm_v8x16_swizzle((v128_t)__a, (v128_t)_mm_and_si128(__b, _mm_set1_epi8(0x8F)));
109
}
110
111
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
112
_mm_sign_epi8(__m128i __a, __m128i __b)
113
{
114
const __m128i __zero = _mm_setzero_si128();
115
__a = _mm_andnot_si128(_mm_cmpeq_epi8(__b, __zero), __a);
116
const __m128i __mask = _mm_cmpgt_epi8(__zero, __b);
117
return _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask);
118
}
119
120
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
121
_mm_sign_epi16(__m128i __a, __m128i __b)
122
{
123
const __m128i __zero = _mm_setzero_si128();
124
__a = _mm_andnot_si128(_mm_cmpeq_epi16(__b, __zero), __a);
125
const __m128i __mask = _mm_cmpgt_epi16(__zero, __b);
126
return _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask);
127
}
128
129
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
130
_mm_sign_epi32(__m128i __a, __m128i __b)
131
{
132
const __m128i __zero = _mm_setzero_si128();
133
__a = _mm_andnot_si128(_mm_cmpeq_epi32(__b, __zero), __a);
134
const __m128i __mask = _mm_cmpgt_epi32(__zero, __b);
135
return _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask);
136
}
137
138
// Unavailable functions:
139
// _mm_abs_pi8
140
// _mm_abs_pi16
141
// _mm_abs_pi32
142
// _mm_alignr_pi8
143
// _mm_hadd_pi16
144
// _mm_hadd_pi32
145
// _mm_hadds_pi16
146
// _mm_hsub_pi16
147
// _mm_hsub_pi32
148
// _mm_hsubs_pi16
149
// _mm_maddubs_pi16
150
// _mm_mulhrs_pi16
151
// _mm_shuffle_pi8
152
// _mm_sign_pi8
153
// _mm_sign_pi16
154
// _mm_sign_pi32
155
156
#endif /* __emscripten_tmmintrin_h__ */
157
158