Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
folium-app
GitHub Repository: folium-app/Folium
Path: blob/a-new-beginning/SharedDependencies/Sources/cryptopp/cham_simd.cpp
2 views
1
// cham_simd.cpp - written and placed in the public domain by Jeffrey Walton
2
//
3
// This source file uses intrinsics and built-ins to gain access to
4
// SSSE3, ARM NEON and ARMv8a, and Power7 Altivec instructions. A separate
5
// source file is needed because additional CXXFLAGS are required to enable
6
// the appropriate instructions sets in some build configurations.
7
8
#include "pch.h"
9
#include "config.h"
10
11
#include "cham.h"
12
#include "misc.h"
13
14
// Uncomment for benchmarking C++ against SSE or NEON.
15
// Do so in both simon.cpp and simon_simd.cpp.
16
// #undef CRYPTOPP_SSSE3_AVAILABLE
17
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
18
19
#if (CRYPTOPP_SSSE3_AVAILABLE)
20
#include "adv_simd.h"
21
# include <pmmintrin.h>
22
# include <tmmintrin.h>
23
#endif
24
25
#if defined(__XOP__)
26
# if defined(CRYPTOPP_GCC_COMPATIBLE)
27
# include <x86intrin.h>
28
# endif
29
# include <ammintrin.h>
30
#endif // XOP
31
32
// Clang intrinsic casts, http://bugs.llvm.org/show_bug.cgi?id=20670
33
#define DOUBLE_CAST(x) ((double*)(void*)(x))
34
#define CONST_DOUBLE_CAST(x) ((const double*)(const void*)(x))
35
36
// Squash MS LNK4221 and libtool warnings
37
extern const char CHAM_SIMD_FNAME[] = __FILE__;
38
39
ANONYMOUS_NAMESPACE_BEGIN
40
41
using CryptoPP::word16;
42
using CryptoPP::word32;
43
44
#if (CRYPTOPP_SSSE3_AVAILABLE)
45
46
//////////////////////////////////////////////////////////////////////////
47
48
NAMESPACE_BEGIN(W32) // CHAM128, 32-bit word size
49
50
template <unsigned int R>
51
inline __m128i RotateLeft32(const __m128i& val)
52
{
53
#if defined(__XOP__)
54
return _mm_roti_epi32(val, R);
55
#else
56
return _mm_or_si128(
57
_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
58
#endif
59
}
60
61
template <unsigned int R>
62
inline __m128i RotateRight32(const __m128i& val)
63
{
64
#if defined(__XOP__)
65
return _mm_roti_epi32(val, 32-R);
66
#else
67
return _mm_or_si128(
68
_mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
69
#endif
70
}
71
72
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
73
template <>
74
inline __m128i RotateLeft32<8>(const __m128i& val)
75
{
76
#if defined(__XOP__)
77
return _mm_roti_epi32(val, 8);
78
#else
79
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
80
return _mm_shuffle_epi8(val, mask);
81
#endif
82
}
83
84
// Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
85
template <>
86
inline __m128i RotateRight32<8>(const __m128i& val)
87
{
88
#if defined(__XOP__)
89
return _mm_roti_epi32(val, 32-8);
90
#else
91
const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
92
return _mm_shuffle_epi8(val, mask);
93
#endif
94
}
95
96
template <unsigned int IDX>
97
inline __m128i UnpackXMM(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
98
{
99
// Should not be instantiated
100
CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
101
CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
102
CRYPTOPP_ASSERT(0);
103
return _mm_setzero_si128();
104
}
105
106
template <>
107
inline __m128i UnpackXMM<0>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
108
{
109
// The shuffle converts to and from little-endian for SSE. A specialized
110
// CHAM implementation can avoid the shuffle by framing the data for
111
// encryption, decryption and benchmarks. The library cannot take the
112
// speed-up because of the byte oriented API.
113
const __m128i r1 = _mm_unpacklo_epi32(a, b);
114
const __m128i r2 = _mm_unpacklo_epi32(c, d);
115
return _mm_shuffle_epi8(_mm_unpacklo_epi64(r1, r2),
116
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
117
}
118
119
template <>
120
inline __m128i UnpackXMM<1>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
121
{
122
// The shuffle converts to and from little-endian for SSE. A specialized
123
// CHAM implementation can avoid the shuffle by framing the data for
124
// encryption, decryption and benchmarks. The library cannot take the
125
// speed-up because of the byte oriented API.
126
const __m128i r1 = _mm_unpacklo_epi32(a, b);
127
const __m128i r2 = _mm_unpacklo_epi32(c, d);
128
return _mm_shuffle_epi8(_mm_unpackhi_epi64(r1, r2),
129
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
130
}
131
132
template <>
133
inline __m128i UnpackXMM<2>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
134
{
135
// The shuffle converts to and from little-endian for SSE. A specialized
136
// CHAM implementation can avoid the shuffle by framing the data for
137
// encryption, decryption and benchmarks. The library cannot take the
138
// speed-up because of the byte oriented API.
139
const __m128i r1 = _mm_unpackhi_epi32(a, b);
140
const __m128i r2 = _mm_unpackhi_epi32(c, d);
141
return _mm_shuffle_epi8(_mm_unpacklo_epi64(r1, r2),
142
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
143
}
144
145
template <>
146
inline __m128i UnpackXMM<3>(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
147
{
148
// The shuffle converts to and from little-endian for SSE. A specialized
149
// CHAM implementation can avoid the shuffle by framing the data for
150
// encryption, decryption and benchmarks. The library cannot take the
151
// speed-up because of the byte oriented API.
152
const __m128i r1 = _mm_unpackhi_epi32(a, b);
153
const __m128i r2 = _mm_unpackhi_epi32(c, d);
154
return _mm_shuffle_epi8(_mm_unpackhi_epi64(r1, r2),
155
_mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3));
156
}
157
158
template <unsigned int IDX>
159
inline __m128i UnpackXMM(const __m128i& v)
160
{
161
// Should not be instantiated
162
CRYPTOPP_UNUSED(v); CRYPTOPP_ASSERT(0);
163
return _mm_setzero_si128();
164
}
165
166
template <>
167
inline __m128i UnpackXMM<0>(const __m128i& v)
168
{
169
return _mm_shuffle_epi8(v, _mm_set_epi8(0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3));
170
}
171
172
template <>
173
inline __m128i UnpackXMM<1>(const __m128i& v)
174
{
175
return _mm_shuffle_epi8(v, _mm_set_epi8(4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7));
176
}
177
178
template <>
179
inline __m128i UnpackXMM<2>(const __m128i& v)
180
{
181
return _mm_shuffle_epi8(v, _mm_set_epi8(8,9,10,11, 8,9,10,11, 8,9,10,11, 8,9,10,11));
182
}
183
184
template <>
185
inline __m128i UnpackXMM<3>(const __m128i& v)
186
{
187
return _mm_shuffle_epi8(v, _mm_set_epi8(12,13,14,15, 12,13,14,15, 12,13,14,15, 12,13,14,15));
188
}
189
190
template <unsigned int IDX>
191
inline __m128i RepackXMM(const __m128i& a, const __m128i& b, const __m128i& c, const __m128i& d)
192
{
193
return UnpackXMM<IDX>(a, b, c, d);
194
}
195
196
template <unsigned int IDX>
197
inline __m128i RepackXMM(const __m128i& v)
198
{
199
return UnpackXMM<IDX>(v);
200
}
201
202
inline void CHAM128_Enc_Block(__m128i &block0,
203
const word32 *subkeys, unsigned int rounds)
204
{
205
// Rearrange the data for vectorization. UnpackXMM includes a
206
// little-endian swap for SSE. Thanks to Peter Cordes for help
207
// with packing and unpacking.
208
// [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 B1 C1 D1][A2 B2 C2 D2] ...
209
__m128i a = UnpackXMM<0>(block0);
210
__m128i b = UnpackXMM<1>(block0);
211
__m128i c = UnpackXMM<2>(block0);
212
__m128i d = UnpackXMM<3>(block0);
213
214
__m128i counter = _mm_set_epi32(0,0,0,0);
215
__m128i increment = _mm_set_epi32(1,1,1,1);
216
217
const unsigned int MASK = (rounds == 80 ? 7 : 15);
218
for (int i=0; i<static_cast<int>(rounds); i+=4)
219
{
220
__m128i k, k1, k2, t1, t2;
221
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i+0) & MASK])));
222
223
// Shuffle out two subkeys
224
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
225
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
226
227
t1 = _mm_xor_si128(a, counter);
228
t2 = _mm_xor_si128(RotateLeft32<1>(b), k1);
229
a = RotateLeft32<8>(_mm_add_epi32(t1, t2));
230
231
counter = _mm_add_epi32(counter, increment);
232
233
t1 = _mm_xor_si128(b, counter);
234
t2 = _mm_xor_si128(RotateLeft32<8>(c), k2);
235
b = RotateLeft32<1>(_mm_add_epi32(t1, t2));
236
237
counter = _mm_add_epi32(counter, increment);
238
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i+2) & MASK])));
239
240
// Shuffle out two subkeys
241
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
242
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
243
244
t1 = _mm_xor_si128(c, counter);
245
t2 = _mm_xor_si128(RotateLeft32<1>(d), k1);
246
c = RotateLeft32<8>(_mm_add_epi32(t1, t2));
247
248
counter = _mm_add_epi32(counter, increment);
249
250
t1 = _mm_xor_si128(d, counter);
251
t2 = _mm_xor_si128(RotateLeft32<8>(a), k2);
252
d = RotateLeft32<1>(_mm_add_epi32(t1, t2));
253
254
counter = _mm_add_epi32(counter, increment);
255
}
256
257
// [A1 B1 C1 D1][A2 B2 C2 D2] ... => [A1 A2 A3 A4][B1 B2 B3 B4] ...
258
block0 = RepackXMM<0>(a,b,c,d);
259
}
260
261
inline void CHAM128_Dec_Block(__m128i &block0,
262
const word32 *subkeys, unsigned int rounds)
263
{
264
// Rearrange the data for vectorization. UnpackXMM includes a
265
// little-endian swap for SSE. Thanks to Peter Cordes for help
266
// with packing and unpacking.
267
// [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 B1 C1 D1][A2 B2 C2 D2] ...
268
__m128i a = UnpackXMM<0>(block0);
269
__m128i b = UnpackXMM<1>(block0);
270
__m128i c = UnpackXMM<2>(block0);
271
__m128i d = UnpackXMM<3>(block0);
272
273
__m128i counter = _mm_set_epi32(rounds-1,rounds-1,rounds-1,rounds-1);
274
__m128i decrement = _mm_set_epi32(1,1,1,1);
275
276
const unsigned int MASK = (rounds == 80 ? 7 : 15);
277
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
278
{
279
__m128i k, k1, k2, t1, t2;
280
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i-1) & MASK])));
281
282
// Shuffle out two subkeys
283
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
284
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
285
286
// Odd round
287
t1 = RotateRight32<1>(d);
288
t2 = _mm_xor_si128(RotateLeft32<8>(a), k1);
289
d = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
290
291
counter = _mm_sub_epi32(counter, decrement);
292
293
// Even round
294
t1 = RotateRight32<8>(c);
295
t2 = _mm_xor_si128(RotateLeft32<1>(d), k2);
296
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
297
298
counter = _mm_sub_epi32(counter, decrement);
299
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i-3) & MASK])));
300
301
// Shuffle out two subkeys
302
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
303
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
304
305
// Odd round
306
t1 = RotateRight32<1>(b);
307
t2 = _mm_xor_si128(RotateLeft32<8>(c), k1);
308
b = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
309
310
counter = _mm_sub_epi32(counter, decrement);
311
312
// Even round
313
t1 = RotateRight32<8>(a);
314
t2 = _mm_xor_si128(RotateLeft32<1>(b), k2);
315
a = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
316
317
counter = _mm_sub_epi32(counter, decrement);
318
}
319
320
// [A1 B1 C1 D1][A2 B2 C2 D2] ... => [A1 A2 A3 A4][B1 B2 B3 B4] ...
321
block0 = RepackXMM<0>(a,b,c,d);
322
}
323
324
inline void CHAM128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
325
__m128i &block2, __m128i &block3, const word32 *subkeys, unsigned int rounds)
326
{
327
// Rearrange the data for vectorization. UnpackXMM includes a
328
// little-endian swap for SSE. Thanks to Peter Cordes for help
329
// with packing and unpacking.
330
// [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 B1 C1 D1][A2 B2 C2 D2] ...
331
__m128i a = UnpackXMM<0>(block0, block1, block2, block3);
332
__m128i b = UnpackXMM<1>(block0, block1, block2, block3);
333
__m128i c = UnpackXMM<2>(block0, block1, block2, block3);
334
__m128i d = UnpackXMM<3>(block0, block1, block2, block3);
335
336
__m128i counter = _mm_set_epi32(0,0,0,0);
337
__m128i increment = _mm_set_epi32(1,1,1,1);
338
339
const unsigned int MASK = (rounds == 80 ? 7 : 15);
340
for (int i=0; i<static_cast<int>(rounds); i+=4)
341
{
342
__m128i k, k1, k2, t1, t2;
343
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i+0) & MASK])));
344
345
// Shuffle out two subkeys
346
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
347
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
348
349
t1 = _mm_xor_si128(a, counter);
350
t2 = _mm_xor_si128(RotateLeft32<1>(b), k1);
351
a = RotateLeft32<8>(_mm_add_epi32(t1, t2));
352
353
counter = _mm_add_epi32(counter, increment);
354
355
t1 = _mm_xor_si128(b, counter);
356
t2 = _mm_xor_si128(RotateLeft32<8>(c), k2);
357
b = RotateLeft32<1>(_mm_add_epi32(t1, t2));
358
359
counter = _mm_add_epi32(counter, increment);
360
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i+2) & MASK])));
361
362
// Shuffle out two subkeys
363
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
364
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
365
366
t1 = _mm_xor_si128(c, counter);
367
t2 = _mm_xor_si128(RotateLeft32<1>(d), k1);
368
c = RotateLeft32<8>(_mm_add_epi32(t1, t2));
369
370
counter = _mm_add_epi32(counter, increment);
371
372
t1 = _mm_xor_si128(d, counter);
373
t2 = _mm_xor_si128(RotateLeft32<8>(a), k2);
374
d = RotateLeft32<1>(_mm_add_epi32(t1, t2));
375
376
counter = _mm_add_epi32(counter, increment);
377
}
378
379
// [A1 B1 C1 D1][A2 B2 C2 D2] ... => [A1 A2 A3 A4][B1 B2 B3 B4] ...
380
block0 = RepackXMM<0>(a,b,c,d);
381
block1 = RepackXMM<1>(a,b,c,d);
382
block2 = RepackXMM<2>(a,b,c,d);
383
block3 = RepackXMM<3>(a,b,c,d);
384
}
385
386
inline void CHAM128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
387
__m128i &block2, __m128i &block3, const word32 *subkeys, unsigned int rounds)
388
{
389
// Rearrange the data for vectorization. UnpackXMM includes a
390
// little-endian swap for SSE. Thanks to Peter Cordes for help
391
// with packing and unpacking.
392
// [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 B1 C1 D1][A2 B2 C2 D2] ...
393
__m128i a = UnpackXMM<0>(block0, block1, block2, block3);
394
__m128i b = UnpackXMM<1>(block0, block1, block2, block3);
395
__m128i c = UnpackXMM<2>(block0, block1, block2, block3);
396
__m128i d = UnpackXMM<3>(block0, block1, block2, block3);
397
398
__m128i counter = _mm_set_epi32(rounds-1,rounds-1,rounds-1,rounds-1);
399
__m128i decrement = _mm_set_epi32(1,1,1,1);
400
401
const unsigned int MASK = (rounds == 80 ? 7 : 15);
402
for (int i = static_cast<int>(rounds)-1; i >= 0; i-=4)
403
{
404
__m128i k, k1, k2, t1, t2;
405
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i-1) & MASK])));
406
407
// Shuffle out two subkeys
408
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
409
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
410
411
// Odd round
412
t1 = RotateRight32<1>(d);
413
t2 = _mm_xor_si128(RotateLeft32<8>(a), k1);
414
d = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
415
416
counter = _mm_sub_epi32(counter, decrement);
417
418
// Even round
419
t1 = RotateRight32<8>(c);
420
t2 = _mm_xor_si128(RotateLeft32<1>(d), k2);
421
c = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
422
423
counter = _mm_sub_epi32(counter, decrement);
424
k = _mm_castpd_si128(_mm_load_sd(CONST_DOUBLE_CAST(&subkeys[(i-3) & MASK])));
425
426
// Shuffle out two subkeys
427
k1 = _mm_shuffle_epi8(k, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
428
k2 = _mm_shuffle_epi8(k, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
429
430
// Odd round
431
t1 = RotateRight32<1>(b);
432
t2 = _mm_xor_si128(RotateLeft32<8>(c), k1);
433
b = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
434
435
counter = _mm_sub_epi32(counter, decrement);
436
437
// Even round
438
t1 = RotateRight32<8>(a);
439
t2 = _mm_xor_si128(RotateLeft32<1>(b), k2);
440
a = _mm_xor_si128(_mm_sub_epi32(t1, t2), counter);
441
442
counter = _mm_sub_epi32(counter, decrement);
443
}
444
445
// [A1 B1 C1 D1][A2 B2 C2 D2] ... => [A1 A2 A3 A4][B1 B2 B3 B4] ...
446
block0 = RepackXMM<0>(a,b,c,d);
447
block1 = RepackXMM<1>(a,b,c,d);
448
block2 = RepackXMM<2>(a,b,c,d);
449
block3 = RepackXMM<3>(a,b,c,d);
450
}
451
452
//////////////////////////////////////////////////////////////////////////
453
454
NAMESPACE_END // W32
455
456
#endif // CRYPTOPP_SSSE3_AVAILABLE
457
458
ANONYMOUS_NAMESPACE_END
459
460
NAMESPACE_BEGIN(CryptoPP)
461
462
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
463
size_t CHAM128_Enc_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
464
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
465
{
466
return AdvancedProcessBlocks128_4x1_SSE(W32::CHAM128_Enc_Block, W32::CHAM128_Enc_4_Blocks,
467
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
468
}
469
470
size_t CHAM128_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds,
471
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
472
{
473
return AdvancedProcessBlocks128_4x1_SSE(W32::CHAM128_Dec_Block, W32::CHAM128_Dec_4_Blocks,
474
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
475
}
476
#endif // CRYPTOPP_SSSE3_AVAILABLE
477
478
NAMESPACE_END
479
480