Path: blob/main/system/include/compat/tmmintrin.h
6171 views
/*1* Copyright 2020 The Emscripten Authors. All rights reserved.2* Emscripten is available under two separate licenses, the MIT license and the3* University of Illinois/NCSA Open Source License. Both these licenses can be4* found in the LICENSE file.5*/6#ifndef __emscripten_tmmintrin_h__7#define __emscripten_tmmintrin_h__89#ifndef __SSSE3__10#error "SSSE3 instruction set not enabled"11#endif1213#include <pmmintrin.h>1415static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))16_mm_abs_epi8(__m128i __a)17{18return (__m128i)wasm_i8x16_abs((v128_t)__a);19}2021static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))22_mm_abs_epi16(__m128i __a)23{24return (__m128i)wasm_i16x8_abs((v128_t)__a);25}2627static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))28_mm_abs_epi32(__m128i __a)29{30return (__m128i)wasm_i32x4_abs((v128_t)__a);31}3233#define _mm_alignr_epi8(__a, __b, __count) \34((__count <= 16) \35? (_mm_or_si128(_mm_bslli_si128((__a), 16 - (((unsigned int)(__count)) & 0xFF)), _mm_bsrli_si128((__b), (((unsigned int)(__count)) & 0xFF)))) \36: (_mm_bsrli_si128((__a), (((unsigned int)(__count)) & 0xFF) - 16)))3738static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))39_mm_hadd_epi16(__m128i __a, __m128i __b)40{41return _mm_add_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),42(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));43}4445static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))46_mm_hadd_epi32(__m128i __a, __m128i __b)47{48return _mm_add_epi32((__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(2, 0, 2, 0)),49(__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(3, 1, 3, 1)));50}5152static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))53_mm_hadds_epi16(__m128i __a, __m128i __b)54{55return _mm_adds_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),56(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));57}5859static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))60_mm_hsub_epi16(__m128i __a, __m128i __b)61{62return _mm_sub_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),63(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));64}6566static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))67_mm_hsub_epi32(__m128i __a, __m128i __b)68{69return _mm_sub_epi32((__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(2, 0, 2, 0)),70(__m128i)_mm_shuffle_ps((__m128)__a, (__m128)__b, _MM_SHUFFLE(3, 1, 3, 1)));71}727374static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))75_mm_hsubs_epi16(__m128i __a, __m128i __b)76{77return _mm_subs_epi16((__m128i)wasm_i16x8_shuffle(__a, __b, 0, 2, 4, 6, 8, 10, 12, 14),78(__m128i)wasm_i16x8_shuffle(__a, __b, 1, 3, 5, 7, 9, 11, 13, 15));79}8081static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))82_mm_maddubs_epi16(__m128i __a, __m128i __b)83{84return _mm_adds_epi16(85_mm_mullo_epi16(86_mm_and_si128(__a, _mm_set1_epi16(0x00FF)),87_mm_srai_epi16(_mm_slli_epi16(__b, 8), 8)),88_mm_mullo_epi16(_mm_srli_epi16(__a, 8), _mm_srai_epi16(__b, 8)));89}9091static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))92_mm_mulhrs_epi16(__m128i __a, __m128i __b)93{94v128_t __lo = wasm_i32x4_mul(wasm_i32x4_widen_low_i16x8((v128_t)__a), wasm_i32x4_widen_low_i16x8((v128_t)__b));95v128_t __hi = wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8((v128_t)__a), wasm_i32x4_widen_high_i16x8((v128_t)__b));96const v128_t __inc = wasm_i32x4_splat(0x4000);97__lo = wasm_i32x4_add(__lo, __inc);98__hi = wasm_i32x4_add(__hi, __inc);99__lo = wasm_i32x4_add(__lo, __lo);100__hi = wasm_i32x4_add(__hi, __hi);101return (__m128i)wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15);102}103104static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))105_mm_shuffle_epi8(__m128i __a, __m128i __b)106{107return (__m128i)wasm_v8x16_swizzle((v128_t)__a, (v128_t)_mm_and_si128(__b, _mm_set1_epi8(0x8F)));108}109110static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))111_mm_sign_epi8(__m128i __a, __m128i __b)112{113const __m128i __zero = _mm_setzero_si128();114__a = _mm_andnot_si128(_mm_cmpeq_epi8(__b, __zero), __a);115const __m128i __mask = _mm_cmpgt_epi8(__zero, __b);116return _mm_xor_si128(_mm_add_epi8(__a, __mask), __mask);117}118119static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))120_mm_sign_epi16(__m128i __a, __m128i __b)121{122const __m128i __zero = _mm_setzero_si128();123__a = _mm_andnot_si128(_mm_cmpeq_epi16(__b, __zero), __a);124const __m128i __mask = _mm_cmpgt_epi16(__zero, __b);125return _mm_xor_si128(_mm_add_epi16(__a, __mask), __mask);126}127128static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))129_mm_sign_epi32(__m128i __a, __m128i __b)130{131const __m128i __zero = _mm_setzero_si128();132__a = _mm_andnot_si128(_mm_cmpeq_epi32(__b, __zero), __a);133const __m128i __mask = _mm_cmpgt_epi32(__zero, __b);134return _mm_xor_si128(_mm_add_epi32(__a, __mask), __mask);135}136137// Unavailable functions:138// _mm_abs_pi8139// _mm_abs_pi16140// _mm_abs_pi32141// _mm_alignr_pi8142// _mm_hadd_pi16143// _mm_hadd_pi32144// _mm_hadds_pi16145// _mm_hsub_pi16146// _mm_hsub_pi32147// _mm_hsubs_pi16148// _mm_maddubs_pi16149// _mm_mulhrs_pi16150// _mm_shuffle_pi8151// _mm_sign_pi8152// _mm_sign_pi16153// _mm_sign_pi32154155#endif /* __emscripten_tmmintrin_h__ */156157158