Path: blob/master/tools/android-sdk/renderscript/clang-include/avx512erintrin.h
496 views
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===1*2* Permission is hereby granted, free of charge, to any person obtaining a copy3* of this software and associated documentation files (the "Software"), to deal4* in the Software without restriction, including without limitation the rights5* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell6* copies of the Software, and to permit persons to whom the Software is7* furnished to do so, subject to the following conditions:8*9* The above copyright notice and this permission notice shall be included in10* all copies or substantial portions of the Software.11*12* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR13* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,14* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE15* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER16* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,17* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN18* THE SOFTWARE.19*20*===-----------------------------------------------------------------------===21*/22#ifndef __IMMINTRIN_H23#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."24#endif2526#ifndef __AVX512ERINTRIN_H27#define __AVX512ERINTRIN_H2829// exp2a2330#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \31(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \32(__v8df)_mm512_setzero_pd(), \33(__mmask8)-1, (int)(R)); })3435#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \36(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \37(__v8df)(__m512d)(S), (__mmask8)(M), \38(int)(R)); })3940#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \41(__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \42(__v8df)_mm512_setzero_pd(), \43(__mmask8)(M), (int)(R)); })4445#define _mm512_exp2a23_pd(A) \46_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)4748#define _mm512_mask_exp2a23_pd(S, M, A) \49_mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)5051#define _mm512_maskz_exp2a23_pd(M, A) \52_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)5354#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \55(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \56(__v16sf)_mm512_setzero_ps(), \57(__mmask16)-1, (int)(R)); })5859#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \60(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \61(__v16sf)(__m512)(S), (__mmask16)(M), \62(int)(R)); })6364#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \65(__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \66(__v16sf)_mm512_setzero_ps(), \67(__mmask16)(M), (int)(R)); })6869#define _mm512_exp2a23_ps(A) \70_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)7172#define _mm512_mask_exp2a23_ps(S, M, A) \73_mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)7475#define _mm512_maskz_exp2a23_ps(M, A) \76_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)7778// rsqrt2879#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \80(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \81(__v8df)_mm512_setzero_pd(), \82(__mmask8)-1, (int)(R)); })8384#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \85(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \86(__v8df)(__m512d)(S), (__mmask8)(M), \87(int)(R)); })8889#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \90(__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \91(__v8df)_mm512_setzero_pd(), \92(__mmask8)(M), (int)(R)); })9394#define _mm512_rsqrt28_pd(A) \95_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)9697#define _mm512_mask_rsqrt28_pd(S, M, A) \98_mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)99100#define _mm512_maskz_rsqrt28_pd(M, A) \101_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)102103#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \104(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \105(__v16sf)_mm512_setzero_ps(), \106(__mmask16)-1, (int)(R)); })107108#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \109(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \110(__v16sf)(__m512)(S), (__mmask16)(M), \111(int)(R)); })112113#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \114(__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \115(__v16sf)_mm512_setzero_ps(), \116(__mmask16)(M), (int)(R)); })117118#define _mm512_rsqrt28_ps(A) \119_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)120121#define _mm512_mask_rsqrt28_ps(S, M, A) \122_mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)123124#define _mm512_maskz_rsqrt28_ps(M, A) \125_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)126127#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \128(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \129(__v4sf)(__m128)(B), \130(__v4sf)_mm_setzero_ps(), \131(__mmask8)-1, (int)(R)); })132133#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \134(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \135(__v4sf)(__m128)(B), \136(__v4sf)(__m128)(S), \137(__mmask8)(M), (int)(R)); })138139#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \140(__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \141(__v4sf)(__m128)(B), \142(__v4sf)_mm_setzero_ps(), \143(__mmask8)(M), (int)(R)); })144145#define _mm_rsqrt28_ss(A, B) \146_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)147148#define _mm_mask_rsqrt28_ss(S, M, A, B) \149_mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)150151#define _mm_maskz_rsqrt28_ss(M, A, B) \152_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)153154#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \155(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \156(__v2df)(__m128d)(B), \157(__v2df)_mm_setzero_pd(), \158(__mmask8)-1, (int)(R)); })159160#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \161(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \162(__v2df)(__m128d)(B), \163(__v2df)(__m128d)(S), \164(__mmask8)(M), (int)(R)); })165166#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \167(__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \168(__v2df)(__m128d)(B), \169(__v2df)_mm_setzero_pd(), \170(__mmask8)(M), (int)(R)); })171172#define _mm_rsqrt28_sd(A, B) \173_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)174175#define _mm_mask_rsqrt28_sd(S, M, A, B) \176_mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)177178#define _mm_maskz_rsqrt28_sd(M, A, B) \179_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)180181// rcp28182#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \183(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \184(__v8df)_mm512_setzero_pd(), \185(__mmask8)-1, (int)(R)); })186187#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \188(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \189(__v8df)(__m512d)(S), (__mmask8)(M), \190(int)(R)); })191192#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \193(__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \194(__v8df)_mm512_setzero_pd(), \195(__mmask8)(M), (int)(R)); })196197#define _mm512_rcp28_pd(A) \198_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)199200#define _mm512_mask_rcp28_pd(S, M, A) \201_mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)202203#define _mm512_maskz_rcp28_pd(M, A) \204_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)205206#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \207(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \208(__v16sf)_mm512_setzero_ps(), \209(__mmask16)-1, (int)(R)); })210211#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \212(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \213(__v16sf)(__m512)(S), (__mmask16)(M), \214(int)(R)); })215216#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \217(__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \218(__v16sf)_mm512_setzero_ps(), \219(__mmask16)(M), (int)(R)); })220221#define _mm512_rcp28_ps(A) \222_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)223224#define _mm512_mask_rcp28_ps(S, M, A) \225_mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)226227#define _mm512_maskz_rcp28_ps(M, A) \228_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)229230#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \231(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \232(__v4sf)(__m128)(B), \233(__v4sf)_mm_setzero_ps(), \234(__mmask8)-1, (int)(R)); })235236#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \237(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \238(__v4sf)(__m128)(B), \239(__v4sf)(__m128)(S), \240(__mmask8)(M), (int)(R)); })241242#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \243(__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \244(__v4sf)(__m128)(B), \245(__v4sf)_mm_setzero_ps(), \246(__mmask8)(M), (int)(R)); })247248#define _mm_rcp28_ss(A, B) \249_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)250251#define _mm_mask_rcp28_ss(S, M, A, B) \252_mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)253254#define _mm_maskz_rcp28_ss(M, A, B) \255_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)256257#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \258(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \259(__v2df)(__m128d)(B), \260(__v2df)_mm_setzero_pd(), \261(__mmask8)-1, (int)(R)); })262263#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \264(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \265(__v2df)(__m128d)(B), \266(__v2df)(__m128d)(S), \267(__mmask8)(M), (int)(R)); })268269#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \270(__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \271(__v2df)(__m128d)(B), \272(__v2df)_mm_setzero_pd(), \273(__mmask8)(M), (int)(R)); })274275#define _mm_rcp28_sd(A, B) \276_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)277278#define _mm_mask_rcp28_sd(S, M, A, B) \279_mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)280281#define _mm_maskz_rcp28_sd(M, A, B) \282_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)283284#endif // __AVX512ERINTRIN_H285286287