Path: blob/master/tools/android-sdk/renderscript/clang-include/avx2intrin.h
496 views
/*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------===1*2* Permission is hereby granted, free of charge, to any person obtaining a copy3* of this software and associated documentation files (the "Software"), to deal4* in the Software without restriction, including without limitation the rights5* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell6* copies of the Software, and to permit persons to whom the Software is7* furnished to do so, subject to the following conditions:8*9* The above copyright notice and this permission notice shall be included in10* all copies or substantial portions of the Software.11*12* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR13* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,14* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE15* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER16* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,17* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN18* THE SOFTWARE.19*20*===-----------------------------------------------------------------------===21*/2223#ifndef __IMMINTRIN_H24#error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."25#endif2627#ifndef __AVX2INTRIN_H28#define __AVX2INTRIN_H2930/* Define the default attributes for the functions in this file. */31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))3233/* SSE4 Multiple Packed Sums of Absolute Difference. */34#define _mm256_mpsadbw_epu8(X, Y, M) \35(__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \36(__v32qi)(__m256i)(Y), (int)(M))3738static __inline__ __m256i __DEFAULT_FN_ATTRS39_mm256_abs_epi8(__m256i __a)40{41return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);42}4344static __inline__ __m256i __DEFAULT_FN_ATTRS45_mm256_abs_epi16(__m256i __a)46{47return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);48}4950static __inline__ __m256i __DEFAULT_FN_ATTRS51_mm256_abs_epi32(__m256i __a)52{53return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);54}5556static __inline__ __m256i __DEFAULT_FN_ATTRS57_mm256_packs_epi16(__m256i __a, __m256i __b)58{59return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);60}6162static __inline__ __m256i __DEFAULT_FN_ATTRS63_mm256_packs_epi32(__m256i __a, __m256i __b)64{65return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);66}6768static __inline__ __m256i __DEFAULT_FN_ATTRS69_mm256_packus_epi16(__m256i __a, __m256i __b)70{71return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);72}7374static __inline__ __m256i __DEFAULT_FN_ATTRS75_mm256_packus_epi32(__m256i __V1, __m256i __V2)76{77return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);78}7980static __inline__ __m256i __DEFAULT_FN_ATTRS81_mm256_add_epi8(__m256i __a, __m256i __b)82{83return (__m256i)((__v32qu)__a + (__v32qu)__b);84}8586static __inline__ __m256i __DEFAULT_FN_ATTRS87_mm256_add_epi16(__m256i __a, __m256i __b)88{89return (__m256i)((__v16hu)__a + (__v16hu)__b);90}9192static __inline__ __m256i __DEFAULT_FN_ATTRS93_mm256_add_epi32(__m256i __a, __m256i __b)94{95return (__m256i)((__v8su)__a + (__v8su)__b);96}9798static __inline__ __m256i __DEFAULT_FN_ATTRS99_mm256_add_epi64(__m256i __a, __m256i __b)100{101return (__m256i)((__v4du)__a + (__v4du)__b);102}103104static __inline__ __m256i __DEFAULT_FN_ATTRS105_mm256_adds_epi8(__m256i __a, __m256i __b)106{107return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);108}109110static __inline__ __m256i __DEFAULT_FN_ATTRS111_mm256_adds_epi16(__m256i __a, __m256i __b)112{113return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);114}115116static __inline__ __m256i __DEFAULT_FN_ATTRS117_mm256_adds_epu8(__m256i __a, __m256i __b)118{119return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);120}121122static __inline__ __m256i __DEFAULT_FN_ATTRS123_mm256_adds_epu16(__m256i __a, __m256i __b)124{125return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);126}127128#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \129(__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \130(__v32qi)(__m256i)(b), (n)); })131132static __inline__ __m256i __DEFAULT_FN_ATTRS133_mm256_and_si256(__m256i __a, __m256i __b)134{135return (__m256i)((__v4du)__a & (__v4du)__b);136}137138static __inline__ __m256i __DEFAULT_FN_ATTRS139_mm256_andnot_si256(__m256i __a, __m256i __b)140{141return (__m256i)(~(__v4du)__a & (__v4du)__b);142}143144static __inline__ __m256i __DEFAULT_FN_ATTRS145_mm256_avg_epu8(__m256i __a, __m256i __b)146{147return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);148}149150static __inline__ __m256i __DEFAULT_FN_ATTRS151_mm256_avg_epu16(__m256i __a, __m256i __b)152{153return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);154}155156static __inline__ __m256i __DEFAULT_FN_ATTRS157_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)158{159return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,160(__v32qi)__M);161}162163#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \164(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \165(__v16hi)(__m256i)(V2), \166(((M) & 0x01) ? 16 : 0), \167(((M) & 0x02) ? 17 : 1), \168(((M) & 0x04) ? 18 : 2), \169(((M) & 0x08) ? 19 : 3), \170(((M) & 0x10) ? 20 : 4), \171(((M) & 0x20) ? 21 : 5), \172(((M) & 0x40) ? 22 : 6), \173(((M) & 0x80) ? 23 : 7), \174(((M) & 0x01) ? 24 : 8), \175(((M) & 0x02) ? 25 : 9), \176(((M) & 0x04) ? 26 : 10), \177(((M) & 0x08) ? 27 : 11), \178(((M) & 0x10) ? 28 : 12), \179(((M) & 0x20) ? 29 : 13), \180(((M) & 0x40) ? 30 : 14), \181(((M) & 0x80) ? 31 : 15)); })182183static __inline__ __m256i __DEFAULT_FN_ATTRS184_mm256_cmpeq_epi8(__m256i __a, __m256i __b)185{186return (__m256i)((__v32qi)__a == (__v32qi)__b);187}188189static __inline__ __m256i __DEFAULT_FN_ATTRS190_mm256_cmpeq_epi16(__m256i __a, __m256i __b)191{192return (__m256i)((__v16hi)__a == (__v16hi)__b);193}194195static __inline__ __m256i __DEFAULT_FN_ATTRS196_mm256_cmpeq_epi32(__m256i __a, __m256i __b)197{198return (__m256i)((__v8si)__a == (__v8si)__b);199}200201static __inline__ __m256i __DEFAULT_FN_ATTRS202_mm256_cmpeq_epi64(__m256i __a, __m256i __b)203{204return (__m256i)((__v4di)__a == (__v4di)__b);205}206207static __inline__ __m256i __DEFAULT_FN_ATTRS208_mm256_cmpgt_epi8(__m256i __a, __m256i __b)209{210/* This function always performs a signed comparison, but __v32qi is a char211which may be signed or unsigned, so use __v32qs. */212return (__m256i)((__v32qs)__a > (__v32qs)__b);213}214215static __inline__ __m256i __DEFAULT_FN_ATTRS216_mm256_cmpgt_epi16(__m256i __a, __m256i __b)217{218return (__m256i)((__v16hi)__a > (__v16hi)__b);219}220221static __inline__ __m256i __DEFAULT_FN_ATTRS222_mm256_cmpgt_epi32(__m256i __a, __m256i __b)223{224return (__m256i)((__v8si)__a > (__v8si)__b);225}226227static __inline__ __m256i __DEFAULT_FN_ATTRS228_mm256_cmpgt_epi64(__m256i __a, __m256i __b)229{230return (__m256i)((__v4di)__a > (__v4di)__b);231}232233static __inline__ __m256i __DEFAULT_FN_ATTRS234_mm256_hadd_epi16(__m256i __a, __m256i __b)235{236return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);237}238239static __inline__ __m256i __DEFAULT_FN_ATTRS240_mm256_hadd_epi32(__m256i __a, __m256i __b)241{242return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);243}244245static __inline__ __m256i __DEFAULT_FN_ATTRS246_mm256_hadds_epi16(__m256i __a, __m256i __b)247{248return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);249}250251static __inline__ __m256i __DEFAULT_FN_ATTRS252_mm256_hsub_epi16(__m256i __a, __m256i __b)253{254return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);255}256257static __inline__ __m256i __DEFAULT_FN_ATTRS258_mm256_hsub_epi32(__m256i __a, __m256i __b)259{260return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);261}262263static __inline__ __m256i __DEFAULT_FN_ATTRS264_mm256_hsubs_epi16(__m256i __a, __m256i __b)265{266return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);267}268269static __inline__ __m256i __DEFAULT_FN_ATTRS270_mm256_maddubs_epi16(__m256i __a, __m256i __b)271{272return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);273}274275static __inline__ __m256i __DEFAULT_FN_ATTRS276_mm256_madd_epi16(__m256i __a, __m256i __b)277{278return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);279}280281static __inline__ __m256i __DEFAULT_FN_ATTRS282_mm256_max_epi8(__m256i __a, __m256i __b)283{284return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);285}286287static __inline__ __m256i __DEFAULT_FN_ATTRS288_mm256_max_epi16(__m256i __a, __m256i __b)289{290return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);291}292293static __inline__ __m256i __DEFAULT_FN_ATTRS294_mm256_max_epi32(__m256i __a, __m256i __b)295{296return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);297}298299static __inline__ __m256i __DEFAULT_FN_ATTRS300_mm256_max_epu8(__m256i __a, __m256i __b)301{302return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);303}304305static __inline__ __m256i __DEFAULT_FN_ATTRS306_mm256_max_epu16(__m256i __a, __m256i __b)307{308return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);309}310311static __inline__ __m256i __DEFAULT_FN_ATTRS312_mm256_max_epu32(__m256i __a, __m256i __b)313{314return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);315}316317static __inline__ __m256i __DEFAULT_FN_ATTRS318_mm256_min_epi8(__m256i __a, __m256i __b)319{320return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);321}322323static __inline__ __m256i __DEFAULT_FN_ATTRS324_mm256_min_epi16(__m256i __a, __m256i __b)325{326return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);327}328329static __inline__ __m256i __DEFAULT_FN_ATTRS330_mm256_min_epi32(__m256i __a, __m256i __b)331{332return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);333}334335static __inline__ __m256i __DEFAULT_FN_ATTRS336_mm256_min_epu8(__m256i __a, __m256i __b)337{338return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);339}340341static __inline__ __m256i __DEFAULT_FN_ATTRS342_mm256_min_epu16(__m256i __a, __m256i __b)343{344return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);345}346347static __inline__ __m256i __DEFAULT_FN_ATTRS348_mm256_min_epu32(__m256i __a, __m256i __b)349{350return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);351}352353static __inline__ int __DEFAULT_FN_ATTRS354_mm256_movemask_epi8(__m256i __a)355{356return __builtin_ia32_pmovmskb256((__v32qi)__a);357}358359static __inline__ __m256i __DEFAULT_FN_ATTRS360_mm256_cvtepi8_epi16(__m128i __V)361{362/* This function always performs a signed extension, but __v16qi is a char363which may be signed or unsigned, so use __v16qs. */364return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);365}366367static __inline__ __m256i __DEFAULT_FN_ATTRS368_mm256_cvtepi8_epi32(__m128i __V)369{370/* This function always performs a signed extension, but __v16qi is a char371which may be signed or unsigned, so use __v16qs. */372return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);373}374375static __inline__ __m256i __DEFAULT_FN_ATTRS376_mm256_cvtepi8_epi64(__m128i __V)377{378/* This function always performs a signed extension, but __v16qi is a char379which may be signed or unsigned, so use __v16qs. */380return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);381}382383static __inline__ __m256i __DEFAULT_FN_ATTRS384_mm256_cvtepi16_epi32(__m128i __V)385{386return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);387}388389static __inline__ __m256i __DEFAULT_FN_ATTRS390_mm256_cvtepi16_epi64(__m128i __V)391{392return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);393}394395static __inline__ __m256i __DEFAULT_FN_ATTRS396_mm256_cvtepi32_epi64(__m128i __V)397{398return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);399}400401static __inline__ __m256i __DEFAULT_FN_ATTRS402_mm256_cvtepu8_epi16(__m128i __V)403{404return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);405}406407static __inline__ __m256i __DEFAULT_FN_ATTRS408_mm256_cvtepu8_epi32(__m128i __V)409{410return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);411}412413static __inline__ __m256i __DEFAULT_FN_ATTRS414_mm256_cvtepu8_epi64(__m128i __V)415{416return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);417}418419static __inline__ __m256i __DEFAULT_FN_ATTRS420_mm256_cvtepu16_epi32(__m128i __V)421{422return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);423}424425static __inline__ __m256i __DEFAULT_FN_ATTRS426_mm256_cvtepu16_epi64(__m128i __V)427{428return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);429}430431static __inline__ __m256i __DEFAULT_FN_ATTRS432_mm256_cvtepu32_epi64(__m128i __V)433{434return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);435}436437static __inline__ __m256i __DEFAULT_FN_ATTRS438_mm256_mul_epi32(__m256i __a, __m256i __b)439{440return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);441}442443static __inline__ __m256i __DEFAULT_FN_ATTRS444_mm256_mulhrs_epi16(__m256i __a, __m256i __b)445{446return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);447}448449static __inline__ __m256i __DEFAULT_FN_ATTRS450_mm256_mulhi_epu16(__m256i __a, __m256i __b)451{452return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);453}454455static __inline__ __m256i __DEFAULT_FN_ATTRS456_mm256_mulhi_epi16(__m256i __a, __m256i __b)457{458return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);459}460461static __inline__ __m256i __DEFAULT_FN_ATTRS462_mm256_mullo_epi16(__m256i __a, __m256i __b)463{464return (__m256i)((__v16hu)__a * (__v16hu)__b);465}466467static __inline__ __m256i __DEFAULT_FN_ATTRS468_mm256_mullo_epi32 (__m256i __a, __m256i __b)469{470return (__m256i)((__v8su)__a * (__v8su)__b);471}472473static __inline__ __m256i __DEFAULT_FN_ATTRS474_mm256_mul_epu32(__m256i __a, __m256i __b)475{476return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);477}478479static __inline__ __m256i __DEFAULT_FN_ATTRS480_mm256_or_si256(__m256i __a, __m256i __b)481{482return (__m256i)((__v4du)__a | (__v4du)__b);483}484485static __inline__ __m256i __DEFAULT_FN_ATTRS486_mm256_sad_epu8(__m256i __a, __m256i __b)487{488return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);489}490491static __inline__ __m256i __DEFAULT_FN_ATTRS492_mm256_shuffle_epi8(__m256i __a, __m256i __b)493{494return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);495}496497#define _mm256_shuffle_epi32(a, imm) __extension__ ({ \498(__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \499(__v8si)_mm256_undefined_si256(), \5000 + (((imm) >> 0) & 0x3), \5010 + (((imm) >> 2) & 0x3), \5020 + (((imm) >> 4) & 0x3), \5030 + (((imm) >> 6) & 0x3), \5044 + (((imm) >> 0) & 0x3), \5054 + (((imm) >> 2) & 0x3), \5064 + (((imm) >> 4) & 0x3), \5074 + (((imm) >> 6) & 0x3)); })508509#define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \510(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \511(__v16hi)_mm256_undefined_si256(), \5120, 1, 2, 3, \5134 + (((imm) >> 0) & 0x3), \5144 + (((imm) >> 2) & 0x3), \5154 + (((imm) >> 4) & 0x3), \5164 + (((imm) >> 6) & 0x3), \5178, 9, 10, 11, \51812 + (((imm) >> 0) & 0x3), \51912 + (((imm) >> 2) & 0x3), \52012 + (((imm) >> 4) & 0x3), \52112 + (((imm) >> 6) & 0x3)); })522523#define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \524(__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \525(__v16hi)_mm256_undefined_si256(), \5260 + (((imm) >> 0) & 0x3), \5270 + (((imm) >> 2) & 0x3), \5280 + (((imm) >> 4) & 0x3), \5290 + (((imm) >> 6) & 0x3), \5304, 5, 6, 7, \5318 + (((imm) >> 0) & 0x3), \5328 + (((imm) >> 2) & 0x3), \5338 + (((imm) >> 4) & 0x3), \5348 + (((imm) >> 6) & 0x3), \53512, 13, 14, 15); })536537static __inline__ __m256i __DEFAULT_FN_ATTRS538_mm256_sign_epi8(__m256i __a, __m256i __b)539{540return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);541}542543static __inline__ __m256i __DEFAULT_FN_ATTRS544_mm256_sign_epi16(__m256i __a, __m256i __b)545{546return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);547}548549static __inline__ __m256i __DEFAULT_FN_ATTRS550_mm256_sign_epi32(__m256i __a, __m256i __b)551{552return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);553}554555#define _mm256_slli_si256(a, imm) __extension__ ({ \556(__m256i)__builtin_shufflevector( \557(__v32qi)_mm256_setzero_si256(), \558(__v32qi)(__m256i)(a), \559((char)(imm)&0xF0) ? 0 : ((char)(imm)>0x0 ? 16 : 32) - (char)(imm), \560((char)(imm)&0xF0) ? 1 : ((char)(imm)>0x1 ? 17 : 33) - (char)(imm), \561((char)(imm)&0xF0) ? 2 : ((char)(imm)>0x2 ? 18 : 34) - (char)(imm), \562((char)(imm)&0xF0) ? 3 : ((char)(imm)>0x3 ? 19 : 35) - (char)(imm), \563((char)(imm)&0xF0) ? 4 : ((char)(imm)>0x4 ? 20 : 36) - (char)(imm), \564((char)(imm)&0xF0) ? 5 : ((char)(imm)>0x5 ? 21 : 37) - (char)(imm), \565((char)(imm)&0xF0) ? 6 : ((char)(imm)>0x6 ? 22 : 38) - (char)(imm), \566((char)(imm)&0xF0) ? 7 : ((char)(imm)>0x7 ? 23 : 39) - (char)(imm), \567((char)(imm)&0xF0) ? 8 : ((char)(imm)>0x8 ? 24 : 40) - (char)(imm), \568((char)(imm)&0xF0) ? 9 : ((char)(imm)>0x9 ? 25 : 41) - (char)(imm), \569((char)(imm)&0xF0) ? 10 : ((char)(imm)>0xA ? 26 : 42) - (char)(imm), \570((char)(imm)&0xF0) ? 11 : ((char)(imm)>0xB ? 27 : 43) - (char)(imm), \571((char)(imm)&0xF0) ? 12 : ((char)(imm)>0xC ? 28 : 44) - (char)(imm), \572((char)(imm)&0xF0) ? 13 : ((char)(imm)>0xD ? 29 : 45) - (char)(imm), \573((char)(imm)&0xF0) ? 14 : ((char)(imm)>0xE ? 30 : 46) - (char)(imm), \574((char)(imm)&0xF0) ? 15 : ((char)(imm)>0xF ? 31 : 47) - (char)(imm), \575((char)(imm)&0xF0) ? 16 : ((char)(imm)>0x0 ? 32 : 48) - (char)(imm), \576((char)(imm)&0xF0) ? 17 : ((char)(imm)>0x1 ? 33 : 49) - (char)(imm), \577((char)(imm)&0xF0) ? 18 : ((char)(imm)>0x2 ? 34 : 50) - (char)(imm), \578((char)(imm)&0xF0) ? 19 : ((char)(imm)>0x3 ? 35 : 51) - (char)(imm), \579((char)(imm)&0xF0) ? 20 : ((char)(imm)>0x4 ? 36 : 52) - (char)(imm), \580((char)(imm)&0xF0) ? 21 : ((char)(imm)>0x5 ? 37 : 53) - (char)(imm), \581((char)(imm)&0xF0) ? 22 : ((char)(imm)>0x6 ? 38 : 54) - (char)(imm), \582((char)(imm)&0xF0) ? 23 : ((char)(imm)>0x7 ? 39 : 55) - (char)(imm), \583((char)(imm)&0xF0) ? 24 : ((char)(imm)>0x8 ? 40 : 56) - (char)(imm), \584((char)(imm)&0xF0) ? 25 : ((char)(imm)>0x9 ? 41 : 57) - (char)(imm), \585((char)(imm)&0xF0) ? 26 : ((char)(imm)>0xA ? 42 : 58) - (char)(imm), \586((char)(imm)&0xF0) ? 27 : ((char)(imm)>0xB ? 43 : 59) - (char)(imm), \587((char)(imm)&0xF0) ? 28 : ((char)(imm)>0xC ? 44 : 60) - (char)(imm), \588((char)(imm)&0xF0) ? 29 : ((char)(imm)>0xD ? 45 : 61) - (char)(imm), \589((char)(imm)&0xF0) ? 30 : ((char)(imm)>0xE ? 46 : 62) - (char)(imm), \590((char)(imm)&0xF0) ? 31 : ((char)(imm)>0xF ? 47 : 63) - (char)(imm)); })591592#define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))593594static __inline__ __m256i __DEFAULT_FN_ATTRS595_mm256_slli_epi16(__m256i __a, int __count)596{597return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);598}599600static __inline__ __m256i __DEFAULT_FN_ATTRS601_mm256_sll_epi16(__m256i __a, __m128i __count)602{603return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);604}605606static __inline__ __m256i __DEFAULT_FN_ATTRS607_mm256_slli_epi32(__m256i __a, int __count)608{609return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);610}611612static __inline__ __m256i __DEFAULT_FN_ATTRS613_mm256_sll_epi32(__m256i __a, __m128i __count)614{615return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);616}617618static __inline__ __m256i __DEFAULT_FN_ATTRS619_mm256_slli_epi64(__m256i __a, int __count)620{621return __builtin_ia32_psllqi256((__v4di)__a, __count);622}623624static __inline__ __m256i __DEFAULT_FN_ATTRS625_mm256_sll_epi64(__m256i __a, __m128i __count)626{627return __builtin_ia32_psllq256((__v4di)__a, __count);628}629630static __inline__ __m256i __DEFAULT_FN_ATTRS631_mm256_srai_epi16(__m256i __a, int __count)632{633return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);634}635636static __inline__ __m256i __DEFAULT_FN_ATTRS637_mm256_sra_epi16(__m256i __a, __m128i __count)638{639return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);640}641642static __inline__ __m256i __DEFAULT_FN_ATTRS643_mm256_srai_epi32(__m256i __a, int __count)644{645return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);646}647648static __inline__ __m256i __DEFAULT_FN_ATTRS649_mm256_sra_epi32(__m256i __a, __m128i __count)650{651return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);652}653654#define _mm256_srli_si256(a, imm) __extension__ ({ \655(__m256i)__builtin_shufflevector( \656(__v32qi)(__m256i)(a), \657(__v32qi)_mm256_setzero_si256(), \658((char)(imm)&0xF0) ? 32 : (char)(imm) + ((char)(imm)>0xF ? 16 : 0), \659((char)(imm)&0xF0) ? 33 : (char)(imm) + ((char)(imm)>0xE ? 17 : 1), \660((char)(imm)&0xF0) ? 34 : (char)(imm) + ((char)(imm)>0xD ? 18 : 2), \661((char)(imm)&0xF0) ? 35 : (char)(imm) + ((char)(imm)>0xC ? 19 : 3), \662((char)(imm)&0xF0) ? 36 : (char)(imm) + ((char)(imm)>0xB ? 20 : 4), \663((char)(imm)&0xF0) ? 37 : (char)(imm) + ((char)(imm)>0xA ? 21 : 5), \664((char)(imm)&0xF0) ? 38 : (char)(imm) + ((char)(imm)>0x9 ? 22 : 6), \665((char)(imm)&0xF0) ? 39 : (char)(imm) + ((char)(imm)>0x8 ? 23 : 7), \666((char)(imm)&0xF0) ? 40 : (char)(imm) + ((char)(imm)>0x7 ? 24 : 8), \667((char)(imm)&0xF0) ? 41 : (char)(imm) + ((char)(imm)>0x6 ? 25 : 9), \668((char)(imm)&0xF0) ? 42 : (char)(imm) + ((char)(imm)>0x5 ? 26 : 10), \669((char)(imm)&0xF0) ? 43 : (char)(imm) + ((char)(imm)>0x4 ? 27 : 11), \670((char)(imm)&0xF0) ? 44 : (char)(imm) + ((char)(imm)>0x3 ? 28 : 12), \671((char)(imm)&0xF0) ? 45 : (char)(imm) + ((char)(imm)>0x2 ? 29 : 13), \672((char)(imm)&0xF0) ? 46 : (char)(imm) + ((char)(imm)>0x1 ? 30 : 14), \673((char)(imm)&0xF0) ? 47 : (char)(imm) + ((char)(imm)>0x0 ? 31 : 15), \674((char)(imm)&0xF0) ? 48 : (char)(imm) + ((char)(imm)>0xF ? 32 : 16), \675((char)(imm)&0xF0) ? 49 : (char)(imm) + ((char)(imm)>0xE ? 33 : 17), \676((char)(imm)&0xF0) ? 50 : (char)(imm) + ((char)(imm)>0xD ? 34 : 18), \677((char)(imm)&0xF0) ? 51 : (char)(imm) + ((char)(imm)>0xC ? 35 : 19), \678((char)(imm)&0xF0) ? 52 : (char)(imm) + ((char)(imm)>0xB ? 36 : 20), \679((char)(imm)&0xF0) ? 53 : (char)(imm) + ((char)(imm)>0xA ? 37 : 21), \680((char)(imm)&0xF0) ? 54 : (char)(imm) + ((char)(imm)>0x9 ? 38 : 22), \681((char)(imm)&0xF0) ? 55 : (char)(imm) + ((char)(imm)>0x8 ? 39 : 23), \682((char)(imm)&0xF0) ? 56 : (char)(imm) + ((char)(imm)>0x7 ? 40 : 24), \683((char)(imm)&0xF0) ? 57 : (char)(imm) + ((char)(imm)>0x6 ? 41 : 25), \684((char)(imm)&0xF0) ? 58 : (char)(imm) + ((char)(imm)>0x5 ? 42 : 26), \685((char)(imm)&0xF0) ? 59 : (char)(imm) + ((char)(imm)>0x4 ? 43 : 27), \686((char)(imm)&0xF0) ? 60 : (char)(imm) + ((char)(imm)>0x3 ? 44 : 28), \687((char)(imm)&0xF0) ? 61 : (char)(imm) + ((char)(imm)>0x2 ? 45 : 29), \688((char)(imm)&0xF0) ? 62 : (char)(imm) + ((char)(imm)>0x1 ? 46 : 30), \689((char)(imm)&0xF0) ? 63 : (char)(imm) + ((char)(imm)>0x0 ? 47 : 31)); })690691#define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))692693static __inline__ __m256i __DEFAULT_FN_ATTRS694_mm256_srli_epi16(__m256i __a, int __count)695{696return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);697}698699static __inline__ __m256i __DEFAULT_FN_ATTRS700_mm256_srl_epi16(__m256i __a, __m128i __count)701{702return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);703}704705static __inline__ __m256i __DEFAULT_FN_ATTRS706_mm256_srli_epi32(__m256i __a, int __count)707{708return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);709}710711static __inline__ __m256i __DEFAULT_FN_ATTRS712_mm256_srl_epi32(__m256i __a, __m128i __count)713{714return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);715}716717static __inline__ __m256i __DEFAULT_FN_ATTRS718_mm256_srli_epi64(__m256i __a, int __count)719{720return __builtin_ia32_psrlqi256((__v4di)__a, __count);721}722723static __inline__ __m256i __DEFAULT_FN_ATTRS724_mm256_srl_epi64(__m256i __a, __m128i __count)725{726return __builtin_ia32_psrlq256((__v4di)__a, __count);727}728729static __inline__ __m256i __DEFAULT_FN_ATTRS730_mm256_sub_epi8(__m256i __a, __m256i __b)731{732return (__m256i)((__v32qu)__a - (__v32qu)__b);733}734735static __inline__ __m256i __DEFAULT_FN_ATTRS736_mm256_sub_epi16(__m256i __a, __m256i __b)737{738return (__m256i)((__v16hu)__a - (__v16hu)__b);739}740741static __inline__ __m256i __DEFAULT_FN_ATTRS742_mm256_sub_epi32(__m256i __a, __m256i __b)743{744return (__m256i)((__v8su)__a - (__v8su)__b);745}746747static __inline__ __m256i __DEFAULT_FN_ATTRS748_mm256_sub_epi64(__m256i __a, __m256i __b)749{750return (__m256i)((__v4du)__a - (__v4du)__b);751}752753static __inline__ __m256i __DEFAULT_FN_ATTRS754_mm256_subs_epi8(__m256i __a, __m256i __b)755{756return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);757}758759static __inline__ __m256i __DEFAULT_FN_ATTRS760_mm256_subs_epi16(__m256i __a, __m256i __b)761{762return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);763}764765static __inline__ __m256i __DEFAULT_FN_ATTRS766_mm256_subs_epu8(__m256i __a, __m256i __b)767{768return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);769}770771static __inline__ __m256i __DEFAULT_FN_ATTRS772_mm256_subs_epu16(__m256i __a, __m256i __b)773{774return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);775}776777static __inline__ __m256i __DEFAULT_FN_ATTRS778_mm256_unpackhi_epi8(__m256i __a, __m256i __b)779{780return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);781}782783static __inline__ __m256i __DEFAULT_FN_ATTRS784_mm256_unpackhi_epi16(__m256i __a, __m256i __b)785{786return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);787}788789static __inline__ __m256i __DEFAULT_FN_ATTRS790_mm256_unpackhi_epi32(__m256i __a, __m256i __b)791{792return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);793}794795static __inline__ __m256i __DEFAULT_FN_ATTRS796_mm256_unpackhi_epi64(__m256i __a, __m256i __b)797{798return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);799}800801static __inline__ __m256i __DEFAULT_FN_ATTRS802_mm256_unpacklo_epi8(__m256i __a, __m256i __b)803{804return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);805}806807static __inline__ __m256i __DEFAULT_FN_ATTRS808_mm256_unpacklo_epi16(__m256i __a, __m256i __b)809{810return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);811}812813static __inline__ __m256i __DEFAULT_FN_ATTRS814_mm256_unpacklo_epi32(__m256i __a, __m256i __b)815{816return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);817}818819static __inline__ __m256i __DEFAULT_FN_ATTRS820_mm256_unpacklo_epi64(__m256i __a, __m256i __b)821{822return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);823}824825static __inline__ __m256i __DEFAULT_FN_ATTRS826_mm256_xor_si256(__m256i __a, __m256i __b)827{828return (__m256i)((__v4du)__a ^ (__v4du)__b);829}830831static __inline__ __m256i __DEFAULT_FN_ATTRS832_mm256_stream_load_si256(__m256i const *__V)833{834return (__m256i)__builtin_ia32_movntdqa256((const __v4di *)__V);835}836837static __inline__ __m128 __DEFAULT_FN_ATTRS838_mm_broadcastss_ps(__m128 __X)839{840return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);841}842843static __inline__ __m128d __DEFAULT_FN_ATTRS844_mm_broadcastsd_pd(__m128d __a)845{846return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);847}848849static __inline__ __m256 __DEFAULT_FN_ATTRS850_mm256_broadcastss_ps(__m128 __X)851{852return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);853}854855static __inline__ __m256d __DEFAULT_FN_ATTRS856_mm256_broadcastsd_pd(__m128d __X)857{858return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);859}860861static __inline__ __m256i __DEFAULT_FN_ATTRS862_mm256_broadcastsi128_si256(__m128i __X)863{864return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);865}866867#define _mm_blend_epi32(V1, V2, M) __extension__ ({ \868(__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \869(__v4si)(__m128i)(V2), \870(((M) & 0x01) ? 4 : 0), \871(((M) & 0x02) ? 5 : 1), \872(((M) & 0x04) ? 6 : 2), \873(((M) & 0x08) ? 7 : 3)); })874875#define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \876(__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \877(__v8si)(__m256i)(V2), \878(((M) & 0x01) ? 8 : 0), \879(((M) & 0x02) ? 9 : 1), \880(((M) & 0x04) ? 10 : 2), \881(((M) & 0x08) ? 11 : 3), \882(((M) & 0x10) ? 12 : 4), \883(((M) & 0x20) ? 13 : 5), \884(((M) & 0x40) ? 14 : 6), \885(((M) & 0x80) ? 15 : 7)); })886887static __inline__ __m256i __DEFAULT_FN_ATTRS888_mm256_broadcastb_epi8(__m128i __X)889{890return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);891}892893static __inline__ __m256i __DEFAULT_FN_ATTRS894_mm256_broadcastw_epi16(__m128i __X)895{896return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);897}898899static __inline__ __m256i __DEFAULT_FN_ATTRS900_mm256_broadcastd_epi32(__m128i __X)901{902return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);903}904905static __inline__ __m256i __DEFAULT_FN_ATTRS906_mm256_broadcastq_epi64(__m128i __X)907{908return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);909}910911static __inline__ __m128i __DEFAULT_FN_ATTRS912_mm_broadcastb_epi8(__m128i __X)913{914return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);915}916917static __inline__ __m128i __DEFAULT_FN_ATTRS918_mm_broadcastw_epi16(__m128i __X)919{920return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);921}922923924static __inline__ __m128i __DEFAULT_FN_ATTRS925_mm_broadcastd_epi32(__m128i __X)926{927return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);928}929930static __inline__ __m128i __DEFAULT_FN_ATTRS931_mm_broadcastq_epi64(__m128i __X)932{933return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);934}935936static __inline__ __m256i __DEFAULT_FN_ATTRS937_mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)938{939return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);940}941942#define _mm256_permute4x64_pd(V, M) __extension__ ({ \943(__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \944(__v4df)_mm256_undefined_pd(), \945((M) >> 0) & 0x3, \946((M) >> 2) & 0x3, \947((M) >> 4) & 0x3, \948((M) >> 6) & 0x3); })949950static __inline__ __m256 __DEFAULT_FN_ATTRS951_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)952{953return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);954}955956#define _mm256_permute4x64_epi64(V, M) __extension__ ({ \957(__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \958(__v4di)_mm256_undefined_si256(), \959((M) >> 0) & 0x3, \960((M) >> 2) & 0x3, \961((M) >> 4) & 0x3, \962((M) >> 6) & 0x3); })963964#define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \965(__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })966967#define _mm256_extracti128_si256(V, M) __extension__ ({ \968(__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \969(__v4di)_mm256_undefined_si256(), \970(((M) & 1) ? 2 : 0), \971(((M) & 1) ? 3 : 1) ); })972973#define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \974(__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \975(__v4di)_mm256_castsi128_si256((__m128i)(V2)), \976(((M) & 1) ? 0 : 4), \977(((M) & 1) ? 1 : 5), \978(((M) & 1) ? 4 : 2), \979(((M) & 1) ? 5 : 3) ); })980981static __inline__ __m256i __DEFAULT_FN_ATTRS982_mm256_maskload_epi32(int const *__X, __m256i __M)983{984return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);985}986987static __inline__ __m256i __DEFAULT_FN_ATTRS988_mm256_maskload_epi64(long long const *__X, __m256i __M)989{990return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);991}992993static __inline__ __m128i __DEFAULT_FN_ATTRS994_mm_maskload_epi32(int const *__X, __m128i __M)995{996return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);997}998999static __inline__ __m128i __DEFAULT_FN_ATTRS1000_mm_maskload_epi64(long long const *__X, __m128i __M)1001{1002return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);1003}10041005static __inline__ void __DEFAULT_FN_ATTRS1006_mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)1007{1008__builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);1009}10101011static __inline__ void __DEFAULT_FN_ATTRS1012_mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)1013{1014__builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);1015}10161017static __inline__ void __DEFAULT_FN_ATTRS1018_mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)1019{1020__builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);1021}10221023static __inline__ void __DEFAULT_FN_ATTRS1024_mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)1025{1026__builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);1027}10281029static __inline__ __m256i __DEFAULT_FN_ATTRS1030_mm256_sllv_epi32(__m256i __X, __m256i __Y)1031{1032return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);1033}10341035static __inline__ __m128i __DEFAULT_FN_ATTRS1036_mm_sllv_epi32(__m128i __X, __m128i __Y)1037{1038return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);1039}10401041static __inline__ __m256i __DEFAULT_FN_ATTRS1042_mm256_sllv_epi64(__m256i __X, __m256i __Y)1043{1044return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);1045}10461047static __inline__ __m128i __DEFAULT_FN_ATTRS1048_mm_sllv_epi64(__m128i __X, __m128i __Y)1049{1050return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);1051}10521053static __inline__ __m256i __DEFAULT_FN_ATTRS1054_mm256_srav_epi32(__m256i __X, __m256i __Y)1055{1056return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);1057}10581059static __inline__ __m128i __DEFAULT_FN_ATTRS1060_mm_srav_epi32(__m128i __X, __m128i __Y)1061{1062return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);1063}10641065static __inline__ __m256i __DEFAULT_FN_ATTRS1066_mm256_srlv_epi32(__m256i __X, __m256i __Y)1067{1068return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);1069}10701071static __inline__ __m128i __DEFAULT_FN_ATTRS1072_mm_srlv_epi32(__m128i __X, __m128i __Y)1073{1074return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);1075}10761077static __inline__ __m256i __DEFAULT_FN_ATTRS1078_mm256_srlv_epi64(__m256i __X, __m256i __Y)1079{1080return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);1081}10821083static __inline__ __m128i __DEFAULT_FN_ATTRS1084_mm_srlv_epi64(__m128i __X, __m128i __Y)1085{1086return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);1087}10881089#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \1090(__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \1091(double const *)(m), \1092(__v4si)(__m128i)(i), \1093(__v2df)(__m128d)(mask), (s)); })10941095#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \1096(__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \1097(double const *)(m), \1098(__v4si)(__m128i)(i), \1099(__v4df)(__m256d)(mask), (s)); })11001101#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \1102(__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \1103(double const *)(m), \1104(__v2di)(__m128i)(i), \1105(__v2df)(__m128d)(mask), (s)); })11061107#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \1108(__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \1109(double const *)(m), \1110(__v4di)(__m256i)(i), \1111(__v4df)(__m256d)(mask), (s)); })11121113#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \1114(__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \1115(float const *)(m), \1116(__v4si)(__m128i)(i), \1117(__v4sf)(__m128)(mask), (s)); })11181119#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \1120(__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \1121(float const *)(m), \1122(__v8si)(__m256i)(i), \1123(__v8sf)(__m256)(mask), (s)); })11241125#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \1126(__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \1127(float const *)(m), \1128(__v2di)(__m128i)(i), \1129(__v4sf)(__m128)(mask), (s)); })11301131#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \1132(__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \1133(float const *)(m), \1134(__v4di)(__m256i)(i), \1135(__v4sf)(__m128)(mask), (s)); })11361137#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \1138(__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \1139(int const *)(m), \1140(__v4si)(__m128i)(i), \1141(__v4si)(__m128i)(mask), (s)); })11421143#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \1144(__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \1145(int const *)(m), \1146(__v8si)(__m256i)(i), \1147(__v8si)(__m256i)(mask), (s)); })11481149#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \1150(__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \1151(int const *)(m), \1152(__v2di)(__m128i)(i), \1153(__v4si)(__m128i)(mask), (s)); })11541155#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \1156(__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \1157(int const *)(m), \1158(__v4di)(__m256i)(i), \1159(__v4si)(__m128i)(mask), (s)); })11601161#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \1162(__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \1163(long long const *)(m), \1164(__v4si)(__m128i)(i), \1165(__v2di)(__m128i)(mask), (s)); })11661167#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \1168(__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \1169(long long const *)(m), \1170(__v4si)(__m128i)(i), \1171(__v4di)(__m256i)(mask), (s)); })11721173#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \1174(__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \1175(long long const *)(m), \1176(__v2di)(__m128i)(i), \1177(__v2di)(__m128i)(mask), (s)); })11781179#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \1180(__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \1181(long long const *)(m), \1182(__v4di)(__m256i)(i), \1183(__v4di)(__m256i)(mask), (s)); })11841185#define _mm_i32gather_pd(m, i, s) __extension__ ({ \1186(__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \1187(double const *)(m), \1188(__v4si)(__m128i)(i), \1189(__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \1190_mm_setzero_pd()), \1191(s)); })11921193#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \1194(__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \1195(double const *)(m), \1196(__v4si)(__m128i)(i), \1197(__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \1198_mm256_setzero_pd(), \1199_CMP_EQ_OQ), \1200(s)); })12011202#define _mm_i64gather_pd(m, i, s) __extension__ ({ \1203(__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \1204(double const *)(m), \1205(__v2di)(__m128i)(i), \1206(__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \1207_mm_setzero_pd()), \1208(s)); })12091210#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \1211(__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \1212(double const *)(m), \1213(__v4di)(__m256i)(i), \1214(__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \1215_mm256_setzero_pd(), \1216_CMP_EQ_OQ), \1217(s)); })12181219#define _mm_i32gather_ps(m, i, s) __extension__ ({ \1220(__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \1221(float const *)(m), \1222(__v4si)(__m128i)(i), \1223(__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \1224_mm_setzero_ps()), \1225(s)); })12261227#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \1228(__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \1229(float const *)(m), \1230(__v8si)(__m256i)(i), \1231(__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \1232_mm256_setzero_ps(), \1233_CMP_EQ_OQ), \1234(s)); })12351236#define _mm_i64gather_ps(m, i, s) __extension__ ({ \1237(__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \1238(float const *)(m), \1239(__v2di)(__m128i)(i), \1240(__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \1241_mm_setzero_ps()), \1242(s)); })12431244#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \1245(__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \1246(float const *)(m), \1247(__v4di)(__m256i)(i), \1248(__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \1249_mm_setzero_ps()), \1250(s)); })12511252#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \1253(__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \1254(int const *)(m), (__v4si)(__m128i)(i), \1255(__v4si)_mm_set1_epi32(-1), (s)); })12561257#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \1258(__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \1259(int const *)(m), (__v8si)(__m256i)(i), \1260(__v8si)_mm256_set1_epi32(-1), (s)); })12611262#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \1263(__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \1264(int const *)(m), (__v2di)(__m128i)(i), \1265(__v4si)_mm_set1_epi32(-1), (s)); })12661267#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \1268(__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \1269(int const *)(m), (__v4di)(__m256i)(i), \1270(__v4si)_mm_set1_epi32(-1), (s)); })12711272#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \1273(__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \1274(long long const *)(m), \1275(__v4si)(__m128i)(i), \1276(__v2di)_mm_set1_epi64x(-1), (s)); })12771278#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \1279(__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \1280(long long const *)(m), \1281(__v4si)(__m128i)(i), \1282(__v4di)_mm256_set1_epi64x(-1), (s)); })12831284#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \1285(__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \1286(long long const *)(m), \1287(__v2di)(__m128i)(i), \1288(__v2di)_mm_set1_epi64x(-1), (s)); })12891290#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \1291(__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \1292(long long const *)(m), \1293(__v4di)(__m256i)(i), \1294(__v4di)_mm256_set1_epi64x(-1), (s)); })12951296#undef __DEFAULT_FN_ATTRS12971298#endif /* __AVX2INTRIN_H */129913001301