Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
4574 views
/****************************************************************************1* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21****************************************************************************/2223#ifndef __SWR_SIMDINTRIN_H__24#define __SWR_SIMDINTRIN_H__2526#include "common/intrin.h"27#include "common/simdlib.hpp"2829#if KNOB_SIMD_WIDTH == 830typedef SIMD256 SIMD;31#else32#error Unsupported vector width33#endif // KNOB_SIMD16_WIDTH == 163435#define _simd128_maskstore_ps SIMD128::maskstore_ps36#define _simd128_fmadd_ps SIMD128::fmadd_ps3738#define _simd_load_ps SIMD::load_ps39#define _simd_load1_ps SIMD::broadcast_ss40#define _simd_loadu_ps SIMD::loadu_ps41#define _simd_setzero_ps SIMD::setzero_ps42#define _simd_set1_ps SIMD::set1_ps43#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)44#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)45#define _simd_blendv_ps SIMD::blendv_ps46#define _simd_store_ps SIMD::store_ps47#define _simd_mul_ps SIMD::mul_ps48#define _simd_add_ps SIMD::add_ps49#define _simd_sub_ps SIMD::sub_ps50#define _simd_rsqrt_ps SIMD::rsqrt_ps51#define _simd_min_ps SIMD::min_ps52#define _simd_max_ps SIMD::max_ps53#define _simd_movemask_ps SIMD::movemask_ps54#define _simd_cvtps_epi32 SIMD::cvtps_epi3255#define _simd_cvttps_epi32 SIMD::cvttps_epi3256#define _simd_cvtepi32_ps SIMD::cvtepi32_ps57#define _simd_cmplt_ps SIMD::cmplt_ps58#define _simd_cmpgt_ps SIMD::cmpgt_ps59#define _simd_cmpneq_ps SIMD::cmpneq_ps60#define _simd_cmpeq_ps SIMD::cmpeq_ps61#define _simd_cmpge_ps SIMD::cmpge_ps62#define _simd_cmple_ps SIMD::cmple_ps63#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)64#define _simd_and_ps SIMD::and_ps65#define _simd_or_ps SIMD::or_ps66#define _simd_rcp_ps SIMD::rcp_ps67#define _simd_div_ps SIMD::div_ps68#define _simd_castsi_ps SIMD::castsi_ps69#define _simd_castps_pd SIMD::castps_pd70#define _simd_castpd_ps SIMD::castpd_ps71#define _simd_andnot_ps SIMD::andnot_ps72#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)73#define _simd_castpd_ps SIMD::castpd_ps74#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))75#define _simd_stream_ps SIMD::stream_ps7677#define _simd_movemask_pd SIMD::movemask_pd78#define _simd_castsi_pd SIMD::castsi_pd7980#define _simd_mul_epi32 SIMD::mul_epi3281#define _simd_mullo_epi32 SIMD::mullo_epi3282#define _simd_sub_epi32 SIMD::sub_epi3283#define _simd_sub_epi64 SIMD::sub_epi6484#define _simd_min_epi32 SIMD::min_epi3285#define _simd_min_epu32 SIMD::min_epu3286#define _simd_max_epi32 SIMD::max_epi3287#define _simd_max_epu32 SIMD::max_epu3288#define _simd_add_epi32 SIMD::add_epi3289#define _simd_and_si SIMD::and_si90#define _simd_andnot_si SIMD::andnot_si91#define _simd_cmpeq_epi32 SIMD::cmpeq_epi3292#define _simd_cmplt_epi32 SIMD::cmplt_epi3293#define _simd_cmpgt_epi32 SIMD::cmpgt_epi3294#define _simd_or_si SIMD::or_si95#define _simd_xor_si SIMD::xor_si96#define _simd_castps_si SIMD::castps_si97#define _simd_adds_epu8 SIMD::adds_epu898#define _simd_subs_epu8 SIMD::subs_epu899#define _simd_add_epi8 SIMD::add_epi8100#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64101#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64102#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8103#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8104#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16105#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16106#define _simd_movemask_epi8 SIMD::movemask_epi8107#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)108#define _simd_permute_ps SIMD::permute_ps109#define _simd_permute_epi32 SIMD::permute_epi32110#define _simd_srlv_epi32 SIMD::srlv_epi32111#define _simd_sllv_epi32 SIMD::sllv_epi32112113#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8114#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8115#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16116#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16117#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32118#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32119#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64120#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64121122#define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)123#define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)124#define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)125#define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)126127#define _simd_fmadd_ps SIMD::fmadd_ps128#define _simd_fmsub_ps SIMD::fmsub_ps129#define _simd_shuffle_epi8 SIMD::shuffle_epi8130131#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)132#define _simd_mask_i32gather_ps(r, p, o, m, s) \133SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)134#define _simd_abs_epi32 SIMD::abs_epi32135136#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16137#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32138#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32139#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64140#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64141142#define _simd_packus_epi16 SIMD::packus_epi16143#define _simd_packs_epi16 SIMD::packs_epi16144#define _simd_packus_epi32 SIMD::packus_epi32145#define _simd_packs_epi32 SIMD::packs_epi32146147#define _simd_unpacklo_ps SIMD::unpacklo_ps148#define _simd_unpackhi_ps SIMD::unpackhi_ps149#define _simd_unpacklo_pd SIMD::unpacklo_pd150#define _simd_unpackhi_pd SIMD::unpackhi_pd151#define _simd_insertf128_ps SIMD::insertf128_ps152#define _simd_insertf128_pd SIMD::insertf128_pd153#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)154#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)155#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)156#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)157#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)158#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)159#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)160#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)161#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)162#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)163#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)164#define _simd_set1_epi32 SIMD::set1_epi32165#define _simd_set_epi32 SIMD::set_epi32166#define _simd_set_ps SIMD::set_ps167#define _simd_set1_epi8 SIMD::set1_epi8168#define _simd_setzero_si SIMD::setzero_si169#define _simd_cvttps_epi32 SIMD::cvttps_epi32170#define _simd_store_si SIMD::store_si171#define _simd_broadcast_ss SIMD::broadcast_ss172#define _simd_maskstore_ps SIMD::maskstore_ps173#define _simd_load_si SIMD::load_si174#define _simd_loadu_si SIMD::loadu_si175#define _simd_sub_ps SIMD::sub_ps176#define _simd_testz_ps SIMD::testz_ps177#define _simd_testz_si SIMD::testz_si178#define _simd_xor_ps SIMD::xor_ps179180#define _simd_loadu2_si SIMD::loadu2_si181#define _simd_storeu2_si SIMD::storeu2_si182183#define _simd_blendv_epi32 SIMD::blendv_epi32184#define _simd_vmask_ps SIMD::vmask_ps185186template <int mask>187SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)188{189return SIMD128::castps_si(190SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));191}192193//////////////////////////////////////////////////////////////////////////194/// @brief Compute plane equation vA * vX + vB * vY + vC195SIMDINLINE simdscalar vplaneps(simdscalar const& vA,196simdscalar const& vB,197simdscalar const& vC,198simdscalar const& vX,199simdscalar const& vY)200{201simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);202vOut = _simd_fmadd_ps(vB, vY, vOut);203return vOut;204}205206//////////////////////////////////////////////////////////////////////////207/// @brief Compute plane equation vA * vX + vB * vY + vC208SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,209simd4scalar const& vB,210simd4scalar const& vC,211simd4scalar const& vX,212simd4scalar const& vY)213{214simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);215vOut = _simd128_fmadd_ps(vB, vY, vOut);216return vOut;217}218219//////////////////////////////////////////////////////////////////////////220/// @brief Interpolates a single component.221/// @param vI - barycentric I222/// @param vJ - barycentric J223/// @param pInterpBuffer - pointer to attribute barycentric coeffs224template <UINT Attrib, UINT Comp, UINT numComponents = 4>225static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,226simdscalar const& vJ,227const float* pInterpBuffer)228{229const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];230const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];231const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];232233if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))234{235// Ensure constant attribs are constant. Required for proper236// 3D resource copies.237return _simd_broadcast_ss(pInterpA);238}239240simdscalar vA = _simd_broadcast_ss(pInterpA);241simdscalar vB = _simd_broadcast_ss(pInterpB);242simdscalar vC = _simd_broadcast_ss(pInterpC);243244simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);245vC = _simd_mul_ps(vk, vC);246247return vplaneps(vA, vB, vC, vI, vJ);248}249250//////////////////////////////////////////////////////////////////////////251/// @brief Interpolates a single component (flat shade).252/// @param pInterpBuffer - pointer to attribute barycentric coeffs253template <UINT Attrib, UINT Comp, UINT numComponents = 4>254static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)255{256const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];257258simdscalar vA = _simd_broadcast_ss(pInterpA);259260return vA;261}262263//////////////////////////////////////////////////////////////////////////264/// @brief Interpolates a single component (flat shade).265/// @param pInterpBuffer - pointer to attribute barycentric coeffs266template <UINT Attrib, UINT Comp, UINT numComponents = 4>267static SIMDINLINE simdscalari InterpolateComponentFlatInt(const uint32_t* pInterpBuffer)268{269const uint32_t interpA = pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];270271simdscalari vA = _simd_set1_epi32(interpA);272273return vA;274}275276//////////////////////////////////////////////////////////////////////////277/// @brief Interpolates a single component.278/// @param vI - barycentric I279/// @param vJ - barycentric J280/// @param pInterpBuffer - pointer to attribute barycentric coeffs281template <UINT Attrib, UINT Comp, UINT numComponents = 4>282static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,283simd4scalar const& vJ,284const float* pInterpBuffer)285{286const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];287const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];288const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];289290if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))291{292// Ensure constant attribs are constant. Required for proper293// 3D resource copies.294return SIMD128::broadcast_ss(pInterpA);295}296297simd4scalar vA = SIMD128::broadcast_ss(pInterpA);298simd4scalar vB = SIMD128::broadcast_ss(pInterpB);299simd4scalar vC = SIMD128::broadcast_ss(pInterpC);300301simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);302vC = SIMD128::mul_ps(vk, vC);303304return vplaneps(vA, vB, vC, vI, vJ);305}306307static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)308{309simd4scalari ai = SIMD128::castps_si(a);310return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));311}312313static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)314{315simdscalari ai = _simd_castps_si(a);316return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));317}318319#include "simd16intrin.h"320321#endif //__SWR_SIMDINTRIN_H__322323324