Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/intrin.h
4574 views
/****************************************************************************1* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21****************************************************************************/2223#ifndef __SWR_INTRIN_H__24#define __SWR_INTRIN_H__2526#include "os.h"2728#if !defined(SIMD_ARCH)29#define SIMD_ARCH KNOB_ARCH30#endif3132#include "simdlib_types.hpp"3334typedef SIMDImpl::SIMD128Impl::Float simd4scalar;35typedef SIMDImpl::SIMD128Impl::Double simd4scalard;36typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;37typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector;38typedef SIMDImpl::SIMD128Impl::Mask simd4mask;3940typedef SIMDImpl::SIMD256Impl::Float simd8scalar;41typedef SIMDImpl::SIMD256Impl::Double simd8scalard;42typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;43typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector;44typedef SIMDImpl::SIMD256Impl::Mask simd8mask;4546typedef SIMDImpl::SIMD512Impl::Float simd16scalar;47typedef SIMDImpl::SIMD512Impl::Double simd16scalard;48typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;49typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector;50typedef SIMDImpl::SIMD512Impl::Mask simd16mask;5152#if KNOB_SIMD_WIDTH == 853typedef simd8scalar simdscalar;54typedef simd8scalard simdscalard;55typedef simd8scalari simdscalari;56typedef simd8vector simdvector;57typedef simd8mask simdmask;58#else59#error Unsupported vector width60#endif6162INLINE63UINT pdep_u32(UINT a, UINT mask)64{65#if KNOB_ARCH >= KNOB_ARCH_AVX266return _pdep_u32(a, mask);67#else68UINT result = 0;6970// copied from http://wm.ite.pl/articles/pdep-soft-emu.html71// using bsf instead of funky loop72unsigned long maskIndex = 0;73while (_BitScanForward(&maskIndex, mask))74{75// 1. isolate lowest set bit of mask76const UINT lowest = 1 << maskIndex;7778// 2. populate LSB from src79const UINT LSB = (UINT)((int)(a << 31) >> 31);8081// 3. copy bit from mask82result |= LSB & lowest;8384// 4. clear lowest bit85mask &= ~lowest;8687// 5. prepare for next iteration88a >>= 1;89}9091return result;92#endif93}9495INLINE96UINT pext_u32(UINT a, UINT mask)97{98#if KNOB_ARCH >= KNOB_ARCH_AVX299return _pext_u32(a, mask);100#else101UINT result = 0;102unsigned long maskIndex;103uint32_t currentBit = 0;104while (_BitScanForward(&maskIndex, mask))105{106// 1. isolate lowest set bit of mask107const UINT lowest = 1 << maskIndex;108109// 2. copy bit from mask110result |= ((a & lowest) > 0) << currentBit++;111112// 3. clear lowest bit113mask &= ~lowest;114}115return result;116#endif117}118119#endif //__SWR_INTRIN_H__120121122