Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/intrin.h
4574 views
1
/****************************************************************************
2
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
****************************************************************************/
23
24
#ifndef __SWR_INTRIN_H__
25
#define __SWR_INTRIN_H__
26
27
#include "os.h"
28
29
#if !defined(SIMD_ARCH)
30
#define SIMD_ARCH KNOB_ARCH
31
#endif
32
33
#include "simdlib_types.hpp"
34
35
typedef SIMDImpl::SIMD128Impl::Float simd4scalar;
36
typedef SIMDImpl::SIMD128Impl::Double simd4scalard;
37
typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
38
typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector;
39
typedef SIMDImpl::SIMD128Impl::Mask simd4mask;
40
41
typedef SIMDImpl::SIMD256Impl::Float simd8scalar;
42
typedef SIMDImpl::SIMD256Impl::Double simd8scalard;
43
typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
44
typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector;
45
typedef SIMDImpl::SIMD256Impl::Mask simd8mask;
46
47
typedef SIMDImpl::SIMD512Impl::Float simd16scalar;
48
typedef SIMDImpl::SIMD512Impl::Double simd16scalard;
49
typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
50
typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector;
51
typedef SIMDImpl::SIMD512Impl::Mask simd16mask;
52
53
#if KNOB_SIMD_WIDTH == 8
54
typedef simd8scalar simdscalar;
55
typedef simd8scalard simdscalard;
56
typedef simd8scalari simdscalari;
57
typedef simd8vector simdvector;
58
typedef simd8mask simdmask;
59
#else
60
#error Unsupported vector width
61
#endif
62
63
INLINE
64
UINT pdep_u32(UINT a, UINT mask)
65
{
66
#if KNOB_ARCH >= KNOB_ARCH_AVX2
67
return _pdep_u32(a, mask);
68
#else
69
UINT result = 0;
70
71
// copied from http://wm.ite.pl/articles/pdep-soft-emu.html
72
// using bsf instead of funky loop
73
unsigned long maskIndex = 0;
74
while (_BitScanForward(&maskIndex, mask))
75
{
76
// 1. isolate lowest set bit of mask
77
const UINT lowest = 1 << maskIndex;
78
79
// 2. populate LSB from src
80
const UINT LSB = (UINT)((int)(a << 31) >> 31);
81
82
// 3. copy bit from mask
83
result |= LSB & lowest;
84
85
// 4. clear lowest bit
86
mask &= ~lowest;
87
88
// 5. prepare for next iteration
89
a >>= 1;
90
}
91
92
return result;
93
#endif
94
}
95
96
INLINE
97
UINT pext_u32(UINT a, UINT mask)
98
{
99
#if KNOB_ARCH >= KNOB_ARCH_AVX2
100
return _pext_u32(a, mask);
101
#else
102
UINT result = 0;
103
unsigned long maskIndex;
104
uint32_t currentBit = 0;
105
while (_BitScanForward(&maskIndex, mask))
106
{
107
// 1. isolate lowest set bit of mask
108
const UINT lowest = 1 << maskIndex;
109
110
// 2. copy bit from mask
111
result |= ((a & lowest) > 0) << currentBit++;
112
113
// 3. clear lowest bit
114
mask &= ~lowest;
115
}
116
return result;
117
#endif
118
}
119
120
#endif //__SWR_INTRIN_H__
121
122