CoCalc -- intrin.h

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/intrin.h
⁷⁷⁸² views
1
/****************************************************************************
2
 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 ****************************************************************************/
23

24
#ifndef __SWR_INTRIN_H__
25
#define __SWR_INTRIN_H__
26

27
#include "os.h"
28

29
#if !defined(SIMD_ARCH)
30
#define SIMD_ARCH KNOB_ARCH
31
#endif
32

33
#include "simdlib_types.hpp"
34

35
typedef SIMDImpl::SIMD128Impl::Float   simd4scalar;
36
typedef SIMDImpl::SIMD128Impl::Double  simd4scalard;
37
typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
38
typedef SIMDImpl::SIMD128Impl::Vec4    simd4vector;
39
typedef SIMDImpl::SIMD128Impl::Mask    simd4mask;
40

41
typedef SIMDImpl::SIMD256Impl::Float   simd8scalar;
42
typedef SIMDImpl::SIMD256Impl::Double  simd8scalard;
43
typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
44
typedef SIMDImpl::SIMD256Impl::Vec4    simd8vector;
45
typedef SIMDImpl::SIMD256Impl::Mask    simd8mask;
46

47
typedef SIMDImpl::SIMD512Impl::Float   simd16scalar;
48
typedef SIMDImpl::SIMD512Impl::Double  simd16scalard;
49
typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
50
typedef SIMDImpl::SIMD512Impl::Vec4    simd16vector;
51
typedef SIMDImpl::SIMD512Impl::Mask    simd16mask;
52

53
#if KNOB_SIMD_WIDTH == 8
54
typedef simd8scalar  simdscalar;
55
typedef simd8scalard simdscalard;
56
typedef simd8scalari simdscalari;
57
typedef simd8vector  simdvector;
58
typedef simd8mask    simdmask;
59
#else
60
#error Unsupported vector width
61
#endif
62

63
INLINE
64
UINT pdep_u32(UINT a, UINT mask)
65
{
66
#if KNOB_ARCH >= KNOB_ARCH_AVX2
67
    return _pdep_u32(a, mask);
68
#else
69
    UINT result = 0;
70

71
    // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
72
    // using bsf instead of funky loop
73
    unsigned long maskIndex = 0;
74
    while (_BitScanForward(&maskIndex, mask))
75
    {
76
        // 1. isolate lowest set bit of mask
77
        const UINT lowest = 1 << maskIndex;
78

79
        // 2. populate LSB from src
80
        const UINT LSB = (UINT)((int)(a << 31) >> 31);
81

82
        // 3. copy bit from mask
83
        result |= LSB & lowest;
84

85
        // 4. clear lowest bit
86
        mask &= ~lowest;
87

88
        // 5. prepare for next iteration
89
        a >>= 1;
90
    }
91

92
    return result;
93
#endif
94
}
95

96
INLINE
97
UINT pext_u32(UINT a, UINT mask)
98
{
99
#if KNOB_ARCH >= KNOB_ARCH_AVX2
100
    return _pext_u32(a, mask);
101
#else
102
    UINT     result = 0;
103
    unsigned long maskIndex;
104
    uint32_t currentBit = 0;
105
    while (_BitScanForward(&maskIndex, mask))
106
    {
107
        // 1. isolate lowest set bit of mask
108
        const UINT lowest = 1 << maskIndex;
109

110
        // 2. copy bit from mask
111
        result |= ((a & lowest) > 0) << currentBit++;
112

113
        // 3. clear lowest bit
114
        mask &= ~lowest;
115
    }
116
    return result;
117
#endif
118
}
119

120
#endif //__SWR_INTRIN_H__
121

122
Product

Resources

Company