Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/common/stack_item.h
9905 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "default.h"
7
8
namespace embree
9
{
10
/*! An item on the stack holds the node ID and distance of that node. */
11
template<typename T>
12
struct __aligned(16) StackItemT
13
{
14
/*! assert that the xchg function works */
15
static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");
16
17
__forceinline StackItemT() {}
18
19
__forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}
20
21
/*! use SSE instructions to swap stack items */
22
__forceinline static void xchg(StackItemT& a, StackItemT& b)
23
{
24
const vfloat4 sse_a = vfloat4::load((float*)&a);
25
const vfloat4 sse_b = vfloat4::load((float*)&b);
26
vfloat4::store(&a,sse_b);
27
vfloat4::store(&b,sse_a);
28
}
29
30
/*! Sort 2 stack items. */
31
__forceinline friend void sort(StackItemT& s1, StackItemT& s2) {
32
if (s2.dist < s1.dist) xchg(s2,s1);
33
}
34
35
/*! Sort 3 stack items. */
36
__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)
37
{
38
if (s2.dist < s1.dist) xchg(s2,s1);
39
if (s3.dist < s2.dist) xchg(s3,s2);
40
if (s2.dist < s1.dist) xchg(s2,s1);
41
}
42
43
/*! Sort 4 stack items. */
44
__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)
45
{
46
if (s2.dist < s1.dist) xchg(s2,s1);
47
if (s4.dist < s3.dist) xchg(s4,s3);
48
if (s3.dist < s1.dist) xchg(s3,s1);
49
if (s4.dist < s2.dist) xchg(s4,s2);
50
if (s3.dist < s2.dist) xchg(s3,s2);
51
}
52
53
/*! use SSE instructions to swap stack items */
54
__forceinline static void cmp_xchg(vint4& a, vint4& b)
55
{
56
#if defined(__AVX512VL__)
57
const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));
58
#else
59
const vboolf4 mask0(b < a);
60
const vboolf4 mask(shuffle<2,2,2,2>(mask0));
61
#endif
62
const vint4 c = select(mask,b,a);
63
const vint4 d = select(mask,a,b);
64
a = c;
65
b = d;
66
}
67
68
/*! Sort 3 stack items. */
69
__forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)
70
{
71
cmp_xchg(s2,s1);
72
cmp_xchg(s3,s2);
73
cmp_xchg(s2,s1);
74
}
75
76
/*! Sort 4 stack items. */
77
__forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)
78
{
79
cmp_xchg(s2,s1);
80
cmp_xchg(s4,s3);
81
cmp_xchg(s3,s1);
82
cmp_xchg(s4,s2);
83
cmp_xchg(s3,s2);
84
}
85
86
87
/*! Sort N stack items. */
88
__forceinline friend void sort(StackItemT* begin, StackItemT* end)
89
{
90
for (StackItemT* i = begin+1; i != end; ++i)
91
{
92
const vfloat4 item = vfloat4::load((float*)i);
93
const unsigned dist = i->dist;
94
StackItemT* j = i;
95
96
while ((j != begin) && ((j-1)->dist < dist))
97
{
98
vfloat4::store(j, vfloat4::load((float*)(j-1)));
99
--j;
100
}
101
102
vfloat4::store(j, item);
103
}
104
}
105
106
public:
107
T ptr;
108
unsigned dist;
109
};
110
111
/*! An item on the stack holds the node ID and active ray mask. */
112
template<typename T>
113
struct __aligned(8) StackItemMaskT
114
{
115
T ptr;
116
size_t mask;
117
};
118
119
struct __aligned(8) StackItemMaskCoherent
120
{
121
size_t mask;
122
size_t parent;
123
size_t child;
124
};
125
}
126
127