CoCalc -- node_intersector

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/bvh/node_intersector_frustum.h
²¹⁷⁹⁸ views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3

4
#pragma once
5

6
#include "node_intersector.h"
7

8
namespace embree
9
{
10
  namespace isa
11
  {
12
    //////////////////////////////////////////////////////////////////////////////////////
13
    // Frustum structure used in hybrid and stream traversal
14
    //////////////////////////////////////////////////////////////////////////////////////
15

16
    /*
17
       Optimized frustum test. We calculate t=(p-org)/dir in ray/box
18
       intersection. We assume the rays are split by octant, thus
19
       dir intervals are either positive or negative in each
20
       dimension.
21

22
       Case 1: dir.min >= 0 && dir.max >= 0:
23
         t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
24
         t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
25

26
       Case 2: dir.min < 0 && dir.max < 0:
27
         t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max
28
         t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min
29
    */
30

31
    template<bool robust>
32
    struct Frustum;
33
    
34
    /* Fast variant */
35
    template<>
36
    struct Frustum<false>
37
    {
38
      __forceinline Frustum() {}
39

40
      template<int K>
41
      __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
42
      {
43
        const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
44
                                     reduce_min(select(valid, org.y, pos_inf)),
45
                                     reduce_min(select(valid, org.z, pos_inf)));
46

47
        const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
48
                                     reduce_max(select(valid, org.y, neg_inf)),
49
                                     reduce_max(select(valid, org.z, neg_inf)));
50

51
        const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
52
                                      reduce_min(select(valid, rdir.y, pos_inf)),
53
                                      reduce_min(select(valid, rdir.z, pos_inf)));
54

55
        const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
56
                                      reduce_max(select(valid, rdir.y, neg_inf)),
57
                                      reduce_max(select(valid, rdir.z, neg_inf)));
58

59
        const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
60
        const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
61

62
        init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
63
      }
64

65
      __forceinline void init(const Vec3fa& reduced_min_org,
66
                              const Vec3fa& reduced_max_org,
67
                              const Vec3fa& reduced_min_rdir,
68
                              const Vec3fa& reduced_max_rdir,
69
                              float reduced_min_dist,
70
                              float reduced_max_dist,
71
                              int N)
72
      {
73
        const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
74

75
        min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
76
        max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
77

78
#if defined (__aarch64__)
79
        neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));
80
        neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));
81
#else
82
        min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);
83
        max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);
84
#endif
85
        min_dist = reduced_min_dist;
86
        max_dist = reduced_max_dist;
87

88
        nf = NearFarPrecalculations(min_rdir, N);
89
      }
90

91
      template<int K>
92
      __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
93
      {
94
        max_dist = reduce_max(ray_tfar);
95
      }
96

97
      NearFarPrecalculations nf;
98

99
      Vec3fa min_rdir;
100
      Vec3fa max_rdir;
101

102
#if defined (__aarch64__)
103
      Vec3fa neg_min_org_rdir;
104
      Vec3fa neg_max_org_rdir;
105
#else
106
      Vec3fa min_org_rdir;
107
      Vec3fa max_org_rdir;
108
#endif
109
      float min_dist;
110
      float max_dist;
111
    };
112

113
    typedef Frustum<false> FrustumFast;
114

115
    /* Robust variant */
116
    template<>
117
    struct Frustum<true>
118
    {
119
      __forceinline Frustum() {}
120

121
      template<int K>
122
      __forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
123
      {
124
        const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),
125
                                     reduce_min(select(valid, org.y, pos_inf)),
126
                                     reduce_min(select(valid, org.z, pos_inf)));
127

128
        const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),
129
                                     reduce_max(select(valid, org.y, neg_inf)),
130
                                     reduce_max(select(valid, org.z, neg_inf)));
131

132
        const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),
133
                                      reduce_min(select(valid, rdir.y, pos_inf)),
134
                                      reduce_min(select(valid, rdir.z, pos_inf)));
135

136
        const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),
137
                                      reduce_max(select(valid, rdir.y, neg_inf)),
138
                                      reduce_max(select(valid, rdir.z, neg_inf)));
139

140
        const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));
141
        const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));
142

143
        init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);
144
      }
145

146
      __forceinline void init(const Vec3fa& reduced_min_org,
147
                              const Vec3fa& reduced_max_org,
148
                              const Vec3fa& reduced_min_rdir,
149
                              const Vec3fa& reduced_max_rdir,
150
                              float reduced_min_dist,
151
                              float reduced_max_dist,
152
                              int N)
153
      {
154
        const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));
155
        min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);
156
        max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);
157

158
        min_org = select(pos_rdir, reduced_max_org, reduced_min_org);
159
        max_org = select(pos_rdir, reduced_min_org, reduced_max_org);
160

161
        min_dist = reduced_min_dist;
162
        max_dist = reduced_max_dist;
163

164
        nf = NearFarPrecalculations(min_rdir, N);
165
      }
166

167
      template<int K>
168
      __forceinline void updateMaxDist(const vfloat<K>& ray_tfar)
169
      {
170
        max_dist = reduce_max(ray_tfar);
171
      }
172

173
      NearFarPrecalculations nf;
174

175
      Vec3fa min_rdir;
176
      Vec3fa max_rdir;
177

178
      Vec3fa min_org;
179
      Vec3fa max_org;
180

181
      float min_dist;
182
      float max_dist;
183
    };
184

185
    typedef Frustum<true> FrustumRobust;
186

187
    //////////////////////////////////////////////////////////////////////////////////////
188
    // Fast AABBNode intersection
189
    //////////////////////////////////////////////////////////////////////////////////////
190

191
    template<int N>
192
    __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
193
                                       const FrustumFast& frustum, vfloat<N>& dist)
194
    {
195
      const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
196
      const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
197
      const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
198
      const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
199
      const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
200
      const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
201

202
#if defined (__aarch64__)
203
      const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));
204
      const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));
205
      const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));
206
      const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));
207
      const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));
208
      const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));
209
#else
210
      const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));
211
      const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));
212
      const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));
213
      const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));
214
      const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));
215
      const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));
216
#endif
217
      const vfloat<N> fmin  = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
218
      dist = fmin;
219
      const vfloat<N> fmax  = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
220
      const vbool<N> vmask_node_hit = fmin <= fmax;
221
      size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
222
      return m_node;
223
    }
224

225
    //////////////////////////////////////////////////////////////////////////////////////
226
    // Robust AABBNode intersection
227
    //////////////////////////////////////////////////////////////////////////////////////
228

229
    template<int N>
230
    __forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,
231
                                       const FrustumRobust& frustum, vfloat<N>& dist)
232
    {
233
      const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);
234
      const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);
235
      const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);
236
      const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);
237
      const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);
238
      const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);
239

240
      const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);
241
      const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);
242
      const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);
243
      const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);
244
      const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);
245
      const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);
246

247
      const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512
248
      const float round_up   = 1.0f+2.0f*float(ulp);
249
      const vfloat<N> fmin  = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));
250
      dist = fmin;
251
      const vfloat<N> fmax  = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));
252
      const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);
253
      size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);
254
      return m_node;
255
    }
256
  }
257
}
258

259
Product

Resources

Company