Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/bvh/node_intersector_packet.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "node_intersector.h"
7
8
namespace embree
9
{
10
namespace isa
11
{
12
//////////////////////////////////////////////////////////////////////////////////////
13
// Ray packet structure used in hybrid traversal
14
//////////////////////////////////////////////////////////////////////////////////////
15
16
template<int K, bool robust>
17
struct TravRayK;
18
19
/* Fast variant */
20
template<int K>
21
struct TravRayK<K, false>
22
{
23
__forceinline TravRayK() {}
24
25
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
26
{
27
init(ray_org, ray_dir, N);
28
}
29
30
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
31
{
32
init(ray_org, ray_dir, N);
33
tnear = ray_tnear;
34
tfar = ray_tfar;
35
}
36
37
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
38
{
39
org = ray_org;
40
dir = ray_dir;
41
rdir = rcp_safe(ray_dir);
42
#if defined(__aarch64__)
43
neg_org_rdir = -(org * rdir);
44
#elif defined(__AVX2__)
45
org_rdir = org * rdir;
46
#endif
47
48
if (N)
49
{
50
const int size = sizeof(float)*N;
51
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
52
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
53
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
54
}
55
}
56
57
Vec3vf<K> org;
58
Vec3vf<K> dir;
59
Vec3vf<K> rdir;
60
#if defined(__aarch64__)
61
Vec3vf<K> neg_org_rdir;
62
#elif defined(__AVX2__)
63
Vec3vf<K> org_rdir;
64
#endif
65
Vec3vi<K> nearXYZ;
66
vfloat<K> tnear;
67
vfloat<K> tfar;
68
};
69
70
template<int K>
71
using TravRayKFast = TravRayK<K, false>;
72
73
/* Robust variant */
74
template<int K>
75
struct TravRayK<K, true>
76
{
77
__forceinline TravRayK() {}
78
79
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
80
{
81
init(ray_org, ray_dir, N);
82
}
83
84
__forceinline TravRayK(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)
85
{
86
init(ray_org, ray_dir, N);
87
tnear = ray_tnear;
88
tfar = ray_tfar;
89
}
90
91
__forceinline void init(const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir, int N)
92
{
93
org = ray_org;
94
dir = ray_dir;
95
rdir = vfloat<K>(1.0f)/(zero_fix(ray_dir));
96
97
if (N)
98
{
99
const int size = sizeof(float)*N;
100
nearXYZ.x = select(rdir.x >= 0.0f, vint<K>(0*size), vint<K>(1*size));
101
nearXYZ.y = select(rdir.y >= 0.0f, vint<K>(2*size), vint<K>(3*size));
102
nearXYZ.z = select(rdir.z >= 0.0f, vint<K>(4*size), vint<K>(5*size));
103
}
104
}
105
106
Vec3vf<K> org;
107
Vec3vf<K> dir;
108
Vec3vf<K> rdir;
109
Vec3vi<K> nearXYZ;
110
vfloat<K> tnear;
111
vfloat<K> tfar;
112
};
113
114
template<int K>
115
using TravRayKRobust = TravRayK<K, true>;
116
117
//////////////////////////////////////////////////////////////////////////////////////
118
// Fast AABBNode intersection
119
//////////////////////////////////////////////////////////////////////////////////////
120
121
template<int N, int K>
122
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNode* node, size_t i,
123
const TravRayKFast<K>& ray, vfloat<K>& dist)
124
125
{
126
#if defined(__aarch64__)
127
const vfloat<K> lclipMinX = madd(node->lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
128
const vfloat<K> lclipMinY = madd(node->lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
129
const vfloat<K> lclipMinZ = madd(node->lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
130
const vfloat<K> lclipMaxX = madd(node->upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
131
const vfloat<K> lclipMaxY = madd(node->upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
132
const vfloat<K> lclipMaxZ = madd(node->upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
133
#elif defined(__AVX2__)
134
const vfloat<K> lclipMinX = msub(node->lower_x[i], ray.rdir.x, ray.org_rdir.x);
135
const vfloat<K> lclipMinY = msub(node->lower_y[i], ray.rdir.y, ray.org_rdir.y);
136
const vfloat<K> lclipMinZ = msub(node->lower_z[i], ray.rdir.z, ray.org_rdir.z);
137
const vfloat<K> lclipMaxX = msub(node->upper_x[i], ray.rdir.x, ray.org_rdir.x);
138
const vfloat<K> lclipMaxY = msub(node->upper_y[i], ray.rdir.y, ray.org_rdir.y);
139
const vfloat<K> lclipMaxZ = msub(node->upper_z[i], ray.rdir.z, ray.org_rdir.z);
140
#else
141
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
142
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
143
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
144
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
145
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
146
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
147
#endif
148
149
#if defined(__AVX512F__) // SKX
150
if (K == 16)
151
{
152
/* use mixed float/int min/max */
153
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
154
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
155
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
156
dist = lnearP;
157
return lhit;
158
}
159
else
160
#endif
161
{
162
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
163
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
164
#if defined(__AVX512F__) // SKX
165
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
166
#else
167
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
168
#endif
169
dist = lnearP;
170
return lhit;
171
}
172
}
173
174
//////////////////////////////////////////////////////////////////////////////////////
175
// Robust AABBNode intersection
176
//////////////////////////////////////////////////////////////////////////////////////
177
178
template<int N, int K>
179
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNode* node, size_t i,
180
const TravRayKRobust<K>& ray, vfloat<K>& dist)
181
{
182
// FIXME: use per instruction rounding for AVX512
183
const vfloat<K> lclipMinX = (node->lower_x[i] - ray.org.x) * ray.rdir.x;
184
const vfloat<K> lclipMinY = (node->lower_y[i] - ray.org.y) * ray.rdir.y;
185
const vfloat<K> lclipMinZ = (node->lower_z[i] - ray.org.z) * ray.rdir.z;
186
const vfloat<K> lclipMaxX = (node->upper_x[i] - ray.org.x) * ray.rdir.x;
187
const vfloat<K> lclipMaxY = (node->upper_y[i] - ray.org.y) * ray.rdir.y;
188
const vfloat<K> lclipMaxZ = (node->upper_z[i] - ray.org.z) * ray.rdir.z;
189
const float round_up = 1.0f+3.0f*float(ulp);
190
const float round_down = 1.0f-3.0f*float(ulp);
191
const vfloat<K> lnearP = round_down*max(max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY)), min(lclipMinZ, lclipMaxZ));
192
const vfloat<K> lfarP = round_up *min(min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY)), max(lclipMinZ, lclipMaxZ));
193
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
194
dist = lnearP;
195
return lhit;
196
}
197
198
//////////////////////////////////////////////////////////////////////////////////////
199
// Fast AABBNodeMB intersection
200
//////////////////////////////////////////////////////////////////////////////////////
201
202
template<int N, int K>
203
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
204
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
205
{
206
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
207
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
208
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
209
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
210
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
211
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
212
213
#if defined(__aarch64__)
214
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
215
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
216
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
217
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
218
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
219
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
220
#elif defined(__AVX2__)
221
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
222
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
223
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
224
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
225
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
226
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
227
#else
228
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
229
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
230
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
231
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
232
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
233
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
234
#endif
235
236
#if defined(__AVX512F__) // SKX
237
if (K == 16)
238
{
239
/* use mixed float/int min/max */
240
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
241
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
242
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
243
dist = lnearP;
244
return lhit;
245
}
246
else
247
#endif
248
{
249
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
250
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
251
#if defined(__AVX512F__) // SKX
252
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
253
#else
254
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
255
#endif
256
dist = lnearP;
257
return lhit;
258
}
259
}
260
261
//////////////////////////////////////////////////////////////////////////////////////
262
// Robust AABBNodeMB intersection
263
//////////////////////////////////////////////////////////////////////////////////////
264
265
template<int N, int K>
266
__forceinline vbool<K> intersectNodeKRobust(const typename BVHN<N>::AABBNodeMB* node, const size_t i,
267
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
268
{
269
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
270
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
271
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
272
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
273
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
274
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
275
276
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
277
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
278
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
279
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
280
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
281
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
282
283
const float round_up = 1.0f+3.0f*float(ulp);
284
const float round_down = 1.0f-3.0f*float(ulp);
285
286
#if defined(__AVX512F__) // SKX
287
if (K == 16)
288
{
289
const vfloat<K> lnearP = round_down*maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
290
const vfloat<K> lfarP = round_up *mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
291
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
292
dist = lnearP;
293
return lhit;
294
}
295
else
296
#endif
297
{
298
const vfloat<K> lnearP = round_down*maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
299
const vfloat<K> lfarP = round_up *mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
300
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
301
dist = lnearP;
302
return lhit;
303
}
304
}
305
306
//////////////////////////////////////////////////////////////////////////////////////
307
// Fast AABBNodeMB4D intersection
308
//////////////////////////////////////////////////////////////////////////////////////
309
310
template<int N, int K>
311
__forceinline vbool<K> intersectNodeKMB4D(const typename BVHN<N>::NodeRef ref, const size_t i,
312
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
313
{
314
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
315
316
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
317
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
318
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
319
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
320
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
321
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
322
323
#if defined(__aarch64__)
324
const vfloat<K> lclipMinX = madd(vlower_x, ray.rdir.x, ray.neg_org_rdir.x);
325
const vfloat<K> lclipMinY = madd(vlower_y, ray.rdir.y, ray.neg_org_rdir.y);
326
const vfloat<K> lclipMinZ = madd(vlower_z, ray.rdir.z, ray.neg_org_rdir.z);
327
const vfloat<K> lclipMaxX = madd(vupper_x, ray.rdir.x, ray.neg_org_rdir.x);
328
const vfloat<K> lclipMaxY = madd(vupper_y, ray.rdir.y, ray.neg_org_rdir.y);
329
const vfloat<K> lclipMaxZ = madd(vupper_z, ray.rdir.z, ray.neg_org_rdir.z);
330
#elif defined(__AVX2__)
331
const vfloat<K> lclipMinX = msub(vlower_x, ray.rdir.x, ray.org_rdir.x);
332
const vfloat<K> lclipMinY = msub(vlower_y, ray.rdir.y, ray.org_rdir.y);
333
const vfloat<K> lclipMinZ = msub(vlower_z, ray.rdir.z, ray.org_rdir.z);
334
const vfloat<K> lclipMaxX = msub(vupper_x, ray.rdir.x, ray.org_rdir.x);
335
const vfloat<K> lclipMaxY = msub(vupper_y, ray.rdir.y, ray.org_rdir.y);
336
const vfloat<K> lclipMaxZ = msub(vupper_z, ray.rdir.z, ray.org_rdir.z);
337
#else
338
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
339
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
340
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
341
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
342
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
343
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
344
#endif
345
346
const vfloat<K> lnearP = maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
347
const vfloat<K> lfarP = mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
348
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
349
if (unlikely(ref.isAABBNodeMB4D())) {
350
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
351
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
352
}
353
dist = lnearP;
354
return lhit;
355
}
356
357
//////////////////////////////////////////////////////////////////////////////////////
358
// Robust AABBNodeMB4D intersection
359
//////////////////////////////////////////////////////////////////////////////////////
360
361
template<int N, int K>
362
__forceinline vbool<K> intersectNodeKMB4DRobust(const typename BVHN<N>::NodeRef ref, const size_t i,
363
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist)
364
{
365
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
366
367
const vfloat<K> vlower_x = madd(time, vfloat<K>(node->lower_dx[i]), vfloat<K>(node->lower_x[i]));
368
const vfloat<K> vlower_y = madd(time, vfloat<K>(node->lower_dy[i]), vfloat<K>(node->lower_y[i]));
369
const vfloat<K> vlower_z = madd(time, vfloat<K>(node->lower_dz[i]), vfloat<K>(node->lower_z[i]));
370
const vfloat<K> vupper_x = madd(time, vfloat<K>(node->upper_dx[i]), vfloat<K>(node->upper_x[i]));
371
const vfloat<K> vupper_y = madd(time, vfloat<K>(node->upper_dy[i]), vfloat<K>(node->upper_y[i]));
372
const vfloat<K> vupper_z = madd(time, vfloat<K>(node->upper_dz[i]), vfloat<K>(node->upper_z[i]));
373
374
const vfloat<K> lclipMinX = (vlower_x - ray.org.x) * ray.rdir.x;
375
const vfloat<K> lclipMinY = (vlower_y - ray.org.y) * ray.rdir.y;
376
const vfloat<K> lclipMinZ = (vlower_z - ray.org.z) * ray.rdir.z;
377
const vfloat<K> lclipMaxX = (vupper_x - ray.org.x) * ray.rdir.x;
378
const vfloat<K> lclipMaxY = (vupper_y - ray.org.y) * ray.rdir.y;
379
const vfloat<K> lclipMaxZ = (vupper_z - ray.org.z) * ray.rdir.z;
380
381
const float round_up = 1.0f+3.0f*float(ulp);
382
const float round_down = 1.0f-3.0f*float(ulp);
383
const vfloat<K> lnearP = round_down*maxi(maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY)), mini(lclipMinZ, lclipMaxZ));
384
const vfloat<K> lfarP = round_up *mini(mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY)), maxi(lclipMinZ, lclipMaxZ));
385
vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
386
387
if (unlikely(ref.isAABBNodeMB4D())) {
388
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
389
lhit = lhit & (vfloat<K>(node1->lower_t[i]) <= time) & (time < vfloat<K>(node1->upper_t[i]));
390
}
391
dist = lnearP;
392
return lhit;
393
}
394
395
//////////////////////////////////////////////////////////////////////////////////////
396
// Fast OBBNode intersection
397
//////////////////////////////////////////////////////////////////////////////////////
398
399
template<int N, int K, bool robust>
400
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNode* node, const size_t i,
401
const TravRayK<K,robust>& ray, vfloat<K>& dist)
402
{
403
const AffineSpace3vf<K> naabb(Vec3f(node->naabb.l.vx.x[i], node->naabb.l.vx.y[i], node->naabb.l.vx.z[i]),
404
Vec3f(node->naabb.l.vy.x[i], node->naabb.l.vy.y[i], node->naabb.l.vy.z[i]),
405
Vec3f(node->naabb.l.vz.x[i], node->naabb.l.vz.y[i], node->naabb.l.vz.z[i]),
406
Vec3f(node->naabb.p .x[i], node->naabb.p .y[i], node->naabb.p .z[i]));
407
408
const Vec3vf<K> dir = xfmVector(naabb, ray.dir);
409
const Vec3vf<K> nrdir = Vec3vf<K>(vfloat<K>(-1.0f)) * rcp_safe(dir); // FIXME: negate instead of mul with -1?
410
const Vec3vf<K> org = xfmPoint(naabb, ray.org);
411
412
const vfloat<K> lclipMinX = org.x * nrdir.x; // (Vec3fa(zero) - org) * rdir;
413
const vfloat<K> lclipMinY = org.y * nrdir.y;
414
const vfloat<K> lclipMinZ = org.z * nrdir.z;
415
const vfloat<K> lclipMaxX = lclipMinX - nrdir.x; // (Vec3fa(one) - org) * rdir;
416
const vfloat<K> lclipMaxY = lclipMinY - nrdir.y;
417
const vfloat<K> lclipMaxZ = lclipMinZ - nrdir.z;
418
419
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
420
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
421
if (robust) {
422
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
423
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
424
}
425
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
426
dist = lnearP;
427
return lhit;
428
}
429
430
//////////////////////////////////////////////////////////////////////////////////////
431
// Fast OBBNodeMB intersection
432
//////////////////////////////////////////////////////////////////////////////////////
433
434
template<int N, int K, bool robust>
435
__forceinline vbool<K> intersectNodeK(const typename BVHN<N>::OBBNodeMB* node, const size_t i,
436
const TravRayK<K,robust>& ray, const vfloat<K>& time, vfloat<K>& dist)
437
{
438
const AffineSpace3vf<K> xfm(Vec3f(node->space0.l.vx.x[i], node->space0.l.vx.y[i], node->space0.l.vx.z[i]),
439
Vec3f(node->space0.l.vy.x[i], node->space0.l.vy.y[i], node->space0.l.vy.z[i]),
440
Vec3f(node->space0.l.vz.x[i], node->space0.l.vz.y[i], node->space0.l.vz.z[i]),
441
Vec3f(node->space0.p .x[i], node->space0.p .y[i], node->space0.p .z[i]));
442
443
const Vec3vf<K> b0_lower = zero;
444
const Vec3vf<K> b0_upper = one;
445
const Vec3vf<K> b1_lower(node->b1.lower.x[i], node->b1.lower.y[i], node->b1.lower.z[i]);
446
const Vec3vf<K> b1_upper(node->b1.upper.x[i], node->b1.upper.y[i], node->b1.upper.z[i]);
447
const Vec3vf<K> lower = lerp(b0_lower, b1_lower, time);
448
const Vec3vf<K> upper = lerp(b0_upper, b1_upper, time);
449
450
const Vec3vf<K> dir = xfmVector(xfm, ray.dir);
451
const Vec3vf<K> rdir = rcp_safe(dir);
452
const Vec3vf<K> org = xfmPoint(xfm, ray.org);
453
454
const vfloat<K> lclipMinX = (lower.x - org.x) * rdir.x;
455
const vfloat<K> lclipMinY = (lower.y - org.y) * rdir.y;
456
const vfloat<K> lclipMinZ = (lower.z - org.z) * rdir.z;
457
const vfloat<K> lclipMaxX = (upper.x - org.x) * rdir.x;
458
const vfloat<K> lclipMaxY = (upper.y - org.y) * rdir.y;
459
const vfloat<K> lclipMaxZ = (upper.z - org.z) * rdir.z;
460
461
vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
462
vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
463
if (robust) {
464
lnearP = lnearP*vfloat<K>(1.0f-3.0f*float(ulp));
465
lfarP = lfarP *vfloat<K>(1.0f+3.0f*float(ulp));
466
}
467
468
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
469
dist = lnearP;
470
return lhit;
471
}
472
473
474
475
//////////////////////////////////////////////////////////////////////////////////////
476
// QuantizedBaseNode intersection
477
//////////////////////////////////////////////////////////////////////////////////////
478
479
template<int N, int K>
480
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
481
const TravRayK<K,false>& ray, vfloat<K>& dist)
482
483
{
484
assert(movemask(node->validMask()) & ((size_t)1 << i));
485
const vfloat<N> lower_x = node->dequantizeLowerX();
486
const vfloat<N> upper_x = node->dequantizeUpperX();
487
const vfloat<N> lower_y = node->dequantizeLowerY();
488
const vfloat<N> upper_y = node->dequantizeUpperY();
489
const vfloat<N> lower_z = node->dequantizeLowerZ();
490
const vfloat<N> upper_z = node->dequantizeUpperZ();
491
492
#if defined(__aarch64__)
493
const vfloat<K> lclipMinX = madd(lower_x[i], ray.rdir.x, ray.neg_org_rdir.x);
494
const vfloat<K> lclipMinY = madd(lower_y[i], ray.rdir.y, ray.neg_org_rdir.y);
495
const vfloat<K> lclipMinZ = madd(lower_z[i], ray.rdir.z, ray.neg_org_rdir.z);
496
const vfloat<K> lclipMaxX = madd(upper_x[i], ray.rdir.x, ray.neg_org_rdir.x);
497
const vfloat<K> lclipMaxY = madd(upper_y[i], ray.rdir.y, ray.neg_org_rdir.y);
498
const vfloat<K> lclipMaxZ = madd(upper_z[i], ray.rdir.z, ray.neg_org_rdir.z);
499
#elif defined(__AVX2__)
500
const vfloat<K> lclipMinX = msub(lower_x[i], ray.rdir.x, ray.org_rdir.x);
501
const vfloat<K> lclipMinY = msub(lower_y[i], ray.rdir.y, ray.org_rdir.y);
502
const vfloat<K> lclipMinZ = msub(lower_z[i], ray.rdir.z, ray.org_rdir.z);
503
const vfloat<K> lclipMaxX = msub(upper_x[i], ray.rdir.x, ray.org_rdir.x);
504
const vfloat<K> lclipMaxY = msub(upper_y[i], ray.rdir.y, ray.org_rdir.y);
505
const vfloat<K> lclipMaxZ = msub(upper_z[i], ray.rdir.z, ray.org_rdir.z);
506
#else
507
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
508
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
509
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
510
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
511
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
512
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
513
#endif
514
515
#if defined(__AVX512F__) // SKX
516
if (K == 16)
517
{
518
/* use mixed float/int min/max */
519
const vfloat<K> lnearP = maxi(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
520
const vfloat<K> lfarP = mini(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
521
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
522
dist = lnearP;
523
return lhit;
524
}
525
else
526
#endif
527
{
528
const vfloat<K> lnearP = maxi(mini(lclipMinX, lclipMaxX), mini(lclipMinY, lclipMaxY), mini(lclipMinZ, lclipMaxZ));
529
const vfloat<K> lfarP = mini(maxi(lclipMinX, lclipMaxX), maxi(lclipMinY, lclipMaxY), maxi(lclipMinZ, lclipMaxZ));
530
#if defined(__AVX512F__) // SKX
531
const vbool<K> lhit = asInt(maxi(lnearP, ray.tnear)) <= asInt(mini(lfarP, ray.tfar));
532
#else
533
const vbool<K> lhit = maxi(lnearP, ray.tnear) <= mini(lfarP, ray.tfar);
534
#endif
535
dist = lnearP;
536
return lhit;
537
}
538
}
539
540
template<int N, int K>
541
__forceinline vbool<K> intersectQuantizedNodeK(const typename BVHN<N>::QuantizedBaseNode* node, size_t i,
542
const TravRayK<K,true>& ray, vfloat<K>& dist)
543
544
{
545
assert(movemask(node->validMask()) & ((size_t)1 << i));
546
const vfloat<N> lower_x = node->dequantizeLowerX();
547
const vfloat<N> upper_x = node->dequantizeUpperX();
548
const vfloat<N> lower_y = node->dequantizeLowerY();
549
const vfloat<N> upper_y = node->dequantizeUpperY();
550
const vfloat<N> lower_z = node->dequantizeLowerZ();
551
const vfloat<N> upper_z = node->dequantizeUpperZ();
552
553
const vfloat<K> lclipMinX = (lower_x[i] - ray.org.x) * ray.rdir.x;
554
const vfloat<K> lclipMinY = (lower_y[i] - ray.org.y) * ray.rdir.y;
555
const vfloat<K> lclipMinZ = (lower_z[i] - ray.org.z) * ray.rdir.z;
556
const vfloat<K> lclipMaxX = (upper_x[i] - ray.org.x) * ray.rdir.x;
557
const vfloat<K> lclipMaxY = (upper_y[i] - ray.org.y) * ray.rdir.y;
558
const vfloat<K> lclipMaxZ = (upper_z[i] - ray.org.z) * ray.rdir.z;
559
560
const float round_up = 1.0f+3.0f*float(ulp);
561
const float round_down = 1.0f-3.0f*float(ulp);
562
563
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
564
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
565
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
566
dist = lnearP;
567
return lhit;
568
}
569
570
template<int N, int K>
571
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
572
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
573
574
{
575
assert(movemask(node->validMask()) & ((size_t)1 << i));
576
577
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
578
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
579
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
580
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
581
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
582
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
583
584
#if defined(__aarch64__)
585
const vfloat<K> lclipMinX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
586
const vfloat<K> lclipMinY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
587
const vfloat<K> lclipMinZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
588
const vfloat<K> lclipMaxX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
589
const vfloat<K> lclipMaxY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
590
const vfloat<K> lclipMaxZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
591
#elif defined(__AVX2__)
592
const vfloat<K> lclipMinX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
593
const vfloat<K> lclipMinY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
594
const vfloat<K> lclipMinZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
595
const vfloat<K> lclipMaxX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
596
const vfloat<K> lclipMaxY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
597
const vfloat<K> lclipMaxZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
598
#else
599
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
600
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
601
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
602
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
603
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
604
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
605
#endif
606
const vfloat<K> lnearP = max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
607
const vfloat<K> lfarP = min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
608
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
609
dist = lnearP;
610
return lhit;
611
}
612
613
614
template<int N, int K>
615
__forceinline vbool<K> intersectQuantizedNodeMBK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
616
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
617
618
{
619
assert(movemask(node->validMask()) & ((size_t)1 << i));
620
621
const vfloat<K> lower_x = node->template dequantizeLowerX<K>(i,time);
622
const vfloat<K> upper_x = node->template dequantizeUpperX<K>(i,time);
623
const vfloat<K> lower_y = node->template dequantizeLowerY<K>(i,time);
624
const vfloat<K> upper_y = node->template dequantizeUpperY<K>(i,time);
625
const vfloat<K> lower_z = node->template dequantizeLowerZ<K>(i,time);
626
const vfloat<K> upper_z = node->template dequantizeUpperZ<K>(i,time);
627
628
const vfloat<K> lclipMinX = (lower_x - ray.org.x) * ray.rdir.x;
629
const vfloat<K> lclipMinY = (lower_y - ray.org.y) * ray.rdir.y;
630
const vfloat<K> lclipMinZ = (lower_z - ray.org.z) * ray.rdir.z;
631
const vfloat<K> lclipMaxX = (upper_x - ray.org.x) * ray.rdir.x;
632
const vfloat<K> lclipMaxY = (upper_y - ray.org.y) * ray.rdir.y;
633
const vfloat<K> lclipMaxZ = (upper_z - ray.org.z) * ray.rdir.z;
634
635
const float round_up = 1.0f+3.0f*float(ulp);
636
const float round_down = 1.0f-3.0f*float(ulp);
637
638
const vfloat<K> lnearP = round_down*max(min(lclipMinX, lclipMaxX), min(lclipMinY, lclipMaxY), min(lclipMinZ, lclipMaxZ));
639
const vfloat<K> lfarP = round_up *min(max(lclipMinX, lclipMaxX), max(lclipMinY, lclipMaxY), max(lclipMinZ, lclipMaxZ));
640
const vbool<K> lhit = max(lnearP, ray.tnear) <= min(lfarP, ray.tfar);
641
dist = lnearP;
642
return lhit;
643
}
644
645
646
//////////////////////////////////////////////////////////////////////////////////////
647
// Node intersectors used in hybrid traversal
648
//////////////////////////////////////////////////////////////////////////////////////
649
650
/*! Intersects N nodes with K rays */
651
template<int N, int K, int types, bool robust>
652
struct BVHNNodeIntersectorK;
653
654
template<int N, int K>
655
struct BVHNNodeIntersectorK<N, K, BVH_AN1, false>
656
{
657
/* vmask is both an input and an output parameter! Its initial value should be the parent node
658
hit mask, which is used for correctly computing the current hit mask. The parent hit mask
659
is actually required only for motion blur node intersections (because different rays may
660
have different times), so for regular nodes vmask is simply overwritten. */
661
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
662
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
663
{
664
vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
665
return true;
666
}
667
};
668
669
template<int N, int K>
670
struct BVHNNodeIntersectorK<N, K, BVH_AN1, true>
671
{
672
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
673
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
674
{
675
vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
676
return true;
677
}
678
};
679
680
template<int N, int K>
681
struct BVHNNodeIntersectorK<N, K, BVH_AN2, false>
682
{
683
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
684
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
685
{
686
vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
687
return true;
688
}
689
};
690
691
template<int N, int K>
692
struct BVHNNodeIntersectorK<N, K, BVH_AN2, true>
693
{
694
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
695
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
696
{
697
vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
698
return true;
699
}
700
};
701
702
template<int N, int K>
703
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, false>
704
{
705
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
706
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
707
{
708
if (likely(node.isAABBNode())) vmask = intersectNodeK<N,K>(node.getAABBNode(), i, ray, dist);
709
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
710
return true;
711
}
712
};
713
714
template<int N, int K>
715
struct BVHNNodeIntersectorK<N, K, BVH_AN1_UN1, true>
716
{
717
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
718
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
719
{
720
if (likely(node.isAABBNode())) vmask = intersectNodeKRobust<N,K>(node.getAABBNode(), i, ray, dist);
721
else /*if (unlikely(node.isOBBNode()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNode(), i, ray, dist);
722
return true;
723
}
724
};
725
726
template<int N, int K>
727
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, false>
728
{
729
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
730
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
731
{
732
if (likely(node.isAABBNodeMB())) vmask = intersectNodeK<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
733
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
734
return true;
735
}
736
};
737
738
template<int N, int K>
739
struct BVHNNodeIntersectorK<N, K, BVH_AN2_UN2, true>
740
{
741
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
742
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
743
{
744
if (likely(node.isAABBNodeMB())) vmask = intersectNodeKRobust<N,K>(node.getAABBNodeMB(), i, ray, time, dist);
745
else /*if (unlikely(node.isOBBNodeMB()))*/ vmask = intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
746
return true;
747
}
748
};
749
750
template<int N, int K>
751
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, false>
752
{
753
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
754
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
755
{
756
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
757
return true;
758
}
759
};
760
761
template<int N, int K>
762
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D, true>
763
{
764
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
765
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
766
{
767
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
768
return true;
769
}
770
};
771
772
template<int N, int K>
773
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, false>
774
{
775
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
776
const TravRayKFast<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
777
{
778
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
779
vmask &= intersectNodeKMB4D<N,K>(node, i, ray, time, dist);
780
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
781
assert(node.isOBBNodeMB());
782
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
783
}
784
return true;
785
}
786
};
787
788
template<int N, int K>
789
struct BVHNNodeIntersectorK<N, K, BVH_AN2_AN4D_UN2, true>
790
{
791
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, size_t i,
792
const TravRayKRobust<K>& ray, const vfloat<K>& time, vfloat<K>& dist, vbool<K>& vmask)
793
{
794
if (likely(node.isAABBNodeMB() || node.isAABBNodeMB4D())) {
795
vmask &= intersectNodeKMB4DRobust<N,K>(node, i, ray, time, dist);
796
} else /*if (unlikely(node.isOBBNodeMB()))*/ {
797
assert(node.isOBBNodeMB());
798
vmask &= intersectNodeK<N,K>(node.ungetAABBNodeMB(), i, ray, time, dist);
799
}
800
return true;
801
}
802
};
803
804
805
/*! Intersects N nodes with K rays */
806
template<int N, int K, bool robust>
807
struct BVHNQuantizedBaseNodeIntersectorK;
808
809
template<int N, int K>
810
struct BVHNQuantizedBaseNodeIntersectorK<N, K, false>
811
{
812
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
813
const TravRayK<K,false>& ray, vfloat<K>& dist)
814
{
815
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
816
}
817
818
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
819
const TravRayK<K,false>& ray, const vfloat<K>& time, vfloat<K>& dist)
820
{
821
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
822
}
823
824
};
825
826
template<int N, int K>
827
struct BVHNQuantizedBaseNodeIntersectorK<N, K, true>
828
{
829
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNode* node, const size_t i,
830
const TravRayK<K,true>& ray, vfloat<K>& dist)
831
{
832
return intersectQuantizedNodeK<N,K>(node,i,ray,dist);
833
}
834
835
static __forceinline vbool<K> intersectK(const typename BVHN<N>::QuantizedBaseNodeMB* node, const size_t i,
836
const TravRayK<K,true>& ray, const vfloat<K>& time, vfloat<K>& dist)
837
{
838
return intersectQuantizedNodeMBK<N,K>(node,i,ray,time,dist);
839
}
840
};
841
842
843
}
844
}
845
846