Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/bvh/node_intersector1.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "node_intersector.h"
7
8
#if defined(__AVX2__)
9
#define __FMA_X4__
10
#endif
11
12
#if defined(__aarch64__)
13
#define __FMA_X4__
14
#endif
15
16
17
namespace embree
18
{
19
namespace isa
20
{
21
//////////////////////////////////////////////////////////////////////////////////////
22
// Ray structure used in single-ray traversal
23
//////////////////////////////////////////////////////////////////////////////////////
24
25
template<int N, bool robust>
26
struct TravRayBase;
27
28
/* Base (without tnear and tfar) */
29
template<int N>
30
struct TravRayBase<N,false>
31
{
32
__forceinline TravRayBase() {}
33
34
__forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
35
: org_xyz(ray_org), dir_xyz(ray_dir)
36
{
37
const Vec3fa ray_rdir = rcp_safe(ray_dir);
38
org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
39
dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
40
rdir = Vec3vf<N>(ray_rdir.x,ray_rdir.y,ray_rdir.z);
41
#if defined(__FMA_X4__)
42
const Vec3fa ray_org_rdir = ray_org*ray_rdir;
43
#if !defined(__aarch64__)
44
org_rdir = Vec3vf<N>(ray_org_rdir.x,ray_org_rdir.y,ray_org_rdir.z);
45
#else
46
//for aarch64, we do not have msub equal instruction, so we negeate orig and use madd
47
//x86 will use msub
48
neg_org_rdir = Vec3vf<N>(-ray_org_rdir.x,-ray_org_rdir.y,-ray_org_rdir.z);
49
#endif
50
#endif
51
nearX = ray_rdir.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
52
nearY = ray_rdir.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
53
nearZ = ray_rdir.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
54
farX = nearX ^ sizeof(vfloat<N>);
55
farY = nearY ^ sizeof(vfloat<N>);
56
farZ = nearZ ^ sizeof(vfloat<N>);
57
}
58
59
template<int K>
60
__forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
61
const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
62
size_t flip = sizeof(vfloat<N>))
63
{
64
org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
65
dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
66
rdir = Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
67
#if defined(__FMA_X4__)
68
#if !defined(__aarch64__)
69
org_rdir = org*rdir;
70
#else
71
neg_org_rdir = -(org*rdir);
72
#endif
73
#endif
74
nearX = nearXYZ.x[k];
75
nearY = nearXYZ.y[k];
76
nearZ = nearXYZ.z[k];
77
farX = nearX ^ flip;
78
farY = nearY ^ flip;
79
farZ = nearZ ^ flip;
80
}
81
82
Vec3fa org_xyz, dir_xyz;
83
Vec3vf<N> org, dir, rdir;
84
#if defined(__FMA_X4__)
85
#if !defined(__aarch64__)
86
Vec3vf<N> org_rdir;
87
#else
88
//aarch64 version are keeping negation of the org_rdir and use madd
89
//x86 uses msub
90
Vec3vf<N> neg_org_rdir;
91
#endif
92
#endif
93
size_t nearX, nearY, nearZ;
94
size_t farX, farY, farZ;
95
};
96
97
/* Base (without tnear and tfar) */
98
template<int N>
99
struct TravRayBase<N,true>
100
{
101
__forceinline TravRayBase() {}
102
103
__forceinline TravRayBase(const Vec3fa& ray_org, const Vec3fa& ray_dir)
104
: org_xyz(ray_org), dir_xyz(ray_dir)
105
{
106
const float round_down = 1.0f-3.0f*float(ulp);
107
const float round_up = 1.0f+3.0f*float(ulp);
108
const Vec3fa ray_rdir = 1.0f/zero_fix(ray_dir);
109
const Vec3fa ray_rdir_near = round_down*ray_rdir;
110
const Vec3fa ray_rdir_far = round_up *ray_rdir;
111
org = Vec3vf<N>(ray_org.x,ray_org.y,ray_org.z);
112
dir = Vec3vf<N>(ray_dir.x,ray_dir.y,ray_dir.z);
113
rdir_near = Vec3vf<N>(ray_rdir_near.x,ray_rdir_near.y,ray_rdir_near.z);
114
rdir_far = Vec3vf<N>(ray_rdir_far .x,ray_rdir_far .y,ray_rdir_far .z);
115
116
nearX = ray_rdir_near.x >= 0.0f ? 0*sizeof(vfloat<N>) : 1*sizeof(vfloat<N>);
117
nearY = ray_rdir_near.y >= 0.0f ? 2*sizeof(vfloat<N>) : 3*sizeof(vfloat<N>);
118
nearZ = ray_rdir_near.z >= 0.0f ? 4*sizeof(vfloat<N>) : 5*sizeof(vfloat<N>);
119
farX = nearX ^ sizeof(vfloat<N>);
120
farY = nearY ^ sizeof(vfloat<N>);
121
farZ = nearZ ^ sizeof(vfloat<N>);
122
}
123
124
template<int K>
125
__forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
126
const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
127
size_t flip = sizeof(vfloat<N>))
128
{
129
const vfloat<N> round_down = 1.0f-3.0f*float(ulp);
130
const vfloat<N> round_up = 1.0f+3.0f*float(ulp);
131
org = Vec3vf<N>(ray_org.x[k], ray_org.y[k], ray_org.z[k]);
132
dir = Vec3vf<N>(ray_dir.x[k], ray_dir.y[k], ray_dir.z[k]);
133
rdir_near = round_down*Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
134
rdir_far = round_up *Vec3vf<N>(ray_rdir.x[k], ray_rdir.y[k], ray_rdir.z[k]);
135
136
nearX = nearXYZ.x[k];
137
nearY = nearXYZ.y[k];
138
nearZ = nearXYZ.z[k];
139
farX = nearX ^ flip;
140
farY = nearY ^ flip;
141
farZ = nearZ ^ flip;
142
}
143
144
Vec3fa org_xyz, dir_xyz;
145
Vec3vf<N> org, dir, rdir_near, rdir_far;
146
size_t nearX, nearY, nearZ;
147
size_t farX, farY, farZ;
148
};
149
150
/* Full (with tnear and tfar) */
151
template<int N, bool robust>
152
struct TravRay : TravRayBase<N,robust>
153
{
154
__forceinline TravRay() {}
155
156
__forceinline TravRay(const Vec3fa& ray_org, const Vec3fa& ray_dir, float ray_tnear, float ray_tfar)
157
: TravRayBase<N,robust>(ray_org, ray_dir),
158
tnear(ray_tnear), tfar(ray_tfar) {}
159
160
template<int K>
161
__forceinline void init(size_t k, const Vec3vf<K>& ray_org, const Vec3vf<K>& ray_dir,
162
const Vec3vf<K>& ray_rdir, const Vec3vi<K>& nearXYZ,
163
float ray_tnear, float ray_tfar,
164
size_t flip = sizeof(vfloat<N>))
165
{
166
TravRayBase<N,robust>::template init<K>(k, ray_org, ray_dir, ray_rdir, nearXYZ, flip);
167
tnear = ray_tnear; tfar = ray_tfar;
168
}
169
170
vfloat<N> tnear;
171
vfloat<N> tfar;
172
};
173
174
//////////////////////////////////////////////////////////////////////////////////////
175
// Point Query structure used in single-ray traversal
176
//////////////////////////////////////////////////////////////////////////////////////
177
178
template<int N>
179
struct TravPointQuery
180
{
181
__forceinline TravPointQuery() {}
182
183
__forceinline TravPointQuery(const Vec3fa& query_org, const Vec3fa& query_rad)
184
{
185
org = Vec3vf<N>(query_org.x, query_org.y, query_org.z);
186
rad = Vec3vf<N>(query_rad.x, query_rad.y, query_rad.z);
187
}
188
189
__forceinline vfloat<N> const& tfar() const {
190
return rad.x;
191
}
192
193
Vec3vf<N> org, rad;
194
};
195
196
//////////////////////////////////////////////////////////////////////////////////////
197
// point query
198
//////////////////////////////////////////////////////////////////////////////////////
199
200
template<int N>
201
__forceinline size_t pointQuerySphereDistAndMask(
202
const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
203
vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
204
{
205
const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
206
const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
207
const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
208
dist = vX * vX + vY * vY + vZ * vZ;
209
const vbool<N> vmask = dist <= query.tfar()*query.tfar();
210
const vbool<N> valid = minX <= maxX;
211
return movemask(vmask) & movemask(valid);
212
}
213
214
template<int N>
215
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
216
{
217
const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
218
const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
219
const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
220
const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
221
const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
222
const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
223
return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
224
}
225
226
template<int N>
227
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
228
{
229
const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
230
const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
231
const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
232
const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
233
const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
234
const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
235
const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
236
const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
237
const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
238
const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
239
const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
240
const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
241
return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
242
}
243
244
template<int N>
245
__forceinline size_t pointQueryNodeSphereMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
246
{
247
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
248
size_t mask = pointQueryNodeSphere(node, query, time, dist);
249
250
if (unlikely(ref.isAABBNodeMB4D())) {
251
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
252
const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
253
mask &= movemask(vmask);
254
}
255
256
return mask;
257
}
258
259
template<int N>
260
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
261
{
262
const vfloat<N> start_x(node->start.x);
263
const vfloat<N> scale_x(node->scale.x);
264
const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
265
const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
266
const vfloat<N> start_y(node->start.y);
267
const vfloat<N> scale_y(node->scale.y);
268
const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
269
const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
270
const vfloat<N> start_z(node->start.z);
271
const vfloat<N> scale_z(node->scale.z);
272
const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
273
const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
274
return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
275
}
276
277
template<int N>
278
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
279
{
280
const vfloat<N> minX = node->dequantizeLowerX(time);
281
const vfloat<N> maxX = node->dequantizeUpperX(time);
282
const vfloat<N> minY = node->dequantizeLowerY(time);
283
const vfloat<N> maxY = node->dequantizeUpperY(time);
284
const vfloat<N> minZ = node->dequantizeLowerZ(time);
285
const vfloat<N> maxZ = node->dequantizeUpperZ(time);
286
return pointQuerySphereDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & movemask(node->validMask());
287
}
288
289
template<int N>
290
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
291
{
292
// TODO: point query - implement
293
const vbool<N> vmask = vbool<N>(true);
294
const size_t mask = movemask(vmask) & ((1<<N)-1);
295
dist = vfloat<N>(0.0f);
296
return mask;
297
}
298
299
template<int N>
300
__forceinline size_t pointQueryNodeSphere(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
301
{
302
// TODO: point query - implement
303
const vbool<N> vmask = vbool<N>(true);
304
const size_t mask = movemask(vmask) & ((1<<N)-1);
305
dist = vfloat<N>(0.0f);
306
return mask;
307
}
308
309
template<int N>
310
__forceinline size_t pointQueryAABBDistAndMask(
311
const TravPointQuery<N>& query, vfloat<N>& dist, vfloat<N> const& minX, vfloat<N> const& maxX,
312
vfloat<N> const& minY, vfloat<N> const& maxY, vfloat<N> const& minZ, vfloat<N> const& maxZ)
313
{
314
const vfloat<N> vX = min(max(query.org.x, minX), maxX) - query.org.x;
315
const vfloat<N> vY = min(max(query.org.y, minY), maxY) - query.org.y;
316
const vfloat<N> vZ = min(max(query.org.z, minZ), maxZ) - query.org.z;
317
dist = vX * vX + vY * vY + vZ * vZ;
318
const vbool<N> valid = minX <= maxX;
319
const vbool<N> vmask = !((maxX < query.org.x - query.rad.x) | (minX > query.org.x + query.rad.x) |
320
(maxY < query.org.y - query.rad.y) | (minY > query.org.y + query.rad.y) |
321
(maxZ < query.org.z - query.rad.z) | (minZ > query.org.z + query.rad.z));
322
return movemask(vmask) & movemask(valid);
323
}
324
325
template<int N>
326
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
327
{
328
const vfloat<N> minX = vfloat<N>::load((float*)((const char*)&node->lower_x));
329
const vfloat<N> minY = vfloat<N>::load((float*)((const char*)&node->lower_y));
330
const vfloat<N> minZ = vfloat<N>::load((float*)((const char*)&node->lower_z));
331
const vfloat<N> maxX = vfloat<N>::load((float*)((const char*)&node->upper_x));
332
const vfloat<N> maxY = vfloat<N>::load((float*)((const char*)&node->upper_y));
333
const vfloat<N> maxZ = vfloat<N>::load((float*)((const char*)&node->upper_z));
334
return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
335
}
336
337
template<int N>
338
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::AABBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
339
{
340
const vfloat<N>* pMinX = (const vfloat<N>*)((const char*)&node->lower_x);
341
const vfloat<N>* pMinY = (const vfloat<N>*)((const char*)&node->lower_y);
342
const vfloat<N>* pMinZ = (const vfloat<N>*)((const char*)&node->lower_z);
343
const vfloat<N>* pMaxX = (const vfloat<N>*)((const char*)&node->upper_x);
344
const vfloat<N>* pMaxY = (const vfloat<N>*)((const char*)&node->upper_y);
345
const vfloat<N>* pMaxZ = (const vfloat<N>*)((const char*)&node->upper_z);
346
const vfloat<N> minX = madd(time,pMinX[6],vfloat<N>(pMinX[0]));
347
const vfloat<N> minY = madd(time,pMinY[6],vfloat<N>(pMinY[0]));
348
const vfloat<N> minZ = madd(time,pMinZ[6],vfloat<N>(pMinZ[0]));
349
const vfloat<N> maxX = madd(time,pMaxX[6],vfloat<N>(pMaxX[0]));
350
const vfloat<N> maxY = madd(time,pMaxY[6],vfloat<N>(pMaxY[0]));
351
const vfloat<N> maxZ = madd(time,pMaxZ[6],vfloat<N>(pMaxZ[0]));
352
return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ);
353
}
354
355
template<int N>
356
__forceinline size_t pointQueryNodeAABBMB4D(const typename BVHN<N>::NodeRef ref, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
357
{
358
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
359
size_t mask = pointQueryNodeAABB(node, query, time, dist);
360
361
if (unlikely(ref.isAABBNodeMB4D())) {
362
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
363
const vbool<N> vmask = (node1->lower_t <= time) & (time < node1->upper_t);
364
mask &= movemask(vmask);
365
}
366
367
return mask;
368
}
369
370
template<int N>
371
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
372
{
373
const size_t mvalid = movemask(node->validMask());
374
const vfloat<N> start_x(node->start.x);
375
const vfloat<N> scale_x(node->scale.x);
376
const vfloat<N> minX = madd(node->template dequantize<N>((0*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
377
const vfloat<N> maxX = madd(node->template dequantize<N>((1*sizeof(vfloat<N>)) >> 2),scale_x,start_x);
378
const vfloat<N> start_y(node->start.y);
379
const vfloat<N> scale_y(node->scale.y);
380
const vfloat<N> minY = madd(node->template dequantize<N>((2*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
381
const vfloat<N> maxY = madd(node->template dequantize<N>((3*sizeof(vfloat<N>)) >> 2),scale_y,start_y);
382
const vfloat<N> start_z(node->start.z);
383
const vfloat<N> scale_z(node->scale.z);
384
const vfloat<N> minZ = madd(node->template dequantize<N>((4*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
385
const vfloat<N> maxZ = madd(node->template dequantize<N>((5*sizeof(vfloat<N>)) >> 2),scale_z,start_z);
386
return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
387
}
388
389
template<int N>
390
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
391
{
392
const size_t mvalid = movemask(node->validMask());
393
const vfloat<N> minX = node->dequantizeLowerX(time);
394
const vfloat<N> maxX = node->dequantizeUpperX(time);
395
const vfloat<N> minY = node->dequantizeLowerY(time);
396
const vfloat<N> maxY = node->dequantizeUpperY(time);
397
const vfloat<N> minZ = node->dequantizeLowerZ(time);
398
const vfloat<N> maxZ = node->dequantizeUpperZ(time);
399
return pointQueryAABBDistAndMask(query, dist, minX, maxX, minY, maxY, minZ, maxZ) & mvalid;
400
}
401
402
template<int N>
403
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
404
{
405
// TODO: point query - implement
406
const vbool<N> vmask = vbool<N>(true);
407
const size_t mask = movemask(vmask) & ((1<<N)-1);
408
dist = vfloat<N>(0.0f);
409
return mask;
410
}
411
412
template<int N>
413
__forceinline size_t pointQueryNodeAABB(const typename BVHN<N>::OBBNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
414
{
415
// TODO: point query - implement
416
const vbool<N> vmask = vbool<N>(true);
417
const size_t mask = movemask(vmask) & ((1<<N)-1);
418
dist = vfloat<N>(0.0f);
419
return mask;
420
}
421
422
//////////////////////////////////////////////////////////////////////////////////////
423
// Fast AABBNode intersection
424
//////////////////////////////////////////////////////////////////////////////////////
425
426
template<int N, bool robust>
427
__forceinline size_t intersectNode(const typename BVHN<N>::AABBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
428
429
template<>
430
__forceinline size_t intersectNode<4>(const typename BVH4::AABBNode* node, const TravRay<4,false>& ray, vfloat4& dist)
431
{
432
#if defined(__FMA_X4__)
433
#if defined(__aarch64__)
434
const vfloat4 tNearX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
435
const vfloat4 tNearY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
436
const vfloat4 tNearZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
437
const vfloat4 tFarX = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
438
const vfloat4 tFarY = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
439
const vfloat4 tFarZ = madd(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
440
#else
441
const vfloat4 tNearX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
442
const vfloat4 tNearY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
443
const vfloat4 tNearZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
444
const vfloat4 tFarX = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
445
const vfloat4 tFarY = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
446
const vfloat4 tFarZ = msub(vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
447
#endif
448
#else
449
const vfloat4 tNearX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
450
const vfloat4 tNearY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
451
const vfloat4 tNearZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
452
const vfloat4 tFarX = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
453
const vfloat4 tFarY = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
454
const vfloat4 tFarZ = (vfloat4::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
455
#endif
456
457
#if defined(__aarch64__)
458
const vfloat4 tNear = maxi(tNearX, tNearY, tNearZ, ray.tnear);
459
const vfloat4 tFar = mini(tFarX, tFarY, tFarZ, ray.tfar);
460
const vbool4 vmask = asInt(tNear) <= asInt(tFar);
461
const size_t mask = movemask(vmask);
462
#elif defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
463
const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
464
const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
465
const vbool4 vmask = asInt(tNear) > asInt(tFar);
466
const size_t mask = movemask(vmask) ^ ((1<<4)-1);
467
#elif defined(__AVX512F__) // SKX
468
const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
469
const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
470
const vbool4 vmask = asInt(tNear) <= asInt(tFar);
471
const size_t mask = movemask(vmask);
472
#else
473
const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
474
const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
475
const vbool4 vmask = tNear <= tFar;
476
const size_t mask = movemask(vmask);
477
#endif
478
dist = tNear;
479
return mask;
480
}
481
482
#if defined(__AVX__)
483
484
template<>
485
__forceinline size_t intersectNode<8>(const typename BVH8::AABBNode* node, const TravRay<8,false>& ray, vfloat8& dist)
486
{
487
#if defined(__AVX2__)
488
#if defined(__aarch64__)
489
const vfloat8 tNearX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.neg_org_rdir.x);
490
const vfloat8 tNearY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.neg_org_rdir.y);
491
const vfloat8 tNearZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.neg_org_rdir.z);
492
const vfloat8 tFarX = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.neg_org_rdir.x);
493
const vfloat8 tFarY = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.neg_org_rdir.y);
494
const vfloat8 tFarZ = madd(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.neg_org_rdir.z);
495
#else
496
const vfloat8 tNearX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)), ray.rdir.x, ray.org_rdir.x);
497
const vfloat8 tNearY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)), ray.rdir.y, ray.org_rdir.y);
498
const vfloat8 tNearZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)), ray.rdir.z, ray.org_rdir.z);
499
const vfloat8 tFarX = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )), ray.rdir.x, ray.org_rdir.x);
500
const vfloat8 tFarY = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )), ray.rdir.y, ray.org_rdir.y);
501
const vfloat8 tFarZ = msub(vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )), ray.rdir.z, ray.org_rdir.z);
502
#endif
503
504
#else
505
const vfloat8 tNearX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir.x;
506
const vfloat8 tNearY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir.y;
507
const vfloat8 tNearZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir.z;
508
const vfloat8 tFarX = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir.x;
509
const vfloat8 tFarY = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir.y;
510
const vfloat8 tFarZ = (vfloat8::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir.z;
511
#endif
512
513
#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
514
const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
515
const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
516
const vbool8 vmask = asInt(tNear) > asInt(tFar);
517
const size_t mask = movemask(vmask) ^ ((1<<8)-1);
518
#elif defined(__AVX512F__) // SKX
519
const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
520
const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
521
const vbool8 vmask = asInt(tNear) <= asInt(tFar);
522
const size_t mask = movemask(vmask);
523
#else
524
const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
525
const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
526
const vbool8 vmask = tNear <= tFar;
527
const size_t mask = movemask(vmask);
528
#endif
529
dist = tNear;
530
return mask;
531
}
532
533
#endif
534
535
//////////////////////////////////////////////////////////////////////////////////////
536
// Robust AABBNode intersection
537
//////////////////////////////////////////////////////////////////////////////////////
538
539
template<int N>
540
__forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
541
{
542
const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
543
const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
544
const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
545
const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
546
const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
547
const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
548
const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
549
const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
550
const vbool<N> vmask = tNear <= tFar;
551
const size_t mask = movemask(vmask);
552
dist = tNear;
553
return mask;
554
}
555
556
//////////////////////////////////////////////////////////////////////////////////////
557
// Fast AABBNodeMB intersection
558
//////////////////////////////////////////////////////////////////////////////////////
559
560
template<int N>
561
__forceinline size_t intersectNode(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
562
{
563
const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
564
const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
565
const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
566
const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
567
const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
568
const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
569
#if defined(__FMA_X4__)
570
#if defined(__aarch64__)
571
const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
572
const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
573
const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
574
const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
575
const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
576
const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
577
#else
578
const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
579
const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
580
const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
581
const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
582
const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
583
const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
584
#endif
585
#else
586
const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
587
const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
588
const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
589
const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
590
const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
591
const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
592
#endif
593
#if defined(__FMA_X4__) && !defined(__AVX512F__) // HSW
594
const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
595
const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
596
const vbool<N> vmask = asInt(tNear) > asInt(tFar);
597
const size_t mask = movemask(vmask) ^ ((1<<N)-1);
598
#elif defined(__AVX512F__) // SKX
599
const vfloat<N> tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
600
const vfloat<N> tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
601
const vbool<N> vmask = asInt(tNear) <= asInt(tFar);
602
const size_t mask = movemask(vmask);
603
#else
604
const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
605
const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
606
const vbool<N> vmask = tNear <= tFar;
607
const size_t mask = movemask(vmask);
608
#endif
609
dist = tNear;
610
return mask;
611
}
612
613
//////////////////////////////////////////////////////////////////////////////////////
614
// Robust AABBNodeMB intersection
615
//////////////////////////////////////////////////////////////////////////////////////
616
617
template<int N>
618
__forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
619
{
620
const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
621
const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
622
const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
623
const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
624
const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
625
const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
626
const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
627
const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
628
const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
629
const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
630
const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
631
const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
632
const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
633
const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
634
const size_t mask = movemask(tNear <= tFar);
635
dist = tNear;
636
return mask;
637
}
638
639
//////////////////////////////////////////////////////////////////////////////////////
640
// Fast AABBNodeMB4D intersection
641
//////////////////////////////////////////////////////////////////////////////////////
642
643
template<int N>
644
__forceinline size_t intersectNodeMB4D(const typename BVHN<N>::NodeRef ref, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
645
{
646
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
647
648
const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
649
const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
650
const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
651
const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
652
const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
653
const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
654
#if defined (__FMA_X4__)
655
#if defined(__aarch64__)
656
const vfloat<N> tNearX = madd(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.neg_org_rdir.x);
657
const vfloat<N> tNearY = madd(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.neg_org_rdir.y);
658
const vfloat<N> tNearZ = madd(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.neg_org_rdir.z);
659
const vfloat<N> tFarX = madd(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.neg_org_rdir.x);
660
const vfloat<N> tFarY = madd(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.neg_org_rdir.y);
661
const vfloat<N> tFarZ = madd(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.neg_org_rdir.z);
662
#else
663
const vfloat<N> tNearX = msub(madd(time,pNearX[6],vfloat<N>(pNearX[0])), ray.rdir.x, ray.org_rdir.x);
664
const vfloat<N> tNearY = msub(madd(time,pNearY[6],vfloat<N>(pNearY[0])), ray.rdir.y, ray.org_rdir.y);
665
const vfloat<N> tNearZ = msub(madd(time,pNearZ[6],vfloat<N>(pNearZ[0])), ray.rdir.z, ray.org_rdir.z);
666
const vfloat<N> tFarX = msub(madd(time,pFarX [6],vfloat<N>(pFarX [0])), ray.rdir.x, ray.org_rdir.x);
667
const vfloat<N> tFarY = msub(madd(time,pFarY [6],vfloat<N>(pFarY [0])), ray.rdir.y, ray.org_rdir.y);
668
const vfloat<N> tFarZ = msub(madd(time,pFarZ [6],vfloat<N>(pFarZ [0])), ray.rdir.z, ray.org_rdir.z);
669
#endif
670
#else
671
const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir.x;
672
const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir.y;
673
const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir.z;
674
const vfloat<N> tFarX = (madd(time,pFarX [6],vfloat<N>(pFarX [0])) - ray.org.x) * ray.rdir.x;
675
const vfloat<N> tFarY = (madd(time,pFarY [6],vfloat<N>(pFarY [0])) - ray.org.y) * ray.rdir.y;
676
const vfloat<N> tFarZ = (madd(time,pFarZ [6],vfloat<N>(pFarZ [0])) - ray.org.z) * ray.rdir.z;
677
#endif
678
#if defined(__FMA_X4__) && !defined(__AVX512F__)
679
const vfloat<N> tNear = maxi(maxi(tNearX,tNearY),maxi(tNearZ,ray.tnear));
680
const vfloat<N> tFar = mini(mini(tFarX ,tFarY ),mini(tFarZ ,ray.tfar ));
681
#else
682
const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
683
const vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
684
#endif
685
vbool<N> vmask = tNear <= tFar;
686
if (unlikely(ref.isAABBNodeMB4D())) {
687
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
688
vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
689
}
690
const size_t mask = movemask(vmask);
691
dist = tNear;
692
return mask;
693
}
694
695
//////////////////////////////////////////////////////////////////////////////////////
696
// Robust AABBNodeMB4D intersection
697
//////////////////////////////////////////////////////////////////////////////////////
698
699
template<int N>
700
__forceinline size_t intersectNodeMB4DRobust(const typename BVHN<N>::NodeRef ref, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
701
{
702
const typename BVHN<N>::AABBNodeMB* node = ref.getAABBNodeMB();
703
704
const vfloat<N>* pNearX = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearX);
705
const vfloat<N>* pNearY = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearY);
706
const vfloat<N>* pNearZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.nearZ);
707
const vfloat<N> tNearX = (madd(time,pNearX[6],vfloat<N>(pNearX[0])) - ray.org.x) * ray.rdir_near.x;
708
const vfloat<N> tNearY = (madd(time,pNearY[6],vfloat<N>(pNearY[0])) - ray.org.y) * ray.rdir_near.y;
709
const vfloat<N> tNearZ = (madd(time,pNearZ[6],vfloat<N>(pNearZ[0])) - ray.org.z) * ray.rdir_near.z;
710
const vfloat<N> tNear = max(ray.tnear,tNearX,tNearY,tNearZ);
711
const vfloat<N>* pFarX = (const vfloat<N>*)((const char*)&node->lower_x+ray.farX);
712
const vfloat<N>* pFarY = (const vfloat<N>*)((const char*)&node->lower_x+ray.farY);
713
const vfloat<N>* pFarZ = (const vfloat<N>*)((const char*)&node->lower_x+ray.farZ);
714
const vfloat<N> tFarX = (madd(time,pFarX[6],vfloat<N>(pFarX[0])) - ray.org.x) * ray.rdir_far.x;
715
const vfloat<N> tFarY = (madd(time,pFarY[6],vfloat<N>(pFarY[0])) - ray.org.y) * ray.rdir_far.y;
716
const vfloat<N> tFarZ = (madd(time,pFarZ[6],vfloat<N>(pFarZ[0])) - ray.org.z) * ray.rdir_far.z;
717
const vfloat<N> tFar = min(ray.tfar,tFarX,tFarY,tFarZ);
718
vbool<N> vmask = tNear <= tFar;
719
if (unlikely(ref.isAABBNodeMB4D())) {
720
const typename BVHN<N>::AABBNodeMB4D* node1 = (const typename BVHN<N>::AABBNodeMB4D*) node;
721
vmask &= (node1->lower_t <= time) & (time < node1->upper_t);
722
}
723
const size_t mask = movemask(vmask);
724
dist = tNear;
725
return mask;
726
}
727
728
//////////////////////////////////////////////////////////////////////////////////////
729
// Fast QuantizedBaseNode intersection
730
//////////////////////////////////////////////////////////////////////////////////////
731
732
template<int N, bool robust>
733
__forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist);
734
735
template<>
736
__forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,false>& ray, vfloat4& dist)
737
{
738
const size_t mvalid = movemask(node->validMask());
739
const vfloat4 start_x(node->start.x);
740
const vfloat4 scale_x(node->scale.x);
741
const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
742
const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
743
const vfloat4 start_y(node->start.y);
744
const vfloat4 scale_y(node->scale.y);
745
const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
746
const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
747
const vfloat4 start_z(node->start.z);
748
const vfloat4 scale_z(node->scale.z);
749
const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
750
const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
751
752
#if defined(__FMA_X4__)
753
#if defined(__aarch64__)
754
const vfloat4 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
755
const vfloat4 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
756
const vfloat4 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
757
const vfloat4 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
758
const vfloat4 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
759
const vfloat4 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
760
#else
761
const vfloat4 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
762
const vfloat4 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
763
const vfloat4 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
764
const vfloat4 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
765
const vfloat4 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
766
const vfloat4 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
767
#endif
768
#else
769
const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
770
const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
771
const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
772
const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
773
const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
774
const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
775
#endif
776
777
#if defined(__aarch64__) || defined(__SSE4_1__) && !defined(__AVX512F__) // up to HSW
778
const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
779
const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
780
const vbool4 vmask = asInt(tNear) > asInt(tFar);
781
const size_t mask = movemask(vmask) ^ ((1<<4)-1);
782
#elif defined(__AVX512F__) // SKX
783
const vfloat4 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
784
const vfloat4 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
785
const vbool4 vmask = asInt(tNear) <= asInt(tFar);
786
const size_t mask = movemask(vmask);
787
#else
788
const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
789
const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
790
const vbool4 vmask = tNear <= tFar;
791
const size_t mask = movemask(vmask);
792
#endif
793
dist = tNear;
794
return mask & mvalid;
795
}
796
797
template<>
798
__forceinline size_t intersectNode<4>(const typename BVH4::QuantizedBaseNode* node, const TravRay<4,true>& ray, vfloat4& dist)
799
{
800
const size_t mvalid = movemask(node->validMask());
801
const vfloat4 start_x(node->start.x);
802
const vfloat4 scale_x(node->scale.x);
803
const vfloat4 lower_x = madd(node->dequantize<4>(ray.nearX >> 2),scale_x,start_x);
804
const vfloat4 upper_x = madd(node->dequantize<4>(ray.farX >> 2),scale_x,start_x);
805
const vfloat4 start_y(node->start.y);
806
const vfloat4 scale_y(node->scale.y);
807
const vfloat4 lower_y = madd(node->dequantize<4>(ray.nearY >> 2),scale_y,start_y);
808
const vfloat4 upper_y = madd(node->dequantize<4>(ray.farY >> 2),scale_y,start_y);
809
const vfloat4 start_z(node->start.z);
810
const vfloat4 scale_z(node->scale.z);
811
const vfloat4 lower_z = madd(node->dequantize<4>(ray.nearZ >> 2),scale_z,start_z);
812
const vfloat4 upper_z = madd(node->dequantize<4>(ray.farZ >> 2),scale_z,start_z);
813
814
const vfloat4 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
815
const vfloat4 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
816
const vfloat4 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
817
const vfloat4 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
818
const vfloat4 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
819
const vfloat4 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
820
821
const vfloat4 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
822
const vfloat4 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
823
const vbool4 vmask = tNear <= tFar;
824
const size_t mask = movemask(vmask);
825
dist = tNear;
826
return mask & mvalid;
827
}
828
829
830
#if defined(__AVX__)
831
832
template<>
833
__forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,false>& ray, vfloat8& dist)
834
{
835
const size_t mvalid = movemask(node->validMask());
836
const vfloat8 start_x(node->start.x);
837
const vfloat8 scale_x(node->scale.x);
838
const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
839
const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
840
const vfloat8 start_y(node->start.y);
841
const vfloat8 scale_y(node->scale.y);
842
const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
843
const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
844
const vfloat8 start_z(node->start.z);
845
const vfloat8 scale_z(node->scale.z);
846
const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
847
const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
848
849
#if defined(__AVX2__)
850
#if defined(__aarch64__)
851
const vfloat8 tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
852
const vfloat8 tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
853
const vfloat8 tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
854
const vfloat8 tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
855
const vfloat8 tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
856
const vfloat8 tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
857
#else
858
const vfloat8 tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
859
const vfloat8 tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
860
const vfloat8 tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
861
const vfloat8 tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
862
const vfloat8 tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
863
const vfloat8 tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
864
#endif
865
#else
866
const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir.x;
867
const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir.y;
868
const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
869
const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir.x;
870
const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir.y;
871
const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
872
#endif
873
874
#if defined(__AVX2__) && !defined(__AVX512F__) // HSW
875
const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
876
const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
877
const vbool8 vmask = asInt(tNear) > asInt(tFar);
878
const size_t mask = movemask(vmask) ^ ((1<<8)-1);
879
#elif defined(__AVX512F__) // SKX
880
const vfloat8 tNear = maxi(tNearX,tNearY,tNearZ,ray.tnear);
881
const vfloat8 tFar = mini(tFarX ,tFarY ,tFarZ ,ray.tfar);
882
const vbool8 vmask = asInt(tNear) <= asInt(tFar);
883
const size_t mask = movemask(vmask);
884
#else
885
const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
886
const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
887
const vbool8 vmask = tNear <= tFar;
888
const size_t mask = movemask(vmask);
889
#endif
890
dist = tNear;
891
return mask & mvalid;
892
}
893
894
template<>
895
__forceinline size_t intersectNode<8>(const typename BVH8::QuantizedBaseNode* node, const TravRay<8,true>& ray, vfloat8& dist)
896
{
897
const size_t mvalid = movemask(node->validMask());
898
const vfloat8 start_x(node->start.x);
899
const vfloat8 scale_x(node->scale.x);
900
const vfloat8 lower_x = madd(node->dequantize<8>(ray.nearX >> 2),scale_x,start_x);
901
const vfloat8 upper_x = madd(node->dequantize<8>(ray.farX >> 2),scale_x,start_x);
902
const vfloat8 start_y(node->start.y);
903
const vfloat8 scale_y(node->scale.y);
904
const vfloat8 lower_y = madd(node->dequantize<8>(ray.nearY >> 2),scale_y,start_y);
905
const vfloat8 upper_y = madd(node->dequantize<8>(ray.farY >> 2),scale_y,start_y);
906
const vfloat8 start_z(node->start.z);
907
const vfloat8 scale_z(node->scale.z);
908
const vfloat8 lower_z = madd(node->dequantize<8>(ray.nearZ >> 2),scale_z,start_z);
909
const vfloat8 upper_z = madd(node->dequantize<8>(ray.farZ >> 2),scale_z,start_z);
910
911
const vfloat8 tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
912
const vfloat8 tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
913
const vfloat8 tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
914
const vfloat8 tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
915
const vfloat8 tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
916
const vfloat8 tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
917
918
const vfloat8 tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
919
const vfloat8 tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
920
const vbool8 vmask = tNear <= tFar;
921
const size_t mask = movemask(vmask);
922
923
dist = tNear;
924
return mask & mvalid;
925
}
926
927
928
#endif
929
930
template<int N>
931
__forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
932
{
933
const vboolf<N> mvalid = node->validMask();
934
const vfloat<N> lower_x = node->dequantizeLowerX(time);
935
const vfloat<N> upper_x = node->dequantizeUpperX(time);
936
const vfloat<N> lower_y = node->dequantizeLowerY(time);
937
const vfloat<N> upper_y = node->dequantizeUpperY(time);
938
const vfloat<N> lower_z = node->dequantizeLowerZ(time);
939
const vfloat<N> upper_z = node->dequantizeUpperZ(time);
940
#if defined(__FMA_X4__)
941
#if defined(__aarch64__)
942
const vfloat<N> tNearX = madd(lower_x, ray.rdir.x, ray.neg_org_rdir.x);
943
const vfloat<N> tNearY = madd(lower_y, ray.rdir.y, ray.neg_org_rdir.y);
944
const vfloat<N> tNearZ = madd(lower_z, ray.rdir.z, ray.neg_org_rdir.z);
945
const vfloat<N> tFarX = madd(upper_x, ray.rdir.x, ray.neg_org_rdir.x);
946
const vfloat<N> tFarY = madd(upper_y, ray.rdir.y, ray.neg_org_rdir.y);
947
const vfloat<N> tFarZ = madd(upper_z, ray.rdir.z, ray.neg_org_rdir.z);
948
#else
949
const vfloat<N> tNearX = msub(lower_x, ray.rdir.x, ray.org_rdir.x);
950
const vfloat<N> tNearY = msub(lower_y, ray.rdir.y, ray.org_rdir.y);
951
const vfloat<N> tNearZ = msub(lower_z, ray.rdir.z, ray.org_rdir.z);
952
const vfloat<N> tFarX = msub(upper_x, ray.rdir.x, ray.org_rdir.x);
953
const vfloat<N> tFarY = msub(upper_y, ray.rdir.y, ray.org_rdir.y);
954
const vfloat<N> tFarZ = msub(upper_z, ray.rdir.z, ray.org_rdir.z);
955
#endif
956
#else
957
const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir.x;
958
const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir.y;
959
const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir.z;
960
const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir.x;
961
const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir.y;
962
const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir.z;
963
#endif
964
965
const vfloat<N> tminX = mini(tNearX,tFarX);
966
const vfloat<N> tmaxX = maxi(tNearX,tFarX);
967
const vfloat<N> tminY = mini(tNearY,tFarY);
968
const vfloat<N> tmaxY = maxi(tNearY,tFarY);
969
const vfloat<N> tminZ = mini(tNearZ,tFarZ);
970
const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
971
const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
972
const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
973
#if defined(__AVX512F__) // SKX
974
const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
975
#else
976
const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
977
#endif
978
const size_t mask = movemask(vmask);
979
dist = tNear;
980
return mask;
981
}
982
983
template<int N>
984
__forceinline size_t intersectNode(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
985
{
986
const vboolf<N> mvalid = node->validMask();
987
const vfloat<N> lower_x = node->dequantizeLowerX(time);
988
const vfloat<N> upper_x = node->dequantizeUpperX(time);
989
const vfloat<N> lower_y = node->dequantizeLowerY(time);
990
const vfloat<N> upper_y = node->dequantizeUpperY(time);
991
const vfloat<N> lower_z = node->dequantizeLowerZ(time);
992
const vfloat<N> upper_z = node->dequantizeUpperZ(time);
993
const vfloat<N> tNearX = (lower_x - ray.org.x) * ray.rdir_near.x;
994
const vfloat<N> tNearY = (lower_y - ray.org.y) * ray.rdir_near.y;
995
const vfloat<N> tNearZ = (lower_z - ray.org.z) * ray.rdir_near.z;
996
const vfloat<N> tFarX = (upper_x - ray.org.x) * ray.rdir_far.x;
997
const vfloat<N> tFarY = (upper_y - ray.org.y) * ray.rdir_far.y;
998
const vfloat<N> tFarZ = (upper_z - ray.org.z) * ray.rdir_far.z;
999
1000
const vfloat<N> tminX = mini(tNearX,tFarX);
1001
const vfloat<N> tmaxX = maxi(tNearX,tFarX);
1002
const vfloat<N> tminY = mini(tNearY,tFarY);
1003
const vfloat<N> tmaxY = maxi(tNearY,tFarY);
1004
const vfloat<N> tminZ = mini(tNearZ,tFarZ);
1005
const vfloat<N> tmaxZ = maxi(tNearZ,tFarZ);
1006
const vfloat<N> tNear = maxi(tminX,tminY,tminZ,ray.tnear);
1007
const vfloat<N> tFar = mini(tmaxX,tmaxY,tmaxZ,ray.tfar);
1008
#if defined(__AVX512F__) // SKX
1009
const vbool<N> vmask = le(mvalid,asInt(tNear),asInt(tFar));
1010
#else
1011
const vbool<N> vmask = (asInt(tNear) <= asInt(tFar)) & mvalid;
1012
#endif
1013
const size_t mask = movemask(vmask);
1014
dist = tNear;
1015
return mask;
1016
}
1017
1018
//////////////////////////////////////////////////////////////////////////////////////
1019
// Fast OBBNode intersection
1020
//////////////////////////////////////////////////////////////////////////////////////
1021
1022
template<int N, bool robust>
1023
__forceinline size_t intersectNode(const typename BVHN<N>::OBBNode* node, const TravRay<N,robust>& ray, vfloat<N>& dist)
1024
{
1025
const Vec3vf<N> dir = xfmVector(node->naabb,ray.dir);
1026
//const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))/dir;
1027
const Vec3vf<N> nrdir = Vec3vf<N>(vfloat<N>(-1.0f))*rcp_safe(dir);
1028
const Vec3vf<N> org = xfmPoint(node->naabb,ray.org);
1029
const Vec3vf<N> tLowerXYZ = org * nrdir; // (Vec3fa(zero) - org) * rdir;
1030
const Vec3vf<N> tUpperXYZ = tLowerXYZ - nrdir; // (Vec3fa(one ) - org) * rdir;
1031
1032
const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
1033
const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
1034
const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
1035
const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
1036
const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
1037
const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
1038
vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
1039
vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
1040
if (robust) {
1041
tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
1042
tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
1043
}
1044
const vbool<N> vmask = tNear <= tFar;
1045
dist = tNear;
1046
return movemask(vmask);
1047
}
1048
1049
//////////////////////////////////////////////////////////////////////////////////////
1050
// Fast OBBNodeMB intersection
1051
//////////////////////////////////////////////////////////////////////////////////////
1052
1053
template<int N, bool robust>
1054
__forceinline size_t intersectNode(const typename BVHN<N>::OBBNodeMB* node, const TravRay<N,robust>& ray, const float time, vfloat<N>& dist)
1055
{
1056
const AffineSpace3vf<N> xfm = node->space0;
1057
const Vec3vf<N> b0_lower = zero;
1058
const Vec3vf<N> b0_upper = one;
1059
const Vec3vf<N> lower = lerp(b0_lower,node->b1.lower,vfloat<N>(time));
1060
const Vec3vf<N> upper = lerp(b0_upper,node->b1.upper,vfloat<N>(time));
1061
1062
const BBox3vf<N> bounds(lower,upper);
1063
const Vec3vf<N> dir = xfmVector(xfm,ray.dir);
1064
const Vec3vf<N> rdir = rcp_safe(dir);
1065
const Vec3vf<N> org = xfmPoint(xfm,ray.org);
1066
1067
const Vec3vf<N> tLowerXYZ = (bounds.lower - org) * rdir;
1068
const Vec3vf<N> tUpperXYZ = (bounds.upper - org) * rdir;
1069
1070
const vfloat<N> tNearX = mini(tLowerXYZ.x,tUpperXYZ.x);
1071
const vfloat<N> tNearY = mini(tLowerXYZ.y,tUpperXYZ.y);
1072
const vfloat<N> tNearZ = mini(tLowerXYZ.z,tUpperXYZ.z);
1073
const vfloat<N> tFarX = maxi(tLowerXYZ.x,tUpperXYZ.x);
1074
const vfloat<N> tFarY = maxi(tLowerXYZ.y,tUpperXYZ.y);
1075
const vfloat<N> tFarZ = maxi(tLowerXYZ.z,tUpperXYZ.z);
1076
vfloat<N> tNear = max(ray.tnear, tNearX,tNearY,tNearZ);
1077
vfloat<N> tFar = min(ray.tfar, tFarX ,tFarY ,tFarZ );
1078
if (robust) {
1079
tNear = tNear*vfloat<N>(1.0f-3.0f*float(ulp));
1080
tFar = tFar *vfloat<N>(1.0f+3.0f*float(ulp));
1081
}
1082
const vbool<N> vmask = tNear <= tFar;
1083
dist = tNear;
1084
return movemask(vmask);
1085
}
1086
1087
//////////////////////////////////////////////////////////////////////////////////////
1088
// Node intersectors used in point query raversal
1089
//////////////////////////////////////////////////////////////////////////////////////
1090
1091
/*! Computes traversal information for N nodes with 1 point query */
1092
template<int N, int types>
1093
struct BVHNNodePointQuerySphere1;
1094
1095
template<int N>
1096
struct BVHNNodePointQuerySphere1<N, BVH_AN1>
1097
{
1098
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1099
{
1100
if (unlikely(node.isLeaf())) return false;
1101
mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1102
return true;
1103
}
1104
};
1105
1106
template<int N>
1107
struct BVHNNodePointQuerySphere1<N, BVH_AN2>
1108
{
1109
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1110
{
1111
if (unlikely(node.isLeaf())) return false;
1112
mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1113
return true;
1114
}
1115
};
1116
1117
template<int N>
1118
struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D>
1119
{
1120
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1121
{
1122
if (unlikely(node.isLeaf())) return false;
1123
mask = pointQueryNodeSphereMB4D<N>(node, query, time, dist);
1124
return true;
1125
}
1126
};
1127
1128
template<int N>
1129
struct BVHNNodePointQuerySphere1<N, BVH_AN1_UN1>
1130
{
1131
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1132
{
1133
if (likely(node.isAABBNode())) mask = pointQueryNodeSphere(node.getAABBNode(), query, dist);
1134
else if (unlikely(node.isOBBNode())) mask = pointQueryNodeSphere(node.ungetAABBNode(), query, dist);
1135
else return false;
1136
return true;
1137
}
1138
};
1139
1140
template<int N>
1141
struct BVHNNodePointQuerySphere1<N, BVH_AN2_UN2>
1142
{
1143
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1144
{
1145
if (likely(node.isAABBNodeMB())) mask = pointQueryNodeSphere(node.getAABBNodeMB(), query, time, dist);
1146
else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1147
else return false;
1148
return true;
1149
}
1150
};
1151
1152
template<int N>
1153
struct BVHNNodePointQuerySphere1<N, BVH_AN2_AN4D_UN2>
1154
{
1155
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1156
{
1157
if (unlikely(node.isLeaf())) return false;
1158
if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeSphere(node.ungetAABBNodeMB(), query, time, dist);
1159
else mask = pointQueryNodeSphereMB4D(node, query, time, dist);
1160
return true;
1161
}
1162
};
1163
1164
template<int N>
1165
struct BVHNNodePointQuerySphere1<N, BVH_QN1>
1166
{
1167
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1168
{
1169
if (unlikely(node.isLeaf())) return false;
1170
mask = pointQueryNodeSphere((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1171
return true;
1172
}
1173
};
1174
1175
template<int N>
1176
struct BVHNQuantizedBaseNodePointQuerySphere1
1177
{
1178
static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1179
{
1180
return pointQueryNodeSphere(node,query,dist);
1181
}
1182
1183
static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1184
{
1185
return pointQueryNodeSphere(node,query,time,dist);
1186
}
1187
};
1188
1189
/*! Computes traversal information for N nodes with 1 point query */
1190
template<int N, int types>
1191
struct BVHNNodePointQueryAABB1;
1192
1193
template<int N>
1194
struct BVHNNodePointQueryAABB1<N, BVH_AN1>
1195
{
1196
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1197
{
1198
if (unlikely(node.isLeaf())) return false;
1199
mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1200
return true;
1201
}
1202
};
1203
1204
template<int N>
1205
struct BVHNNodePointQueryAABB1<N, BVH_AN2>
1206
{
1207
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1208
{
1209
if (unlikely(node.isLeaf())) return false;
1210
mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1211
return true;
1212
}
1213
};
1214
1215
template<int N>
1216
struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D>
1217
{
1218
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1219
{
1220
if (unlikely(node.isLeaf())) return false;
1221
mask = pointQueryNodeAABBMB4D<N>(node, query, time, dist);
1222
return true;
1223
}
1224
};
1225
1226
template<int N>
1227
struct BVHNNodePointQueryAABB1<N, BVH_AN1_UN1>
1228
{
1229
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1230
{
1231
if (likely(node.isAABBNode())) mask = pointQueryNodeAABB(node.getAABBNode(), query, dist);
1232
else if (unlikely(node.isOBBNode())) mask = pointQueryNodeAABB(node.ungetAABBNode(), query, dist);
1233
else return false;
1234
return true;
1235
}
1236
};
1237
1238
template<int N>
1239
struct BVHNNodePointQueryAABB1<N, BVH_AN2_UN2>
1240
{
1241
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1242
{
1243
if (likely(node.isAABBNodeMB())) mask = pointQueryNodeAABB(node.getAABBNodeMB(), query, time, dist);
1244
else if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1245
else return false;
1246
return true;
1247
}
1248
};
1249
1250
template<int N>
1251
struct BVHNNodePointQueryAABB1<N, BVH_AN2_AN4D_UN2>
1252
{
1253
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1254
{
1255
if (unlikely(node.isLeaf())) return false;
1256
if (unlikely(node.isOBBNodeMB())) mask = pointQueryNodeAABB(node.ungetAABBNodeMB(), query, time, dist);
1257
else mask = pointQueryNodeAABBMB4D(node, query, time, dist);
1258
return true;
1259
}
1260
};
1261
1262
template<int N>
1263
struct BVHNNodePointQueryAABB1<N, BVH_QN1>
1264
{
1265
static __forceinline bool pointQuery(const typename BVHN<N>::NodeRef& node, const TravPointQuery<N>& query, float time, vfloat<N>& dist, size_t& mask)
1266
{
1267
if (unlikely(node.isLeaf())) return false;
1268
mask = pointQueryNodeAABB((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), query, dist);
1269
return true;
1270
}
1271
};
1272
1273
template<int N>
1274
struct BVHNQuantizedBaseNodePointQueryAABB1
1275
{
1276
static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNode* node, const TravPointQuery<N>& query, vfloat<N>& dist)
1277
{
1278
return pointQueryNodeAABB(node,query,dist);
1279
}
1280
1281
static __forceinline size_t pointQuery(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravPointQuery<N>& query, const float time, vfloat<N>& dist)
1282
{
1283
return pointQueryNodeAABB(node,query,time,dist);
1284
}
1285
};
1286
1287
1288
//////////////////////////////////////////////////////////////////////////////////////
1289
// Node intersectors used in ray traversal
1290
//////////////////////////////////////////////////////////////////////////////////////
1291
1292
/*! Intersects N nodes with 1 ray */
1293
template<int N, int types, bool robust>
1294
struct BVHNNodeIntersector1;
1295
1296
template<int N>
1297
struct BVHNNodeIntersector1<N, BVH_AN1, false>
1298
{
1299
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1300
{
1301
if (unlikely(node.isLeaf())) return false;
1302
mask = intersectNode(node.getAABBNode(), ray, dist);
1303
return true;
1304
}
1305
};
1306
1307
template<int N>
1308
struct BVHNNodeIntersector1<N, BVH_AN1, true>
1309
{
1310
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1311
{
1312
if (unlikely(node.isLeaf())) return false;
1313
mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1314
return true;
1315
}
1316
};
1317
1318
template<int N>
1319
struct BVHNNodeIntersector1<N, BVH_AN2, false>
1320
{
1321
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1322
{
1323
if (unlikely(node.isLeaf())) return false;
1324
mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1325
return true;
1326
}
1327
};
1328
1329
template<int N>
1330
struct BVHNNodeIntersector1<N, BVH_AN2, true>
1331
{
1332
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1333
{
1334
if (unlikely(node.isLeaf())) return false;
1335
mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1336
return true;
1337
}
1338
};
1339
1340
template<int N>
1341
struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, false>
1342
{
1343
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1344
{
1345
if (unlikely(node.isLeaf())) return false;
1346
mask = intersectNodeMB4D<N>(node, ray, time, dist);
1347
return true;
1348
}
1349
};
1350
1351
template<int N>
1352
struct BVHNNodeIntersector1<N, BVH_AN2_AN4D, true>
1353
{
1354
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1355
{
1356
if (unlikely(node.isLeaf())) return false;
1357
mask = intersectNodeMB4DRobust<N>(node, ray, time, dist);
1358
return true;
1359
}
1360
};
1361
1362
template<int N>
1363
struct BVHNNodeIntersector1<N, BVH_AN1_UN1, false>
1364
{
1365
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1366
{
1367
if (likely(node.isAABBNode())) mask = intersectNode(node.getAABBNode(), ray, dist);
1368
else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1369
else return false;
1370
return true;
1371
}
1372
};
1373
1374
template<int N>
1375
struct BVHNNodeIntersector1<N, BVH_AN1_UN1, true>
1376
{
1377
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1378
{
1379
if (likely(node.isAABBNode())) mask = intersectNodeRobust(node.getAABBNode(), ray, dist);
1380
else if (unlikely(node.isOBBNode())) mask = intersectNode(node.ungetAABBNode(), ray, dist);
1381
else return false;
1382
return true;
1383
}
1384
};
1385
1386
template<int N>
1387
struct BVHNNodeIntersector1<N, BVH_AN2_UN2, false>
1388
{
1389
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1390
{
1391
if (likely(node.isAABBNodeMB())) mask = intersectNode(node.getAABBNodeMB(), ray, time, dist);
1392
else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1393
else return false;
1394
return true;
1395
}
1396
};
1397
1398
template<int N>
1399
struct BVHNNodeIntersector1<N, BVH_AN2_UN2, true>
1400
{
1401
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1402
{
1403
if (likely(node.isAABBNodeMB())) mask = intersectNodeRobust(node.getAABBNodeMB(), ray, time, dist);
1404
else if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1405
else return false;
1406
return true;
1407
}
1408
};
1409
1410
template<int N>
1411
struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, false>
1412
{
1413
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1414
{
1415
if (unlikely(node.isLeaf())) return false;
1416
if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1417
else mask = intersectNodeMB4D(node, ray, time, dist);
1418
return true;
1419
}
1420
};
1421
1422
template<int N>
1423
struct BVHNNodeIntersector1<N, BVH_AN2_AN4D_UN2, true>
1424
{
1425
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1426
{
1427
if (unlikely(node.isLeaf())) return false;
1428
if (unlikely(node.isOBBNodeMB())) mask = intersectNode(node.ungetAABBNodeMB(), ray, time, dist);
1429
else mask = intersectNodeMB4DRobust(node, ray, time, dist);
1430
return true;
1431
}
1432
};
1433
1434
template<int N>
1435
struct BVHNNodeIntersector1<N, BVH_QN1, false>
1436
{
1437
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,false>& ray, float time, vfloat<N>& dist, size_t& mask)
1438
{
1439
if (unlikely(node.isLeaf())) return false;
1440
mask = intersectNode((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1441
return true;
1442
}
1443
};
1444
1445
template<int N>
1446
struct BVHNNodeIntersector1<N, BVH_QN1, true>
1447
{
1448
static __forceinline bool intersect(const typename BVHN<N>::NodeRef& node, const TravRay<N,true>& ray, float time, vfloat<N>& dist, size_t& mask)
1449
{
1450
if (unlikely(node.isLeaf())) return false;
1451
mask = intersectNodeRobust((const typename BVHN<N>::QuantizedNode*)node.quantizedNode(), ray, dist);
1452
return true;
1453
}
1454
};
1455
1456
/*! Intersects N nodes with K rays */
1457
template<int N, bool robust>
1458
struct BVHNQuantizedBaseNodeIntersector1;
1459
1460
template<int N>
1461
struct BVHNQuantizedBaseNodeIntersector1<N, false>
1462
{
1463
static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,false>& ray, vfloat<N>& dist)
1464
{
1465
return intersectNode(node,ray,dist);
1466
}
1467
1468
static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,false>& ray, const float time, vfloat<N>& dist)
1469
{
1470
return intersectNode(node,ray,time,dist);
1471
}
1472
1473
};
1474
1475
template<int N>
1476
struct BVHNQuantizedBaseNodeIntersector1<N, true>
1477
{
1478
static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
1479
{
1480
return intersectNode(node,ray,dist);
1481
}
1482
1483
static __forceinline size_t intersect(const typename BVHN<N>::QuantizedBaseNodeMB* node, const TravRay<N,true>& ray, const float time, vfloat<N>& dist)
1484
{
1485
return intersectNode(node,ray,time,dist);
1486
}
1487
1488
};
1489
1490
1491
}
1492
}
1493
1494