Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/geometry/grid_soa.h
9905 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../common/ray.h"
7
#include "../common/scene_subdiv_mesh.h"
8
#include "../bvh/bvh.h"
9
#include "../subdiv/tessellation.h"
10
#include "../subdiv/tessellation_cache.h"
11
#include "subdivpatch1.h"
12
13
namespace embree
14
{
15
namespace isa
16
{
17
class GridSOA
18
{
19
public:
20
21
/*! GridSOA constructor */
22
GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,
23
const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,
24
const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);
25
26
/*! Subgrid creation */
27
template<typename Allocator>
28
static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,
29
unsigned x0, unsigned x1, unsigned y0, unsigned y1,
30
const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)
31
{
32
const unsigned width = x1-x0+1;
33
const unsigned height = y1-y0+1;
34
const GridRange range(0,width-1,0,height-1);
35
size_t bvhBytes = 0;
36
if (time_steps == 1)
37
bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);
38
else {
39
bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);
40
bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));
41
}
42
const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);
43
size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);
44
#if !defined(__64BIT__)
45
rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.
46
#endif
47
void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);
48
assert(data);
49
return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);
50
}
51
52
/*! Grid creation */
53
template<typename Allocator>
54
static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,
55
const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)
56
{
57
return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);
58
}
59
60
/*! returns reference to root */
61
__forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
62
__forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }
63
64
/*! returns pointer to BVH array */
65
__forceinline char* bvhData() { return &data[0]; }
66
__forceinline const char* bvhData() const { return &data[0]; }
67
68
/*! returns pointer to Grid array */
69
__forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }
70
__forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }
71
72
__forceinline void* encodeLeaf(size_t u, size_t v) {
73
return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf
74
}
75
__forceinline float* decodeLeaf(size_t t, const void* ptr) {
76
return gridData(t) + (((size_t) (ptr) >> 4) - 1);
77
}
78
79
/*! returns the size of the BVH over the grid in bytes */
80
static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);
81
82
/*! returns the size of the temporal BVH over the time range BVHs */
83
static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);
84
85
/*! calculates bounding box of grid range */
86
__forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const
87
{
88
const float* const grid_array = gridData(time);
89
const float* const grid_x_array = grid_array + 0 * dim_offset;
90
const float* const grid_y_array = grid_array + 1 * dim_offset;
91
const float* const grid_z_array = grid_array + 2 * dim_offset;
92
93
/* compute the bounds just for the range! */
94
BBox3fa bounds( empty );
95
for (unsigned v = range.v_start; v<=range.v_end; v++)
96
{
97
for (unsigned u = range.u_start; u<=range.u_end; u++)
98
{
99
const float x = grid_x_array[ v * width + u];
100
const float y = grid_y_array[ v * width + u];
101
const float z = grid_z_array[ v * width + u];
102
bounds.extend( Vec3fa(x,y,z) );
103
}
104
}
105
assert(is_finite(bounds));
106
return bounds;
107
}
108
109
/*! Evaluates grid over patch and builds BVH4 tree over the grid. */
110
std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);
111
112
/*! Create BVH4 tree over grid. */
113
std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);
114
115
/*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */
116
std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);
117
118
/*! Create MBlur BVH4 tree over grid. */
119
std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);
120
121
/*! Create MSMBlur BVH4 tree over grid. */
122
std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);
123
124
template<typename Loader>
125
struct MapUV
126
{
127
typedef typename Loader::vfloat vfloat;
128
const float* const grid_uv;
129
size_t line_offset;
130
size_t lines;
131
132
__forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)
133
: grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}
134
135
__forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const {
136
const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);
137
const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);
138
const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);
139
const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);
140
const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;
141
u = uv[0];v = uv[1];
142
}
143
};
144
145
struct Gather2x3
146
{
147
enum { M = 4 };
148
typedef vbool4 vbool;
149
typedef vint4 vint;
150
typedef vfloat4 vfloat;
151
152
static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)
153
{
154
vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);
155
vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
156
if (unlikely(line_offset == 2))
157
{
158
r0 = shuffle<0,1,1,1>(r0);
159
r1 = shuffle<0,1,1,1>(r1);
160
}
161
return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11
162
shuffle<1,1,2,2>(r0), // r01, r01, r02, r02
163
shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12
164
}
165
166
static __forceinline void gather(const float* const grid_x,
167
const float* const grid_y,
168
const float* const grid_z,
169
const size_t line_offset,
170
const size_t lines,
171
Vec3vf4& v0_o,
172
Vec3vf4& v1_o,
173
Vec3vf4& v2_o)
174
{
175
const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);
176
const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);
177
const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);
178
v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
179
v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
180
v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
181
}
182
};
183
184
#if defined (__AVX__)
185
struct Gather3x3
186
{
187
enum { M = 8 };
188
typedef vbool8 vbool;
189
typedef vint8 vint;
190
typedef vfloat8 vfloat;
191
192
static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)
193
{
194
vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);
195
vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid
196
vfloat4 rc;
197
if (likely(lines > 2))
198
rc = vfloat4::loadu(grid + 2*line_offset);
199
else
200
rc = rb;
201
202
if (unlikely(line_offset == 2))
203
{
204
ra = shuffle<0,1,1,1>(ra);
205
rb = shuffle<0,1,1,1>(rb);
206
rc = shuffle<0,1,1,1>(rc);
207
}
208
209
const vfloat8 r0 = vfloat8(ra,rb);
210
const vfloat8 r1 = vfloat8(rb,rc);
211
return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21
212
shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12
213
shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22
214
}
215
216
static __forceinline void gather(const float* const grid_x,
217
const float* const grid_y,
218
const float* const grid_z,
219
const size_t line_offset,
220
const size_t lines,
221
Vec3vf8& v0_o,
222
Vec3vf8& v1_o,
223
Vec3vf8& v2_o)
224
{
225
const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);
226
const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);
227
const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);
228
v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);
229
v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);
230
v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);
231
}
232
};
233
#endif
234
235
template<typename vfloat>
236
static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)
237
{
238
typedef typename vfloat::Int vint;
239
const vint iu = asInt(uv) & 0xffff;
240
const vint iv = srl(asInt(uv),16);
241
const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);
242
const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);
243
return Vec2<vfloat>(u,v);
244
}
245
246
__forceinline unsigned int geomID() const {
247
return _geomID;
248
}
249
250
__forceinline unsigned int primID() const {
251
return _primID;
252
}
253
254
public:
255
BVH4::NodeRef troot;
256
#if !defined(__64BIT__)
257
unsigned align1;
258
#endif
259
unsigned time_steps;
260
unsigned width;
261
262
unsigned height;
263
unsigned dim_offset;
264
unsigned _geomID;
265
unsigned _primID;
266
267
unsigned align2;
268
unsigned gridOffset;
269
unsigned gridBytes;
270
unsigned rootOffset;
271
272
char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots
273
};
274
}
275
}
276
277