Path: blob/master/thirdparty/embree/kernels/geometry/grid_soa.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../common/ray.h"6#include "../common/scene_subdiv_mesh.h"7#include "../bvh/bvh.h"8#include "../subdiv/tessellation.h"9#include "../subdiv/tessellation_cache.h"10#include "subdivpatch1.h"1112namespace embree13{14namespace isa15{16class GridSOA17{18public:1920/*! GridSOA constructor */21GridSOA(const SubdivPatch1Base* patches, const unsigned time_steps,22const unsigned x0, const unsigned x1, const unsigned y0, const unsigned y1, const unsigned swidth, const unsigned sheight,23const SubdivMesh* const geom, const size_t totalBvhBytes, const size_t gridBytes, BBox3fa* bounds_o = nullptr);2425/*! Subgrid creation */26template<typename Allocator>27static GridSOA* create(const SubdivPatch1Base* patches, const unsigned time_steps,28unsigned x0, unsigned x1, unsigned y0, unsigned y1,29const Scene* scene, Allocator& alloc, BBox3fa* bounds_o = nullptr)30{31const unsigned width = x1-x0+1;32const unsigned height = y1-y0+1;33const GridRange range(0,width-1,0,height-1);34size_t bvhBytes = 0;35if (time_steps == 1)36bvhBytes = getBVHBytes(range,sizeof(BVH4::AABBNode),0);37else {38bvhBytes = (time_steps-1)*getBVHBytes(range,sizeof(BVH4::AABBNodeMB),0);39bvhBytes += getTemporalBVHBytes(make_range(0,int(time_steps-1)),sizeof(BVH4::AABBNodeMB4D));40}41const size_t gridBytes = 4*size_t(width)*size_t(height)*sizeof(float);42size_t rootBytes = time_steps*sizeof(BVH4::NodeRef);43#if !defined(__64BIT__)44rootBytes += 4; // We read 2 elements behind the grid. As we store at least 8 root bytes after the grid we are fine in 64 bit mode. But in 32 bit mode we have to do additional padding.45#endif46void* data = alloc(offsetof(GridSOA,data)+bvhBytes+time_steps*gridBytes+rootBytes);47assert(data);48return new (data) GridSOA(patches,time_steps,x0,x1,y0,y1,patches->grid_u_res,patches->grid_v_res,scene->get<SubdivMesh>(patches->geomID()),bvhBytes,gridBytes,bounds_o);49}5051/*! Grid creation */52template<typename Allocator>53static GridSOA* create(const SubdivPatch1Base* const patches, const unsigned time_steps,54const Scene* scene, const Allocator& alloc, BBox3fa* bounds_o = nullptr)55{56return create(patches,time_steps,0,patches->grid_u_res-1,0,patches->grid_v_res-1,scene,alloc,bounds_o);57}5859/*! returns reference to root */60__forceinline BVH4::NodeRef& root(size_t t = 0) { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }61__forceinline const BVH4::NodeRef& root(size_t t = 0) const { return (BVH4::NodeRef&)data[rootOffset + t*sizeof(BVH4::NodeRef)]; }6263/*! returns pointer to BVH array */64__forceinline char* bvhData() { return &data[0]; }65__forceinline const char* bvhData() const { return &data[0]; }6667/*! returns pointer to Grid array */68__forceinline float* gridData(size_t t = 0) { return (float*) &data[gridOffset + t*gridBytes]; }69__forceinline const float* gridData(size_t t = 0) const { return (float*) &data[gridOffset + t*gridBytes]; }7071__forceinline void* encodeLeaf(size_t u, size_t v) {72return (void*) (16*(v * width + u + 1)); // +1 to not create empty leaf73}74__forceinline float* decodeLeaf(size_t t, const void* ptr) {75return gridData(t) + (((size_t) (ptr) >> 4) - 1);76}7778/*! returns the size of the BVH over the grid in bytes */79static size_t getBVHBytes(const GridRange& range, const size_t nodeBytes, const size_t leafBytes);8081/*! returns the size of the temporal BVH over the time range BVHs */82static size_t getTemporalBVHBytes(const range<int> time_range, const size_t nodeBytes);8384/*! calculates bounding box of grid range */85__forceinline BBox3fa calculateBounds(size_t time, const GridRange& range) const86{87const float* const grid_array = gridData(time);88const float* const grid_x_array = grid_array + 0 * dim_offset;89const float* const grid_y_array = grid_array + 1 * dim_offset;90const float* const grid_z_array = grid_array + 2 * dim_offset;9192/* compute the bounds just for the range! */93BBox3fa bounds( empty );94for (unsigned v = range.v_start; v<=range.v_end; v++)95{96for (unsigned u = range.u_start; u<=range.u_end; u++)97{98const float x = grid_x_array[ v * width + u];99const float y = grid_y_array[ v * width + u];100const float z = grid_z_array[ v * width + u];101bounds.extend( Vec3fa(x,y,z) );102}103}104assert(is_finite(bounds));105return bounds;106}107108/*! Evaluates grid over patch and builds BVH4 tree over the grid. */109std::pair<BVH4::NodeRef,BBox3fa> buildBVH(BBox3fa* bounds_o);110111/*! Create BVH4 tree over grid. */112std::pair<BVH4::NodeRef,BBox3fa> buildBVH(const GridRange& range, size_t& allocator);113114/*! Evaluates grid over patch and builds MSMBlur BVH4 tree over the grid. */115std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, BBox3fa* bounds_o);116117/*! Create MBlur BVH4 tree over grid. */118std::pair<BVH4::NodeRef,LBBox3fa> buildMBlurBVH(size_t time, const GridRange& range, size_t& allocator);119120/*! Create MSMBlur BVH4 tree over grid. */121std::pair<BVH4::NodeRef,LBBox3fa> buildMSMBlurBVH(const range<int> time_range, size_t& allocator, BBox3fa* bounds_o);122123template<typename Loader>124struct MapUV125{126typedef typename Loader::vfloat vfloat;127const float* const grid_uv;128size_t line_offset;129size_t lines;130131__forceinline MapUV(const float* const grid_uv, size_t line_offset, const size_t lines)132: grid_uv(grid_uv), line_offset(line_offset), lines(lines) {}133134__forceinline void operator() (vfloat& u, vfloat& v, Vec3<vfloat>& Ng) const {135const Vec3<vfloat> tri_v012_uv = Loader::gather(grid_uv,line_offset,lines);136const Vec2<vfloat> uv0 = GridSOA::decodeUV(tri_v012_uv[0]);137const Vec2<vfloat> uv1 = GridSOA::decodeUV(tri_v012_uv[1]);138const Vec2<vfloat> uv2 = GridSOA::decodeUV(tri_v012_uv[2]);139const Vec2<vfloat> uv = u * uv1 + v * uv2 + (1.0f-u-v) * uv0;140u = uv[0];v = uv[1];141}142};143144struct Gather2x3145{146enum { M = 4 };147typedef vbool4 vbool;148typedef vint4 vint;149typedef vfloat4 vfloat;150151static __forceinline const Vec3vf4 gather(const float* const grid, const size_t line_offset, const size_t lines)152{153vfloat4 r0 = vfloat4::loadu(grid + 0*line_offset);154vfloat4 r1 = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid155if (unlikely(line_offset == 2))156{157r0 = shuffle<0,1,1,1>(r0);158r1 = shuffle<0,1,1,1>(r1);159}160return Vec3vf4(unpacklo(r0,r1), // r00, r10, r01, r11161shuffle<1,1,2,2>(r0), // r01, r01, r02, r02162shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12163}164165static __forceinline void gather(const float* const grid_x,166const float* const grid_y,167const float* const grid_z,168const size_t line_offset,169const size_t lines,170Vec3vf4& v0_o,171Vec3vf4& v1_o,172Vec3vf4& v2_o)173{174const Vec3vf4 tri_v012_x = gather(grid_x,line_offset,lines);175const Vec3vf4 tri_v012_y = gather(grid_y,line_offset,lines);176const Vec3vf4 tri_v012_z = gather(grid_z,line_offset,lines);177v0_o = Vec3vf4(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);178v1_o = Vec3vf4(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);179v2_o = Vec3vf4(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);180}181};182183#if defined (__AVX__)184struct Gather3x3185{186enum { M = 8 };187typedef vbool8 vbool;188typedef vint8 vint;189typedef vfloat8 vfloat;190191static __forceinline const Vec3vf8 gather(const float* const grid, const size_t line_offset, const size_t lines)192{193vfloat4 ra = vfloat4::loadu(grid + 0*line_offset);194vfloat4 rb = vfloat4::loadu(grid + 1*line_offset); // this accesses 2 elements too much in case of 2x2 grid, but this is ok as we ensure enough padding after the grid195vfloat4 rc;196if (likely(lines > 2))197rc = vfloat4::loadu(grid + 2*line_offset);198else199rc = rb;200201if (unlikely(line_offset == 2))202{203ra = shuffle<0,1,1,1>(ra);204rb = shuffle<0,1,1,1>(rb);205rc = shuffle<0,1,1,1>(rc);206}207208const vfloat8 r0 = vfloat8(ra,rb);209const vfloat8 r1 = vfloat8(rb,rc);210return Vec3vf8(unpacklo(r0,r1), // r00, r10, r01, r11, r10, r20, r11, r21211shuffle<1,1,2,2>(r0), // r01, r01, r02, r02, r11, r11, r12, r12212shuffle<0,1,1,2>(r1)); // r10, r11, r11, r12, r20, r21, r21, r22213}214215static __forceinline void gather(const float* const grid_x,216const float* const grid_y,217const float* const grid_z,218const size_t line_offset,219const size_t lines,220Vec3vf8& v0_o,221Vec3vf8& v1_o,222Vec3vf8& v2_o)223{224const Vec3vf8 tri_v012_x = gather(grid_x,line_offset,lines);225const Vec3vf8 tri_v012_y = gather(grid_y,line_offset,lines);226const Vec3vf8 tri_v012_z = gather(grid_z,line_offset,lines);227v0_o = Vec3vf8(tri_v012_x[0],tri_v012_y[0],tri_v012_z[0]);228v1_o = Vec3vf8(tri_v012_x[1],tri_v012_y[1],tri_v012_z[1]);229v2_o = Vec3vf8(tri_v012_x[2],tri_v012_y[2],tri_v012_z[2]);230}231};232#endif233234template<typename vfloat>235static __forceinline Vec2<vfloat> decodeUV(const vfloat& uv)236{237typedef typename vfloat::Int vint;238const vint iu = asInt(uv) & 0xffff;239const vint iv = srl(asInt(uv),16);240const vfloat u = (vfloat)iu * vfloat(8.0f/0x10000);241const vfloat v = (vfloat)iv * vfloat(8.0f/0x10000);242return Vec2<vfloat>(u,v);243}244245__forceinline unsigned int geomID() const {246return _geomID;247}248249__forceinline unsigned int primID() const {250return _primID;251}252253public:254BVH4::NodeRef troot;255#if !defined(__64BIT__)256unsigned align1;257#endif258unsigned time_steps;259unsigned width;260261unsigned height;262unsigned dim_offset;263unsigned _geomID;264unsigned _primID;265266unsigned align2;267unsigned gridOffset;268unsigned gridBytes;269unsigned rootOffset;270271char data[1]; //!< after the struct we first store the BVH, then the grid, and finally the roots272};273}274}275276277