Path: blob/master/thirdparty/embree/kernels/geometry/grid_soa_intersector_packet.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "grid_soa.h"6#include "../common/ray.h"7#include "triangle_intersector_pluecker.h"89namespace embree10{11namespace isa12{13template<int K>14struct MapUV015{16const float* const grid_uv;17size_t ofs00, ofs01, ofs10, ofs11;1819__forceinline MapUV0(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)20: grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}2122__forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {23const vfloat<K> uv00(grid_uv[ofs00]);24const vfloat<K> uv01(grid_uv[ofs01]);25const vfloat<K> uv10(grid_uv[ofs10]);26const vfloat<K> uv11(grid_uv[ofs11]);27const Vec2vf<K> uv0 = GridSOA::decodeUV(uv00);28const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);29const Vec2vf<K> uv2 = GridSOA::decodeUV(uv10);30const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));31u = uv[0]; v = uv[1];32}33};3435template<int K>36struct MapUV137{38const float* const grid_uv;39size_t ofs00, ofs01, ofs10, ofs11;4041__forceinline MapUV1(const float* const grid_uv, size_t ofs00, size_t ofs01, size_t ofs10, size_t ofs11)42: grid_uv(grid_uv), ofs00(ofs00), ofs01(ofs01), ofs10(ofs10), ofs11(ofs11) {}4344__forceinline void operator() (vfloat<K>& u, vfloat<K>& v, Vec3vf<K>& Ng) const {45const vfloat<K> uv00(grid_uv[ofs00]);46const vfloat<K> uv01(grid_uv[ofs01]);47const vfloat<K> uv10(grid_uv[ofs10]);48const vfloat<K> uv11(grid_uv[ofs11]);49const Vec2vf<K> uv0 = GridSOA::decodeUV(uv10);50const Vec2vf<K> uv1 = GridSOA::decodeUV(uv01);51const Vec2vf<K> uv2 = GridSOA::decodeUV(uv11);52const Vec2vf<K> uv = madd(u,uv1,madd(v,uv2,(1.0f-u-v)*uv0));53u = uv[0]; v = uv[1];54}55};5657template<int K>58class GridSOAIntersectorK59{60public:61typedef void Primitive;6263class Precalculations64{65#if defined(__AVX__)66static const int M = 8;67#else68static const int M = 4;69#endif7071public:72__forceinline Precalculations (const vbool<K>& valid, const RayK<K>& ray)73: grid(nullptr), intersector(valid,ray) {}7475public:76GridSOA* grid;77PlueckerIntersectorK<M,K> intersector; // FIXME: use quad intersector78};7980/*! Intersect a ray with the primitive. */81static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)82{83const size_t dim_offset = pre.grid->dim_offset;84const size_t line_offset = pre.grid->width;85const float* const grid_x = pre.grid->decodeLeaf(0,prim);86const float* const grid_y = grid_x + 1 * dim_offset;87const float* const grid_z = grid_x + 2 * dim_offset;88const float* const grid_uv = grid_x + 3 * dim_offset;8990const size_t max_x = pre.grid->width == 2 ? 1 : 2;91const size_t max_y = pre.grid->height == 2 ? 1 : 2;92for (size_t y=0; y<max_y; y++)93{94for (size_t x=0; x<max_x; x++)95{96const size_t ofs00 = (y+0)*line_offset+(x+0);97const size_t ofs01 = (y+0)*line_offset+(x+1);98const size_t ofs10 = (y+1)*line_offset+(x+0);99const size_t ofs11 = (y+1)*line_offset+(x+1);100const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);101const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);102const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);103const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);104105pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));106pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));107}108}109}110111/*! Test if the ray is occluded by the primitive */112static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)113{114const size_t dim_offset = pre.grid->dim_offset;115const size_t line_offset = pre.grid->width;116const float* const grid_x = pre.grid->decodeLeaf(0,prim);117const float* const grid_y = grid_x + 1 * dim_offset;118const float* const grid_z = grid_x + 2 * dim_offset;119const float* const grid_uv = grid_x + 3 * dim_offset;120121vbool<K> valid = valid_i;122const size_t max_x = pre.grid->width == 2 ? 1 : 2;123const size_t max_y = pre.grid->height == 2 ? 1 : 2;124for (size_t y=0; y<max_y; y++)125{126for (size_t x=0; x<max_x; x++)127{128const size_t ofs00 = (y+0)*line_offset+(x+0);129const size_t ofs01 = (y+0)*line_offset+(x+1);130const size_t ofs10 = (y+1)*line_offset+(x+0);131const size_t ofs11 = (y+1)*line_offset+(x+1);132const Vec3vf<K> p00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);133const Vec3vf<K> p01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);134const Vec3vf<K> p10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);135const Vec3vf<K> p11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);136137pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));138if (none(valid)) break;139pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));140if (none(valid)) break;141}142}143return !valid;144}145146template<typename Loader>147static __forceinline void intersect(RayHitK<K>& ray, size_t k,148RayQueryContext* context,149const float* const grid_x,150const size_t line_offset,151const size_t lines,152Precalculations& pre)153{154typedef typename Loader::vfloat vfloat;155const size_t dim_offset = pre.grid->dim_offset;156const float* const grid_y = grid_x + 1 * dim_offset;157const float* const grid_z = grid_x + 2 * dim_offset;158const float* const grid_uv = grid_x + 3 * dim_offset;159Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);160pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));161};162163template<typename Loader>164static __forceinline bool occluded(RayK<K>& ray, size_t k,165RayQueryContext* context,166const float* const grid_x,167const size_t line_offset,168const size_t lines,169Precalculations& pre)170{171typedef typename Loader::vfloat vfloat;172const size_t dim_offset = pre.grid->dim_offset;173const float* const grid_y = grid_x + 1 * dim_offset;174const float* const grid_z = grid_x + 2 * dim_offset;175const float* const grid_uv = grid_x + 3 * dim_offset;176Vec3<vfloat> v0, v1, v2; Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,v0,v1,v2);177return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));178}179180/*! Intersect a ray with the primitive. */181static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)182{183const size_t line_offset = pre.grid->width;184const size_t lines = pre.grid->height;185const float* const grid_x = pre.grid->decodeLeaf(0,prim);186#if defined(__AVX__)187intersect<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);188#else189intersect<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre);190if (likely(lines > 2))191intersect<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre);192#endif193}194195/*! Test if the ray is occluded by the primitive */196static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)197{198const size_t line_offset = pre.grid->width;199const size_t lines = pre.grid->height;200const float* const grid_x = pre.grid->decodeLeaf(0,prim);201202#if defined(__AVX__)203return occluded<GridSOA::Gather3x3>( ray, k, context, grid_x, line_offset, lines, pre);204#else205if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x , line_offset, lines, pre)) return true;206if (likely(lines > 2))207if (occluded<GridSOA::Gather2x3>(ray, k, context, grid_x+line_offset, line_offset, lines, pre)) return true;208#endif209return false;210}211};212213template<int K>214class GridSOAMBIntersectorK215{216public:217typedef void Primitive;218typedef typename GridSOAIntersectorK<K>::Precalculations Precalculations;219220/*! Intersect a ray with the primitive. */221static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)222{223vfloat<K> vftime;224vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);225226vbool<K> valid1 = valid_i;227while (any(valid1)) {228const size_t j = bsf(movemask(valid1));229const int itime = vitime[j];230const vbool<K> valid2 = valid1 & (itime == vitime);231valid1 = valid1 & !valid2;232intersect(valid2,pre,ray,vftime,itime,context,prim,lazy_node);233}234}235236/*! Intersect a ray with the primitive. */237static __forceinline void intersect(const vbool<K>& valid_i, Precalculations& pre, RayHitK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)238{239const size_t grid_offset = pre.grid->gridBytes >> 2;240const size_t dim_offset = pre.grid->dim_offset;241const size_t line_offset = pre.grid->width;242const float* const grid_x = pre.grid->decodeLeaf(itime,prim);243const float* const grid_y = grid_x + 1 * dim_offset;244const float* const grid_z = grid_x + 2 * dim_offset;245const float* const grid_uv = grid_x + 3 * dim_offset;246247const size_t max_x = pre.grid->width == 2 ? 1 : 2;248const size_t max_y = pre.grid->height == 2 ? 1 : 2;249for (size_t y=0; y<max_y; y++)250{251for (size_t x=0; x<max_x; x++)252{253size_t ofs00 = (y+0)*line_offset+(x+0);254size_t ofs01 = (y+0)*line_offset+(x+1);255size_t ofs10 = (y+1)*line_offset+(x+0);256size_t ofs11 = (y+1)*line_offset+(x+1);257const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);258const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);259const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);260const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);261ofs00 += grid_offset;262ofs01 += grid_offset;263ofs10 += grid_offset;264ofs11 += grid_offset;265const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);266const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);267const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);268const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);269const Vec3vf<K> p00 = lerp(a00,b00,ftime);270const Vec3vf<K> p01 = lerp(a01,b01,ftime);271const Vec3vf<K> p10 = lerp(a10,b10,ftime);272const Vec3vf<K> p11 = lerp(a11,b11,ftime);273274pre.intersector.intersectK(valid_i,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));275pre.intersector.intersectK(valid_i,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),IntersectKEpilogMU<1,K,true>(ray,context,pre.grid->geomID(),pre.grid->primID()));276}277}278}279280/*! Test if the ray is occluded by the primitive */281static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)282{283vfloat<K> vftime;284vint<K> vitime = getTimeSegment<K>(ray.time(), vfloat<K>((float)(pre.grid->time_steps-1)), vftime);285286vbool<K> valid_o = valid_i;287vbool<K> valid1 = valid_i;288while (any(valid1)) {289const int j = int(bsf(movemask(valid1)));290const int itime = vitime[j];291const vbool<K> valid2 = valid1 & (itime == vitime);292valid1 = valid1 & !valid2;293valid_o &= !valid2 | occluded(valid2,pre,ray,vftime,itime,context,prim,lazy_node);294}295return !valid_o;296}297298/*! Test if the ray is occluded by the primitive */299static __forceinline vbool<K> occluded(const vbool<K>& valid_i, Precalculations& pre, RayK<K>& ray, const vfloat<K>& ftime, int itime, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)300{301const size_t grid_offset = pre.grid->gridBytes >> 2;302const size_t dim_offset = pre.grid->dim_offset;303const size_t line_offset = pre.grid->width;304const float* const grid_x = pre.grid->decodeLeaf(itime,prim);305const float* const grid_y = grid_x + 1 * dim_offset;306const float* const grid_z = grid_x + 2 * dim_offset;307const float* const grid_uv = grid_x + 3 * dim_offset;308309vbool<K> valid = valid_i;310const size_t max_x = pre.grid->width == 2 ? 1 : 2;311const size_t max_y = pre.grid->height == 2 ? 1 : 2;312for (size_t y=0; y<max_y; y++)313{314for (size_t x=0; x<max_x; x++)315{316size_t ofs00 = (y+0)*line_offset+(x+0);317size_t ofs01 = (y+0)*line_offset+(x+1);318size_t ofs10 = (y+1)*line_offset+(x+0);319size_t ofs11 = (y+1)*line_offset+(x+1);320const Vec3vf<K> a00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);321const Vec3vf<K> a01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);322const Vec3vf<K> a10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);323const Vec3vf<K> a11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);324ofs00 += grid_offset;325ofs01 += grid_offset;326ofs10 += grid_offset;327ofs11 += grid_offset;328const Vec3vf<K> b00(grid_x[ofs00],grid_y[ofs00],grid_z[ofs00]);329const Vec3vf<K> b01(grid_x[ofs01],grid_y[ofs01],grid_z[ofs01]);330const Vec3vf<K> b10(grid_x[ofs10],grid_y[ofs10],grid_z[ofs10]);331const Vec3vf<K> b11(grid_x[ofs11],grid_y[ofs11],grid_z[ofs11]);332const Vec3vf<K> p00 = lerp(a00,b00,ftime);333const Vec3vf<K> p01 = lerp(a01,b01,ftime);334const Vec3vf<K> p10 = lerp(a10,b10,ftime);335const Vec3vf<K> p11 = lerp(a11,b11,ftime);336337pre.intersector.intersectK(valid,ray,p00,p01,p10,MapUV0<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));338if (none(valid)) break;339pre.intersector.intersectK(valid,ray,p10,p01,p11,MapUV1<K>(grid_uv,ofs00,ofs01,ofs10,ofs11),OccludedKEpilogMU<1,K,true>(valid,ray,context,pre.grid->geomID(),pre.grid->primID()));340if (none(valid)) break;341}342}343return valid;344}345346template<typename Loader>347static __forceinline void intersect(RayHitK<K>& ray, size_t k,348const float ftime,349RayQueryContext* context,350const float* const grid_x,351const size_t line_offset,352const size_t lines,353Precalculations& pre)354{355typedef typename Loader::vfloat vfloat;356const size_t grid_offset = pre.grid->gridBytes >> 2;357const size_t dim_offset = pre.grid->dim_offset;358const float* const grid_y = grid_x + 1 * dim_offset;359const float* const grid_z = grid_x + 2 * dim_offset;360const float* const grid_uv = grid_x + 3 * dim_offset;361362Vec3<vfloat> a0, a1, a2;363Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);364365Vec3<vfloat> b0, b1, b2;366Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);367368Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));369Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));370Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));371372pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Intersect1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));373};374375template<typename Loader>376static __forceinline bool occluded(RayK<K>& ray, size_t k,377const float ftime,378RayQueryContext* context,379const float* const grid_x,380const size_t line_offset,381const size_t lines,382Precalculations& pre)383{384typedef typename Loader::vfloat vfloat;385const size_t grid_offset = pre.grid->gridBytes >> 2;386const size_t dim_offset = pre.grid->dim_offset;387const float* const grid_y = grid_x + 1 * dim_offset;388const float* const grid_z = grid_x + 2 * dim_offset;389const float* const grid_uv = grid_x + 3 * dim_offset;390391Vec3<vfloat> a0, a1, a2;392Loader::gather(grid_x,grid_y,grid_z,line_offset,lines,a0,a1,a2);393394Vec3<vfloat> b0, b1, b2;395Loader::gather(grid_x+grid_offset,grid_y+grid_offset,grid_z+grid_offset,line_offset,lines,b0,b1,b2);396397Vec3<vfloat> v0 = lerp(a0,b0,vfloat(ftime));398Vec3<vfloat> v1 = lerp(a1,b1,vfloat(ftime));399Vec3<vfloat> v2 = lerp(a2,b2,vfloat(ftime));400401return pre.intersector.intersect(ray,k,v0,v1,v2,GridSOA::MapUV<Loader>(grid_uv,line_offset,lines),Occluded1KEpilogMU<Loader::M,K,true>(ray,k,context,pre.grid->geomID(),pre.grid->primID()));402}403404/*! Intersect a ray with the primitive. */405static __forceinline void intersect(Precalculations& pre, RayHitK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)406{407float ftime;408int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);409410const size_t line_offset = pre.grid->width;411const size_t lines = pre.grid->height;412const float* const grid_x = pre.grid->decodeLeaf(itime,prim);413414#if defined(__AVX__)415intersect<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);416#else417intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre);418if (likely(lines > 2))419intersect<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre);420#endif421}422423/*! Test if the ray is occluded by the primitive */424static __forceinline bool occluded(Precalculations& pre, RayK<K>& ray, size_t k, RayQueryContext* context, const Primitive* prim, size_t& lazy_node)425{426float ftime;427int itime = getTimeSegment(ray.time()[k], float(pre.grid->time_steps-1), ftime);428429const size_t line_offset = pre.grid->width;430const size_t lines = pre.grid->height;431const float* const grid_x = pre.grid->decodeLeaf(itime,prim);432433#if defined(__AVX__)434return occluded<GridSOA::Gather3x3>( ray, k, ftime, context, grid_x, line_offset, lines, pre);435#else436if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x, line_offset, lines, pre)) return true;437if (likely(lines > 2))438if (occluded<GridSOA::Gather2x3>(ray, k, ftime, context, grid_x+line_offset, line_offset, lines, pre)) return true;439#endif440return false;441}442};443}444}445446447