Path: blob/master/thirdparty/embree/kernels/bvh/node_intersector_frustum.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "node_intersector.h"67namespace embree8{9namespace isa10{11//////////////////////////////////////////////////////////////////////////////////////12// Frustum structure used in hybrid and stream traversal13//////////////////////////////////////////////////////////////////////////////////////1415/*16Optimized frustum test. We calculate t=(p-org)/dir in ray/box17intersection. We assume the rays are split by octant, thus18dir intervals are either positive or negative in each19dimension.2021Case 1: dir.min >= 0 && dir.max >= 0:22t_min = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min23t_max = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max2425Case 2: dir.min < 0 && dir.max < 0:26t_min = (p_max - org_min) / dir_min = (p_max - org_min)*rdir_max = p_max*rdir_max - org_min*rdir_max27t_max = (p_min - org_max) / dir_max = (p_min - org_max)*rdir_min = p_min*rdir_min - org_max*rdir_min28*/2930template<bool robust>31struct Frustum;3233/* Fast variant */34template<>35struct Frustum<false>36{37__forceinline Frustum() {}3839template<int K>40__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)41{42const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),43reduce_min(select(valid, org.y, pos_inf)),44reduce_min(select(valid, org.z, pos_inf)));4546const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),47reduce_max(select(valid, org.y, neg_inf)),48reduce_max(select(valid, org.z, neg_inf)));4950const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),51reduce_min(select(valid, rdir.y, pos_inf)),52reduce_min(select(valid, rdir.z, pos_inf)));5354const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),55reduce_max(select(valid, rdir.y, neg_inf)),56reduce_max(select(valid, rdir.z, neg_inf)));5758const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));59const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));6061init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);62}6364__forceinline void init(const Vec3fa& reduced_min_org,65const Vec3fa& reduced_max_org,66const Vec3fa& reduced_min_rdir,67const Vec3fa& reduced_max_rdir,68float reduced_min_dist,69float reduced_max_dist,70int N)71{72const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));7374min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);75max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);7677#if defined (__aarch64__)78neg_min_org_rdir = -(min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org));79neg_max_org_rdir = -(max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org));80#else81min_org_rdir = min_rdir * select(pos_rdir, reduced_max_org, reduced_min_org);82max_org_rdir = max_rdir * select(pos_rdir, reduced_min_org, reduced_max_org);83#endif84min_dist = reduced_min_dist;85max_dist = reduced_max_dist;8687nf = NearFarPrecalculations(min_rdir, N);88}8990template<int K>91__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)92{93max_dist = reduce_max(ray_tfar);94}9596NearFarPrecalculations nf;9798Vec3fa min_rdir;99Vec3fa max_rdir;100101#if defined (__aarch64__)102Vec3fa neg_min_org_rdir;103Vec3fa neg_max_org_rdir;104#else105Vec3fa min_org_rdir;106Vec3fa max_org_rdir;107#endif108float min_dist;109float max_dist;110};111112typedef Frustum<false> FrustumFast;113114/* Robust variant */115template<>116struct Frustum<true>117{118__forceinline Frustum() {}119120template<int K>121__forceinline void init(const vbool<K>& valid, const Vec3vf<K>& org, const Vec3vf<K>& rdir, const vfloat<K>& ray_tnear, const vfloat<K>& ray_tfar, int N)122{123const Vec3fa reduced_min_org(reduce_min(select(valid, org.x, pos_inf)),124reduce_min(select(valid, org.y, pos_inf)),125reduce_min(select(valid, org.z, pos_inf)));126127const Vec3fa reduced_max_org(reduce_max(select(valid, org.x, neg_inf)),128reduce_max(select(valid, org.y, neg_inf)),129reduce_max(select(valid, org.z, neg_inf)));130131const Vec3fa reduced_min_rdir(reduce_min(select(valid, rdir.x, pos_inf)),132reduce_min(select(valid, rdir.y, pos_inf)),133reduce_min(select(valid, rdir.z, pos_inf)));134135const Vec3fa reduced_max_rdir(reduce_max(select(valid, rdir.x, neg_inf)),136reduce_max(select(valid, rdir.y, neg_inf)),137reduce_max(select(valid, rdir.z, neg_inf)));138139const float reduced_min_dist = reduce_min(select(valid, ray_tnear, vfloat<K>(pos_inf)));140const float reduced_max_dist = reduce_max(select(valid, ray_tfar , vfloat<K>(neg_inf)));141142init(reduced_min_org, reduced_max_org, reduced_min_rdir, reduced_max_rdir, reduced_min_dist, reduced_max_dist, N);143}144145__forceinline void init(const Vec3fa& reduced_min_org,146const Vec3fa& reduced_max_org,147const Vec3fa& reduced_min_rdir,148const Vec3fa& reduced_max_rdir,149float reduced_min_dist,150float reduced_max_dist,151int N)152{153const Vec3ba pos_rdir = ge_mask(reduced_min_rdir, Vec3fa(zero));154min_rdir = select(pos_rdir, reduced_min_rdir, reduced_max_rdir);155max_rdir = select(pos_rdir, reduced_max_rdir, reduced_min_rdir);156157min_org = select(pos_rdir, reduced_max_org, reduced_min_org);158max_org = select(pos_rdir, reduced_min_org, reduced_max_org);159160min_dist = reduced_min_dist;161max_dist = reduced_max_dist;162163nf = NearFarPrecalculations(min_rdir, N);164}165166template<int K>167__forceinline void updateMaxDist(const vfloat<K>& ray_tfar)168{169max_dist = reduce_max(ray_tfar);170}171172NearFarPrecalculations nf;173174Vec3fa min_rdir;175Vec3fa max_rdir;176177Vec3fa min_org;178Vec3fa max_org;179180float min_dist;181float max_dist;182};183184typedef Frustum<true> FrustumRobust;185186//////////////////////////////////////////////////////////////////////////////////////187// Fast AABBNode intersection188//////////////////////////////////////////////////////////////////////////////////////189190template<int N>191__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,192const FrustumFast& frustum, vfloat<N>& dist)193{194const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);195const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);196const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);197const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);198const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);199const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);200201#if defined (__aarch64__)202const vfloat<N> fminX = madd(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.neg_min_org_rdir.x));203const vfloat<N> fminY = madd(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.neg_min_org_rdir.y));204const vfloat<N> fminZ = madd(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.neg_min_org_rdir.z));205const vfloat<N> fmaxX = madd(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.neg_max_org_rdir.x));206const vfloat<N> fmaxY = madd(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.neg_max_org_rdir.y));207const vfloat<N> fmaxZ = madd(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.neg_max_org_rdir.z));208#else209const vfloat<N> fminX = msub(bminX, vfloat<N>(frustum.min_rdir.x), vfloat<N>(frustum.min_org_rdir.x));210const vfloat<N> fminY = msub(bminY, vfloat<N>(frustum.min_rdir.y), vfloat<N>(frustum.min_org_rdir.y));211const vfloat<N> fminZ = msub(bminZ, vfloat<N>(frustum.min_rdir.z), vfloat<N>(frustum.min_org_rdir.z));212const vfloat<N> fmaxX = msub(bmaxX, vfloat<N>(frustum.max_rdir.x), vfloat<N>(frustum.max_org_rdir.x));213const vfloat<N> fmaxY = msub(bmaxY, vfloat<N>(frustum.max_rdir.y), vfloat<N>(frustum.max_org_rdir.y));214const vfloat<N> fmaxZ = msub(bmaxZ, vfloat<N>(frustum.max_rdir.z), vfloat<N>(frustum.max_org_rdir.z));215#endif216const vfloat<N> fmin = maxi(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));217dist = fmin;218const vfloat<N> fmax = mini(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));219const vbool<N> vmask_node_hit = fmin <= fmax;220size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);221return m_node;222}223224//////////////////////////////////////////////////////////////////////////////////////225// Robust AABBNode intersection226//////////////////////////////////////////////////////////////////////////////////////227228template<int N>229__forceinline size_t intersectNodeFrustum(const typename BVHN<N>::AABBNode* __restrict__ node,230const FrustumRobust& frustum, vfloat<N>& dist)231{232const vfloat<N> bminX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearX);233const vfloat<N> bminY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearY);234const vfloat<N> bminZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.nearZ);235const vfloat<N> bmaxX = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farX);236const vfloat<N> bmaxY = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farY);237const vfloat<N> bmaxZ = *(const vfloat<N>*)((const char*)&node->lower_x + frustum.nf.farZ);238239const vfloat<N> fminX = (bminX - vfloat<N>(frustum.min_org.x)) * vfloat<N>(frustum.min_rdir.x);240const vfloat<N> fminY = (bminY - vfloat<N>(frustum.min_org.y)) * vfloat<N>(frustum.min_rdir.y);241const vfloat<N> fminZ = (bminZ - vfloat<N>(frustum.min_org.z)) * vfloat<N>(frustum.min_rdir.z);242const vfloat<N> fmaxX = (bmaxX - vfloat<N>(frustum.max_org.x)) * vfloat<N>(frustum.max_rdir.x);243const vfloat<N> fmaxY = (bmaxY - vfloat<N>(frustum.max_org.y)) * vfloat<N>(frustum.max_rdir.y);244const vfloat<N> fmaxZ = (bmaxZ - vfloat<N>(frustum.max_org.z)) * vfloat<N>(frustum.max_rdir.z);245246const float round_down = 1.0f-2.0f*float(ulp); // FIXME: use per instruction rounding for AVX512247const float round_up = 1.0f+2.0f*float(ulp);248const vfloat<N> fmin = max(fminX, fminY, fminZ, vfloat<N>(frustum.min_dist));249dist = fmin;250const vfloat<N> fmax = min(fmaxX, fmaxY, fmaxZ, vfloat<N>(frustum.max_dist));251const vbool<N> vmask_node_hit = (round_down*fmin <= round_up*fmax);252size_t m_node = movemask(vmask_node_hit) & (((size_t)1 << N)-1);253return m_node;254}255}256}257258259