Path: blob/master/thirdparty/embree/kernels/bvh/bvh_node_qaabb.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "bvh_node_base.h"67namespace embree8{9/*! BVHN Quantized Node */10template<int N>11struct __aligned(8) QuantizedBaseNode_t12{13typedef unsigned char T;14static const T MIN_QUAN = 0;15static const T MAX_QUAN = 255;1617/*! Clears the node. */18__forceinline void clear() {19for (size_t i=0; i<N; i++) lower_x[i] = lower_y[i] = lower_z[i] = MAX_QUAN;20for (size_t i=0; i<N; i++) upper_x[i] = upper_y[i] = upper_z[i] = MIN_QUAN;21}2223/*! Returns bounds of specified child. */24__forceinline BBox3fa bounds(size_t i) const25{26assert(i < N);27const Vec3fa lower(madd(scale.x,(float)lower_x[i],start.x),28madd(scale.y,(float)lower_y[i],start.y),29madd(scale.z,(float)lower_z[i],start.z));30const Vec3fa upper(madd(scale.x,(float)upper_x[i],start.x),31madd(scale.y,(float)upper_y[i],start.y),32madd(scale.z,(float)upper_z[i],start.z));33return BBox3fa(lower,upper);34}3536/*! Returns extent of bounds of specified child. */37__forceinline Vec3fa extent(size_t i) const {38return bounds(i).size();39}4041static __forceinline void init_dim(const vfloat<N> &lower,42const vfloat<N> &upper,43T lower_quant[N],44T upper_quant[N],45float &start,46float &scale)47{48/* quantize bounds */49const vbool<N> m_valid = lower != vfloat<N>(pos_inf);50const float minF = reduce_min(lower);51const float maxF = reduce_max(upper);52float diff = (1.0f+2.0f*float(ulp))*(maxF - minF);53float decode_scale = diff / float(MAX_QUAN);54if (decode_scale == 0.0f) decode_scale = 2.0f*FLT_MIN; // result may have been flushed to zero55assert(madd(decode_scale,float(MAX_QUAN),minF) >= maxF);56const float encode_scale = diff > 0 ? (float(MAX_QUAN) / diff) : 0.0f;57vint<N> ilower = max(vint<N>(floor((lower - vfloat<N>(minF))*vfloat<N>(encode_scale))),MIN_QUAN);58vint<N> iupper = min(vint<N>(ceil ((upper - vfloat<N>(minF))*vfloat<N>(encode_scale))),MAX_QUAN);5960/* lower/upper correction */61vbool<N> m_lower_correction = (madd(vfloat<N>(ilower),decode_scale,minF)) > lower;62vbool<N> m_upper_correction = (madd(vfloat<N>(iupper),decode_scale,minF)) < upper;63ilower = max(select(m_lower_correction,ilower-1,ilower),MIN_QUAN);64iupper = min(select(m_upper_correction,iupper+1,iupper),MAX_QUAN);6566/* disable invalid lanes */67ilower = select(m_valid,ilower,MAX_QUAN);68iupper = select(m_valid,iupper,MIN_QUAN);6970/* store as uchar to memory */71vint<N>::store(lower_quant,ilower);72vint<N>::store(upper_quant,iupper);73start = minF;74scale = decode_scale;7576#if defined(DEBUG)77vfloat<N> extract_lower( vint<N>::loadu(lower_quant) );78vfloat<N> extract_upper( vint<N>::loadu(upper_quant) );79vfloat<N> final_extract_lower = madd(extract_lower,decode_scale,minF);80vfloat<N> final_extract_upper = madd(extract_upper,decode_scale,minF);81assert( (movemask(final_extract_lower <= lower ) & movemask(m_valid)) == movemask(m_valid));82assert( (movemask(final_extract_upper >= upper ) & movemask(m_valid)) == movemask(m_valid));83#endif84}8586__forceinline void init_dim(AABBNode_t<NodeRefPtr<N>,N>& node)87{88init_dim(node.lower_x,node.upper_x,lower_x,upper_x,start.x,scale.x);89init_dim(node.lower_y,node.upper_y,lower_y,upper_y,start.y,scale.y);90init_dim(node.lower_z,node.upper_z,lower_z,upper_z,start.z,scale.z);91}9293__forceinline vbool<N> validMask() const { return vint<N>::loadu(lower_x) <= vint<N>::loadu(upper_x); }9495#if defined(__AVX512F__) // KNL96__forceinline vbool16 validMask16() const { return le(0xff,vint<16>::loadu(lower_x),vint<16>::loadu(upper_x)); }97#endif98__forceinline vfloat<N> dequantizeLowerX() const { return madd(vfloat<N>(vint<N>::loadu(lower_x)),scale.x,vfloat<N>(start.x)); }99100__forceinline vfloat<N> dequantizeUpperX() const { return madd(vfloat<N>(vint<N>::loadu(upper_x)),scale.x,vfloat<N>(start.x)); }101102__forceinline vfloat<N> dequantizeLowerY() const { return madd(vfloat<N>(vint<N>::loadu(lower_y)),scale.y,vfloat<N>(start.y)); }103104__forceinline vfloat<N> dequantizeUpperY() const { return madd(vfloat<N>(vint<N>::loadu(upper_y)),scale.y,vfloat<N>(start.y)); }105106__forceinline vfloat<N> dequantizeLowerZ() const { return madd(vfloat<N>(vint<N>::loadu(lower_z)),scale.z,vfloat<N>(start.z)); }107108__forceinline vfloat<N> dequantizeUpperZ() const { return madd(vfloat<N>(vint<N>::loadu(upper_z)),scale.z,vfloat<N>(start.z)); }109110template <int M>111__forceinline vfloat<M> dequantize(const size_t offset) const { return vfloat<M>(vint<M>::loadu(all_planes+offset)); }112113#if defined(__AVX512F__)114__forceinline vfloat16 dequantizeLowerUpperX(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_x),p)),scale.x,vfloat16(start.x)); }115__forceinline vfloat16 dequantizeLowerUpperY(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_y),p)),scale.y,vfloat16(start.y)); }116__forceinline vfloat16 dequantizeLowerUpperZ(const vint16 &p) const { return madd(vfloat16(permute(vint<16>::loadu(lower_z),p)),scale.z,vfloat16(start.z)); }117#endif118119union {120struct {121T lower_x[N]; //!< 8bit discretized X dimension of lower bounds of all N children122T upper_x[N]; //!< 8bit discretized X dimension of upper bounds of all N children123T lower_y[N]; //!< 8bit discretized Y dimension of lower bounds of all N children124T upper_y[N]; //!< 8bit discretized Y dimension of upper bounds of all N children125T lower_z[N]; //!< 8bit discretized Z dimension of lower bounds of all N children126T upper_z[N]; //!< 8bit discretized Z dimension of upper bounds of all N children127};128T all_planes[6*N];129};130131Vec3f start;132Vec3f scale;133134friend embree_ostream operator<<(embree_ostream o, const QuantizedBaseNode_t& n)135{136o << "QuantizedBaseNode { " << embree_endl;137o << " start " << n.start << embree_endl;138o << " scale " << n.scale << embree_endl;139o << " lower_x " << vuint<N>::loadu(n.lower_x) << embree_endl;140o << " upper_x " << vuint<N>::loadu(n.upper_x) << embree_endl;141o << " lower_y " << vuint<N>::loadu(n.lower_y) << embree_endl;142o << " upper_y " << vuint<N>::loadu(n.upper_y) << embree_endl;143o << " lower_z " << vuint<N>::loadu(n.lower_z) << embree_endl;144o << " upper_z " << vuint<N>::loadu(n.upper_z) << embree_endl;145o << "}" << embree_endl;146return o;147}148149};150151template<typename NodeRef, int N>152struct __aligned(8) QuantizedNode_t : public BaseNode_t<NodeRef, N>, QuantizedBaseNode_t<N>153{154using BaseNode_t<NodeRef,N>::children;155using QuantizedBaseNode_t<N>::lower_x;156using QuantizedBaseNode_t<N>::upper_x;157using QuantizedBaseNode_t<N>::lower_y;158using QuantizedBaseNode_t<N>::upper_y;159using QuantizedBaseNode_t<N>::lower_z;160using QuantizedBaseNode_t<N>::upper_z;161using QuantizedBaseNode_t<N>::start;162using QuantizedBaseNode_t<N>::scale;163using QuantizedBaseNode_t<N>::init_dim;164165__forceinline void setRef(size_t i, const NodeRef& ref) {166assert(i < N);167children[i] = ref;168}169170struct Create2171{172template<typename BuildRecord>173__forceinline NodeRef operator() (BuildRecord* children, const size_t n, const FastAllocator::CachedAllocator& alloc) const174{175__aligned(64) AABBNode_t<NodeRef,N> node;176node.clear();177for (size_t i=0; i<n; i++) {178node.setBounds(i,children[i].bounds());179}180QuantizedNode_t *qnode = (QuantizedNode_t*) alloc.malloc0(sizeof(QuantizedNode_t), NodeRef::byteAlignment);181qnode->init(node);182183return (size_t)qnode | NodeRef::tyQuantizedNode;184}185};186187struct Set2188{189template<typename BuildRecord>190__forceinline NodeRef operator() (const BuildRecord& precord, const BuildRecord* crecords, NodeRef ref, NodeRef* children, const size_t num) const191{192#if defined(DEBUG)193// check that empty children are only at the end of the child list194bool emptyChild = false;195for (size_t i=0; i<num; i++) {196emptyChild |= (children[i] == NodeRef::emptyNode);197assert(emptyChild == (children[i] == NodeRef::emptyNode));198}199#endif200QuantizedNode_t* node = ref.quantizedNode();201for (size_t i=0; i<num; i++) node->setRef(i,children[i]);202return ref;203}204};205206__forceinline void init(AABBNode_t<NodeRef,N>& node)207{208for (size_t i=0;i<N;i++) children[i] = NodeRef::emptyNode;209init_dim(node);210}211212};213214/*! BVHN Quantized Node */215template<int N>216struct __aligned(8) QuantizedBaseNodeMB_t217{218QuantizedBaseNode_t<N> node0;219QuantizedBaseNode_t<N> node1;220221/*! Clears the node. */222__forceinline void clear() {223node0.clear();224node1.clear();225}226227/*! Returns bounds of specified child. */228__forceinline BBox3fa bounds(size_t i) const229{230assert(i < N);231BBox3fa bounds0 = node0.bounds(i);232BBox3fa bounds1 = node1.bounds(i);233bounds0.extend(bounds1);234return bounds0;235}236237/*! Returns extent of bounds of specified child. */238__forceinline Vec3fa extent(size_t i) const {239return bounds(i).size();240}241242__forceinline vbool<N> validMask() const { return node0.validMask(); }243244template<typename T>245__forceinline vfloat<N> dequantizeLowerX(const T t) const { return lerp(node0.dequantizeLowerX(),node1.dequantizeLowerX(),t); }246template<typename T>247__forceinline vfloat<N> dequantizeUpperX(const T t) const { return lerp(node0.dequantizeUpperX(),node1.dequantizeUpperX(),t); }248template<typename T>249__forceinline vfloat<N> dequantizeLowerY(const T t) const { return lerp(node0.dequantizeLowerY(),node1.dequantizeLowerY(),t); }250template<typename T>251__forceinline vfloat<N> dequantizeUpperY(const T t) const { return lerp(node0.dequantizeUpperY(),node1.dequantizeUpperY(),t); }252template<typename T>253__forceinline vfloat<N> dequantizeLowerZ(const T t) const { return lerp(node0.dequantizeLowerZ(),node1.dequantizeLowerZ(),t); }254template<typename T>255__forceinline vfloat<N> dequantizeUpperZ(const T t) const { return lerp(node0.dequantizeUpperZ(),node1.dequantizeUpperZ(),t); }256257258template<int M>259__forceinline vfloat<M> dequantizeLowerX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerX()[i]),vfloat<M>(node1.dequantizeLowerX()[i]),t); }260template<int M>261__forceinline vfloat<M> dequantizeUpperX(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperX()[i]),vfloat<M>(node1.dequantizeUpperX()[i]),t); }262template<int M>263__forceinline vfloat<M> dequantizeLowerY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerY()[i]),vfloat<M>(node1.dequantizeLowerY()[i]),t); }264template<int M>265__forceinline vfloat<M> dequantizeUpperY(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperY()[i]),vfloat<M>(node1.dequantizeUpperY()[i]),t); }266template<int M>267__forceinline vfloat<M> dequantizeLowerZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeLowerZ()[i]),vfloat<M>(node1.dequantizeLowerZ()[i]),t); }268template<int M>269__forceinline vfloat<M> dequantizeUpperZ(const size_t i, const vfloat<M> &t) const { return lerp(vfloat<M>(node0.dequantizeUpperZ()[i]),vfloat<M>(node1.dequantizeUpperZ()[i]),t); }270271};272}273274275