Path: blob/master/thirdparty/embree/kernels/geometry/curve_intersector_sweep.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../common/ray.h"6#include "cylinder.h"7#include "plane.h"8#include "line_intersector.h"9#include "curve_intersector_precalculations.h"1011namespace embree12{13namespace isa14{15static const size_t numJacobianIterations = 5;16#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)17static const size_t numBezierSubdivisions = 2;18#elif defined(__AVX__)19static const size_t numBezierSubdivisions = 2;20#else21static const size_t numBezierSubdivisions = 3;22#endif2324struct BezierCurveHit25{26__forceinline BezierCurveHit() {}2728__forceinline BezierCurveHit(const float t, const float u, const Vec3fa& Ng)29: t(t), u(u), v(0.0f), Ng(Ng) {}3031__forceinline BezierCurveHit(const float t, const float u, const float v, const Vec3fa& Ng)32: t(t), u(u), v(v), Ng(Ng) {}3334__forceinline void finalize() {}3536public:37float t;38float u;39float v;40Vec3fa Ng;41};4243template<typename NativeCurve3ff, typename Ray, typename Epilog>44__forceinline bool intersect_bezier_iterative_debug(const Ray& ray, const float dt, const NativeCurve3ff& curve, size_t i,45const vfloatx& u, const BBox<vfloatx>& tp, const BBox<vfloatx>& h0, const BBox<vfloatx>& h1,46const Vec3vfx& Ng, const Vec4vfx& dP0du, const Vec4vfx& dP3du,47const Epilog& epilog)48{49if (tp.lower[i]+dt > ray.tfar) return false;50Vec3fa Ng_o = Vec3fa(Ng.x[i],Ng.y[i],Ng.z[i]);51if (h0.lower[i] == tp.lower[i]) Ng_o = -Vec3fa(dP0du.x[i],dP0du.y[i],dP0du.z[i]);52if (h1.lower[i] == tp.lower[i]) Ng_o = +Vec3fa(dP3du.x[i],dP3du.y[i],dP3du.z[i]);53BezierCurveHit hit(tp.lower[i]+dt,u[i],Ng_o);54return epilog(hit);55}5657template<typename NativeCurve3ff, typename Ray, typename Epilog>58__forceinline bool intersect_bezier_iterative_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, float u, float t, const Epilog& epilog)59{60const Vec3fa org = zero;61const Vec3fa dir = ray.dir;62const float length_ray_dir = length(dir);6364/* error of curve evaluations is proportional to largest coordinate */65const BBox3ff box = curve.bounds();66const float P_err = 16.0f*float(ulp)*reduce_max(max(abs(box.lower),abs(box.upper)));6768for (size_t i=0; i<numJacobianIterations; i++)69{70const Vec3fa Q = madd(Vec3fa(t),dir,org);71//const Vec3fa dQdu = zero;72const Vec3fa dQdt = dir;73const float Q_err = 16.0f*float(ulp)*length_ray_dir*t; // works as org=zero here7475Vec3ff P,dPdu,ddPdu; curve.eval(u,P,dPdu,ddPdu);76//const Vec3fa dPdt = zero;7778const Vec3fa R = Q-P;79const float len_R = length(R); //reduce_max(abs(R));80const float R_err = max(Q_err,P_err);81const Vec3fa dRdu = /*dQdu*/-dPdu;82const Vec3fa dRdt = dQdt;//-dPdt;8384const Vec3fa T = normalize(dPdu);85const Vec3fa dTdu = dnormalize(dPdu,ddPdu);86//const Vec3fa dTdt = zero;87const float cos_err = P_err/length(dPdu);8889/* Error estimate for dot(R,T):9091dot(R,T) = cos(R,T) |R| |T|92= (cos(R,T) +- cos_error) * (|R| +- |R|_err) * (|T| +- |T|_err)93= cos(R,T)*|R|*|T|94+- cos(R,T)*(|R|*|T|_err + |T|*|R|_err)95+- cos_error*(|R| + |T|)96+- lower order terms97with cos(R,T) being in [0,1] and |T| = 1 we get:98dot(R,T)_err = |R|*|T|_err + |R|_err = cos_error*(|R|+1)99*/100101const float f = dot(R,T);102const float f_err = len_R*P_err + R_err + cos_err*(1.0f+len_R);103const float dfdu = dot(dRdu,T) + dot(R,dTdu);104const float dfdt = dot(dRdt,T);// + dot(R,dTdt);105106const float K = dot(R,R)-sqr(f);107const float dKdu = /*2.0f*/(dot(R,dRdu)-f*dfdu);108const float dKdt = /*2.0f*/(dot(R,dRdt)-f*dfdt);109const float rsqrt_K = rsqrt(K);110111const float g = sqrt(K)-P.w;112const float g_err = R_err + f_err + 16.0f*float(ulp)*box.upper.w;113const float dgdu = /*0.5f*/dKdu*rsqrt_K-dPdu.w;114const float dgdt = /*0.5f*/dKdt*rsqrt_K;//-dPdt.w;115116const LinearSpace2f J = LinearSpace2f(dfdu,dfdt,dgdu,dgdt);117const Vec2f dut = rcp(J)*Vec2f(f,g);118const Vec2f ut = Vec2f(u,t) - dut;119u = ut.x; t = ut.y;120121if (abs(f) < f_err && abs(g) < g_err)122{123t+=dt;124if (!(ray.tnear() <= t && t <= ray.tfar)) return false; // rejects NaNs125if (!(u >= 0.0f && u <= 1.0f)) return false; // rejects NaNs126const Vec3fa R = normalize(Q-P);127const Vec3fa U = madd(Vec3fa(dPdu.w),R,dPdu);128const Vec3fa V = cross(dPdu,R);129BezierCurveHit hit(t,u,cross(V,U));130return epilog(hit);131}132}133return false;134}135136#if !defined(__SYCL_DEVICE_ONLY__)137138template<typename NativeCurve3ff, typename Ray, typename Epilog>139__forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)140{141float u0 = 0.0f;142float u1 = 1.0f;143unsigned int depth = 1;144145#if defined(__AVX__)146enum { VSIZEX_ = 8 };147typedef vbool8 vboolx; // maximally 8-wide to work around KNL issues148typedef vint8 vintx;149typedef vfloat8 vfloatx;150#else151enum { VSIZEX_ = 4 };152typedef vbool4 vboolx;153typedef vint4 vintx;154typedef vfloat4 vfloatx;155#endif156157unsigned int maxDepth = numBezierSubdivisions;158bool found = false;159const Vec3fa org = zero;160const Vec3fa dir = ray.dir;161162unsigned int sptr = 0;163const unsigned int stack_size = numBezierSubdivisions+1; // +1 because of unstable workaround below164struct StackEntry {165vboolx valid;166vfloatx tlower;167float u0;168float u1;169unsigned int depth;170};171StackEntry stack[stack_size];172goto entry;173174/* terminate if stack is empty */175while (sptr)176{177/* pop from stack */178{179sptr--;180vboolx valid = stack[sptr].valid;181const vfloatx tlower = stack[sptr].tlower;182valid &= tlower+dt <= ray.tfar;183if (none(valid)) continue;184u0 = stack[sptr].u0;185u1 = stack[sptr].u1;186depth = stack[sptr].depth;187const size_t i = select_min(valid,tlower); clear(valid,i);188stack[sptr].valid = valid;189if (any(valid)) sptr++; // there are still items on the stack190191/* process next segment */192const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));193u0 = vu0[i+0];194u1 = vu0[i+1];195}196entry:197198/* subdivide curve */199const float dscale = (u1-u0)*(1.0f/(3.0f*(vfloatx::size-1)));200const vfloatx vu0 = lerp(u0,u1,vfloatx(step)*(1.0f/(vfloatx::size-1)));201Vec4vfx P0, dP0du; curve.template veval<VSIZEX_>(vu0,P0,dP0du); dP0du = dP0du * Vec4vfx(dscale);202const Vec4vfx P3 = shift_right_1(P0);203const Vec4vfx dP3du = shift_right_1(dP0du);204const Vec4vfx P1 = P0 + dP0du;205const Vec4vfx P2 = P3 - dP3du;206207/* calculate bounding cylinders */208const vfloatx rr1 = sqr_point_to_line_distance(Vec3vfx(dP0du),Vec3vfx(P3-P0));209const vfloatx rr2 = sqr_point_to_line_distance(Vec3vfx(dP3du),Vec3vfx(P3-P0));210const vfloatx maxr12 = sqrt(max(rr1,rr2));211const vfloatx one_plus_ulp = 1.0f+2.0f*float(ulp);212const vfloatx one_minus_ulp = 1.0f-2.0f*float(ulp);213vfloatx r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;214vfloatx r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;215r_outer = one_plus_ulp*r_outer;216r_inner = max(0.0f,one_minus_ulp*r_inner);217const CylinderN<vfloatx::size> cylinder_outer(Vec3vfx(P0),Vec3vfx(P3),r_outer);218const CylinderN<vfloatx::size> cylinder_inner(Vec3vfx(P0),Vec3vfx(P3),r_inner);219vboolx valid = true; clear(valid,vfloatx::size-1);220221/* intersect with outer cylinder */222BBox<vfloatx> tc_outer; vfloatx u_outer0; Vec3vfx Ng_outer0; vfloatx u_outer1; Vec3vfx Ng_outer1;223valid &= cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1);224if (none(valid)) continue;225226/* intersect with cap-planes */227BBox<vfloatx> tp(ray.tnear()-dt,ray.tfar-dt);228tp = embree::intersect(tp,tc_outer);229BBox<vfloatx> h0 = HalfPlaneN<vfloatx::size>(Vec3vfx(P0),+Vec3vfx(dP0du)).intersect(org,dir);230tp = embree::intersect(tp,h0);231BBox<vfloatx> h1 = HalfPlaneN<vfloatx::size>(Vec3vfx(P3),-Vec3vfx(dP3du)).intersect(org,dir);232tp = embree::intersect(tp,h1);233valid &= tp.lower <= tp.upper;234if (none(valid)) continue;235236/* clamp and correct u parameter */237u_outer0 = clamp(u_outer0,vfloatx(0.0f),vfloatx(1.0f));238u_outer1 = clamp(u_outer1,vfloatx(0.0f),vfloatx(1.0f));239u_outer0 = lerp(u0,u1,(vfloatx(step)+u_outer0)*(1.0f/float(vfloatx::size)));240u_outer1 = lerp(u0,u1,(vfloatx(step)+u_outer1)*(1.0f/float(vfloatx::size)));241242/* intersect with inner cylinder */243BBox<vfloatx> tc_inner;244vfloatx u_inner0 = zero; Vec3vfx Ng_inner0 = zero; vfloatx u_inner1 = zero; Vec3vfx Ng_inner1 = zero;245const vboolx valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);246247/* at the unstable area we subdivide deeper */248const vboolx unstable0 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner0)) < 0.3f);249const vboolx unstable1 = (!valid_inner) | (abs(dot(Vec3vfx(Vec3fa(ray.dir)),Ng_inner1)) < 0.3f);250251/* subtract the inner interval from the current hit interval */252BBox<vfloatx> tp0, tp1;253subtract(tp,tc_inner,tp0,tp1);254vboolx valid0 = valid & (tp0.lower <= tp0.upper);255vboolx valid1 = valid & (tp1.lower <= tp1.upper);256if (none(valid0 | valid1)) continue;257258/* iterate over all first hits front to back */259const vintx termDepth0 = select(unstable0,vintx(maxDepth+1),vintx(maxDepth));260vboolx recursion_valid0 = valid0 & (depth < termDepth0);261valid0 &= depth >= termDepth0;262263while (any(valid0))264{265const size_t i = select_min(valid0,tp0.lower); clear(valid0,i);266found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0[i],tp0.lower[i],epilog);267//found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer0,tp0,h0,h1,Ng_outer0,dP0du,dP3du,epilog);268valid0 &= tp0.lower+dt <= ray.tfar;269}270valid1 &= tp1.lower+dt <= ray.tfar;271272/* iterate over all second hits front to back */273const vintx termDepth1 = select(unstable1,vintx(maxDepth+1),vintx(maxDepth));274vboolx recursion_valid1 = valid1 & (depth < termDepth1);275valid1 &= depth >= termDepth1;276while (any(valid1))277{278const size_t i = select_min(valid1,tp1.lower); clear(valid1,i);279found = found | intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1[i],tp1.upper[i],epilog);280//found = found | intersect_bezier_iterative_debug (ray,dt,curve,i,u_outer1,tp1,h0,h1,Ng_outer1,dP0du,dP3du,epilog);281valid1 &= tp1.lower+dt <= ray.tfar;282}283284/* push valid segments to stack */285recursion_valid0 &= tp0.lower+dt <= ray.tfar;286recursion_valid1 &= tp1.lower+dt <= ray.tfar;287const vboolx recursion_valid = recursion_valid0 | recursion_valid1;288if (any(recursion_valid))289{290assert(sptr < stack_size);291stack[sptr].valid = recursion_valid;292stack[sptr].tlower = select(recursion_valid0,tp0.lower,tp1.lower);293stack[sptr].u0 = u0;294stack[sptr].u1 = u1;295stack[sptr].depth = depth+1;296sptr++;297}298}299return found;300}301302#else303304template<typename NativeCurve3ff, typename Ray, typename Epilog>305__forceinline bool intersect_bezier_recursive_jacobian(const Ray& ray, const float dt, const NativeCurve3ff& curve, const Epilog& epilog)306{307const Vec3fa org = zero;308const Vec3fa dir = ray.dir;309const unsigned int max_depth = 7;310311bool found = false;312313struct ShortStack314{315/* pushes both children */316__forceinline void push() {317depth++;318}319320/* pops next node */321__forceinline void pop() {322short_stack += (1<<(31-depth));323depth = 31-bsf(short_stack);324}325326unsigned int depth = 0;327unsigned int short_stack = 0;328};329330ShortStack stack;331332do333{334const float u0 = (stack.short_stack+0*(1<<(31-stack.depth)))/float(0x80000000);335const float u1 = (stack.short_stack+1*(1<<(31-stack.depth)))/float(0x80000000);336337/* subdivide bezier curve */338Vec3ff P0, dP0du; curve.eval(u0,P0,dP0du); dP0du = dP0du * (u1-u0);339Vec3ff P3, dP3du; curve.eval(u1,P3,dP3du); dP3du = dP3du * (u1-u0);340const Vec3ff P1 = P0 + dP0du*(1.0f/3.0f);341const Vec3ff P2 = P3 - dP3du*(1.0f/3.0f);342343/* check if curve is well behaved, by checking deviation of tangents from straight line */344const Vec3ff W = Vec3ff(P3-P0,0.0f);345const Vec3ff dQ0 = abs(3.0f*(P1-P0) - W);346const Vec3ff dQ1 = abs(3.0f*(P2-P1) - W);347const Vec3ff dQ2 = abs(3.0f*(P3-P2) - W);348const Vec3ff max_dQ = max(dQ0,dQ1,dQ2);349const float m = max(max_dQ.x,max_dQ.y,max_dQ.z); //,max_dQ.w);350const float l = length(Vec3f(W));351const bool well_behaved = m < 0.2f*l;352353if (!well_behaved && stack.depth < max_depth) {354stack.push();355continue;356}357358/* calculate bounding cylinders */359const float rr1 = sqr_point_to_line_distance(Vec3f(dP0du),Vec3f(P3-P0));360const float rr2 = sqr_point_to_line_distance(Vec3f(dP3du),Vec3f(P3-P0));361const float maxr12 = sqrt(max(rr1,rr2));362const float one_plus_ulp = 1.0f+2.0f*float(ulp);363const float one_minus_ulp = 1.0f-2.0f*float(ulp);364float r_outer = max(P0.w,P1.w,P2.w,P3.w)+maxr12;365float r_inner = min(P0.w,P1.w,P2.w,P3.w)-maxr12;366r_outer = one_plus_ulp*r_outer;367r_inner = max(0.0f,one_minus_ulp*r_inner);368const Cylinder cylinder_outer(Vec3f(P0),Vec3f(P3),r_outer);369const Cylinder cylinder_inner(Vec3f(P0),Vec3f(P3),r_inner);370371/* intersect with outer cylinder */372BBox<float> tc_outer; float u_outer0; Vec3fa Ng_outer0; float u_outer1; Vec3fa Ng_outer1;373if (!cylinder_outer.intersect(org,dir,tc_outer,u_outer0,Ng_outer0,u_outer1,Ng_outer1))374{375stack.pop();376continue;377}378379/* intersect with cap-planes */380BBox<float> tp(ray.tnear()-dt,ray.tfar-dt);381tp = embree::intersect(tp,tc_outer);382BBox<float> h0 = HalfPlane(Vec3f(P0),+Vec3f(dP0du)).intersect(org,dir);383tp = embree::intersect(tp,h0);384BBox<float> h1 = HalfPlane(Vec3f(P3),-Vec3f(dP3du)).intersect(org,dir);385tp = embree::intersect(tp,h1);386if (tp.lower > tp.upper)387{388stack.pop();389continue;390}391392bool valid = true;393394/* clamp and correct u parameter */395u_outer0 = clamp(u_outer0,float(0.0f),float(1.0f));396u_outer1 = clamp(u_outer1,float(0.0f),float(1.0f));397u_outer0 = lerp(u0,u1,u_outer0);398u_outer1 = lerp(u0,u1,u_outer1);399400/* intersect with inner cylinder */401BBox<float> tc_inner;402float u_inner0 = zero; Vec3fa Ng_inner0 = zero; float u_inner1 = zero; Vec3fa Ng_inner1 = zero;403const bool valid_inner = cylinder_inner.intersect(org,dir,tc_inner,u_inner0,Ng_inner0,u_inner1,Ng_inner1);404405/* subtract the inner interval from the current hit interval */406BBox<float> tp0, tp1;407subtract(tp,tc_inner,tp0,tp1);408bool valid0 = valid & (tp0.lower <= tp0.upper);409bool valid1 = valid & (tp1.lower <= tp1.upper);410if (!(valid0 | valid1))411{412stack.pop();413continue;414}415416/* at the unstable area we subdivide deeper */417const bool unstable0 = valid0 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner0)) < 0.3f));418const bool unstable1 = valid1 && ((!valid_inner) | (abs(dot(Vec3fa(ray.dir),Ng_inner1)) < 0.3f));419420if ((unstable0 | unstable1) && (stack.depth < max_depth)) {421stack.push();422continue;423}424425if (valid0)426found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer0,tp0.lower,epilog);427428/* the far hit cannot be closer, thus skip if we hit entry already */429valid1 &= tp1.lower+dt <= ray.tfar;430431/* iterate over second hit */432if (valid1)433found |= intersect_bezier_iterative_jacobian(ray,dt,curve,u_outer1,tp1.upper,epilog);434435stack.pop();436437} while (stack.short_stack != 0x80000000);438439return found;440}441442#endif443444template<template<typename Ty> class NativeCurve>445struct SweepCurve1Intersector1446{447typedef NativeCurve<Vec3ff> NativeCurve3ff;448449template<typename Ray, typename Epilog>450__forceinline bool intersect(const CurvePrecalculations1& pre, Ray& ray,451RayQueryContext* context,452const CurveGeometry* geom, const unsigned int primID,453const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,454const Epilog& epilog)455{456STAT3(normal.trav_prims,1,1,1);457458/* move ray closer to make intersection stable */459NativeCurve3ff curve0(v0,v1,v2,v3);460curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);461const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));462const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);463const NativeCurve3ff curve1 = curve0-ref;464return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);465}466};467468template<template<typename Ty> class NativeCurve, int K>469struct SweepCurve1IntersectorK470{471typedef NativeCurve<Vec3ff> NativeCurve3ff;472473struct Ray1474{475__forceinline Ray1(RayK<K>& ray, size_t k)476: org(ray.org.x[k],ray.org.y[k],ray.org.z[k]), dir(ray.dir.x[k],ray.dir.y[k],ray.dir.z[k]), _tnear(ray.tnear()[k]), tfar(ray.tfar[k]) {}477478Vec3fa org;479Vec3fa dir;480float _tnear;481float& tfar;482483__forceinline float& tnear() { return _tnear; }484//__forceinline float& tfar() { return _tfar; }485__forceinline const float& tnear() const { return _tnear; }486//__forceinline const float& tfar() const { return _tfar; }487488};489490template<typename Epilog>491__forceinline bool intersect(const CurvePrecalculationsK<K>& pre, RayK<K>& vray, size_t k,492RayQueryContext* context,493const CurveGeometry* geom, const unsigned int primID,494const Vec3ff& v0, const Vec3ff& v1, const Vec3ff& v2, const Vec3ff& v3,495const Epilog& epilog)496{497STAT3(normal.trav_prims,1,1,1);498Ray1 ray(vray,k);499500/* move ray closer to make intersection stable */501NativeCurve3ff curve0(v0,v1,v2,v3);502curve0 = enlargeRadiusToMinWidth(context,geom,ray.org,curve0);503const float dt = dot(curve0.center()-ray.org,ray.dir)*rcp(dot(ray.dir,ray.dir));504const Vec3ff ref(madd(Vec3fa(dt),ray.dir,ray.org),0.0f);505const NativeCurve3ff curve1 = curve0-ref;506return intersect_bezier_recursive_jacobian(ray,dt,curve1,epilog);507}508};509}510}511512513