Path: blob/master/thirdparty/embree/kernels/common/ray.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "default.h"6#include "instance_stack.h"78// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted910namespace embree11{12/* Ray structure for K rays */13template<int K>14struct RayK15{16/* Default construction does nothing */17__forceinline RayK() {}1819/* Constructs a ray from origin, direction, and ray segment. Near20* has to be smaller than far */21__forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,22const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,23const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)24: org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}2526/* Returns the size of the ray */27static __forceinline size_t size() { return K; }2829/* Calculates if this is a valid ray that does not cause issues during traversal */30__forceinline vbool<K> valid() const31{32const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));33const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));34const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));35const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);36const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);37return vx & vy & vz & vn & vf;38}3940__forceinline void get(RayK<1>* ray) const;41__forceinline void get(size_t i, RayK<1>& ray) const;42__forceinline void set(const RayK<1>* ray);43__forceinline void set(size_t i, const RayK<1>& ray);4445__forceinline void copy(size_t dest, size_t source);4647__forceinline vint<K> octant() const48{49return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |50select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |51select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));52}5354/* Ray data */55Vec3vf<K> org; // ray origin56vfloat<K> _tnear; // start of ray segment57Vec3vf<K> dir; // ray direction58vfloat<K> _time; // time of this ray for motion blur59vfloat<K> tfar; // end of ray segment60vint<K> mask; // used to mask out objects during traversal61vint<K> id;62vint<K> flags;6364__forceinline vfloat<K>& tnear() { return _tnear; }65__forceinline vfloat<K>& time() { return _time; }66__forceinline const vfloat<K>& tnear() const { return _tnear; }67__forceinline const vfloat<K>& time() const { return _time; }68};6970/* Ray+hit structure for K rays */71template<int K>72struct RayHitK : RayK<K>73{74using RayK<K>::org;75using RayK<K>::_tnear;76using RayK<K>::dir;77using RayK<K>::_time;78using RayK<K>::tfar;79using RayK<K>::mask;80using RayK<K>::id;81using RayK<K>::flags;8283using RayK<K>::tnear;84using RayK<K>::time;8586/* Default construction does nothing */87__forceinline RayHitK() {}8889/* Constructs a ray from origin, direction, and ray segment. Near90* has to be smaller than far */91__forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,92const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,93const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)94: RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),95geomID(RTC_INVALID_GEOMETRY_ID)96{97for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {98instID[l] = RTC_INVALID_GEOMETRY_ID;99#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)100instPrimID[l] = RTC_INVALID_GEOMETRY_ID;101#endif102}103}104105__forceinline RayHitK(const RayK<K>& ray)106: RayK<K>(ray),107geomID(RTC_INVALID_GEOMETRY_ID)108{109for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {110instID[l] = RTC_INVALID_GEOMETRY_ID;111#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)112instPrimID[l] = RTC_INVALID_GEOMETRY_ID;113#endif114}115}116117__forceinline RayHitK<K>& operator =(const RayK<K>& ray)118{119org = ray.org;120_tnear = ray._tnear;121dir = ray.dir;122_time = ray._time;123tfar = ray.tfar;124mask = ray.mask;125id = ray.id;126flags = ray.flags;127128geomID = RTC_INVALID_GEOMETRY_ID;129for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {130instID[l] = RTC_INVALID_GEOMETRY_ID;131#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)132instPrimID[l] = RTC_INVALID_GEOMETRY_ID;133#endif134}135136return *this;137}138139/* Calculates if the hit is valid */140__forceinline void verifyHit(const vbool<K>& valid0) const141{142vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);143const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));144const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));145const vbool<K> vv = (abs(v) <= vfloat<K>(FLT_LARGE));146const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);147const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);148const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);149if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");150if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");151if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");152if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");153if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");154if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");155}156157__forceinline void get(RayHitK<1>* ray) const;158__forceinline void get(size_t i, RayHitK<1>& ray) const;159__forceinline void set(const RayHitK<1>* ray);160__forceinline void set(size_t i, const RayHitK<1>& ray);161162__forceinline void copy(size_t dest, size_t source);163164/* Hit data */165Vec3vf<K> Ng; // geometry normal166vfloat<K> u; // barycentric u coordinate of hit167vfloat<K> v; // barycentric v coordinate of hit168vuint<K> primID; // primitive ID169vuint<K> geomID; // geometry ID170vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID171#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)172vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance prim ID173#endif174};175176/* Specialization for a single ray */177template<>178struct RayK<1>179{180/* Default construction does nothing */181__forceinline RayK() {}182183/* Constructs a ray from origin, direction, and ray segment. Near184* has to be smaller than far */185__forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)186: org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}187188/* Calculates if this is a valid ray that does not cause issues during traversal */189__forceinline bool valid() const {190return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);191}192193/* checks if occlusion ray is done */194__forceinline bool occluded() const {195return tfar < 0.0f;196}197198/* Ray data */199Vec3ff org; // 3 floats for ray origin, 1 float for tnear200//float tnear; // start of ray segment201Vec3ff dir; // 3 floats for ray direction, 1 float for time202// float time;203float tfar; // end of ray segment204int mask; // used to mask out objects during traversal205int id; // ray ID206int flags; // ray flags207208__forceinline float& tnear() { return org.w; };209__forceinline const float& tnear() const { return org.w; };210211__forceinline float& time() { return dir.w; };212__forceinline const float& time() const { return dir.w; };213214};215216template<>217struct RayHitK<1> : RayK<1>218{219/* Default construction does nothing */220__forceinline RayHitK() {}221222/* Constructs a ray from origin, direction, and ray segment. Near223* has to be smaller than far */224__forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)225: RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),226geomID(RTC_INVALID_GEOMETRY_ID) {}227228__forceinline RayHitK(const RayK<1>& ray)229: RayK<1>(ray),230geomID(RTC_INVALID_GEOMETRY_ID) {}231232__forceinline RayHitK<1>& operator =(const RayK<1>& ray)233{234org = ray.org;235dir = ray.dir;236tfar = ray.tfar;237mask = ray.mask;238id = ray.id;239flags = ray.flags;240241geomID = RTC_INVALID_GEOMETRY_ID;242243return *this;244}245246/* Calculates if the hit is valid */247__forceinline void verifyHit() const248{249if (geomID == RTC_INVALID_GEOMETRY_ID) return;250const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));251const bool vu = (abs(u) <= FLT_LARGE);252const bool vv = (abs(u) <= FLT_LARGE);253const bool vnx = abs(Ng.x) <= FLT_LARGE;254const bool vny = abs(Ng.y) <= FLT_LARGE;255const bool vnz = abs(Ng.z) <= FLT_LARGE;256if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");257if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");258if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");259if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");260if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");261if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");262}263264/* Hit data */265Vec3f Ng; // not normalized geometry normal266float u; // barycentric u coordinate of hit267float v; // barycentric v coordinate of hit268unsigned int primID; // primitive ID269unsigned int geomID; // geometry ID270unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID271#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)272unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID273#endif274};275276/* Converts ray packet to single rays */277template<int K>278__forceinline void RayK<K>::get(RayK<1>* ray) const279{280for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose281{282ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];283ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];284ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];285}286}287288template<int K>289__forceinline void RayHitK<K>::get(RayHitK<1>* ray) const290{291// FIXME: use SIMD transpose292for (size_t i = 0; i < K; i++)293get(i, ray[i]);294}295296/* Extracts a single ray out of a ray packet*/297template<int K>298__forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const299{300ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];301ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];302ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];303}304305template<int K>306__forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const307{308ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];309ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];310ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];311ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];312ray.u = u[i]; ray.v = v[i];313ray.primID = primID[i]; ray.geomID = geomID[i];314315instance_id_stack::copy_VU<K>(instID, ray.instID, i);316#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)317instance_id_stack::copy_VU<K>(instPrimID, ray.instPrimID, i);318#endif319}320321/* Converts single rays to ray packet */322template<int K>323__forceinline void RayK<K>::set(const RayK<1>* ray)324{325// FIXME: use SIMD transpose326for (size_t i = 0; i < K; i++)327set(i, ray[i]);328}329330template<int K>331__forceinline void RayHitK<K>::set(const RayHitK<1>* ray)332{333// FIXME: use SIMD transpose334for (size_t i = 0; i < K; i++)335set(i, ray[i]);336}337338/* inserts a single ray into a ray packet element */339template<int K>340__forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)341{342org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();343dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();344tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;345}346347template<int K>348__forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)349{350org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();351dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();352tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;353Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;354u[i] = ray.u; v[i] = ray.v;355primID[i] = ray.primID; geomID[i] = ray.geomID;356357instance_id_stack::copy_UV<K>(ray.instID, instID, i);358#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)359instance_id_stack::copy_UV<K>(ray.instPrimID, instPrimID, i);360#endif361}362363/* copies a ray packet element into another element*/364template<int K>365__forceinline void RayK<K>::copy(size_t dest, size_t source)366{367org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];368dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];369tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];370}371372template<int K>373__forceinline void RayHitK<K>::copy(size_t dest, size_t source)374{375org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];376dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];377tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];378Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];379u[dest] = u[source]; v[dest] = v[source];380primID[dest] = primID[source]; geomID[dest] = geomID[source];381382instance_id_stack::copy_VV<K>(instID, instID, source, dest);383#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)384instance_id_stack::copy_VV<K>(instPrimID, instPrimID, source, dest);385#endif386}387388/* Shortcuts */389typedef RayK<1> Ray;390typedef RayK<4> Ray4;391typedef RayK<8> Ray8;392typedef RayK<16> Ray16;393typedef RayK<VSIZEX> Rayx;394struct RayN;395396typedef RayHitK<1> RayHit;397typedef RayHitK<4> RayHit4;398typedef RayHitK<8> RayHit8;399typedef RayHitK<16> RayHit16;400typedef RayHitK<VSIZEX> RayHitx;401struct RayHitN;402403template<int K, bool intersect>404struct RayTypeHelper;405406template<int K>407struct RayTypeHelper<K, true>408{409typedef RayHitK<K> Ty;410};411412template<int K>413struct RayTypeHelper<K, false>414{415typedef RayK<K> Ty;416};417418template<bool intersect>419using RayType = typename RayTypeHelper<1, intersect>::Ty;420421template<int K, bool intersect>422using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;423424/* Outputs ray to stream */425template<int K>426__forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)427{428return cout << "{ " << embree_endl429<< " org = " << ray.org << embree_endl430<< " dir = " << ray.dir << embree_endl431<< " near = " << ray.tnear() << embree_endl432<< " far = " << ray.tfar << embree_endl433<< " time = " << ray.time() << embree_endl434<< " mask = " << ray.mask << embree_endl435<< " id = " << ray.id << embree_endl436<< " flags = " << ray.flags << embree_endl437<< "}";438}439440template<int K>441__forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)442{443cout << "{ " << embree_endl444<< " org = " << ray.org << embree_endl445<< " dir = " << ray.dir << embree_endl446<< " near = " << ray.tnear() << embree_endl447<< " far = " << ray.tfar << embree_endl448<< " time = " << ray.time() << embree_endl449<< " mask = " << ray.mask << embree_endl450<< " id = " << ray.id << embree_endl451<< " flags = " << ray.flags << embree_endl452<< " Ng = " << ray.Ng453<< " u = " << ray.u << embree_endl454<< " v = " << ray.v << embree_endl455<< " primID = " << ray.primID << embree_endl456<< " geomID = " << ray.geomID << embree_endl457<< " instID =";458for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)459{460cout << " " << ray.instID[l];461}462#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)463cout << " instPrimID =";464for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)465{466cout << " " << ray.instPrimID[l];467}468#endif469cout << embree_endl;470return cout << "}";471}472473struct RayStreamSOA474{475__forceinline RayStreamSOA(void* rays, size_t N)476: ptr((char*)rays), N(N) {}477478/* ray data access functions */479__forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin480__forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin481__forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin482__forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment483484__forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction485__forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction486__forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction487__forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur488489__forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)490__forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)491__forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id492__forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags493494/* hit data access functions */495__forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal496__forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal497__forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal498499__forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit500__forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit501502__forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID503__forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID504__forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID505#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)506__forceinline unsigned int* instPrimID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+RTC_MAX_INSTANCE_LEVEL_COUNT*4*N+level*4*N+offset]; }; // instance primitive ID507#endif508509__forceinline Ray getRayByOffset(size_t offset)510{511Ray ray;512ray.org.x = org_x(offset)[0];513ray.org.y = org_y(offset)[0];514ray.org.z = org_z(offset)[0];515ray.tnear() = tnear(offset)[0];516ray.dir.x = dir_x(offset)[0];517ray.dir.y = dir_y(offset)[0];518ray.dir.z = dir_z(offset)[0];519ray.time() = time(offset)[0];520ray.tfar = tfar(offset)[0];521ray.mask = mask(offset)[0];522ray.id = id(offset)[0];523ray.flags = flags(offset)[0];524return ray;525}526527template<int K>528__forceinline RayK<K> getRayByOffset(size_t offset)529{530RayK<K> ray;531ray.org.x = vfloat<K>::loadu(org_x(offset));532ray.org.y = vfloat<K>::loadu(org_y(offset));533ray.org.z = vfloat<K>::loadu(org_z(offset));534ray.tnear = vfloat<K>::loadu(tnear(offset));535ray.dir.x = vfloat<K>::loadu(dir_x(offset));536ray.dir.y = vfloat<K>::loadu(dir_y(offset));537ray.dir.z = vfloat<K>::loadu(dir_z(offset));538ray.time = vfloat<K>::loadu(time(offset));539ray.tfar = vfloat<K>::loadu(tfar(offset));540ray.mask = vint<K>::loadu(mask(offset));541ray.id = vint<K>::loadu(id(offset));542ray.flags = vint<K>::loadu(flags(offset));543return ray;544}545546template<int K>547__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)548{549RayK<K> ray;550ray.org.x = vfloat<K>::loadu(valid, org_x(offset));551ray.org.y = vfloat<K>::loadu(valid, org_y(offset));552ray.org.z = vfloat<K>::loadu(valid, org_z(offset));553ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));554ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));555ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));556ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));557ray.time() = vfloat<K>::loadu(valid, time(offset));558ray.tfar = vfloat<K>::loadu(valid, tfar(offset));559560#if !defined(__AVX__)561/* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,562because the SSE masked loads always access the entire vector */563if (unlikely(!all(valid)))564{565ray.mask = zero;566ray.id = zero;567ray.flags = zero;568569for (size_t k = 0; k < K; k++)570{571if (likely(valid[k]))572{573ray.mask[k] = mask(offset)[k];574ray.id[k] = id(offset)[k];575ray.flags[k] = flags(offset)[k];576}577}578}579else580#endif581{582ray.mask = vint<K>::loadu(valid, mask(offset));583ray.id = vint<K>::loadu(valid, id(offset));584ray.flags = vint<K>::loadu(valid, flags(offset));585}586587return ray;588}589590template<int K>591__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)592{593/*594* valid_i: stores which of the input rays exist (do not access nonexistent rays!)595* valid: stores which of the rays actually hit something.596*/597vbool<K> valid = valid_i;598valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);599600if (likely(any(valid)))601{602vfloat<K>::storeu(valid, tfar(offset), ray.tfar);603vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);604vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);605vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);606vfloat<K>::storeu(valid, u(offset), ray.u);607vfloat<K>::storeu(valid, v(offset), ray.v);608609#if !defined(__AVX__)610/* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,611because the SSE masked stores always access the entire vector */612if (unlikely(!all(valid_i)))613{614for (size_t k = 0; k < K; k++)615{616if (likely(valid[k]))617{618primID(offset)[k] = ray.primID[k];619geomID(offset)[k] = ray.geomID[k];620621instID(0, offset)[k] = ray.instID[0][k];622#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)623instPrimID(0, offset)[k] = ray.instPrimID[0][k];624#endif625#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)626for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {627instID(l, offset)[k] = ray.instID[l][k];628#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)629instPrimID(l, offset)[k] = ray.instPrimID[l][k];630#endif631}632#endif633}634}635}636else637#endif638{639vuint<K>::storeu(valid, primID(offset), ray.primID);640vuint<K>::storeu(valid, geomID(offset), ray.geomID);641642vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);643#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)644vuint<K>::storeu(valid, instPrimID(0, offset), ray.instPrimID[0]);645#endif646#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)647for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {648vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);649#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)650vuint<K>::storeu(valid, instPrimID(l, offset), ray.instPrimID[l]);651#endif652}653#endif654}655}656}657658template<int K>659__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)660{661vbool<K> valid = valid_i;662valid &= (ray.tfar < 0.0f);663664if (likely(any(valid)))665vfloat<K>::storeu(valid, tfar(offset), ray.tfar);666}667668__forceinline size_t getOctantByOffset(size_t offset)669{670const float dx = dir_x(offset)[0];671const float dy = dir_y(offset)[0];672const float dz = dir_z(offset)[0];673const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);674return octantID;675}676677__forceinline bool isValidByOffset(size_t offset)678{679const float nnear = tnear(offset)[0];680const float ffar = tfar(offset)[0];681return nnear <= ffar;682}683684template<int K>685__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)686{687RayK<K> ray;688689#if defined(__AVX2__)690ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);691ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);692ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);693ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);694ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);695ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);696ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);697ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);698ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);699ray.mask = vint<K>::template gather<1>(valid, mask(), offset);700ray.id = vint<K>::template gather<1>(valid, id(), offset);701ray.flags = vint<K>::template gather<1>(valid, flags(), offset);702#else703ray.org = zero;704ray.tnear() = zero;705ray.dir = zero;706ray.time() = zero;707ray.tfar = zero;708ray.mask = zero;709ray.id = zero;710ray.flags = zero;711712for (size_t k = 0; k < K; k++)713{714if (likely(valid[k]))715{716const size_t ofs = offset[k];717718ray.org.x[k] = *org_x(ofs);719ray.org.y[k] = *org_y(ofs);720ray.org.z[k] = *org_z(ofs);721ray.tnear()[k] = *tnear(ofs);722ray.dir.x[k] = *dir_x(ofs);723ray.dir.y[k] = *dir_y(ofs);724ray.dir.z[k] = *dir_z(ofs);725ray.time()[k] = *time(ofs);726ray.tfar[k] = *tfar(ofs);727ray.mask[k] = *mask(ofs);728ray.id[k] = *id(ofs);729ray.flags[k] = *flags(ofs);730}731}732#endif733734return ray;735}736737template<int K>738__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)739{740vbool<K> valid = valid_i;741valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);742743if (likely(any(valid)))744{745#if defined(__AVX512F__)746vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);747vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);748vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);749vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);750vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);751vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);752vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);753vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);754755vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);756#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)757vuint<K>::template scatter<1>(valid, instPrimID(0), offset, ray.instPrimID[0]);758#endif759#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)760for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {761vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);762#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)763vuint<K>::template scatter<1>(valid, instPrimID(l), offset, ray.instPrimID[l]);764#endif765}766#endif767#else768size_t valid_bits = movemask(valid);769while (valid_bits != 0)770{771const size_t k = bscf(valid_bits);772const size_t ofs = offset[k];773774*tfar(ofs) = ray.tfar[k];775776*Ng_x(ofs) = ray.Ng.x[k];777*Ng_y(ofs) = ray.Ng.y[k];778*Ng_z(ofs) = ray.Ng.z[k];779*u(ofs) = ray.u[k];780*v(ofs) = ray.v[k];781*primID(ofs) = ray.primID[k];782*geomID(ofs) = ray.geomID[k];783784*instID(0, ofs) = ray.instID[0][k];785#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)786*instPrimID(0, ofs) = ray.instPrimID[0][k];787#endif788#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)789for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {790*instID(l, ofs) = ray.instID[l][k];791#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)792*instPrimID(l, ofs) = ray.instPrimID[l][k];793#endif794}795#endif796}797#endif798}799}800801template<int K>802__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)803{804vbool<K> valid = valid_i;805valid &= (ray.tfar < 0.0f);806807if (likely(any(valid)))808{809#if defined(__AVX512F__)810vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);811#else812size_t valid_bits = movemask(valid);813while (valid_bits != 0)814{815const size_t k = bscf(valid_bits);816const size_t ofs = offset[k];817818*tfar(ofs) = ray.tfar[k];819}820#endif821}822}823824char* __restrict__ ptr;825size_t N;826};827828template<size_t MAX_K>829struct StackRayStreamSOA : public RayStreamSOA830{831__forceinline StackRayStreamSOA(size_t K)832: RayStreamSOA(data, K) { assert(K <= MAX_K); }833834char data[MAX_K / 4 * sizeof(RayHit4)];835};836837838struct RayStreamSOP839{840template<class T>841__forceinline void init(T& t)842{843org_x = (float*)&t.org.x;844org_y = (float*)&t.org.y;845org_z = (float*)&t.org.z;846tnear = (float*)&t.tnear;847dir_x = (float*)&t.dir.x;848dir_y = (float*)&t.dir.y;849dir_z = (float*)&t.dir.z;850time = (float*)&t.time;851tfar = (float*)&t.tfar;852mask = (unsigned int*)&t.mask;853id = (unsigned int*)&t.id;854flags = (unsigned int*)&t.flags;855856Ng_x = (float*)&t.Ng.x;857Ng_y = (float*)&t.Ng.y;858Ng_z = (float*)&t.Ng.z;859u = (float*)&t.u;860v = (float*)&t.v;861primID = (unsigned int*)&t.primID;862geomID = (unsigned int*)&t.geomID;863864for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {865instID[l] = (unsigned int*)&t.instID[l];866#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)867instPrimID[l] = (unsigned int*)&t.instPrimID[l];868#endif869}870}871872__forceinline Ray getRayByOffset(size_t offset)873{874Ray ray;875ray.org.x = *(float* __restrict__)((char*)org_x + offset);876ray.org.y = *(float* __restrict__)((char*)org_y + offset);877ray.org.z = *(float* __restrict__)((char*)org_z + offset);878ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);879ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);880ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);881ray.tfar = *(float* __restrict__)((char*)tfar + offset);882ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;883ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;884ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;885ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;886ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;887return ray;888}889890template<int K>891__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)892{893RayK<K> ray;894ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));895ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));896ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));897ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));898ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));899ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));900ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));901ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;902ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;903ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;904ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;905ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;906return ray;907}908909template<int K>910__forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)911{912Vec3vf<K> dir;913dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));914dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));915dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));916return dir;917}918919__forceinline void setHitByOffset(size_t offset, const RayHit& ray)920{921if (ray.geomID != RTC_INVALID_GEOMETRY_ID)922{923*(float* __restrict__)((char*)tfar + offset) = ray.tfar;924925if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;926if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;927if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;928*(float* __restrict__)((char*)u + offset) = ray.u;929*(float* __restrict__)((char*)v + offset) = ray.v;930*(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;931*(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;932933if (likely(instID[0])) {934*(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];935#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)936*(unsigned int* __restrict__)((char*)instPrimID[0] + offset) = ray.instPrimID[0];937#endif938#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)939for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l) {940*(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];941#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)942*(unsigned int* __restrict__)((char*)instPrimID[l] + offset) = ray.instPrimID[l];943#endif944}945#endif946}947}948}949950__forceinline void setHitByOffset(size_t offset, const Ray& ray)951{952*(float* __restrict__)((char*)tfar + offset) = ray.tfar;953}954955template<int K>956__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)957{958vbool<K> valid = valid_i;959valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);960961if (likely(any(valid)))962{963vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);964965if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);966if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);967if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);968vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);969vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);970vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);971vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);972973if (likely(instID[0])) {974vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);975#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)976vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[0] + offset), ray.instPrimID[0]);977#endif978#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)979for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {980vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);981#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)982vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[l] + offset), ray.instPrimID[l]);983#endif984}985#endif986}987}988}989990template<int K>991__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)992{993vbool<K> valid = valid_i;994valid &= (ray.tfar < 0.0f);995996if (likely(any(valid)))997vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);998}9991000__forceinline size_t getOctantByOffset(size_t offset)1001{1002const float dx = *(float* __restrict__)((char*)dir_x + offset);1003const float dy = *(float* __restrict__)((char*)dir_y + offset);1004const float dz = *(float* __restrict__)((char*)dir_z + offset);1005const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);1006return octantID;1007}10081009__forceinline bool isValidByOffset(size_t offset)1010{1011const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;1012const float ffar = *(float* __restrict__)((char*)tfar + offset);1013return nnear <= ffar;1014}10151016template<int K>1017__forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)1018{1019const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;1020const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));1021return nnear <= ffar;1022}10231024template<int K>1025__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)1026{1027RayK<K> ray;10281029#if defined(__AVX2__)1030ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);1031ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);1032ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);1033ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);1034ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);1035ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);1036ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);1037ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);1038ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);1039ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);1040ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);1041ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);1042#else1043ray.org = zero;1044ray.tnear() = zero;1045ray.dir = zero;1046ray.tfar = zero;1047ray.time() = zero;1048ray.mask = zero;1049ray.id = zero;1050ray.flags = zero;10511052for (size_t k = 0; k < K; k++)1053{1054if (likely(valid[k]))1055{1056const size_t ofs = offset[k];10571058ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);1059ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);1060ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);1061ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);1062ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);1063ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);1064ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);1065ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;1066ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;1067ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;1068ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;1069ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;1070}1071}1072#endif10731074return ray;1075}10761077template<int K>1078__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)1079{1080vbool<K> valid = valid_i;1081valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);10821083if (likely(any(valid)))1084{1085#if defined(__AVX512F__)1086vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);10871088if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);1089if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);1090if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);1091vfloat<K>::template scatter<1>(valid, u, offset, ray.u);1092vfloat<K>::template scatter<1>(valid, v, offset, ray.v);1093vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);1094vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);10951096if (likely(instID[0])) {1097vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);1098#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1099vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[0], offset, ray.instPrimID[0]);1100#endif1101#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)1102for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {1103vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);1104#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1105vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[l], offset, ray.instPrimID[l]);1106#endif1107}1108#endif1109}1110#else1111size_t valid_bits = movemask(valid);1112while (valid_bits != 0)1113{1114const size_t k = bscf(valid_bits);1115const size_t ofs = offset[k];11161117*(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];11181119if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];1120if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];1121if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];1122*(float* __restrict__)((char*)u + ofs) = ray.u[k];1123*(float* __restrict__)((char*)v + ofs) = ray.v[k];1124*(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];1125*(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];11261127if (likely(instID[0])) {1128*(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];1129#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1130*(unsigned int* __restrict__)((char*)instPrimID[0] + ofs) = ray.instPrimID[0][k];1131#endif1132#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)1133for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {1134*(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];1135#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1136*(unsigned int* __restrict__)((char*)instPrimID[l] + ofs) = ray.instPrimID[l][k];1137#endif1138}1139#endif1140}1141}1142#endif1143}1144}11451146template<int K>1147__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)1148{1149vbool<K> valid = valid_i;1150valid &= (ray.tfar < 0.0f);11511152if (likely(any(valid)))1153{1154#if defined(__AVX512F__)1155vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);1156#else1157size_t valid_bits = movemask(valid);1158while (valid_bits != 0)1159{1160const size_t k = bscf(valid_bits);1161const size_t ofs = offset[k];11621163*(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];1164}1165#endif1166}1167}11681169/* ray data */1170float* __restrict__ org_x; // x coordinate of ray origin1171float* __restrict__ org_y; // y coordinate of ray origin1172float* __restrict__ org_z; // z coordinate of ray origin1173float* __restrict__ tnear; // start of ray segment (optional)11741175float* __restrict__ dir_x; // x coordinate of ray direction1176float* __restrict__ dir_y; // y coordinate of ray direction1177float* __restrict__ dir_z; // z coordinate of ray direction1178float* __restrict__ time; // time of this ray for motion blur (optional)11791180float* __restrict__ tfar; // end of ray segment (set to hit distance)1181unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)1182unsigned int* __restrict__ id; // ray ID1183unsigned int* __restrict__ flags; // ray flags11841185/* hit data */1186float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)1187float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)1188float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)11891190float* __restrict__ u; // barycentric u coordinate of hit1191float* __restrict__ v; // barycentric v coordinate of hit11921193unsigned int* __restrict__ primID; // primitive ID1194unsigned int* __restrict__ geomID; // geometry ID1195unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID1196#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1197unsigned int* __restrict__ instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID (optional)1198#endif1199};120012011202struct RayStreamAOS1203{1204__forceinline RayStreamAOS(void* rays)1205: ptr((Ray*)rays) {}12061207__forceinline Ray& getRayByOffset(size_t offset)1208{1209return *(Ray*)((char*)ptr + offset);1210}12111212template<int K>1213__forceinline RayK<K> getRayByOffset(const vint<K>& offset);12141215template<int K>1216__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)1217{1218const vint<K> valid_offset = select(valid, offset, vintx(zero));1219return getRayByOffset<K>(valid_offset);1220}12211222template<int K>1223__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)1224{1225vbool<K> valid = valid_i;1226valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);12271228if (likely(any(valid)))1229{1230#if defined(__AVX512F__)1231vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);1232vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);1233vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);1234vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);1235vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);1236vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);1237vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);1238vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);12391240vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);1241#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1242vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[0], offset, ray.instPrimID[0]);1243#endif1244#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)1245for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {1246vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);1247#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1248vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[l], offset, ray.instPrimID[l]);1249#endif1250}1251#endif1252#else1253size_t valid_bits = movemask(valid);1254while (valid_bits != 0)1255{1256const size_t k = bscf(valid_bits);1257RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);1258ray_k->tfar = ray.tfar[k];1259ray_k->Ng.x = ray.Ng.x[k];1260ray_k->Ng.y = ray.Ng.y[k];1261ray_k->Ng.z = ray.Ng.z[k];1262ray_k->u = ray.u[k];1263ray_k->v = ray.v[k];1264ray_k->primID = ray.primID[k];1265ray_k->geomID = ray.geomID[k];12661267instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);1268#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1269instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);1270#endif1271}1272#endif1273}1274}12751276template<int K>1277__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)1278{1279vbool<K> valid = valid_i;1280valid &= (ray.tfar < 0.0f);12811282if (likely(any(valid)))1283{1284#if defined(__AVX512F__)1285vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);1286#else1287size_t valid_bits = movemask(valid);1288while (valid_bits != 0)1289{1290const size_t k = bscf(valid_bits);1291Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);1292ray_k->tfar = ray.tfar[k];1293}1294#endif1295}1296}12971298Ray* __restrict__ ptr;1299};13001301template<>1302__forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)1303{1304Ray4 ray;13051306/* load and transpose: org.x, org.y, org.z, tnear */1307const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);1308const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);1309const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);1310const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);13111312transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());13131314/* load and transpose: dir.x, dir.y, dir.z, time */1315const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);1316const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);1317const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);1318const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);13191320transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());13211322/* load and transpose: tfar, mask, id, flags */1323const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);1324const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);1325const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);1326const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);13271328vfloat4 maskf, idf, flagsf;1329transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);1330ray.mask = asInt(maskf);1331ray.id = asInt(idf);1332ray.flags = asInt(flagsf);13331334return ray;1335}13361337#if defined(__AVX__)1338template<>1339__forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)1340{1341Ray8 ray;13421343/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */1344const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);1345const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);1346const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);1347const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);1348const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);1349const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);1350const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);1351const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);13521353transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());13541355/* load and transpose: tfar, mask, id, flags */1356const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);1357const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);1358const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);1359const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);1360const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);1361const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);1362const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);1363const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);13641365vfloat8 maskf, idf, flagsf;1366transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);1367ray.mask = asInt(maskf);1368ray.id = asInt(idf);1369ray.flags = asInt(flagsf);13701371return ray;1372}1373#endif13741375#if defined(__AVX512F__)1376template<>1377__forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)1378{1379Ray16 ray;13801381/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */1382const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);1383const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);1384const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);1385const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);1386const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);1387const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);1388const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);1389const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);1390const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);1391const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);1392const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);1393const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);1394const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);1395const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);1396const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);1397const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);13981399transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,1400ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());14011402/* load and transpose: tfar, mask, id, flags */1403const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);1404const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);1405const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);1406const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);1407const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);1408const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);1409const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);1410const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);1411const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);1412const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);1413const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);1414const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);1415const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);1416const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);1417const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);1418const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);14191420vfloat16 maskf, idf, flagsf;1421transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,1422ray.tfar, maskf, idf, flagsf);1423ray.mask = asInt(maskf);1424ray.id = asInt(idf);1425ray.flags = asInt(flagsf);14261427return ray;1428}1429#endif143014311432struct RayStreamAOP1433{1434__forceinline RayStreamAOP(void* rays)1435: ptr((Ray**)rays) {}14361437__forceinline Ray& getRayByIndex(size_t index)1438{1439return *ptr[index];1440}14411442template<int K>1443__forceinline RayK<K> getRayByIndex(const vint<K>& index);14441445template<int K>1446__forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)1447{1448const vint<K> valid_index = select(valid, index, vintx(zero));1449return getRayByIndex<K>(valid_index);1450}14511452template<int K>1453__forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)1454{1455vbool<K> valid = valid_i;1456valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);14571458if (likely(any(valid)))1459{1460size_t valid_bits = movemask(valid);1461while (valid_bits != 0)1462{1463const size_t k = bscf(valid_bits);1464RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];14651466ray_k->tfar = ray.tfar[k];1467ray_k->Ng.x = ray.Ng.x[k];1468ray_k->Ng.y = ray.Ng.y[k];1469ray_k->Ng.z = ray.Ng.z[k];1470ray_k->u = ray.u[k];1471ray_k->v = ray.v[k];1472ray_k->primID = ray.primID[k];1473ray_k->geomID = ray.geomID[k];1474instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);1475#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)1476instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);1477#endif1478}1479}1480}14811482template<int K>1483__forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)1484{1485vbool<K> valid = valid_i;1486valid &= (ray.tfar < 0.0f);14871488if (likely(any(valid)))1489{1490size_t valid_bits = movemask(valid);1491while (valid_bits != 0)1492{1493const size_t k = bscf(valid_bits);1494Ray* __restrict__ ray_k = ptr[index[k]];14951496ray_k->tfar = ray.tfar[k];1497}1498}1499}15001501Ray** __restrict__ ptr;1502};15031504template<>1505__forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)1506{1507Ray4 ray;15081509/* load and transpose: org.x, org.y, org.z, tnear */1510const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);1511const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);1512const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);1513const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);15141515transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());15161517/* load and transpose: dir.x, dir.y, dir.z, time */1518const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);1519const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);1520const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);1521const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);15221523transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());15241525/* load and transpose: tfar, mask, id, flags */1526const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);1527const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);1528const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);1529const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);15301531vfloat4 maskf, idf, flagsf;1532transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);1533ray.mask = asInt(maskf);1534ray.id = asInt(idf);1535ray.flags = asInt(flagsf);15361537return ray;1538}15391540#if defined(__AVX__)1541template<>1542__forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)1543{1544Ray8 ray;15451546/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */1547const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);1548const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);1549const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);1550const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);1551const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);1552const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);1553const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);1554const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);15551556transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());15571558/* load and transpose: tfar, mask, id, flags */1559const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);1560const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);1561const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);1562const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);1563const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);1564const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);1565const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);1566const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);15671568vfloat8 maskf, idf, flagsf;1569transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);1570ray.mask = asInt(maskf);1571ray.id = asInt(idf);1572ray.flags = asInt(flagsf);15731574return ray;1575}1576#endif15771578#if defined(__AVX512F__)1579template<>1580__forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)1581{1582Ray16 ray;15831584/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */1585const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);1586const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);1587const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);1588const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);1589const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);1590const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);1591const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);1592const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);1593const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);1594const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);1595const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);1596const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);1597const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);1598const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);1599const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);1600const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);16011602transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,1603ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());16041605/* load and transpose: tfar, mask, id, flags */1606const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);1607const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);1608const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);1609const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);1610const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);1611const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);1612const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);1613const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);1614const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);1615const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);1616const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);1617const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);1618const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);1619const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);1620const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);1621const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);16221623vfloat16 maskf, idf, flagsf;1624transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,1625ray.tfar, maskf, idf, flagsf);16261627ray.mask = asInt(maskf);1628ray.id = asInt(idf);1629ray.flags = asInt(flagsf);16301631return ray;1632}1633#endif1634}163516361637