Path: blob/master/thirdparty/embree/kernels/common/stack_item.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "default.h"67namespace embree8{9/*! An item on the stack holds the node ID and distance of that node. */10template<typename T>11struct __aligned(16) StackItemT12{13/*! assert that the xchg function works */14static_assert(sizeof(T) <= 12, "sizeof(T) <= 12 failed");1516__forceinline StackItemT() {}1718__forceinline StackItemT(T &ptr, unsigned &dist) : ptr(ptr), dist(dist) {}1920/*! use SSE instructions to swap stack items */21__forceinline static void xchg(StackItemT& a, StackItemT& b)22{23const vfloat4 sse_a = vfloat4::load((float*)&a);24const vfloat4 sse_b = vfloat4::load((float*)&b);25vfloat4::store(&a,sse_b);26vfloat4::store(&b,sse_a);27}2829/*! Sort 2 stack items. */30__forceinline friend void sort(StackItemT& s1, StackItemT& s2) {31if (s2.dist < s1.dist) xchg(s2,s1);32}3334/*! Sort 3 stack items. */35__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3)36{37if (s2.dist < s1.dist) xchg(s2,s1);38if (s3.dist < s2.dist) xchg(s3,s2);39if (s2.dist < s1.dist) xchg(s2,s1);40}4142/*! Sort 4 stack items. */43__forceinline friend void sort(StackItemT& s1, StackItemT& s2, StackItemT& s3, StackItemT& s4)44{45if (s2.dist < s1.dist) xchg(s2,s1);46if (s4.dist < s3.dist) xchg(s4,s3);47if (s3.dist < s1.dist) xchg(s3,s1);48if (s4.dist < s2.dist) xchg(s4,s2);49if (s3.dist < s2.dist) xchg(s3,s2);50}5152/*! use SSE instructions to swap stack items */53__forceinline static void cmp_xchg(vint4& a, vint4& b)54{55#if defined(__AVX512VL__)56const vboolf4 mask(shuffle<2,2,2,2>(b) < shuffle<2,2,2,2>(a));57#else58const vboolf4 mask0(b < a);59const vboolf4 mask(shuffle<2,2,2,2>(mask0));60#endif61const vint4 c = select(mask,b,a);62const vint4 d = select(mask,a,b);63a = c;64b = d;65}6667/*! Sort 3 stack items. */68__forceinline static void sort3(vint4& s1, vint4& s2, vint4& s3)69{70cmp_xchg(s2,s1);71cmp_xchg(s3,s2);72cmp_xchg(s2,s1);73}7475/*! Sort 4 stack items. */76__forceinline static void sort4(vint4& s1, vint4& s2, vint4& s3, vint4& s4)77{78cmp_xchg(s2,s1);79cmp_xchg(s4,s3);80cmp_xchg(s3,s1);81cmp_xchg(s4,s2);82cmp_xchg(s3,s2);83}848586/*! Sort N stack items. */87__forceinline friend void sort(StackItemT* begin, StackItemT* end)88{89for (StackItemT* i = begin+1; i != end; ++i)90{91const vfloat4 item = vfloat4::load((float*)i);92const unsigned dist = i->dist;93StackItemT* j = i;9495while ((j != begin) && ((j-1)->dist < dist))96{97vfloat4::store(j, vfloat4::load((float*)(j-1)));98--j;99}100101vfloat4::store(j, item);102}103}104105public:106T ptr;107unsigned dist;108};109110/*! An item on the stack holds the node ID and active ray mask. */111template<typename T>112struct __aligned(8) StackItemMaskT113{114T ptr;115size_t mask;116};117118struct __aligned(8) StackItemMaskCoherent119{120size_t mask;121size_t parent;122size_t child;123};124}125126127