Path: blob/master/thirdparty/embree/kernels/common/buffer.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "default.h"6#include "device.h"78namespace embree9{10enum class BufferDataPointerType {11HOST = 0,12DEVICE = 1,13UNKNOWN = 214};1516/*! Implements an API data buffer object. This class may or may not own the data. */17class Buffer : public RefCount18{19private:20char* alloc(void* ptr_in, bool &shared, EmbreeMemoryType memoryType)21{22if (ptr_in)23{24shared = true;25return (char*)ptr_in;26}27else28{29shared = false;30device->memoryMonitor(this->bytes(), false);31size_t b = (this->bytes()+15) & ssize_t(-16);32return (char*)device->malloc(b,16,memoryType);33}34}3536public:37Buffer(Device* device, size_t numBytes_in, void* ptr_in)38: device(device), numBytes(numBytes_in)39{40device->refInc();4142ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);43#if defined(EMBREE_SYCL_SUPPORT)44dshared = true;45dptr = ptr;46modified = true;47#endif48}4950Buffer(Device* device, size_t numBytes_in, void* ptr_in, void* dptr_in)51: device(device), numBytes(numBytes_in)52{53device->refInc();5455#if defined(EMBREE_SYCL_SUPPORT)56modified = true;57if (device->is_gpu() && !device->has_unified_memory())58{59ptr = alloc( ptr_in, shared, EmbreeMemoryType::MALLOC);60dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);61}62else if (device->is_gpu() && device->has_unified_memory())63{64ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);6566if (device->get_memory_type(ptr) != EmbreeMemoryType::USM_SHARED)67{68dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);69}70else71{72dshared = true;73dptr = ptr;74}75}76else77#endif78{79ptr = alloc(ptr_in, shared, EmbreeMemoryType::MALLOC);80#if defined(EMBREE_SYCL_SUPPORT)81dshared = true;82dptr = ptr;83#endif84}85}8687/*! Buffer destruction */88virtual ~Buffer() {89free();90device->refDec();91}9293/*! this class is not copyable */94private:95Buffer(const Buffer& other) DELETED; // do not implement96Buffer& operator =(const Buffer& other) DELETED; // do not implement9798public:99100/*! frees the buffer */101virtual void free()102{103if (!shared && ptr) {104#if defined(EMBREE_SYCL_SUPPORT)105if (dptr == ptr) {106dptr = nullptr;107}108#endif109device->free(ptr);110device->memoryMonitor(-ssize_t(this->bytes()), true);111ptr = nullptr;112}113#if defined(EMBREE_SYCL_SUPPORT)114if (!dshared && dptr) {115device->free(dptr);116device->memoryMonitor(-ssize_t(this->bytes()), true);117dptr = nullptr;118}119#endif120}121122/*! gets buffer pointer */123void* data()124{125/* report error if buffer is not existing */126if (!device)127throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");128129/* return buffer */130return ptr;131}132133/*! gets buffer pointer */134void* dataDevice()135{136/* report error if buffer is not existing */137if (!device)138throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");139140/* return buffer */141#if defined(EMBREE_SYCL_SUPPORT)142return dptr;143#else144return ptr;145#endif146}147148/*! returns pointer to first element */149__forceinline char* getPtr(BufferDataPointerType type) const150{151if (type == BufferDataPointerType::HOST) return getHostPtr();152else if (type == BufferDataPointerType::DEVICE) return getDevicePtr();153154throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");155return nullptr;156}157158/*! returns pointer to first element */159__forceinline virtual char* getHostPtr() const {160return ptr;161}162163/*! returns pointer to first element */164__forceinline virtual char* getDevicePtr() const {165#if defined(EMBREE_SYCL_SUPPORT)166return dptr;167#else168return ptr;169#endif170}171172/*! returns the number of bytes of the buffer */173__forceinline size_t bytes() const {174return numBytes;175}176177/*! returns true of the buffer is not empty */178__forceinline operator bool() const {179return ptr;180}181182__forceinline void commit() {183#if defined(EMBREE_SYCL_SUPPORT)184DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);185if (gpu_device) {186sycl::queue queue(gpu_device->getGPUDevice());187commit(queue);188queue.wait_and_throw();189}190modified = false;191#endif192}193194#if defined(EMBREE_SYCL_SUPPORT)195__forceinline sycl::event commit(sycl::queue queue) {196if (dptr == ptr)197return sycl::event();198199modified = false;200return queue.memcpy(dptr, ptr, numBytes);201}202#endif203204__forceinline bool needsCommit() const {205#if defined(EMBREE_SYCL_SUPPORT)206return (dptr == ptr) ? false : modified;207#else208return false;209#endif210}211212__forceinline void setNeedsCommit(bool isModified = true) {213#if defined(EMBREE_SYCL_SUPPORT)214modified = isModified;215#endif216}217218__forceinline void commitIfNeeded() {219if (needsCommit()) {220commit();221}222}223224public:225Device* device; //!< device to report memory usage to226size_t numBytes; //!< number of bytes in the buffer227char* ptr; //!< pointer to buffer data228#if defined(EMBREE_SYCL_SUPPORT)229char* dptr; //!< pointer to buffer data on device230#endif231bool shared; //!< set if memory is shared with application232#if defined(EMBREE_SYCL_SUPPORT)233bool dshared; //!< set if device memory is shared with application234bool modified; //!< to be set when host memory has been modified and dev needs update235#endif236};237238/*! An untyped contiguous range of a buffer. This class does not own the buffer content. */239class RawBufferView240{241public:242/*! Buffer construction */243RawBufferView()244: ptr_ofs(nullptr), dptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}245246public:247/*! sets the buffer view */248void set(const Ref<Buffer>& buffer_in, size_t offset_in, size_t stride_in, size_t num_in, RTCFormat format_in)249{250if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))251throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");252253ptr_ofs = buffer_in->getHostPtr() + offset_in;254dptr_ofs = buffer_in->getDevicePtr() + offset_in;255stride = stride_in;256num = num_in;257format = format_in;258modCounter++;259modified = true;260buffer = buffer_in;261}262263/*! returns pointer to the i'th element */264__forceinline char* getPtr(BufferDataPointerType pointerType) const265{266if (pointerType == BufferDataPointerType::HOST)267return ptr_ofs;268else if (pointerType == BufferDataPointerType::DEVICE)269return dptr_ofs;270271throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");272return nullptr;273}274275/*! returns pointer to the first element */276__forceinline char* getPtr() const {277#if defined(__SYCL_DEVICE_ONLY__)278return dptr_ofs;279#else280return ptr_ofs;281#endif282}283284/*! returns pointer to the i'th element */285__forceinline char* getPtr(size_t i) const286{287#if defined(__SYCL_DEVICE_ONLY__)288assert(i<num);289return dptr_ofs + i*stride;290#else291return ptr_ofs + i*stride;292#endif293}294295/*! returns the number of elements of the buffer */296__forceinline size_t size() const {297return num;298}299300/*! returns the number of bytes of the buffer */301__forceinline size_t bytes() const {302return num*stride;303}304305/*! returns the buffer stride */306__forceinline unsigned getStride() const307{308assert(stride <= unsigned(inf));309return unsigned(stride);310}311312/*! return the buffer format */313__forceinline RTCFormat getFormat() const {314return format;315}316317/*! mark buffer as modified or unmodified */318__forceinline void setModified() {319modCounter++;320modified = true;321if (buffer) buffer->setNeedsCommit();322}323324/*! mark buffer as modified or unmodified */325__forceinline bool isModified(unsigned int otherModCounter) const {326return modCounter > otherModCounter;327}328329/*! mark buffer as modified or unmodified */330__forceinline bool isLocalModified() const {331return modified;332}333334/*! clear local modified flag */335__forceinline void clearLocalModified() {336modified = false;337}338339/*! returns true of the buffer is not empty */340__forceinline operator bool() const {341return ptr_ofs;342}343344/*! checks padding to 16 byte check, fails hard */345__forceinline void checkPadding16() const346{347if (ptr_ofs && num)348volatile int MAYBE_UNUSED w = *((int*)getPtr(size()-1)+3); // FIXME: is failing hard avoidable?349}350351public:352char* ptr_ofs; //!< base pointer plus offset353char* dptr_ofs; //!< base pointer plus offset in device memory354size_t stride; //!< stride of the buffer in bytes355size_t num; //!< number of elements in the buffer356RTCFormat format; //!< format of the buffer357unsigned int modCounter; //!< version ID of this buffer358bool modified; //!< local modified data359int userData; //!< special data360Ref<Buffer> buffer; //!< reference to the parent buffer361};362363/*! A typed contiguous range of a buffer. This class does not own the buffer content. */364template<typename T>365class BufferView : public RawBufferView366{367public:368typedef T value_type;369370#if defined(__SYCL_DEVICE_ONLY__)371/*! access to the ith element of the buffer */372__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(dptr_ofs + i*stride); }373__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(dptr_ofs + i*stride); }374#else375/*! access to the ith element of the buffer */376__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }377__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }378#endif379};380381template<>382class BufferView<Vec3fa> : public RawBufferView383{384public:385typedef Vec3fa value_type;386387#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)388389/*! access to the ith element of the buffer */390__forceinline const Vec3fa operator [](size_t i) const391{392assert(i<num);393return Vec3fa::loadu(dptr_ofs + i*stride);394}395396/*! writes the i'th element */397__forceinline void store(size_t i, const Vec3fa& v)398{399assert(i<num);400Vec3fa::storeu(dptr_ofs + i*stride, v);401}402403#else404405/*! access to the ith element of the buffer */406__forceinline const Vec3fa operator [](size_t i) const407{408assert(i<num);409return Vec3fa(vfloat4::loadu((float*)(ptr_ofs + i*stride)));410}411412/*! writes the i'th element */413__forceinline void store(size_t i, const Vec3fa& v)414{415assert(i<num);416vfloat4::storeu((float*)(ptr_ofs + i*stride), (vfloat4)v);417}418#endif419};420}421422423