Path: blob/master/thirdparty/embree/kernels/common/device.cpp
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#include "device.h"45#include "../../common/tasking/taskscheduler.h"67#include "../hash.h"8#include "scene_triangle_mesh.h"9#include "scene_user_geometry.h"10#include "scene_instance.h"11#include "scene_curves.h"12#include "scene_subdiv_mesh.h"1314#include "../subdiv/tessellation_cache.h"1516#include "acceln.h"17#include "geometry.h"1819#include "../geometry/cylinder.h"2021#include "../bvh/bvh4_factory.h"22#include "../bvh/bvh8_factory.h"2324#include "../../common/sys/alloc.h"2526#if defined(EMBREE_SYCL_SUPPORT)27# include "../level_zero/ze_wrapper.h"28#endif2930namespace embree31{32/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */33ssize_t Device::debug_int0 = 0;34ssize_t Device::debug_int1 = 0;35ssize_t Device::debug_int2 = 0;36ssize_t Device::debug_int3 = 0;3738static MutexSys g_mutex;39static std::map<Device*,size_t> g_cache_size_map;40static std::map<Device*,size_t> g_num_threads_map;4142struct TaskArena43{44#if USE_TASK_ARENA45std::unique_ptr<tbb::task_arena> arena;46#endif47};4849Device::Device (const char* cfg) : arena(new TaskArena())50{51/* check that CPU supports lowest ISA */52if (!hasISA(ISA)) {53throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);54}5556/* set default frequency level for detected CPU */57switch (getCPUModel()) {58case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;59case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;60case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;61case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;62case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;63case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;64case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;65case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;66case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;67case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;68case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;69case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;70case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;71case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;72case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;73case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;74case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;75case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;76case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;77case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;78case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;79case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;80}8182/* initialize global state */83#if defined(EMBREE_CONFIG)84State::parseString(EMBREE_CONFIG);85#endif86State::parseString(cfg);87State::verify();8889/* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */90if (!checkISASupport()) {91throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");92}9394/*! do some internal tests */95assert(isa::Cylinder::verify());9697/*! enable huge page support if desired */98#if defined(__WIN32__)99if (State::enable_selockmemoryprivilege)100State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));101#endif102State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));103104/*! set tessellation cache size */105setCacheSize( State::tessellation_cache_size );106107/*! enable some floating point exceptions to catch bugs */108if (State::float_exceptions)109{110int exceptions = _MM_MASK_MASK;111//exceptions &= ~_MM_MASK_INVALID;112exceptions &= ~_MM_MASK_DENORM;113exceptions &= ~_MM_MASK_DIV_ZERO;114//exceptions &= ~_MM_MASK_OVERFLOW;115//exceptions &= ~_MM_MASK_UNDERFLOW;116//exceptions &= ~_MM_MASK_INEXACT;117_MM_SET_EXCEPTION_MASK(exceptions);118}119120/* print info header */121if (State::verbosity(1))122print();123if (State::verbosity(2))124State::print();125126/* register all algorithms */127bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));128129#if defined(EMBREE_TARGET_SIMD8)130bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));131#endif132133/* setup tasking system */134initTaskingSystem(numThreads);135}136137Device::~Device ()138{139setCacheSize(0);140exitTaskingSystem();141}142143std::string getEnabledTargets()144{145std::string v;146#if defined(EMBREE_TARGET_SSE2)147v += "SSE2 ";148#endif149#if defined(EMBREE_TARGET_SSE42)150v += "SSE4.2 ";151#endif152#if defined(EMBREE_TARGET_AVX)153v += "AVX ";154#endif155#if defined(EMBREE_TARGET_AVX2)156v += "AVX2 ";157#endif158#if defined(EMBREE_TARGET_AVX512)159v += "AVX512 ";160#endif161return v;162}163164std::string getEmbreeFeatures()165{166std::string v;167#if defined(EMBREE_RAY_MASK)168v += "raymasks ";169#endif170#if defined (EMBREE_BACKFACE_CULLING)171v += "backfaceculling ";172#endif173#if defined (EMBREE_BACKFACE_CULLING_CURVES)174v += "backfacecullingcurves ";175#endif176#if defined (EMBREE_BACKFACE_CULLING_SPHERES)177v += "backfacecullingspheres ";178#endif179#if defined(EMBREE_FILTER_FUNCTION)180v += "intersection_filter ";181#endif182#if defined (EMBREE_COMPACT_POLYS)183v += "compact_polys ";184#endif185return v;186}187188void Device::print()189{190const int cpu_features = getCPUFeatures();191std::cout << std::endl;192std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;193std::cout << " Compiler : " << getCompilerName() << std::endl;194std::cout << " Build : ";195#if defined(DEBUG)196std::cout << "Debug " << std::endl;197#else198std::cout << "Release " << std::endl;199#endif200std::cout << " Platform : " << getPlatformName() << std::endl;201std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;202std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;203std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;204std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;205const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;206const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;207std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;208std::cout << " Config" << std::endl;209std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;210std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;211std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;212std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;213std::cout << " Features: " << getEmbreeFeatures() << std::endl;214std::cout << " Tasking : ";215#if defined(TASKING_TBB)216std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";217#if TBB_INTERFACE_VERSION >= 12002218std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";219#else220std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";221#endif222#endif223#if defined(TASKING_INTERNAL)224std::cout << "internal_tasking_system ";225#endif226#if defined(TASKING_PPL)227std::cout << "PPL ";228#endif229std::cout << std::endl;230231#if defined(__X86_64__)232/* check of FTZ and DAZ flags are set in CSR */233if (!hasFTZ || !hasDAZ)234{235#if !defined(_DEBUG)236if (State::verbosity(1))237#endif238{239std::cout << std::endl;240std::cout << "================================================================================" << std::endl;241std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl242<< " in the MXCSR control and status register. This can have a severe " << std::endl243<< " performance impact. Please enable these modes for each application " << std::endl244<< " thread the following way:" << std::endl245<< std::endl246<< " #include \"xmmintrin.h\"" << std::endl247<< " #include \"pmmintrin.h\"" << std::endl248<< std::endl249<< " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl250<< " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;251std::cout << "================================================================================" << std::endl;252std::cout << std::endl;253}254}255#endif256std::cout << std::endl;257}258259void Device::setDeviceErrorCode(RTCError error, std::string const& msg)260{261RTCErrorMessage* stored_error = errorHandler.error();262if (stored_error->error == RTC_ERROR_NONE) {263stored_error->error = error;264if (msg != "")265stored_error->msg = msg;266}267}268269RTCError Device::getDeviceErrorCode()270{271RTCErrorMessage* stored_error = errorHandler.error();272RTCErrorMessage error = *stored_error;273stored_error->error = RTC_ERROR_NONE;274return error.error;275}276277const char* Device::getDeviceLastErrorMessage()278{279RTCErrorMessage* stored_error = errorHandler.error();280return stored_error->msg.c_str();281}282283void Device::setThreadErrorCode(RTCError error, std::string const& msg)284{285RTCErrorMessage* stored_error = g_errorHandler.error();286if (stored_error->error == RTC_ERROR_NONE) {287stored_error->error = error;288if (msg != "")289stored_error->msg = msg;290}291}292293RTCError Device::getThreadErrorCode()294{295RTCErrorMessage* stored_error = g_errorHandler.error();296RTCErrorMessage error = *stored_error;297stored_error->error = RTC_ERROR_NONE;298return error.error;299}300301const char* Device::getThreadLastErrorMessage()302{303RTCErrorMessage* stored_error = g_errorHandler.error();304return stored_error->msg.c_str();305}306307void Device::process_error(Device* device, RTCError error, const char* str)308{309/* store global error code when device construction failed */310if (!device)311return setThreadErrorCode(error, str ? std::string(str) : std::string());312313/* print error when in verbose mode */314if (device->verbosity(1))315{316std::cerr << "Embree: " << getErrorString(error);317if (str) std::cerr << ", (" << str << ")";318std::cerr << std::endl;319}320321/* call user specified error callback */322if (device->error_function)323device->error_function(device->error_function_userptr,error,str);324325/* record error code */326device->setDeviceErrorCode(error, str ? std::string(str) : std::string());327}328329void Device::memoryMonitor(ssize_t bytes, bool post)330{331if (State::memory_monitor_function && bytes != 0) {332if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {333if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor334throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");335}336}337}338}339340size_t getMaxNumThreads()341{342size_t maxNumThreads = 0;343for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)344maxNumThreads = max(maxNumThreads, (*i).second);345if (maxNumThreads == 0)346maxNumThreads = std::numeric_limits<size_t>::max();347return maxNumThreads;348}349350size_t getMaxCacheSize()351{352size_t maxCacheSize = 0;353for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)354maxCacheSize = max(maxCacheSize, (*i).second);355return maxCacheSize;356}357358void Device::setCacheSize(size_t bytes)359{360#if defined(EMBREE_GEOMETRY_SUBDIVISION)361Lock<MutexSys> lock(g_mutex);362if (bytes == 0) g_cache_size_map.erase(this);363else g_cache_size_map[this] = bytes;364365size_t maxCacheSize = getMaxCacheSize();366resizeTessellationCache(maxCacheSize);367#endif368}369370void Device::initTaskingSystem(size_t numThreads)371{372Lock<MutexSys> lock(g_mutex);373if (numThreads == 0)374g_num_threads_map[this] = std::numeric_limits<size_t>::max();375else376g_num_threads_map[this] = numThreads;377378/* create task scheduler */379size_t maxNumThreads = getMaxNumThreads();380TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);381#if USE_TASK_ARENA382const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());383const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);384arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));385#endif386}387388void Device::exitTaskingSystem()389{390Lock<MutexSys> lock(g_mutex);391g_num_threads_map.erase(this);392393/* terminate tasking system */394if (g_num_threads_map.size() == 0) {395TaskScheduler::destroy();396}397/* or configure new number of threads */398else {399size_t maxNumThreads = getMaxNumThreads();400TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);401}402#if USE_TASK_ARENA403arena->arena.reset();404#endif405}406407void Device::execute(bool join, const std::function<void()>& func)408{409#if USE_TASK_ARENA410if (join) {411arena->arena->execute(func);412}413else414#endif415{416func();417}418}419420void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)421{422/* hidden internal properties */423switch ((size_t)prop)424{425case 1000000: debug_int0 = val; return;426case 1000001: debug_int1 = val; return;427case 1000002: debug_int2 = val; return;428case 1000003: debug_int3 = val; return;429}430431throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");432}433434ssize_t Device::getProperty(const RTCDeviceProperty prop)435{436size_t iprop = (size_t)prop;437438/* get name of internal regression test */439if (iprop >= 2000000 && iprop < 3000000)440{441RegressionTest* test = getRegressionTest(iprop-2000000);442if (test) return (ssize_t) test->name.c_str();443else return 0;444}445446/* run internal regression test */447if (iprop >= 3000000 && iprop < 4000000)448{449RegressionTest* test = getRegressionTest(iprop-3000000);450if (test) return test->run();451else return 0;452}453454/* documented properties */455switch (prop)456{457case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;458case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;459case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;460case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;461462#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)463case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);464#else465case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;466#endif467468#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)469case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);470#else471case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;472#endif473474#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)475case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);476#else477case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;478#endif479480#if defined(EMBREE_RAY_MASK)481case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;482#else483case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;484#endif485486#if defined(EMBREE_BACKFACE_CULLING)487case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;488#else489case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;490#endif491492#if defined(EMBREE_BACKFACE_CULLING_CURVES)493case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;494#else495case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;496#endif497498#if defined(EMBREE_BACKFACE_CULLING_SPHERES)499case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;500#else501case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;502#endif503504#if defined(EMBREE_COMPACT_POLYS)505case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;506#else507case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;508#endif509510#if defined(EMBREE_FILTER_FUNCTION)511case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;512#else513case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;514#endif515516#if defined(EMBREE_IGNORE_INVALID_RAYS)517case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;518#else519case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;520#endif521522#if defined(TASKING_INTERNAL)523case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;524#endif525526#if defined(TASKING_TBB)527case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;528#endif529530#if defined(TASKING_PPL)531case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;532#endif533534#if defined(EMBREE_GEOMETRY_TRIANGLE)535case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;536#else537case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;538#endif539540#if defined(EMBREE_GEOMETRY_QUAD)541case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;542#else543case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;544#endif545546#if defined(EMBREE_GEOMETRY_CURVE)547case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;548#else549case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;550#endif551552#if defined(EMBREE_GEOMETRY_SUBDIVISION)553case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;554#else555case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;556#endif557558#if defined(EMBREE_GEOMETRY_USER)559case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;560#else561case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;562#endif563564#if defined(EMBREE_GEOMETRY_POINT)565case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;566#else567case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;568#endif569570#if defined(TASKING_PPL)571case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;572#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)573case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;574#else575case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;576#endif577578#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION579case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;580#else581case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;582#endif583584#if defined(EMBREE_SYCL_SUPPORT)585case RTC_DEVICE_PROPERTY_CPU_DEVICE: {586if (!dynamic_cast<DeviceGPU*>(this))587return 1;588return 0;589};590case RTC_DEVICE_PROPERTY_SYCL_DEVICE: {591if (!dynamic_cast<DeviceGPU*>(this))592return 0;593return 1;594};595#else596case RTC_DEVICE_PROPERTY_CPU_DEVICE: return 1;597case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0;598#endif599600default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;601};602}603604void* Device::malloc(size_t size, size_t align) {605return alignedMalloc(size,align);606}607608void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {609return alignedMalloc(size,align);610}611612void Device::free(void* ptr) {613alignedFree(ptr);614}615616const std::vector<std::string> Device::error_strings = {617"No Error",618"Unknown error",619"Invalid argument",620"Invalid operation",621"Out of Memory",622"Unsupported CPU",623"Build cancelled",624"Level Zero raytracing support missing"625};626627const char* Device::getErrorString(RTCError error) {628if (error >= 0 && error < error_strings.size()) {629return error_strings.at(error).c_str();630}631return "Invalid error code";632}633634#if defined(EMBREE_SYCL_SUPPORT)635636DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)637: Device(cfg), gpu_context(sycl_context)638{639/* initialize ZeWrapper */640if (ZeWrapper::init() != ZE_RESULT_SUCCESS)641throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");642643/* take first device as default device */644auto devices = gpu_context.get_devices();645if (devices.size() == 0)646throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");647gpu_device = devices[0];648649/* check if RTAS build extension is available */650sycl::platform platform = gpu_device.get_platform();651ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);652653uint32_t count = 0;654std::vector<ze_driver_extension_properties_t> extensions;655ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());656if (result != ZE_RESULT_SUCCESS)657throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");658659extensions.resize(count);660result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());661if (result != ZE_RESULT_SUCCESS)662throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");663664bool ze_rtas_builder = false;665for (uint32_t i=0; i<extensions.size(); i++)666{667if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)668ze_rtas_builder = true;669}670if (!ze_rtas_builder)671throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "ZE_experimental_rtas_builder extension not found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");672673result = ZeWrapper::initRTASBuilder(hDriver);674if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) {675throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "cannot load ZE_experimental_rtas_builder extension. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");676}677if (result != ZE_RESULT_SUCCESS)678throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");679680if (State::verbosity(1))681{682std::cout << " Level Zero RTAS Builder" << std::endl;683}684685/* check if extension library can get loaded */686ze_rtas_parallel_operation_exp_handle_t hParallelOperation;687result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);688if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)689throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");690if (result == ZE_RESULT_SUCCESS)691ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);692693gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();694gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();695696if (State::verbosity(1))697{698sycl::platform platform = gpu_context.get_platform();699std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;700std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;701std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;702std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;703std::cout << std::endl;704}705706dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);707}708709DeviceGPU::~DeviceGPU()710{711rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);712}713714void DeviceGPU::enter() {715}716717void DeviceGPU::leave() {718}719720void* DeviceGPU::malloc(size_t size, size_t align) {721return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY);722}723724void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {725return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY,type);726}727728void DeviceGPU::free(void* ptr) {729alignedSYCLFree(&gpu_context,ptr);730}731732void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {733gpu_device = sycl_device_in;734}735736// turn off deprecation warning for host_unified_memory property usage.737// there is currently no equivalent SYCL aspect that replaces this property.738#pragma GCC diagnostic push739#pragma GCC diagnostic ignored "-Wdeprecated-declarations"740bool DeviceGPU::has_unified_memory() const {741return gpu_device.get_info<sycl::info::device::host_unified_memory>();742}743#pragma GCC diagnostic pop744745#endif746747DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)748: device((Device*)hdevice)749{750assert(device);751device->refInc();752device->enter();753}754755DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)756: device(((Scene*)hscene)->device)757{758assert(device);759device->refInc();760device->enter();761}762763DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)764: device(((Geometry*)hgeometry)->device)765{766assert(device);767device->refInc();768device->enter();769}770771DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)772: device(((Buffer*)hbuffer)->device)773{774assert(device);775device->refInc();776device->enter();777}778779DeviceEnterLeave::~DeviceEnterLeave() {780device->leave();781device->refDec();782}783}784785786