Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/common/device.cpp
9905 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#include "device.h"
5
6
#include "../../common/tasking/taskscheduler.h"
7
8
#include "../hash.h"
9
#include "scene_triangle_mesh.h"
10
#include "scene_user_geometry.h"
11
#include "scene_instance.h"
12
#include "scene_curves.h"
13
#include "scene_subdiv_mesh.h"
14
15
#include "../subdiv/tessellation_cache.h"
16
17
#include "acceln.h"
18
#include "geometry.h"
19
20
#include "../geometry/cylinder.h"
21
22
#include "../bvh/bvh4_factory.h"
23
#include "../bvh/bvh8_factory.h"
24
25
#include "../../common/sys/alloc.h"
26
27
#if defined(EMBREE_SYCL_SUPPORT)
28
# include "../level_zero/ze_wrapper.h"
29
#endif
30
31
namespace embree
32
{
33
/*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
34
ssize_t Device::debug_int0 = 0;
35
ssize_t Device::debug_int1 = 0;
36
ssize_t Device::debug_int2 = 0;
37
ssize_t Device::debug_int3 = 0;
38
39
static MutexSys g_mutex;
40
static std::map<Device*,size_t> g_cache_size_map;
41
static std::map<Device*,size_t> g_num_threads_map;
42
43
struct TaskArena
44
{
45
#if USE_TASK_ARENA
46
std::unique_ptr<tbb::task_arena> arena;
47
#endif
48
};
49
50
Device::Device (const char* cfg) : arena(new TaskArena())
51
{
52
/* check that CPU supports lowest ISA */
53
if (!hasISA(ISA)) {
54
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
55
}
56
57
/* set default frequency level for detected CPU */
58
switch (getCPUModel()) {
59
case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
60
case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
61
case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
62
case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
63
case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
64
case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
65
case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
66
case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
67
case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
68
case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
69
case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
70
case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
71
case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
72
case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
73
case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
74
case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
75
case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
76
case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
77
case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
78
case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
79
case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
80
case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
81
}
82
83
/* initialize global state */
84
#if defined(EMBREE_CONFIG)
85
State::parseString(EMBREE_CONFIG);
86
#endif
87
State::parseString(cfg);
88
State::verify();
89
90
/* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
91
if (!checkISASupport()) {
92
throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
93
}
94
95
/*! do some internal tests */
96
assert(isa::Cylinder::verify());
97
98
/*! enable huge page support if desired */
99
#if defined(__WIN32__)
100
if (State::enable_selockmemoryprivilege)
101
State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
102
#endif
103
State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
104
105
/*! set tessellation cache size */
106
setCacheSize( State::tessellation_cache_size );
107
108
/*! enable some floating point exceptions to catch bugs */
109
if (State::float_exceptions)
110
{
111
int exceptions = _MM_MASK_MASK;
112
//exceptions &= ~_MM_MASK_INVALID;
113
exceptions &= ~_MM_MASK_DENORM;
114
exceptions &= ~_MM_MASK_DIV_ZERO;
115
//exceptions &= ~_MM_MASK_OVERFLOW;
116
//exceptions &= ~_MM_MASK_UNDERFLOW;
117
//exceptions &= ~_MM_MASK_INEXACT;
118
_MM_SET_EXCEPTION_MASK(exceptions);
119
}
120
121
/* print info header */
122
if (State::verbosity(1))
123
print();
124
if (State::verbosity(2))
125
State::print();
126
127
/* register all algorithms */
128
bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
129
130
#if defined(EMBREE_TARGET_SIMD8)
131
bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
132
#endif
133
134
/* setup tasking system */
135
initTaskingSystem(numThreads);
136
}
137
138
Device::~Device ()
139
{
140
setCacheSize(0);
141
exitTaskingSystem();
142
}
143
144
std::string getEnabledTargets()
145
{
146
std::string v;
147
#if defined(EMBREE_TARGET_SSE2)
148
v += "SSE2 ";
149
#endif
150
#if defined(EMBREE_TARGET_SSE42)
151
v += "SSE4.2 ";
152
#endif
153
#if defined(EMBREE_TARGET_AVX)
154
v += "AVX ";
155
#endif
156
#if defined(EMBREE_TARGET_AVX2)
157
v += "AVX2 ";
158
#endif
159
#if defined(EMBREE_TARGET_AVX512)
160
v += "AVX512 ";
161
#endif
162
return v;
163
}
164
165
std::string getEmbreeFeatures()
166
{
167
std::string v;
168
#if defined(EMBREE_RAY_MASK)
169
v += "raymasks ";
170
#endif
171
#if defined (EMBREE_BACKFACE_CULLING)
172
v += "backfaceculling ";
173
#endif
174
#if defined (EMBREE_BACKFACE_CULLING_CURVES)
175
v += "backfacecullingcurves ";
176
#endif
177
#if defined (EMBREE_BACKFACE_CULLING_SPHERES)
178
v += "backfacecullingspheres ";
179
#endif
180
#if defined(EMBREE_FILTER_FUNCTION)
181
v += "intersection_filter ";
182
#endif
183
#if defined (EMBREE_COMPACT_POLYS)
184
v += "compact_polys ";
185
#endif
186
return v;
187
}
188
189
void Device::print()
190
{
191
const int cpu_features = getCPUFeatures();
192
std::cout << std::endl;
193
std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
194
std::cout << " Compiler : " << getCompilerName() << std::endl;
195
std::cout << " Build : ";
196
#if defined(DEBUG)
197
std::cout << "Debug " << std::endl;
198
#else
199
std::cout << "Release " << std::endl;
200
#endif
201
std::cout << " Platform : " << getPlatformName() << std::endl;
202
std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
203
std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
204
std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
205
std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
206
const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
207
const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
208
std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
209
std::cout << " Config" << std::endl;
210
std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
211
std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
212
std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
213
std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
214
std::cout << " Features: " << getEmbreeFeatures() << std::endl;
215
std::cout << " Tasking : ";
216
#if defined(TASKING_TBB)
217
std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
218
#if TBB_INTERFACE_VERSION >= 12002
219
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
220
#else
221
std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
222
#endif
223
#endif
224
#if defined(TASKING_INTERNAL)
225
std::cout << "internal_tasking_system ";
226
#endif
227
#if defined(TASKING_PPL)
228
std::cout << "PPL ";
229
#endif
230
std::cout << std::endl;
231
232
#if defined(__X86_64__)
233
/* check of FTZ and DAZ flags are set in CSR */
234
if (!hasFTZ || !hasDAZ)
235
{
236
#if !defined(_DEBUG)
237
if (State::verbosity(1))
238
#endif
239
{
240
std::cout << std::endl;
241
std::cout << "================================================================================" << std::endl;
242
std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
243
<< " in the MXCSR control and status register. This can have a severe " << std::endl
244
<< " performance impact. Please enable these modes for each application " << std::endl
245
<< " thread the following way:" << std::endl
246
<< std::endl
247
<< " #include \"xmmintrin.h\"" << std::endl
248
<< " #include \"pmmintrin.h\"" << std::endl
249
<< std::endl
250
<< " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
251
<< " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
252
std::cout << "================================================================================" << std::endl;
253
std::cout << std::endl;
254
}
255
}
256
#endif
257
std::cout << std::endl;
258
}
259
260
void Device::setDeviceErrorCode(RTCError error, std::string const& msg)
261
{
262
RTCErrorMessage* stored_error = errorHandler.error();
263
if (stored_error->error == RTC_ERROR_NONE) {
264
stored_error->error = error;
265
if (msg != "")
266
stored_error->msg = msg;
267
}
268
}
269
270
RTCError Device::getDeviceErrorCode()
271
{
272
RTCErrorMessage* stored_error = errorHandler.error();
273
RTCErrorMessage error = *stored_error;
274
stored_error->error = RTC_ERROR_NONE;
275
return error.error;
276
}
277
278
const char* Device::getDeviceLastErrorMessage()
279
{
280
RTCErrorMessage* stored_error = errorHandler.error();
281
return stored_error->msg.c_str();
282
}
283
284
void Device::setThreadErrorCode(RTCError error, std::string const& msg)
285
{
286
RTCErrorMessage* stored_error = g_errorHandler.error();
287
if (stored_error->error == RTC_ERROR_NONE) {
288
stored_error->error = error;
289
if (msg != "")
290
stored_error->msg = msg;
291
}
292
}
293
294
RTCError Device::getThreadErrorCode()
295
{
296
RTCErrorMessage* stored_error = g_errorHandler.error();
297
RTCErrorMessage error = *stored_error;
298
stored_error->error = RTC_ERROR_NONE;
299
return error.error;
300
}
301
302
const char* Device::getThreadLastErrorMessage()
303
{
304
RTCErrorMessage* stored_error = g_errorHandler.error();
305
return stored_error->msg.c_str();
306
}
307
308
void Device::process_error(Device* device, RTCError error, const char* str)
309
{
310
/* store global error code when device construction failed */
311
if (!device)
312
return setThreadErrorCode(error, str ? std::string(str) : std::string());
313
314
/* print error when in verbose mode */
315
if (device->verbosity(1))
316
{
317
std::cerr << "Embree: " << getErrorString(error);
318
if (str) std::cerr << ", (" << str << ")";
319
std::cerr << std::endl;
320
}
321
322
/* call user specified error callback */
323
if (device->error_function)
324
device->error_function(device->error_function_userptr,error,str);
325
326
/* record error code */
327
device->setDeviceErrorCode(error, str ? std::string(str) : std::string());
328
}
329
330
void Device::memoryMonitor(ssize_t bytes, bool post)
331
{
332
if (State::memory_monitor_function && bytes != 0) {
333
if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
334
if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
335
throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
336
}
337
}
338
}
339
}
340
341
size_t getMaxNumThreads()
342
{
343
size_t maxNumThreads = 0;
344
for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
345
maxNumThreads = max(maxNumThreads, (*i).second);
346
if (maxNumThreads == 0)
347
maxNumThreads = std::numeric_limits<size_t>::max();
348
return maxNumThreads;
349
}
350
351
size_t getMaxCacheSize()
352
{
353
size_t maxCacheSize = 0;
354
for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
355
maxCacheSize = max(maxCacheSize, (*i).second);
356
return maxCacheSize;
357
}
358
359
void Device::setCacheSize(size_t bytes)
360
{
361
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
362
Lock<MutexSys> lock(g_mutex);
363
if (bytes == 0) g_cache_size_map.erase(this);
364
else g_cache_size_map[this] = bytes;
365
366
size_t maxCacheSize = getMaxCacheSize();
367
resizeTessellationCache(maxCacheSize);
368
#endif
369
}
370
371
void Device::initTaskingSystem(size_t numThreads)
372
{
373
Lock<MutexSys> lock(g_mutex);
374
if (numThreads == 0)
375
g_num_threads_map[this] = std::numeric_limits<size_t>::max();
376
else
377
g_num_threads_map[this] = numThreads;
378
379
/* create task scheduler */
380
size_t maxNumThreads = getMaxNumThreads();
381
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
382
#if USE_TASK_ARENA
383
const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
384
const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
385
arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
386
#endif
387
}
388
389
void Device::exitTaskingSystem()
390
{
391
Lock<MutexSys> lock(g_mutex);
392
g_num_threads_map.erase(this);
393
394
/* terminate tasking system */
395
if (g_num_threads_map.size() == 0) {
396
TaskScheduler::destroy();
397
}
398
/* or configure new number of threads */
399
else {
400
size_t maxNumThreads = getMaxNumThreads();
401
TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
402
}
403
#if USE_TASK_ARENA
404
arena->arena.reset();
405
#endif
406
}
407
408
void Device::execute(bool join, const std::function<void()>& func)
409
{
410
#if USE_TASK_ARENA
411
if (join) {
412
arena->arena->execute(func);
413
}
414
else
415
#endif
416
{
417
func();
418
}
419
}
420
421
void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
422
{
423
/* hidden internal properties */
424
switch ((size_t)prop)
425
{
426
case 1000000: debug_int0 = val; return;
427
case 1000001: debug_int1 = val; return;
428
case 1000002: debug_int2 = val; return;
429
case 1000003: debug_int3 = val; return;
430
}
431
432
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
433
}
434
435
ssize_t Device::getProperty(const RTCDeviceProperty prop)
436
{
437
size_t iprop = (size_t)prop;
438
439
/* get name of internal regression test */
440
if (iprop >= 2000000 && iprop < 3000000)
441
{
442
RegressionTest* test = getRegressionTest(iprop-2000000);
443
if (test) return (ssize_t) test->name.c_str();
444
else return 0;
445
}
446
447
/* run internal regression test */
448
if (iprop >= 3000000 && iprop < 4000000)
449
{
450
RegressionTest* test = getRegressionTest(iprop-3000000);
451
if (test) return test->run();
452
else return 0;
453
}
454
455
/* documented properties */
456
switch (prop)
457
{
458
case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
459
case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
460
case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
461
case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
462
463
#if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
464
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
465
#else
466
case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
467
#endif
468
469
#if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
470
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
471
#else
472
case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
473
#endif
474
475
#if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
476
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
477
#else
478
case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
479
#endif
480
481
#if defined(EMBREE_RAY_MASK)
482
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
483
#else
484
case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
485
#endif
486
487
#if defined(EMBREE_BACKFACE_CULLING)
488
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
489
#else
490
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
491
#endif
492
493
#if defined(EMBREE_BACKFACE_CULLING_CURVES)
494
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
495
#else
496
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
497
#endif
498
499
#if defined(EMBREE_BACKFACE_CULLING_SPHERES)
500
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
501
#else
502
case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
503
#endif
504
505
#if defined(EMBREE_COMPACT_POLYS)
506
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
507
#else
508
case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
509
#endif
510
511
#if defined(EMBREE_FILTER_FUNCTION)
512
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
513
#else
514
case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
515
#endif
516
517
#if defined(EMBREE_IGNORE_INVALID_RAYS)
518
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
519
#else
520
case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
521
#endif
522
523
#if defined(TASKING_INTERNAL)
524
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
525
#endif
526
527
#if defined(TASKING_TBB)
528
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
529
#endif
530
531
#if defined(TASKING_PPL)
532
case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
533
#endif
534
535
#if defined(EMBREE_GEOMETRY_TRIANGLE)
536
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
537
#else
538
case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
539
#endif
540
541
#if defined(EMBREE_GEOMETRY_QUAD)
542
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
543
#else
544
case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
545
#endif
546
547
#if defined(EMBREE_GEOMETRY_CURVE)
548
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
549
#else
550
case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
551
#endif
552
553
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
554
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
555
#else
556
case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
557
#endif
558
559
#if defined(EMBREE_GEOMETRY_USER)
560
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
561
#else
562
case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
563
#endif
564
565
#if defined(EMBREE_GEOMETRY_POINT)
566
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
567
#else
568
case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
569
#endif
570
571
#if defined(TASKING_PPL)
572
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
573
#elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
574
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
575
#else
576
case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
577
#endif
578
579
#if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
580
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
581
#else
582
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
583
#endif
584
585
#if defined(EMBREE_SYCL_SUPPORT)
586
case RTC_DEVICE_PROPERTY_CPU_DEVICE: {
587
if (!dynamic_cast<DeviceGPU*>(this))
588
return 1;
589
return 0;
590
};
591
case RTC_DEVICE_PROPERTY_SYCL_DEVICE: {
592
if (!dynamic_cast<DeviceGPU*>(this))
593
return 0;
594
return 1;
595
};
596
#else
597
case RTC_DEVICE_PROPERTY_CPU_DEVICE: return 1;
598
case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0;
599
#endif
600
601
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
602
};
603
}
604
605
void* Device::malloc(size_t size, size_t align) {
606
return alignedMalloc(size,align);
607
}
608
609
void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {
610
return alignedMalloc(size,align);
611
}
612
613
void Device::free(void* ptr) {
614
alignedFree(ptr);
615
}
616
617
const std::vector<std::string> Device::error_strings = {
618
"No Error",
619
"Unknown error",
620
"Invalid argument",
621
"Invalid operation",
622
"Out of Memory",
623
"Unsupported CPU",
624
"Build cancelled",
625
"Level Zero raytracing support missing"
626
};
627
628
const char* Device::getErrorString(RTCError error) {
629
if (error >= 0 && error < error_strings.size()) {
630
return error_strings.at(error).c_str();
631
}
632
return "Invalid error code";
633
}
634
635
#if defined(EMBREE_SYCL_SUPPORT)
636
637
DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
638
: Device(cfg), gpu_context(sycl_context)
639
{
640
/* initialize ZeWrapper */
641
if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
642
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
643
644
/* take first device as default device */
645
auto devices = gpu_context.get_devices();
646
if (devices.size() == 0)
647
throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
648
gpu_device = devices[0];
649
650
/* check if RTAS build extension is available */
651
sycl::platform platform = gpu_device.get_platform();
652
ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
653
654
uint32_t count = 0;
655
std::vector<ze_driver_extension_properties_t> extensions;
656
ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
657
if (result != ZE_RESULT_SUCCESS)
658
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
659
660
extensions.resize(count);
661
result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
662
if (result != ZE_RESULT_SUCCESS)
663
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
664
665
bool ze_rtas_builder = false;
666
for (uint32_t i=0; i<extensions.size(); i++)
667
{
668
if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
669
ze_rtas_builder = true;
670
}
671
if (!ze_rtas_builder)
672
throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "ZE_experimental_rtas_builder extension not found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
673
674
result = ZeWrapper::initRTASBuilder(hDriver);
675
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) {
676
throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "cannot load ZE_experimental_rtas_builder extension. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
677
}
678
if (result != ZE_RESULT_SUCCESS)
679
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
680
681
if (State::verbosity(1))
682
{
683
std::cout << " Level Zero RTAS Builder" << std::endl;
684
}
685
686
/* check if extension library can get loaded */
687
ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
688
result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
689
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
690
throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
691
if (result == ZE_RESULT_SUCCESS)
692
ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
693
694
gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
695
gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
696
697
if (State::verbosity(1))
698
{
699
sycl::platform platform = gpu_context.get_platform();
700
std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
701
std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
702
std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
703
std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
704
std::cout << std::endl;
705
}
706
707
dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
708
}
709
710
DeviceGPU::~DeviceGPU()
711
{
712
rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
713
}
714
715
void DeviceGPU::enter() {
716
}
717
718
void DeviceGPU::leave() {
719
}
720
721
void* DeviceGPU::malloc(size_t size, size_t align) {
722
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY);
723
}
724
725
void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {
726
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY,type);
727
}
728
729
void DeviceGPU::free(void* ptr) {
730
alignedSYCLFree(&gpu_context,ptr);
731
}
732
733
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
734
gpu_device = sycl_device_in;
735
}
736
737
// turn off deprecation warning for host_unified_memory property usage.
738
// there is currently no equivalent SYCL aspect that replaces this property.
739
#pragma GCC diagnostic push
740
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
741
bool DeviceGPU::has_unified_memory() const {
742
return gpu_device.get_info<sycl::info::device::host_unified_memory>();
743
}
744
#pragma GCC diagnostic pop
745
746
#endif
747
748
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
749
: device((Device*)hdevice)
750
{
751
assert(device);
752
device->refInc();
753
device->enter();
754
}
755
756
DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
757
: device(((Scene*)hscene)->device)
758
{
759
assert(device);
760
device->refInc();
761
device->enter();
762
}
763
764
DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
765
: device(((Geometry*)hgeometry)->device)
766
{
767
assert(device);
768
device->refInc();
769
device->enter();
770
}
771
772
DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
773
: device(((Buffer*)hbuffer)->device)
774
{
775
assert(device);
776
device->refInc();
777
device->enter();
778
}
779
780
DeviceEnterLeave::~DeviceEnterLeave() {
781
device->leave();
782
device->refDec();
783
}
784
}
785
786