device.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "device.h"
  4. #include "../../common/tasking/taskscheduler.h"
  5. #include "../hash.h"
  6. #include "scene_triangle_mesh.h"
  7. #include "scene_user_geometry.h"
  8. #include "scene_instance.h"
  9. #include "scene_curves.h"
  10. #include "scene_subdiv_mesh.h"
  11. #include "../subdiv/tessellation_cache.h"
  12. #include "acceln.h"
  13. #include "geometry.h"
  14. #include "../geometry/cylinder.h"
  15. #include "../bvh/bvh4_factory.h"
  16. #include "../bvh/bvh8_factory.h"
  17. #include "../../common/sys/alloc.h"
  18. #if defined(EMBREE_SYCL_SUPPORT)
  19. # include "../level_zero/ze_wrapper.h"
  20. #endif
  21. namespace embree
  22. {
  23. /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
  24. ssize_t Device::debug_int0 = 0;
  25. ssize_t Device::debug_int1 = 0;
  26. ssize_t Device::debug_int2 = 0;
  27. ssize_t Device::debug_int3 = 0;
  28. static MutexSys g_mutex;
  29. static std::map<Device*,size_t> g_cache_size_map;
  30. static std::map<Device*,size_t> g_num_threads_map;
  31. struct TaskArena
  32. {
  33. #if USE_TASK_ARENA
  34. std::unique_ptr<tbb::task_arena> arena;
  35. #endif
  36. };
  37. Device::Device (const char* cfg) : arena(new TaskArena())
  38. {
  39. /* check that CPU supports lowest ISA */
  40. if (!hasISA(ISA)) {
  41. throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
  42. }
  43. /* set default frequency level for detected CPU */
  44. switch (getCPUModel()) {
  45. case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
  46. case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  47. case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  48. case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  49. case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  50. case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
  51. case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  52. case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
  53. case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  54. case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
  55. case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
  56. case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
  57. case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
  58. case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  59. case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  60. case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  61. case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
  62. case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
  63. case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
  64. case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
  65. case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
  66. case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
  67. }
  68. /* initialize global state */
  69. #if defined(EMBREE_CONFIG)
  70. State::parseString(EMBREE_CONFIG);
  71. #endif
  72. State::parseString(cfg);
  73. State::verify();
  74. /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
  75. if (!checkISASupport()) {
  76. throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
  77. }
  78. /*! do some internal tests */
  79. assert(isa::Cylinder::verify());
  80. /*! enable huge page support if desired */
  81. #if defined(__WIN32__)
  82. if (State::enable_selockmemoryprivilege)
  83. State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
  84. #endif
  85. State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
  86. /*! set tessellation cache size */
  87. setCacheSize( State::tessellation_cache_size );
  88. /*! enable some floating point exceptions to catch bugs */
  89. if (State::float_exceptions)
  90. {
  91. int exceptions = _MM_MASK_MASK;
  92. //exceptions &= ~_MM_MASK_INVALID;
  93. exceptions &= ~_MM_MASK_DENORM;
  94. exceptions &= ~_MM_MASK_DIV_ZERO;
  95. //exceptions &= ~_MM_MASK_OVERFLOW;
  96. //exceptions &= ~_MM_MASK_UNDERFLOW;
  97. //exceptions &= ~_MM_MASK_INEXACT;
  98. _MM_SET_EXCEPTION_MASK(exceptions);
  99. }
  100. /* print info header */
  101. if (State::verbosity(1))
  102. print();
  103. if (State::verbosity(2))
  104. State::print();
  105. /* register all algorithms */
  106. bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
  107. #if defined(EMBREE_TARGET_SIMD8)
  108. bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
  109. #endif
  110. /* setup tasking system */
  111. initTaskingSystem(numThreads);
  112. }
  113. Device::~Device ()
  114. {
  115. setCacheSize(0);
  116. exitTaskingSystem();
  117. }
  118. std::string getEnabledTargets()
  119. {
  120. std::string v;
  121. #if defined(EMBREE_TARGET_SSE2)
  122. v += "SSE2 ";
  123. #endif
  124. #if defined(EMBREE_TARGET_SSE42)
  125. v += "SSE4.2 ";
  126. #endif
  127. #if defined(EMBREE_TARGET_AVX)
  128. v += "AVX ";
  129. #endif
  130. #if defined(EMBREE_TARGET_AVX2)
  131. v += "AVX2 ";
  132. #endif
  133. #if defined(EMBREE_TARGET_AVX512)
  134. v += "AVX512 ";
  135. #endif
  136. return v;
  137. }
  138. std::string getEmbreeFeatures()
  139. {
  140. std::string v;
  141. #if defined(EMBREE_RAY_MASK)
  142. v += "raymasks ";
  143. #endif
  144. #if defined (EMBREE_BACKFACE_CULLING)
  145. v += "backfaceculling ";
  146. #endif
  147. #if defined (EMBREE_BACKFACE_CULLING_CURVES)
  148. v += "backfacecullingcurves ";
  149. #endif
  150. #if defined (EMBREE_BACKFACE_CULLING_SPHERES)
  151. v += "backfacecullingspheres ";
  152. #endif
  153. #if defined(EMBREE_FILTER_FUNCTION)
  154. v += "intersection_filter ";
  155. #endif
  156. #if defined (EMBREE_COMPACT_POLYS)
  157. v += "compact_polys ";
  158. #endif
  159. return v;
  160. }
  161. void Device::print()
  162. {
  163. const int cpu_features = getCPUFeatures();
  164. std::cout << std::endl;
  165. std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
  166. std::cout << " Compiler : " << getCompilerName() << std::endl;
  167. std::cout << " Build : ";
  168. #if defined(DEBUG)
  169. std::cout << "Debug " << std::endl;
  170. #else
  171. std::cout << "Release " << std::endl;
  172. #endif
  173. std::cout << " Platform : " << getPlatformName() << std::endl;
  174. std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
  175. std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
  176. std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
  177. std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
  178. const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
  179. const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
  180. std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
  181. std::cout << " Config" << std::endl;
  182. std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
  183. std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
  184. std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
  185. std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
  186. std::cout << " Features: " << getEmbreeFeatures() << std::endl;
  187. std::cout << " Tasking : ";
  188. #if defined(TASKING_TBB)
  189. std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
  190. #if TBB_INTERFACE_VERSION >= 12002
  191. std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
  192. #else
  193. std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
  194. #endif
  195. #endif
  196. #if defined(TASKING_INTERNAL)
  197. std::cout << "internal_tasking_system ";
  198. #endif
  199. #if defined(TASKING_PPL)
  200. std::cout << "PPL ";
  201. #endif
  202. std::cout << std::endl;
  203. /* check of FTZ and DAZ flags are set in CSR */
  204. if (!hasFTZ || !hasDAZ)
  205. {
  206. #if !defined(_DEBUG)
  207. if (State::verbosity(1))
  208. #endif
  209. {
  210. std::cout << std::endl;
  211. std::cout << "================================================================================" << std::endl;
  212. std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
  213. << " in the MXCSR control and status register. This can have a severe " << std::endl
  214. << " performance impact. Please enable these modes for each application " << std::endl
  215. << " thread the following way:" << std::endl
  216. << std::endl
  217. << " #include \"xmmintrin.h\"" << std::endl
  218. << " #include \"pmmintrin.h\"" << std::endl
  219. << std::endl
  220. << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
  221. << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
  222. std::cout << "================================================================================" << std::endl;
  223. std::cout << std::endl;
  224. }
  225. }
  226. std::cout << std::endl;
  227. }
  228. void Device::setDeviceErrorCode(RTCError error)
  229. {
  230. RTCError* stored_error = errorHandler.error();
  231. if (*stored_error == RTC_ERROR_NONE)
  232. *stored_error = error;
  233. }
  234. RTCError Device::getDeviceErrorCode()
  235. {
  236. RTCError* stored_error = errorHandler.error();
  237. RTCError error = *stored_error;
  238. *stored_error = RTC_ERROR_NONE;
  239. return error;
  240. }
  241. void Device::setThreadErrorCode(RTCError error)
  242. {
  243. RTCError* stored_error = g_errorHandler.error();
  244. if (*stored_error == RTC_ERROR_NONE)
  245. *stored_error = error;
  246. }
  247. RTCError Device::getThreadErrorCode()
  248. {
  249. RTCError* stored_error = g_errorHandler.error();
  250. RTCError error = *stored_error;
  251. *stored_error = RTC_ERROR_NONE;
  252. return error;
  253. }
  254. void Device::process_error(Device* device, RTCError error, const char* str)
  255. {
  256. /* store global error code when device construction failed */
  257. if (!device)
  258. return setThreadErrorCode(error);
  259. /* print error when in verbose mode */
  260. if (device->verbosity(1))
  261. {
  262. switch (error) {
  263. case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
  264. case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
  265. case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
  266. case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
  267. case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
  268. case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
  269. default : std::cerr << "Embree: Invalid error code"; break;
  270. };
  271. if (str) std::cerr << ", (" << str << ")";
  272. std::cerr << std::endl;
  273. }
  274. /* call user specified error callback */
  275. if (device->error_function)
  276. device->error_function(device->error_function_userptr,error,str);
  277. /* record error code */
  278. device->setDeviceErrorCode(error);
  279. }
  280. void Device::memoryMonitor(ssize_t bytes, bool post)
  281. {
  282. if (State::memory_monitor_function && bytes != 0) {
  283. if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
  284. if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
  285. throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
  286. }
  287. }
  288. }
  289. }
  290. size_t getMaxNumThreads()
  291. {
  292. size_t maxNumThreads = 0;
  293. for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
  294. maxNumThreads = max(maxNumThreads, (*i).second);
  295. if (maxNumThreads == 0)
  296. maxNumThreads = std::numeric_limits<size_t>::max();
  297. return maxNumThreads;
  298. }
  299. size_t getMaxCacheSize()
  300. {
  301. size_t maxCacheSize = 0;
  302. for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
  303. maxCacheSize = max(maxCacheSize, (*i).second);
  304. return maxCacheSize;
  305. }
  306. void Device::setCacheSize(size_t bytes)
  307. {
  308. #if defined(EMBREE_GEOMETRY_SUBDIVISION)
  309. Lock<MutexSys> lock(g_mutex);
  310. if (bytes == 0) g_cache_size_map.erase(this);
  311. else g_cache_size_map[this] = bytes;
  312. size_t maxCacheSize = getMaxCacheSize();
  313. resizeTessellationCache(maxCacheSize);
  314. #endif
  315. }
  316. void Device::initTaskingSystem(size_t numThreads)
  317. {
  318. Lock<MutexSys> lock(g_mutex);
  319. if (numThreads == 0)
  320. g_num_threads_map[this] = std::numeric_limits<size_t>::max();
  321. else
  322. g_num_threads_map[this] = numThreads;
  323. /* create task scheduler */
  324. size_t maxNumThreads = getMaxNumThreads();
  325. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  326. #if USE_TASK_ARENA
  327. const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
  328. const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
  329. arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
  330. #endif
  331. }
  332. void Device::exitTaskingSystem()
  333. {
  334. Lock<MutexSys> lock(g_mutex);
  335. g_num_threads_map.erase(this);
  336. /* terminate tasking system */
  337. if (g_num_threads_map.size() == 0) {
  338. TaskScheduler::destroy();
  339. }
  340. /* or configure new number of threads */
  341. else {
  342. size_t maxNumThreads = getMaxNumThreads();
  343. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  344. }
  345. #if USE_TASK_ARENA
  346. arena->arena.reset();
  347. #endif
  348. }
  349. void Device::execute(bool join, const std::function<void()>& func)
  350. {
  351. #if USE_TASK_ARENA
  352. if (join) {
  353. arena->arena->execute(func);
  354. }
  355. else
  356. #endif
  357. {
  358. func();
  359. }
  360. }
  361. void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
  362. {
  363. /* hidden internal properties */
  364. switch ((size_t)prop)
  365. {
  366. case 1000000: debug_int0 = val; return;
  367. case 1000001: debug_int1 = val; return;
  368. case 1000002: debug_int2 = val; return;
  369. case 1000003: debug_int3 = val; return;
  370. }
  371. throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
  372. }
  373. ssize_t Device::getProperty(const RTCDeviceProperty prop)
  374. {
  375. size_t iprop = (size_t)prop;
  376. /* get name of internal regression test */
  377. if (iprop >= 2000000 && iprop < 3000000)
  378. {
  379. RegressionTest* test = getRegressionTest(iprop-2000000);
  380. if (test) return (ssize_t) test->name.c_str();
  381. else return 0;
  382. }
  383. /* run internal regression test */
  384. if (iprop >= 3000000 && iprop < 4000000)
  385. {
  386. RegressionTest* test = getRegressionTest(iprop-3000000);
  387. if (test) return test->run();
  388. else return 0;
  389. }
  390. /* documented properties */
  391. switch (prop)
  392. {
  393. case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
  394. case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
  395. case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
  396. case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
  397. #if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
  398. case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
  399. #else
  400. case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
  401. #endif
  402. #if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
  403. case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
  404. #else
  405. case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
  406. #endif
  407. #if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
  408. case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
  409. #else
  410. case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
  411. #endif
  412. #if defined(EMBREE_RAY_MASK)
  413. case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
  414. #else
  415. case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
  416. #endif
  417. #if defined(EMBREE_BACKFACE_CULLING)
  418. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
  419. #else
  420. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
  421. #endif
  422. #if defined(EMBREE_BACKFACE_CULLING_CURVES)
  423. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
  424. #else
  425. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
  426. #endif
  427. #if defined(EMBREE_BACKFACE_CULLING_SPHERES)
  428. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
  429. #else
  430. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
  431. #endif
  432. #if defined(EMBREE_COMPACT_POLYS)
  433. case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
  434. #else
  435. case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
  436. #endif
  437. #if defined(EMBREE_FILTER_FUNCTION)
  438. case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
  439. #else
  440. case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
  441. #endif
  442. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  443. case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
  444. #else
  445. case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
  446. #endif
  447. #if defined(TASKING_INTERNAL)
  448. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
  449. #endif
  450. #if defined(TASKING_TBB)
  451. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
  452. #endif
  453. #if defined(TASKING_PPL)
  454. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
  455. #endif
  456. #if defined(EMBREE_GEOMETRY_TRIANGLE)
  457. case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
  458. #else
  459. case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
  460. #endif
  461. #if defined(EMBREE_GEOMETRY_QUAD)
  462. case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
  463. #else
  464. case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
  465. #endif
  466. #if defined(EMBREE_GEOMETRY_CURVE)
  467. case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
  468. #else
  469. case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
  470. #endif
  471. #if defined(EMBREE_GEOMETRY_SUBDIVISION)
  472. case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
  473. #else
  474. case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
  475. #endif
  476. #if defined(EMBREE_GEOMETRY_USER)
  477. case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
  478. #else
  479. case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
  480. #endif
  481. #if defined(EMBREE_GEOMETRY_POINT)
  482. case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
  483. #else
  484. case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
  485. #endif
  486. #if defined(TASKING_PPL)
  487. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
  488. #elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
  489. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
  490. #else
  491. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
  492. #endif
  493. #if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
  494. case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
  495. #else
  496. case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
  497. #endif
  498. default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
  499. };
  500. }
  501. void* Device::malloc(size_t size, size_t align) {
  502. return alignedMalloc(size,align);
  503. }
  504. void Device::free(void* ptr) {
  505. alignedFree(ptr);
  506. }
  507. #if defined(EMBREE_SYCL_SUPPORT)
  508. DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
  509. : Device(cfg), gpu_context(sycl_context)
  510. {
  511. /* initialize ZeWrapper */
  512. if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
  513. throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
  514. /* take first device as default device */
  515. auto devices = gpu_context.get_devices();
  516. if (devices.size() == 0)
  517. throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
  518. gpu_device = devices[0];
  519. /* check if RTAS build extension is available */
  520. sycl::platform platform = gpu_device.get_platform();
  521. ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
  522. uint32_t count = 0;
  523. std::vector<ze_driver_extension_properties_t> extensions;
  524. ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
  525. if (result != ZE_RESULT_SUCCESS)
  526. throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
  527. extensions.resize(count);
  528. result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
  529. if (result != ZE_RESULT_SUCCESS)
  530. throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
  531. #if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
  532. bool ze_rtas_builder = false;
  533. for (uint32_t i=0; i<extensions.size(); i++)
  534. {
  535. if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
  536. ze_rtas_builder = true;
  537. }
  538. if (!ze_rtas_builder)
  539. throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
  540. result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
  541. if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
  542. throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
  543. if (result != ZE_RESULT_SUCCESS)
  544. throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
  545. #else
  546. ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
  547. #endif
  548. if (State::verbosity(1))
  549. {
  550. if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
  551. std::cout << " Internal RTAS Builder" << std::endl;
  552. else
  553. std::cout << " Level Zero RTAS Builder" << std::endl;
  554. }
  555. /* check if extension library can get loaded */
  556. ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
  557. result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
  558. if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
  559. throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
  560. if (result == ZE_RESULT_SUCCESS)
  561. ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
  562. gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
  563. gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
  564. if (State::verbosity(1))
  565. {
  566. sycl::platform platform = gpu_context.get_platform();
  567. std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
  568. std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
  569. std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
  570. std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
  571. std::cout << std::endl;
  572. }
  573. dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
  574. }
  575. DeviceGPU::~DeviceGPU()
  576. {
  577. rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
  578. }
  579. void DeviceGPU::enter() {
  580. enableUSMAllocEmbree(&gpu_context,&gpu_device);
  581. }
  582. void DeviceGPU::leave() {
  583. disableUSMAllocEmbree();
  584. }
  585. void* DeviceGPU::malloc(size_t size, size_t align) {
  586. return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
  587. }
  588. void DeviceGPU::free(void* ptr) {
  589. alignedSYCLFree(&gpu_context,ptr);
  590. }
  591. void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
  592. gpu_device = sycl_device_in;
  593. }
  594. #endif
  595. DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
  596. : device((Device*)hdevice)
  597. {
  598. assert(device);
  599. device->refInc();
  600. device->enter();
  601. }
  602. DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
  603. : device(((Scene*)hscene)->device)
  604. {
  605. assert(device);
  606. device->refInc();
  607. device->enter();
  608. }
  609. DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
  610. : device(((Geometry*)hgeometry)->device)
  611. {
  612. assert(device);
  613. device->refInc();
  614. device->enter();
  615. }
  616. DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
  617. : device(((Buffer*)hbuffer)->device)
  618. {
  619. assert(device);
  620. device->refInc();
  621. device->enter();
  622. }
  623. DeviceEnterLeave::~DeviceEnterLeave() {
  624. device->leave();
  625. device->refDec();
  626. }
  627. }