device.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "device.h"
  4. #include "../../common/tasking/taskscheduler.h"
  5. #include "../hash.h"
  6. #include "scene_triangle_mesh.h"
  7. #include "scene_user_geometry.h"
  8. #include "scene_instance.h"
  9. #include "scene_curves.h"
  10. #include "scene_subdiv_mesh.h"
  11. #include "../subdiv/tessellation_cache.h"
  12. #include "acceln.h"
  13. #include "geometry.h"
  14. #include "../geometry/cylinder.h"
  15. #include "../bvh/bvh4_factory.h"
  16. #include "../bvh/bvh8_factory.h"
  17. #include "../../common/sys/alloc.h"
  18. #if defined(EMBREE_SYCL_SUPPORT)
  19. # include "../level_zero/ze_wrapper.h"
  20. #endif
  21. namespace embree
  22. {
  23. /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
  24. ssize_t Device::debug_int0 = 0;
  25. ssize_t Device::debug_int1 = 0;
  26. ssize_t Device::debug_int2 = 0;
  27. ssize_t Device::debug_int3 = 0;
  28. static MutexSys g_mutex;
  29. static std::map<Device*,size_t> g_cache_size_map;
  30. static std::map<Device*,size_t> g_num_threads_map;
  31. struct TaskArena
  32. {
  33. #if USE_TASK_ARENA
  34. std::unique_ptr<tbb::task_arena> arena;
  35. #endif
  36. };
  37. Device::Device (const char* cfg) : arena(new TaskArena())
  38. {
  39. /* check that CPU supports lowest ISA */
  40. if (!hasISA(ISA)) {
  41. throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support " ISA_STR);
  42. }
  43. /* set default frequency level for detected CPU */
  44. switch (getCPUModel()) {
  45. case CPU::UNKNOWN: frequency_level = FREQUENCY_SIMD256; break;
  46. case CPU::XEON_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  47. case CPU::CORE_ICE_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  48. case CPU::CORE_TIGER_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  49. case CPU::CORE_COMET_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  50. case CPU::CORE_CANNON_LAKE:frequency_level = FREQUENCY_SIMD256; break;
  51. case CPU::CORE_KABY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  52. case CPU::XEON_SKY_LAKE: frequency_level = FREQUENCY_SIMD128; break;
  53. case CPU::CORE_SKY_LAKE: frequency_level = FREQUENCY_SIMD256; break;
  54. case CPU::XEON_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
  55. case CPU::CORE_BROADWELL: frequency_level = FREQUENCY_SIMD256; break;
  56. case CPU::XEON_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
  57. case CPU::CORE_HASWELL: frequency_level = FREQUENCY_SIMD256; break;
  58. case CPU::XEON_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  59. case CPU::CORE_IVY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  60. case CPU::SANDY_BRIDGE: frequency_level = FREQUENCY_SIMD256; break;
  61. case CPU::NEHALEM: frequency_level = FREQUENCY_SIMD128; break;
  62. case CPU::CORE2: frequency_level = FREQUENCY_SIMD128; break;
  63. case CPU::CORE1: frequency_level = FREQUENCY_SIMD128; break;
  64. case CPU::XEON_PHI_KNIGHTS_MILL : frequency_level = FREQUENCY_SIMD512; break;
  65. case CPU::XEON_PHI_KNIGHTS_LANDING: frequency_level = FREQUENCY_SIMD512; break;
  66. case CPU::ARM: frequency_level = FREQUENCY_SIMD256; break;
  67. }
  68. /* initialize global state */
  69. #if defined(EMBREE_CONFIG)
  70. State::parseString(EMBREE_CONFIG);
  71. #endif
  72. State::parseString(cfg);
  73. State::verify();
  74. /* check whether selected ISA is supported by the HW, as the user could have forced an unsupported ISA */
  75. if (!checkISASupport()) {
  76. throw_RTCError(RTC_ERROR_UNSUPPORTED_CPU,"CPU does not support selected ISA");
  77. }
  78. /*! do some internal tests */
  79. assert(isa::Cylinder::verify());
  80. /*! enable huge page support if desired */
  81. #if defined(__WIN32__)
  82. if (State::enable_selockmemoryprivilege)
  83. State::hugepages_success &= win_enable_selockmemoryprivilege(State::verbosity(3));
  84. #endif
  85. State::hugepages_success &= os_init(State::hugepages,State::verbosity(3));
  86. /*! set tessellation cache size */
  87. setCacheSize( State::tessellation_cache_size );
  88. /*! enable some floating point exceptions to catch bugs */
  89. if (State::float_exceptions)
  90. {
  91. int exceptions = _MM_MASK_MASK;
  92. //exceptions &= ~_MM_MASK_INVALID;
  93. exceptions &= ~_MM_MASK_DENORM;
  94. exceptions &= ~_MM_MASK_DIV_ZERO;
  95. //exceptions &= ~_MM_MASK_OVERFLOW;
  96. //exceptions &= ~_MM_MASK_UNDERFLOW;
  97. //exceptions &= ~_MM_MASK_INEXACT;
  98. _MM_SET_EXCEPTION_MASK(exceptions);
  99. }
  100. /* print info header */
  101. if (State::verbosity(1))
  102. print();
  103. if (State::verbosity(2))
  104. State::print();
  105. /* register all algorithms */
  106. bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
  107. #if defined(EMBREE_TARGET_SIMD8)
  108. bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
  109. #endif
  110. /* setup tasking system */
  111. initTaskingSystem(numThreads);
  112. }
  113. Device::~Device ()
  114. {
  115. setCacheSize(0);
  116. exitTaskingSystem();
  117. }
  118. std::string getEnabledTargets()
  119. {
  120. std::string v;
  121. #if defined(EMBREE_TARGET_SSE2)
  122. v += "SSE2 ";
  123. #endif
  124. #if defined(EMBREE_TARGET_SSE42)
  125. v += "SSE4.2 ";
  126. #endif
  127. #if defined(EMBREE_TARGET_AVX)
  128. v += "AVX ";
  129. #endif
  130. #if defined(EMBREE_TARGET_AVX2)
  131. v += "AVX2 ";
  132. #endif
  133. #if defined(EMBREE_TARGET_AVX512)
  134. v += "AVX512 ";
  135. #endif
  136. return v;
  137. }
  138. std::string getEmbreeFeatures()
  139. {
  140. std::string v;
  141. #if defined(EMBREE_RAY_MASK)
  142. v += "raymasks ";
  143. #endif
  144. #if defined (EMBREE_BACKFACE_CULLING)
  145. v += "backfaceculling ";
  146. #endif
  147. #if defined (EMBREE_BACKFACE_CULLING_CURVES)
  148. v += "backfacecullingcurves ";
  149. #endif
  150. #if defined (EMBREE_BACKFACE_CULLING_SPHERES)
  151. v += "backfacecullingspheres ";
  152. #endif
  153. #if defined(EMBREE_FILTER_FUNCTION)
  154. v += "intersection_filter ";
  155. #endif
  156. #if defined (EMBREE_COMPACT_POLYS)
  157. v += "compact_polys ";
  158. #endif
  159. return v;
  160. }
  161. void Device::print()
  162. {
  163. const int cpu_features = getCPUFeatures();
  164. std::cout << std::endl;
  165. std::cout << "Embree Ray Tracing Kernels " << RTC_VERSION_STRING << " (" << RTC_HASH << ")" << std::endl;
  166. std::cout << " Compiler : " << getCompilerName() << std::endl;
  167. std::cout << " Build : ";
  168. #if defined(DEBUG)
  169. std::cout << "Debug " << std::endl;
  170. #else
  171. std::cout << "Release " << std::endl;
  172. #endif
  173. std::cout << " Platform : " << getPlatformName() << std::endl;
  174. std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
  175. std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
  176. std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
  177. std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
  178. const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
  179. const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
  180. std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
  181. std::cout << " Config" << std::endl;
  182. std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
  183. std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
  184. std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
  185. std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
  186. std::cout << " Features: " << getEmbreeFeatures() << std::endl;
  187. std::cout << " Tasking : ";
  188. #if defined(TASKING_TBB)
  189. std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
  190. #if TBB_INTERFACE_VERSION >= 12002
  191. std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << TBB_runtime_interface_version() << " ";
  192. #else
  193. std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
  194. #endif
  195. #endif
  196. #if defined(TASKING_INTERNAL)
  197. std::cout << "internal_tasking_system ";
  198. #endif
  199. #if defined(TASKING_PPL)
  200. std::cout << "PPL ";
  201. #endif
  202. std::cout << std::endl;
  203. #if defined(__X86_64__)
  204. /* check of FTZ and DAZ flags are set in CSR */
  205. if (!hasFTZ || !hasDAZ)
  206. {
  207. #if !defined(_DEBUG)
  208. if (State::verbosity(1))
  209. #endif
  210. {
  211. std::cout << std::endl;
  212. std::cout << "================================================================================" << std::endl;
  213. std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
  214. << " in the MXCSR control and status register. This can have a severe " << std::endl
  215. << " performance impact. Please enable these modes for each application " << std::endl
  216. << " thread the following way:" << std::endl
  217. << std::endl
  218. << " #include \"xmmintrin.h\"" << std::endl
  219. << " #include \"pmmintrin.h\"" << std::endl
  220. << std::endl
  221. << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
  222. << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
  223. std::cout << "================================================================================" << std::endl;
  224. std::cout << std::endl;
  225. }
  226. }
  227. #endif
  228. std::cout << std::endl;
  229. }
  230. void Device::setDeviceErrorCode(RTCError error, std::string const& msg)
  231. {
  232. RTCErrorMessage* stored_error = errorHandler.error();
  233. if (stored_error->error == RTC_ERROR_NONE) {
  234. stored_error->error = error;
  235. if (msg != "")
  236. stored_error->msg = msg;
  237. }
  238. }
  239. RTCError Device::getDeviceErrorCode()
  240. {
  241. RTCErrorMessage* stored_error = errorHandler.error();
  242. RTCErrorMessage error = *stored_error;
  243. stored_error->error = RTC_ERROR_NONE;
  244. return error.error;
  245. }
  246. const char* Device::getDeviceLastErrorMessage()
  247. {
  248. RTCErrorMessage* stored_error = errorHandler.error();
  249. return stored_error->msg.c_str();
  250. }
  251. void Device::setThreadErrorCode(RTCError error, std::string const& msg)
  252. {
  253. RTCErrorMessage* stored_error = g_errorHandler.error();
  254. if (stored_error->error == RTC_ERROR_NONE) {
  255. stored_error->error = error;
  256. if (msg != "")
  257. stored_error->msg = msg;
  258. }
  259. }
  260. RTCError Device::getThreadErrorCode()
  261. {
  262. RTCErrorMessage* stored_error = g_errorHandler.error();
  263. RTCErrorMessage error = *stored_error;
  264. stored_error->error = RTC_ERROR_NONE;
  265. return error.error;
  266. }
  267. const char* Device::getThreadLastErrorMessage()
  268. {
  269. RTCErrorMessage* stored_error = g_errorHandler.error();
  270. return stored_error->msg.c_str();
  271. }
  272. void Device::process_error(Device* device, RTCError error, const char* str)
  273. {
  274. /* store global error code when device construction failed */
  275. if (!device)
  276. return setThreadErrorCode(error, str ? std::string(str) : std::string());
  277. /* print error when in verbose mode */
  278. if (device->verbosity(1))
  279. {
  280. std::cerr << "Embree: " << getErrorString(error);
  281. if (str) std::cerr << ", (" << str << ")";
  282. std::cerr << std::endl;
  283. }
  284. /* call user specified error callback */
  285. if (device->error_function)
  286. device->error_function(device->error_function_userptr,error,str);
  287. /* record error code */
  288. device->setDeviceErrorCode(error, str ? std::string(str) : std::string());
  289. }
  290. void Device::memoryMonitor(ssize_t bytes, bool post)
  291. {
  292. if (State::memory_monitor_function && bytes != 0) {
  293. if (!State::memory_monitor_function(State::memory_monitor_userptr,bytes,post)) {
  294. if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
  295. throw_RTCError(RTC_ERROR_OUT_OF_MEMORY,"memory monitor forced termination");
  296. }
  297. }
  298. }
  299. }
  300. size_t getMaxNumThreads()
  301. {
  302. size_t maxNumThreads = 0;
  303. for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
  304. maxNumThreads = max(maxNumThreads, (*i).second);
  305. if (maxNumThreads == 0)
  306. maxNumThreads = std::numeric_limits<size_t>::max();
  307. return maxNumThreads;
  308. }
  309. size_t getMaxCacheSize()
  310. {
  311. size_t maxCacheSize = 0;
  312. for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
  313. maxCacheSize = max(maxCacheSize, (*i).second);
  314. return maxCacheSize;
  315. }
  316. void Device::setCacheSize(size_t bytes)
  317. {
  318. #if defined(EMBREE_GEOMETRY_SUBDIVISION)
  319. Lock<MutexSys> lock(g_mutex);
  320. if (bytes == 0) g_cache_size_map.erase(this);
  321. else g_cache_size_map[this] = bytes;
  322. size_t maxCacheSize = getMaxCacheSize();
  323. resizeTessellationCache(maxCacheSize);
  324. #endif
  325. }
  326. void Device::initTaskingSystem(size_t numThreads)
  327. {
  328. Lock<MutexSys> lock(g_mutex);
  329. if (numThreads == 0)
  330. g_num_threads_map[this] = std::numeric_limits<size_t>::max();
  331. else
  332. g_num_threads_map[this] = numThreads;
  333. /* create task scheduler */
  334. size_t maxNumThreads = getMaxNumThreads();
  335. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  336. #if USE_TASK_ARENA
  337. const size_t nThreads = min(maxNumThreads,TaskScheduler::threadCount());
  338. const size_t uThreads = min(max(numUserThreads,(size_t)1),nThreads);
  339. arena->arena = make_unique(new tbb::task_arena((int)nThreads,(unsigned int)uThreads));
  340. #endif
  341. }
  342. void Device::exitTaskingSystem()
  343. {
  344. Lock<MutexSys> lock(g_mutex);
  345. g_num_threads_map.erase(this);
  346. /* terminate tasking system */
  347. if (g_num_threads_map.size() == 0) {
  348. TaskScheduler::destroy();
  349. }
  350. /* or configure new number of threads */
  351. else {
  352. size_t maxNumThreads = getMaxNumThreads();
  353. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  354. }
  355. #if USE_TASK_ARENA
  356. arena->arena.reset();
  357. #endif
  358. }
  359. void Device::execute(bool join, const std::function<void()>& func)
  360. {
  361. #if USE_TASK_ARENA
  362. if (join) {
  363. arena->arena->execute(func);
  364. }
  365. else
  366. #endif
  367. {
  368. func();
  369. }
  370. }
  371. void Device::setProperty(const RTCDeviceProperty prop, ssize_t val)
  372. {
  373. /* hidden internal properties */
  374. switch ((size_t)prop)
  375. {
  376. case 1000000: debug_int0 = val; return;
  377. case 1000001: debug_int1 = val; return;
  378. case 1000002: debug_int2 = val; return;
  379. case 1000003: debug_int3 = val; return;
  380. }
  381. throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown writable property");
  382. }
  383. ssize_t Device::getProperty(const RTCDeviceProperty prop)
  384. {
  385. size_t iprop = (size_t)prop;
  386. /* get name of internal regression test */
  387. if (iprop >= 2000000 && iprop < 3000000)
  388. {
  389. RegressionTest* test = getRegressionTest(iprop-2000000);
  390. if (test) return (ssize_t) test->name.c_str();
  391. else return 0;
  392. }
  393. /* run internal regression test */
  394. if (iprop >= 3000000 && iprop < 4000000)
  395. {
  396. RegressionTest* test = getRegressionTest(iprop-3000000);
  397. if (test) return test->run();
  398. else return 0;
  399. }
  400. /* documented properties */
  401. switch (prop)
  402. {
  403. case RTC_DEVICE_PROPERTY_VERSION_MAJOR: return RTC_VERSION_MAJOR;
  404. case RTC_DEVICE_PROPERTY_VERSION_MINOR: return RTC_VERSION_MINOR;
  405. case RTC_DEVICE_PROPERTY_VERSION_PATCH: return RTC_VERSION_PATCH;
  406. case RTC_DEVICE_PROPERTY_VERSION : return RTC_VERSION;
  407. #if defined(EMBREE_TARGET_SIMD4) && defined(EMBREE_RAY_PACKETS)
  408. case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return hasISA(SSE2);
  409. #else
  410. case RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED: return 0;
  411. #endif
  412. #if defined(EMBREE_TARGET_SIMD8) && defined(EMBREE_RAY_PACKETS)
  413. case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return hasISA(AVX);
  414. #else
  415. case RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED: return 0;
  416. #endif
  417. #if defined(EMBREE_TARGET_SIMD16) && defined(EMBREE_RAY_PACKETS)
  418. case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return hasISA(AVX512);
  419. #else
  420. case RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED: return 0;
  421. #endif
  422. #if defined(EMBREE_RAY_MASK)
  423. case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 1;
  424. #else
  425. case RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED: return 0;
  426. #endif
  427. #if defined(EMBREE_BACKFACE_CULLING)
  428. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 1;
  429. #else
  430. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED: return 0;
  431. #endif
  432. #if defined(EMBREE_BACKFACE_CULLING_CURVES)
  433. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 1;
  434. #else
  435. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED: return 0;
  436. #endif
  437. #if defined(EMBREE_BACKFACE_CULLING_SPHERES)
  438. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 1;
  439. #else
  440. case RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED: return 0;
  441. #endif
  442. #if defined(EMBREE_COMPACT_POLYS)
  443. case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 1;
  444. #else
  445. case RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED: return 0;
  446. #endif
  447. #if defined(EMBREE_FILTER_FUNCTION)
  448. case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 1;
  449. #else
  450. case RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED: return 0;
  451. #endif
  452. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  453. case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 1;
  454. #else
  455. case RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED: return 0;
  456. #endif
  457. #if defined(TASKING_INTERNAL)
  458. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 0;
  459. #endif
  460. #if defined(TASKING_TBB)
  461. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 1;
  462. #endif
  463. #if defined(TASKING_PPL)
  464. case RTC_DEVICE_PROPERTY_TASKING_SYSTEM: return 2;
  465. #endif
  466. #if defined(EMBREE_GEOMETRY_TRIANGLE)
  467. case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 1;
  468. #else
  469. case RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED: return 0;
  470. #endif
  471. #if defined(EMBREE_GEOMETRY_QUAD)
  472. case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 1;
  473. #else
  474. case RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED: return 0;
  475. #endif
  476. #if defined(EMBREE_GEOMETRY_CURVE)
  477. case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 1;
  478. #else
  479. case RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED: return 0;
  480. #endif
  481. #if defined(EMBREE_GEOMETRY_SUBDIVISION)
  482. case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 1;
  483. #else
  484. case RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED: return 0;
  485. #endif
  486. #if defined(EMBREE_GEOMETRY_USER)
  487. case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 1;
  488. #else
  489. case RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED: return 0;
  490. #endif
  491. #if defined(EMBREE_GEOMETRY_POINT)
  492. case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 1;
  493. #else
  494. case RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED: return 0;
  495. #endif
  496. #if defined(TASKING_PPL)
  497. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
  498. #elif defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
  499. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 0;
  500. #else
  501. case RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED: return 1;
  502. #endif
  503. #if defined(TASKING_TBB) && TASKING_TBB_USE_TASK_ISOLATION
  504. case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 1;
  505. #else
  506. case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
  507. #endif
  508. #if defined(EMBREE_SYCL_SUPPORT)
  509. case RTC_DEVICE_PROPERTY_CPU_DEVICE: {
  510. if (!dynamic_cast<DeviceGPU*>(this))
  511. return 1;
  512. return 0;
  513. };
  514. case RTC_DEVICE_PROPERTY_SYCL_DEVICE: {
  515. if (!dynamic_cast<DeviceGPU*>(this))
  516. return 0;
  517. return 1;
  518. };
  519. #else
  520. case RTC_DEVICE_PROPERTY_CPU_DEVICE: return 1;
  521. case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0;
  522. #endif
  523. default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
  524. };
  525. }
  526. void* Device::malloc(size_t size, size_t align) {
  527. return alignedMalloc(size,align);
  528. }
  529. void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {
  530. return alignedMalloc(size,align);
  531. }
  532. void Device::free(void* ptr) {
  533. alignedFree(ptr);
  534. }
  535. const std::vector<std::string> Device::error_strings = {
  536. "No Error",
  537. "Unknown error",
  538. "Invalid argument",
  539. "Invalid operation",
  540. "Out of Memory",
  541. "Unsupported CPU",
  542. "Build cancelled",
  543. "Level Zero raytracing support missing"
  544. };
  545. const char* Device::getErrorString(RTCError error) {
  546. if (error >= 0 && error < error_strings.size()) {
  547. return error_strings.at(error).c_str();
  548. }
  549. return "Invalid error code";
  550. }
  551. #if defined(EMBREE_SYCL_SUPPORT)
  552. DeviceGPU::DeviceGPU(sycl::context sycl_context, const char* cfg)
  553. : Device(cfg), gpu_context(sycl_context)
  554. {
  555. /* initialize ZeWrapper */
  556. if (ZeWrapper::init() != ZE_RESULT_SUCCESS)
  557. throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZeWrapper");
  558. /* take first device as default device */
  559. auto devices = gpu_context.get_devices();
  560. if (devices.size() == 0)
  561. throw_RTCError(RTC_ERROR_UNKNOWN, "SYCL context contains no device");
  562. gpu_device = devices[0];
  563. /* check if RTAS build extension is available */
  564. sycl::platform platform = gpu_device.get_platform();
  565. ze_driver_handle_t hDriver = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(platform);
  566. uint32_t count = 0;
  567. std::vector<ze_driver_extension_properties_t> extensions;
  568. ze_result_t result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
  569. if (result != ZE_RESULT_SUCCESS)
  570. throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
  571. extensions.resize(count);
  572. result = ZeWrapper::zeDriverGetExtensionProperties(hDriver,&count,extensions.data());
  573. if (result != ZE_RESULT_SUCCESS)
  574. throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
  575. bool ze_rtas_builder = false;
  576. for (uint32_t i=0; i<extensions.size(); i++)
  577. {
  578. if (strncmp("ZE_experimental_rtas_builder",extensions[i].name,sizeof(extensions[i].name)) == 0)
  579. ze_rtas_builder = true;
  580. }
  581. if (!ze_rtas_builder)
  582. throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "ZE_experimental_rtas_builder extension not found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
  583. result = ZeWrapper::initRTASBuilder(hDriver);
  584. if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) {
  585. throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "cannot load ZE_experimental_rtas_builder extension. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
  586. }
  587. if (result != ZE_RESULT_SUCCESS)
  588. throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
  589. if (State::verbosity(1))
  590. {
  591. std::cout << " Level Zero RTAS Builder" << std::endl;
  592. }
  593. /* check if extension library can get loaded */
  594. ze_rtas_parallel_operation_exp_handle_t hParallelOperation;
  595. result = ZeWrapper::zeRTASParallelOperationCreateExp(hDriver, &hParallelOperation);
  596. if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
  597. throw_RTCError(RTC_ERROR_UNKNOWN, "Level Zero RTAS Build Extension cannot get loaded");
  598. if (result == ZE_RESULT_SUCCESS)
  599. ZeWrapper::zeRTASParallelOperationDestroyExp(hParallelOperation);
  600. gpu_maxWorkGroupSize = getGPUDevice().get_info<sycl::info::device::max_work_group_size>();
  601. gpu_maxComputeUnits = getGPUDevice().get_info<sycl::info::device::max_compute_units>();
  602. if (State::verbosity(1))
  603. {
  604. sycl::platform platform = gpu_context.get_platform();
  605. std::cout << " Platform : " << platform.get_info<sycl::info::platform::name>() << std::endl;
  606. std::cout << " Device : " << getGPUDevice().get_info<sycl::info::device::name>() << std::endl;
  607. std::cout << " Max Work Group Size : " << gpu_maxWorkGroupSize << std::endl;
  608. std::cout << " Max Compute Units : " << gpu_maxComputeUnits << std::endl;
  609. std::cout << std::endl;
  610. }
  611. dispatchGlobalsPtr = zeRTASInitExp(gpu_device, gpu_context);
  612. }
  613. DeviceGPU::~DeviceGPU()
  614. {
  615. rthwifCleanup(this,dispatchGlobalsPtr,gpu_context);
  616. }
  617. void DeviceGPU::enter() {
  618. }
  619. void DeviceGPU::leave() {
  620. }
  621. void* DeviceGPU::malloc(size_t size, size_t align) {
  622. return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY);
  623. }
  624. void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {
  625. return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY,type);
  626. }
  627. void DeviceGPU::free(void* ptr) {
  628. alignedSYCLFree(&gpu_context,ptr);
  629. }
  630. void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
  631. gpu_device = sycl_device_in;
  632. }
  633. // turn off deprecation warning for host_unified_memory property usage.
  634. // there is currently no equivalent SYCL aspect that replaces this property.
  635. #pragma GCC diagnostic push
  636. #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
  637. bool DeviceGPU::has_unified_memory() const {
  638. return gpu_device.get_info<sycl::info::device::host_unified_memory>();
  639. }
  640. #pragma GCC diagnostic pop
  641. #endif
  642. DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)
  643. : device((Device*)hdevice)
  644. {
  645. assert(device);
  646. device->refInc();
  647. device->enter();
  648. }
  649. DeviceEnterLeave::DeviceEnterLeave (RTCScene hscene)
  650. : device(((Scene*)hscene)->device)
  651. {
  652. assert(device);
  653. device->refInc();
  654. device->enter();
  655. }
  656. DeviceEnterLeave::DeviceEnterLeave (RTCGeometry hgeometry)
  657. : device(((Geometry*)hgeometry)->device)
  658. {
  659. assert(device);
  660. device->refInc();
  661. device->enter();
  662. }
  663. DeviceEnterLeave::DeviceEnterLeave (RTCBuffer hbuffer)
  664. : device(((Buffer*)hbuffer)->device)
  665. {
  666. assert(device);
  667. device->refInc();
  668. device->enter();
  669. }
  670. DeviceEnterLeave::~DeviceEnterLeave() {
  671. device->leave();
  672. device->refDec();
  673. }
  674. }