device.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "device.h"
  17. #include "version.h"
  18. #include "scene_triangle_mesh.h"
  19. #include "scene_user_geometry.h"
  20. #include "scene_instance.h"
  21. #include "scene_bezier_curves.h"
  22. #include "scene_subdiv_mesh.h"
  23. #include "../subdiv/tessellation_cache.h"
  24. #include "acceln.h"
  25. #include "geometry.h"
  26. #include "../geometry/cylinder.h"
  27. #include "../bvh/bvh4_factory.h"
  28. #include "../bvh/bvh8_factory.h"
  29. #include "../common/tasking/taskscheduler.h"
  30. namespace embree
  31. {
  32. /*! some global variables that can be set via rtcSetParameter1i for debugging purposes */
  33. ssize_t Device::debug_int0 = 0;
  34. ssize_t Device::debug_int1 = 0;
  35. ssize_t Device::debug_int2 = 0;
  36. ssize_t Device::debug_int3 = 0;
  37. DECLARE_SYMBOL2(RayStreamFilterFuncs,rayStreamFilterFuncs);
  38. static MutexSys g_mutex;
  39. static std::map<Device*,size_t> g_cache_size_map;
  40. static std::map<Device*,size_t> g_num_threads_map;
  41. Device::Device (const char* cfg, bool singledevice)
  42. : State(singledevice)
  43. {
  44. /* initialize global state */
  45. State::parseString(cfg);
  46. if (!ignore_config_files && FileName::executableFolder() != FileName(""))
  47. State::parseFile(FileName::executableFolder()+FileName(".embree" TOSTRING(__EMBREE_VERSION_MAJOR__)));
  48. if (!ignore_config_files && FileName::homeFolder() != FileName(""))
  49. State::parseFile(FileName::homeFolder()+FileName(".embree" TOSTRING(__EMBREE_VERSION_MAJOR__)));
  50. State::verify();
  51. /*! do some internal tests */
  52. assert(isa::Cylinder::verify());
  53. /*! set tessellation cache size */
  54. setCacheSize( State::tessellation_cache_size );
  55. /*! enable some floating point exceptions to catch bugs */
  56. if (State::float_exceptions)
  57. {
  58. int exceptions = _MM_MASK_MASK;
  59. //exceptions &= ~_MM_MASK_INVALID;
  60. exceptions &= ~_MM_MASK_DENORM;
  61. exceptions &= ~_MM_MASK_DIV_ZERO;
  62. //exceptions &= ~_MM_MASK_OVERFLOW;
  63. //exceptions &= ~_MM_MASK_UNDERFLOW;
  64. //exceptions &= ~_MM_MASK_INEXACT;
  65. _MM_SET_EXCEPTION_MASK(exceptions);
  66. }
  67. /* print info header */
  68. if (State::verbosity(1))
  69. print();
  70. if (State::verbosity(2))
  71. State::print();
  72. /* register all algorithms */
  73. instance_factory = make_unique(new InstanceFactory(enabled_cpu_features));
  74. bvh4_factory = make_unique(new BVH4Factory(enabled_builder_cpu_features, enabled_cpu_features));
  75. #if defined(__TARGET_AVX__)
  76. bvh8_factory = make_unique(new BVH8Factory(enabled_builder_cpu_features, enabled_cpu_features));
  77. #endif
  78. /* setup tasking system */
  79. initTaskingSystem(numThreads);
  80. /* ray stream SOA to AOS conversion */
  81. #if defined(EMBREE_RAY_PACKETS)
  82. RayStreamFilterFuncsType rayStreamFilterFuncs;
  83. SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512KNL_AVX512SKX(enabled_cpu_features,rayStreamFilterFuncs);
  84. rayStreamFilters = rayStreamFilterFuncs();
  85. #endif
  86. }
  87. Device::~Device ()
  88. {
  89. setCacheSize(0);
  90. exitTaskingSystem();
  91. }
  92. std::string getEnabledTargets()
  93. {
  94. std::string v = std::string(ISA_STR) + " ";
  95. #if defined(__TARGET_SSE41__)
  96. v += "SSE4.1 ";
  97. #endif
  98. #if defined(__TARGET_SSE42__)
  99. v += "SSE4.2 ";
  100. #endif
  101. #if defined(__TARGET_AVX__)
  102. v += "AVX ";
  103. #endif
  104. #if defined(__TARGET_AVX2__)
  105. v += "AVX2 ";
  106. #endif
  107. #if defined(__TARGET_AVX512KNL__)
  108. v += "AVX512KNL ";
  109. #endif
  110. #if defined(__TARGET_AVX512SKX__)
  111. v += "AVX512SKX ";
  112. #endif
  113. return v;
  114. }
  115. std::string getEmbreeFeatures()
  116. {
  117. std::string v;
  118. #if defined(EMBREE_RAY_MASK)
  119. v += "raymasks ";
  120. #endif
  121. #if defined (EMBREE_BACKFACE_CULLING)
  122. v += "backfaceculling ";
  123. #endif
  124. #if defined(EMBREE_INTERSECTION_FILTER)
  125. v += "intersection_filter ";
  126. #endif
  127. return v;
  128. }
  129. void Device::print()
  130. {
  131. const int cpu_features = getCPUFeatures();
  132. std::cout << "Embree Ray Tracing Kernels " << __EMBREE_VERSION__ << " (" << __EMBREE_HASH__ << ")" << std::endl;
  133. std::cout << " Compiler : " << getCompilerName() << std::endl;
  134. std::cout << " Build : ";
  135. #if defined(DEBUG)
  136. std::cout << "Debug " << std::endl;
  137. #else
  138. std::cout << "Release " << std::endl;
  139. #endif
  140. std::cout << " Platform : " << getPlatformName() << std::endl;
  141. std::cout << " CPU : " << stringOfCPUModel(getCPUModel()) << " (" << getCPUVendor() << ")" << std::endl;
  142. std::cout << " Threads : " << getNumberOfLogicalThreads() << std::endl;
  143. std::cout << " ISA : " << stringOfCPUFeatures(cpu_features) << std::endl;
  144. std::cout << " Targets : " << supportedTargetList(cpu_features) << std::endl;
  145. const bool hasFTZ = _mm_getcsr() & _MM_FLUSH_ZERO_ON;
  146. const bool hasDAZ = _mm_getcsr() & _MM_DENORMALS_ZERO_ON;
  147. std::cout << " MXCSR : " << "FTZ=" << hasFTZ << ", DAZ=" << hasDAZ << std::endl;
  148. std::cout << " Config" << std::endl;
  149. std::cout << " Threads : " << (numThreads ? toString(numThreads) : std::string("default")) << std::endl;
  150. std::cout << " ISA : " << stringOfCPUFeatures(enabled_cpu_features) << std::endl;
  151. std::cout << " Targets : " << supportedTargetList(enabled_cpu_features) << " (supported)" << std::endl;
  152. std::cout << " " << getEnabledTargets() << " (compile time enabled)" << std::endl;
  153. std::cout << " Features: " << getEmbreeFeatures() << std::endl;
  154. std::cout << " Tasking : ";
  155. #if defined(TASKING_TBB)
  156. std::cout << "TBB" << TBB_VERSION_MAJOR << "." << TBB_VERSION_MINOR << " ";
  157. std::cout << "TBB_header_interface_" << TBB_INTERFACE_VERSION << " TBB_lib_interface_" << tbb::TBB_runtime_interface_version() << " ";
  158. #endif
  159. #if defined(TASKING_INTERNAL)
  160. std::cout << "internal_tasking_system ";
  161. #endif
  162. #if defined(TASKING_PPL)
  163. std::cout << "PPL ";
  164. #endif
  165. std::cout << std::endl;
  166. /* check of FTZ and DAZ flags are set in CSR */
  167. if (!hasFTZ || !hasDAZ)
  168. {
  169. #if !defined(_DEBUG)
  170. if (State::verbosity(1))
  171. #endif
  172. {
  173. std::cout << std::endl;
  174. std::cout << "================================================================================" << std::endl;
  175. std::cout << " WARNING: \"Flush to Zero\" or \"Denormals are Zero\" mode not enabled " << std::endl
  176. << " in the MXCSR control and status register. This can have a severe " << std::endl
  177. << " performance impact. Please enable these modes for each application " << std::endl
  178. << " thread the following way:" << std::endl
  179. << std::endl
  180. << " #include \"xmmintrin.h\"" << std::endl
  181. << " #include \"pmmintrin.h\"" << std::endl
  182. << std::endl
  183. << " _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);" << std::endl
  184. << " _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);" << std::endl;
  185. std::cout << "================================================================================" << std::endl;
  186. std::cout << std::endl;
  187. }
  188. }
  189. std::cout << std::endl;
  190. }
  191. void Device::setDeviceErrorCode(RTCError error)
  192. {
  193. RTCError* stored_error = errorHandler.error();
  194. if (*stored_error == RTC_NO_ERROR)
  195. *stored_error = error;
  196. }
  197. RTCError Device::getDeviceErrorCode()
  198. {
  199. RTCError* stored_error = errorHandler.error();
  200. RTCError error = *stored_error;
  201. *stored_error = RTC_NO_ERROR;
  202. return error;
  203. }
  204. void Device::setThreadErrorCode(RTCError error)
  205. {
  206. RTCError* stored_error = g_errorHandler.error();
  207. if (*stored_error == RTC_NO_ERROR)
  208. *stored_error = error;
  209. }
  210. RTCError Device::getThreadErrorCode()
  211. {
  212. RTCError* stored_error = g_errorHandler.error();
  213. RTCError error = *stored_error;
  214. *stored_error = RTC_NO_ERROR;
  215. return error;
  216. }
  217. void Device::process_error(Device* device, RTCError error, const char* str)
  218. {
  219. /* store global error code when device construction failed */
  220. if (!device)
  221. return setThreadErrorCode(error);
  222. /* print error when in verbose mode */
  223. if (device->verbosity(1))
  224. {
  225. switch (error) {
  226. case RTC_NO_ERROR : std::cerr << "Embree: No error"; break;
  227. case RTC_UNKNOWN_ERROR : std::cerr << "Embree: Unknown error"; break;
  228. case RTC_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
  229. case RTC_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
  230. case RTC_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
  231. case RTC_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
  232. default : std::cerr << "Embree: Invalid error code"; break;
  233. };
  234. if (str) std::cerr << ", (" << str << ")";
  235. std::cerr << std::endl;
  236. }
  237. /* call user specified error callback */
  238. if (device->error_function)
  239. device->error_function(error,str);
  240. if (device->error_function2)
  241. device->error_function2(device->error_function_userptr,error,str);
  242. /* record error code */
  243. device->setDeviceErrorCode(error);
  244. }
  245. void Device::memoryMonitor(ssize_t bytes, bool post)
  246. {
  247. if (State::memory_monitor_function && bytes != 0) {
  248. if (!State::memory_monitor_function(bytes,post)) {
  249. if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
  250. throw_RTCError(RTC_OUT_OF_MEMORY,"memory monitor forced termination");
  251. }
  252. }
  253. }
  254. if (State::memory_monitor_function2 && bytes != 0) {
  255. if (!State::memory_monitor_function2(State::memory_monitor_userptr,bytes,post)) {
  256. if (bytes > 0) { // only throw exception when we allocate memory to never throw inside a destructor
  257. throw_RTCError(RTC_OUT_OF_MEMORY,"memory monitor forced termination");
  258. }
  259. }
  260. }
  261. }
  262. size_t getMaxNumThreads()
  263. {
  264. size_t maxNumThreads = 0;
  265. for (std::map<Device*,size_t>::iterator i=g_num_threads_map.begin(); i != g_num_threads_map.end(); i++)
  266. maxNumThreads = max(maxNumThreads, (*i).second);
  267. if (maxNumThreads == 0)
  268. maxNumThreads = std::numeric_limits<size_t>::max();
  269. return maxNumThreads;
  270. }
  271. size_t getMaxCacheSize()
  272. {
  273. size_t maxCacheSize = 0;
  274. for (std::map<Device*,size_t>::iterator i=g_cache_size_map.begin(); i!= g_cache_size_map.end(); i++)
  275. maxCacheSize = max(maxCacheSize, (*i).second);
  276. return maxCacheSize;
  277. }
  278. void Device::setCacheSize(size_t bytes)
  279. {
  280. #if defined(EMBREE_GEOMETRY_SUBDIV)
  281. Lock<MutexSys> lock(g_mutex);
  282. if (bytes == 0) g_cache_size_map.erase(this);
  283. else g_cache_size_map[this] = bytes;
  284. size_t maxCacheSize = getMaxCacheSize();
  285. resizeTessellationCache(maxCacheSize);
  286. #endif
  287. }
  288. void Device::initTaskingSystem(size_t numThreads)
  289. {
  290. Lock<MutexSys> lock(g_mutex);
  291. if (numThreads == 0)
  292. g_num_threads_map[this] = std::numeric_limits<size_t>::max();
  293. else
  294. g_num_threads_map[this] = numThreads;
  295. /* create task scheduler */
  296. size_t maxNumThreads = getMaxNumThreads();
  297. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  298. #if USE_TASK_ARENA
  299. arena = make_unique(new tbb::task_arena((int)min(maxNumThreads,TaskScheduler::threadCount())));
  300. #endif
  301. }
  302. void Device::exitTaskingSystem()
  303. {
  304. Lock<MutexSys> lock(g_mutex);
  305. g_num_threads_map.erase(this);
  306. /* terminate tasking system */
  307. if (g_num_threads_map.size() == 0) {
  308. TaskScheduler::destroy();
  309. }
  310. /* or configure new number of threads */
  311. else {
  312. size_t maxNumThreads = getMaxNumThreads();
  313. TaskScheduler::create(maxNumThreads,State::set_affinity,State::start_threads);
  314. }
  315. #if USE_TASK_ARENA
  316. arena.reset();
  317. #endif
  318. }
  319. void Device::setParameter1i(const RTCParameter parm, ssize_t val)
  320. {
  321. /* hidden internal parameters */
  322. switch ((size_t)parm)
  323. {
  324. case 1000000: debug_int0 = val; return;
  325. case 1000001: debug_int1 = val; return;
  326. case 1000002: debug_int2 = val; return;
  327. case 1000003: debug_int3 = val; return;
  328. }
  329. switch (parm) {
  330. case RTC_SOFTWARE_CACHE_SIZE: setCacheSize(val); break;
  331. default: throw_RTCError(RTC_INVALID_ARGUMENT, "unknown writable parameter"); break;
  332. };
  333. }
  334. ssize_t Device::getParameter1i(const RTCParameter parm)
  335. {
  336. size_t iparm = (size_t)parm;
  337. /* get name of internal regression test */
  338. if (iparm >= 2000000 && iparm < 3000000)
  339. {
  340. RegressionTest* test = getRegressionTest(iparm-2000000);
  341. if (test) return (ssize_t) test->name.c_str();
  342. else return 0;
  343. }
  344. /* run internal regression test */
  345. if (iparm >= 3000000 && iparm < 4000000)
  346. {
  347. RegressionTest* test = getRegressionTest(iparm-3000000);
  348. if (test) return test->run();
  349. else return 0;
  350. }
  351. /* documented parameters */
  352. switch (parm)
  353. {
  354. case RTC_CONFIG_VERSION_MAJOR: return __EMBREE_VERSION_MAJOR__;
  355. case RTC_CONFIG_VERSION_MINOR: return __EMBREE_VERSION_MINOR__;
  356. case RTC_CONFIG_VERSION_PATCH: return __EMBREE_VERSION_PATCH__;
  357. case RTC_CONFIG_VERSION : return __EMBREE_VERSION_NUMBER__;
  358. case RTC_CONFIG_INTERSECT1: return 1;
  359. #if defined(__TARGET_SIMD4__) && defined(EMBREE_RAY_PACKETS)
  360. case RTC_CONFIG_INTERSECT4: return hasISA(SSE2);
  361. #else
  362. case RTC_CONFIG_INTERSECT4: return 0;
  363. #endif
  364. #if defined(__TARGET_SIMD8__) && defined(EMBREE_RAY_PACKETS)
  365. case RTC_CONFIG_INTERSECT8: return hasISA(AVX);
  366. #else
  367. case RTC_CONFIG_INTERSECT8: return 0;
  368. #endif
  369. #if defined(__TARGET_SIMD16__) && defined(EMBREE_RAY_PACKETS)
  370. case RTC_CONFIG_INTERSECT16: return hasISA(AVX512KNL) | hasISA(AVX512SKX);
  371. #else
  372. case RTC_CONFIG_INTERSECT16: return 0;
  373. #endif
  374. #if defined(EMBREE_RAY_PACKETS)
  375. case RTC_CONFIG_INTERSECT_STREAM: return 1;
  376. #else
  377. case RTC_CONFIG_INTERSECT_STREAM: return 0;
  378. #endif
  379. #if defined(EMBREE_RAY_MASK)
  380. case RTC_CONFIG_RAY_MASK: return 1;
  381. #else
  382. case RTC_CONFIG_RAY_MASK: return 0;
  383. #endif
  384. #if defined(EMBREE_BACKFACE_CULLING)
  385. case RTC_CONFIG_BACKFACE_CULLING: return 1;
  386. #else
  387. case RTC_CONFIG_BACKFACE_CULLING: return 0;
  388. #endif
  389. #if defined(EMBREE_INTERSECTION_FILTER)
  390. case RTC_CONFIG_INTERSECTION_FILTER: return 1;
  391. #else
  392. case RTC_CONFIG_INTERSECTION_FILTER: return 0;
  393. #endif
  394. #if defined(EMBREE_INTERSECTION_FILTER_RESTORE)
  395. case RTC_CONFIG_INTERSECTION_FILTER_RESTORE: return 1;
  396. #else
  397. case RTC_CONFIG_INTERSECTION_FILTER_RESTORE: return 0;
  398. #endif
  399. #if defined(EMBREE_IGNORE_INVALID_RAYS)
  400. case RTC_CONFIG_IGNORE_INVALID_RAYS: return 1;
  401. #else
  402. case RTC_CONFIG_IGNORE_INVALID_RAYS: return 0;
  403. #endif
  404. #if defined(TASKING_INTERNAL)
  405. case RTC_CONFIG_TASKING_SYSTEM: return 0;
  406. #endif
  407. #if defined(TASKING_TBB)
  408. case RTC_CONFIG_TASKING_SYSTEM: return 1;
  409. #endif
  410. #if defined(EMBREE_GEOMETRY_TRIANGLES)
  411. case RTC_CONFIG_TRIANGLE_GEOMETRY: return 1;
  412. #else
  413. case RTC_CONFIG_TRIANGLE_GEOMETRY: return 0;
  414. #endif
  415. #if defined(EMBREE_GEOMETRY_QUADS)
  416. case RTC_CONFIG_QUAD_GEOMETRY: return 1;
  417. #else
  418. case RTC_CONFIG_QUAD_GEOMETRY: return 0;
  419. #endif
  420. #if defined(EMBREE_GEOMETRY_LINES)
  421. case RTC_CONFIG_LINE_GEOMETRY: return 1;
  422. #else
  423. case RTC_CONFIG_LINE_GEOMETRY: return 0;
  424. #endif
  425. #if defined(EMBREE_GEOMETRY_HAIR)
  426. case RTC_CONFIG_HAIR_GEOMETRY: return 1;
  427. #else
  428. case RTC_CONFIG_HAIR_GEOMETRY: return 0;
  429. #endif
  430. #if defined(EMBREE_GEOMETRY_SUBDIV)
  431. case RTC_CONFIG_SUBDIV_GEOMETRY: return 1;
  432. #else
  433. case RTC_CONFIG_SUBDIV_GEOMETRY: return 0;
  434. #endif
  435. #if defined(EMBREE_GEOMETRY_USER)
  436. case RTC_CONFIG_USER_GEOMETRY: return 1;
  437. #else
  438. case RTC_CONFIG_USER_GEOMETRY: return 0;
  439. #endif
  440. #if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
  441. case RTC_CONFIG_COMMIT_JOIN: return 0;
  442. case RTC_CONFIG_COMMIT_THREAD: return 0;
  443. #else
  444. case RTC_CONFIG_COMMIT_JOIN: return 1;
  445. case RTC_CONFIG_COMMIT_THREAD: return 1;
  446. #endif
  447. default: throw_RTCError(RTC_INVALID_ARGUMENT, "unknown readable parameter"); break;
  448. };
  449. }
  450. }