tessellation_cache.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // ======================================================================== //
  2. // Copyright 2009-2017 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "tessellation_cache.h"
  17. namespace embree
  18. {
  19. SharedLazyTessellationCache SharedLazyTessellationCache::sharedLazyTessellationCache;
  20. __thread ThreadWorkState* SharedLazyTessellationCache::init_t_state = nullptr;
  21. ThreadWorkState* SharedLazyTessellationCache::current_t_state = nullptr;
  22. void resizeTessellationCache(size_t new_size)
  23. {
  24. if (new_size >= SharedLazyTessellationCache::MAX_TESSELLATION_CACHE_SIZE)
  25. new_size = SharedLazyTessellationCache::MAX_TESSELLATION_CACHE_SIZE;
  26. if (SharedLazyTessellationCache::sharedLazyTessellationCache.getSize() != new_size)
  27. SharedLazyTessellationCache::sharedLazyTessellationCache.realloc(new_size);
  28. }
  29. void resetTessellationCache()
  30. {
  31. //SharedLazyTessellationCache::sharedLazyTessellationCache.addCurrentIndex(SharedLazyTessellationCache::NUM_CACHE_SEGMENTS);
  32. SharedLazyTessellationCache::sharedLazyTessellationCache.reset();
  33. }
  34. SharedLazyTessellationCache::SharedLazyTessellationCache()
  35. {
  36. size = 0;
  37. data = nullptr;
  38. maxBlocks = size/BLOCK_SIZE;
  39. localTime = NUM_CACHE_SEGMENTS;
  40. next_block = 0;
  41. numRenderThreads = 0;
  42. #if FORCE_SIMPLE_FLUSH == 1
  43. switch_block_threshold = maxBlocks;
  44. #else
  45. switch_block_threshold = maxBlocks/NUM_CACHE_SEGMENTS;
  46. #endif
  47. threadWorkState = new ThreadWorkState[NUM_PREALLOC_THREAD_WORK_STATES];
  48. //reset_state.reset();
  49. //linkedlist_mtx.reset();
  50. }
  51. SharedLazyTessellationCache::~SharedLazyTessellationCache()
  52. {
  53. for (ThreadWorkState* t=current_t_state; t!=nullptr; )
  54. {
  55. ThreadWorkState* next = t->next;
  56. if (t->allocated) delete t;
  57. t = next;
  58. }
  59. delete[] threadWorkState;
  60. }
  61. void SharedLazyTessellationCache::getNextRenderThreadWorkState()
  62. {
  63. const size_t id = numRenderThreads.fetch_add(1);
  64. if (id >= NUM_PREALLOC_THREAD_WORK_STATES) init_t_state = new ThreadWorkState(true);
  65. else init_t_state = &threadWorkState[id];
  66. /* critical section for updating link list with new thread state */
  67. linkedlist_mtx.lock();
  68. init_t_state->next = current_t_state;
  69. current_t_state = init_t_state;
  70. linkedlist_mtx.unlock();
  71. }
  72. void SharedLazyTessellationCache::waitForUsersLessEqual(ThreadWorkState *const t_state,
  73. const unsigned int users)
  74. {
  75. while( !(t_state->counter <= users) )
  76. {
  77. _mm_pause();
  78. _mm_pause();
  79. _mm_pause();
  80. _mm_pause();
  81. }
  82. }
  83. void SharedLazyTessellationCache::allocNextSegment()
  84. {
  85. if (reset_state.try_lock())
  86. {
  87. if (next_block >= switch_block_threshold)
  88. {
  89. /* lock the linked list of thread states */
  90. linkedlist_mtx.lock();
  91. /* block all threads */
  92. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  93. if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
  94. waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
  95. /* switch to the next segment */
  96. addCurrentIndex();
  97. CACHE_STATS(PRINT("RESET TESS CACHE"));
  98. #if FORCE_SIMPLE_FLUSH == 1
  99. next_block = 0;
  100. switch_block_threshold = maxBlocks;
  101. #else
  102. const size_t region = localTime % NUM_CACHE_SEGMENTS;
  103. next_block = region * (maxBlocks/NUM_CACHE_SEGMENTS);
  104. switch_block_threshold = next_block + (maxBlocks/NUM_CACHE_SEGMENTS);
  105. assert( switch_block_threshold <= maxBlocks );
  106. #endif
  107. CACHE_STATS(SharedTessellationCacheStats::cache_flushes++);
  108. /* release all blocked threads */
  109. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  110. unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
  111. /* unlock the linked list of thread states */
  112. linkedlist_mtx.unlock();
  113. }
  114. reset_state.unlock();
  115. }
  116. else
  117. reset_state.wait_until_unlocked();
  118. }
  119. void SharedLazyTessellationCache::reset()
  120. {
  121. /* lock the reset_state */
  122. reset_state.lock();
  123. /* lock the linked list of thread states */
  124. linkedlist_mtx.lock();
  125. /* block all threads */
  126. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  127. if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
  128. waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
  129. /* reset to the first segment */
  130. next_block = 0;
  131. #if FORCE_SIMPLE_FLUSH == 1
  132. switch_block_threshold = maxBlocks;
  133. #else
  134. switch_block_threshold = maxBlocks/NUM_CACHE_SEGMENTS;
  135. #endif
  136. /* reset local time */
  137. localTime = NUM_CACHE_SEGMENTS;
  138. /* release all blocked threads */
  139. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  140. unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
  141. /* unlock the linked list of thread states */
  142. linkedlist_mtx.unlock();
  143. /* unlock the reset_state */
  144. reset_state.unlock();
  145. }
  146. void SharedLazyTessellationCache::realloc(const size_t new_size)
  147. {
  148. /* lock the reset_state */
  149. reset_state.lock();
  150. /* lock the linked list of thread states */
  151. linkedlist_mtx.lock();
  152. /* block all threads */
  153. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  154. if (lockThread(t,THREAD_BLOCK_ATOMIC_ADD) != 0)
  155. waitForUsersLessEqual(t,THREAD_BLOCK_ATOMIC_ADD);
  156. /* reallocate data */
  157. if (data) os_free(data,size);
  158. size = new_size;
  159. data = nullptr;
  160. if (size) data = (float*)os_malloc(size); // FIXME: do os_reserve under linux
  161. maxBlocks = size/BLOCK_SIZE;
  162. /* invalidate entire cache */
  163. localTime += NUM_CACHE_SEGMENTS;
  164. /* reset to the first segment */
  165. #if FORCE_SIMPLE_FLUSH == 1
  166. next_block = 0;
  167. switch_block_threshold = maxBlocks;
  168. #else
  169. const size_t region = localTime % NUM_CACHE_SEGMENTS;
  170. next_block = region * (maxBlocks/NUM_CACHE_SEGMENTS);
  171. switch_block_threshold = next_block + (maxBlocks/NUM_CACHE_SEGMENTS);
  172. assert( switch_block_threshold <= maxBlocks );
  173. #endif
  174. /* release all blocked threads */
  175. for (ThreadWorkState *t=current_t_state;t!=nullptr;t=t->next)
  176. unlockThread(t,-THREAD_BLOCK_ATOMIC_ADD);
  177. /* unlock the linked list of thread states */
  178. linkedlist_mtx.unlock();
  179. /* unlock the reset_state */
  180. reset_state.unlock();
  181. }
  182. ////////////////////////////////////////////////////////////////////////////////////////////////////////////
  183. ////////////////////////////////////////////////////////////////////////////////////////////////////////////
  184. ////////////////////////////////////////////////////////////////////////////////////////////////////////////
  185. std::atomic<size_t> SharedTessellationCacheStats::cache_accesses(0);
  186. std::atomic<size_t> SharedTessellationCacheStats::cache_hits(0);
  187. std::atomic<size_t> SharedTessellationCacheStats::cache_misses(0);
  188. std::atomic<size_t> SharedTessellationCacheStats::cache_flushes(0);
  189. SpinLock SharedTessellationCacheStats::mtx;
  190. size_t SharedTessellationCacheStats::cache_num_patches(0);
  191. void SharedTessellationCacheStats::printStats()
  192. {
  193. PRINT(cache_accesses);
  194. PRINT(cache_misses);
  195. PRINT(cache_hits);
  196. PRINT(cache_flushes);
  197. PRINT(100.0f * cache_hits / cache_accesses);
  198. assert(cache_hits + cache_misses == cache_accesses);
  199. PRINT(cache_num_patches);
  200. }
  201. void SharedTessellationCacheStats::clearStats()
  202. {
  203. SharedTessellationCacheStats::cache_accesses = 0;
  204. SharedTessellationCacheStats::cache_hits = 0;
  205. SharedTessellationCacheStats::cache_misses = 0;
  206. SharedTessellationCacheStats::cache_flushes = 0;
  207. }
  208. struct cache_regression_test : public RegressionTest
  209. {
  210. BarrierSys barrier;
  211. std::atomic<size_t> numFailed;
  212. std::atomic<int> threadIDCounter;
  213. static const size_t numEntries = 4*1024;
  214. SharedLazyTessellationCache::CacheEntry entry[numEntries];
  215. cache_regression_test()
  216. : RegressionTest("cache_regression_test"), numFailed(0), threadIDCounter(0)
  217. {
  218. registerRegressionTest(this);
  219. }
  220. static void thread_alloc(cache_regression_test* This)
  221. {
  222. int threadID = This->threadIDCounter++;
  223. size_t maxN = SharedLazyTessellationCache::sharedLazyTessellationCache.maxAllocSize()/4;
  224. This->barrier.wait();
  225. for (size_t j=0; j<100000; j++)
  226. {
  227. size_t elt = (threadID+j)%numEntries;
  228. size_t N = min(1+10*(elt%1000),maxN);
  229. volatile int* data = (volatile int*) SharedLazyTessellationCache::lookup(This->entry[elt],0,[&] () {
  230. int* data = (int*) SharedLazyTessellationCache::sharedLazyTessellationCache.malloc(4*N);
  231. for (size_t k=0; k<N; k++) data[k] = (int)elt;
  232. return data;
  233. });
  234. if (data == nullptr) {
  235. SharedLazyTessellationCache::sharedLazyTessellationCache.unlock();
  236. This->numFailed++;
  237. continue;
  238. }
  239. /* check memory block */
  240. for (size_t k=0; k<N; k++) {
  241. if (data[k] != (int)elt) {
  242. This->numFailed++;
  243. break;
  244. }
  245. }
  246. SharedLazyTessellationCache::sharedLazyTessellationCache.unlock();
  247. }
  248. This->barrier.wait();
  249. }
  250. bool run ()
  251. {
  252. numFailed.store(0);
  253. size_t numThreads = getNumberOfLogicalThreads();
  254. barrier.init(numThreads+1);
  255. /* create threads */
  256. std::vector<thread_t> threads;
  257. for (size_t i=0; i<numThreads; i++)
  258. threads.push_back(createThread((thread_func)thread_alloc,this,0,i));
  259. /* run test */
  260. barrier.wait();
  261. barrier.wait();
  262. /* destroy threads */
  263. for (size_t i=0; i<numThreads; i++)
  264. join(threads[i]);
  265. return numFailed == 0;
  266. }
  267. };
  268. cache_regression_test cache_regression;
  269. };
  270. extern "C" void printTessCacheStats()
  271. {
  272. PRINT("SHARED TESSELLATION CACHE");
  273. embree::SharedTessellationCacheStats::printStats();
  274. embree::SharedTessellationCacheStats::clearStats();
  275. }