astcenc_internal_entry.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2024 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions and data declarations for the outer context.
  19. *
  20. * The outer context includes thread-pool management, which is slower to
  21. * compile due to increased use of C++ stdlib. The inner context used in the
  22. * majority of the codec library does not include this.
  23. */
  24. #ifndef ASTCENC_INTERNAL_ENTRY_INCLUDED
  25. #define ASTCENC_INTERNAL_ENTRY_INCLUDED
  26. #include <atomic>
  27. #include <condition_variable>
  28. #include <functional>
  29. #include <mutex>
  30. #include "astcenc_internal.h"
  31. /* ============================================================================
  32. Parallel execution control
  33. ============================================================================ */
  34. /**
  35. * @brief A simple counter-based manager for parallel task execution.
  36. *
  37. * The task processing execution consists of:
  38. *
  39. * * A single-threaded init stage.
  40. * * A multi-threaded processing stage.
  41. * * A condition variable so threads can wait for processing completion.
  42. *
  43. * The init stage will be executed by the first thread to arrive in the critical section, there is
  44. * no main thread in the thread pool.
  45. *
  46. * The processing stage uses dynamic dispatch to assign task tickets to threads on an on-demand
  47. * basis. Threads may each therefore executed different numbers of tasks, depending on their
  48. * processing complexity. The task queue and the task tickets are just counters; the caller must map
  49. * these integers to an actual processing partition in a specific problem domain.
  50. *
  51. * The exit wait condition is needed to ensure processing has finished before a worker thread can
  52. * progress to the next stage of the pipeline. Specifically a worker may exit the processing stage
  53. * because there are no new tasks to assign to it while other worker threads are still processing.
  54. * Calling @c wait() will ensure that all other worker have finished before the thread can proceed.
  55. *
  56. * The basic usage model:
  57. *
  58. * // --------- From single-threaded code ---------
  59. *
  60. * // Reset the tracker state
  61. * manager->reset()
  62. *
  63. * // --------- From multi-threaded code ---------
  64. *
  65. * // Run the stage init; only first thread actually runs the lambda
  66. * manager->init(<lambda>)
  67. *
  68. * do
  69. * {
  70. * // Request a task assignment
  71. * uint task_count;
  72. * uint base_index = manager->get_tasks(<granule>, task_count);
  73. *
  74. * // Process any tasks we were given (task_count <= granule size)
  75. * if (task_count)
  76. * {
  77. * // Run the user task processing code for N tasks here
  78. * ...
  79. *
  80. * // Flag these tasks as complete
  81. * manager->complete_tasks(task_count);
  82. * }
  83. * } while (task_count);
  84. *
  85. * // Wait for all threads to complete tasks before progressing
  86. * manager->wait()
  87. *
  88. * // Run the stage term; only first thread actually runs the lambda
  89. * manager->term(<lambda>)
  90. */
  91. class ParallelManager
  92. {
  93. private:
  94. /** @brief Lock used for critical section and condition synchronization. */
  95. std::mutex m_lock;
  96. /** @brief True if the stage init() step has been executed. */
  97. bool m_init_done;
  98. /** @brief True if the stage term() step has been executed. */
  99. bool m_term_done;
  100. /** @brief Condition variable for tracking stage processing completion. */
  101. std::condition_variable m_complete;
  102. /** @brief Number of tasks started, but not necessarily finished. */
  103. std::atomic<unsigned int> m_start_count;
  104. /** @brief Number of tasks finished. */
  105. unsigned int m_done_count;
  106. /** @brief Number of tasks that need to be processed. */
  107. unsigned int m_task_count;
  108. /** @brief Progress callback (optional). */
  109. astcenc_progress_callback m_callback;
  110. /** @brief Lock used for callback synchronization. */
  111. std::mutex m_callback_lock;
  112. /** @brief Minimum progress before making a callback. */
  113. float m_callback_min_diff;
  114. /** @brief Last progress callback value. */
  115. float m_callback_last_value;
  116. public:
  117. /** @brief Create a new ParallelManager. */
  118. ParallelManager()
  119. {
  120. reset();
  121. }
  122. /**
  123. * @brief Reset the tracker for a new processing batch.
  124. *
  125. * This must be called from single-threaded code before starting the multi-threaded processing
  126. * operations.
  127. */
  128. void reset()
  129. {
  130. m_init_done = false;
  131. m_term_done = false;
  132. m_start_count = 0;
  133. m_done_count = 0;
  134. m_task_count = 0;
  135. m_callback_last_value = 0.0f;
  136. m_callback_min_diff = 1.0f;
  137. }
  138. /**
  139. * @brief Trigger the pipeline stage init step.
  140. *
  141. * This can be called from multi-threaded code. The first thread to hit this will process the
  142. * initialization. Other threads will block and wait for it to complete.
  143. *
  144. * @param init_func Callable which executes the stage initialization. It must return the
  145. * total number of tasks in the stage.
  146. */
  147. void init(std::function<unsigned int(void)> init_func)
  148. {
  149. std::lock_guard<std::mutex> lck(m_lock);
  150. if (!m_init_done)
  151. {
  152. m_task_count = init_func();
  153. m_init_done = true;
  154. }
  155. }
  156. /**
  157. * @brief Trigger the pipeline stage init step.
  158. *
  159. * This can be called from multi-threaded code. The first thread to hit this will process the
  160. * initialization. Other threads will block and wait for it to complete.
  161. *
  162. * @param task_count Total number of tasks needing processing.
  163. * @param callback Function pointer for progress status callbacks.
  164. */
  165. void init(unsigned int task_count, astcenc_progress_callback callback)
  166. {
  167. std::lock_guard<std::mutex> lck(m_lock);
  168. if (!m_init_done)
  169. {
  170. m_callback = callback;
  171. m_task_count = task_count;
  172. m_init_done = true;
  173. // Report every 1% or 4096 blocks, whichever is larger, to avoid callback overhead
  174. float min_diff = (4096.0f / static_cast<float>(task_count)) * 100.0f;
  175. m_callback_min_diff = astc::max(min_diff, 1.0f);
  176. }
  177. }
  178. /**
  179. * @brief Request a task assignment.
  180. *
  181. * Assign up to @c granule tasks to the caller for processing.
  182. *
  183. * @param granule Maximum number of tasks that can be assigned.
  184. * @param[out] count Actual number of tasks assigned, or zero if no tasks were assigned.
  185. *
  186. * @return Task index of the first assigned task; assigned tasks increment from this.
  187. */
  188. unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
  189. {
  190. unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
  191. if (base >= m_task_count)
  192. {
  193. count = 0;
  194. return 0;
  195. }
  196. count = astc::min(m_task_count - base, granule);
  197. return base;
  198. }
  199. /**
  200. * @brief Complete a task assignment.
  201. *
  202. * Mark @c count tasks as complete. This will notify all threads blocked on @c wait() if this
  203. * completes the processing of the stage.
  204. *
  205. * @param count The number of completed tasks.
  206. */
  207. void complete_task_assignment(unsigned int count)
  208. {
  209. // Note: m_done_count cannot use an atomic without the mutex; this has a race between the
  210. // update here and the wait() for other threads
  211. unsigned int local_count;
  212. float local_last_value;
  213. {
  214. std::unique_lock<std::mutex> lck(m_lock);
  215. m_done_count += count;
  216. local_count = m_done_count;
  217. local_last_value = m_callback_last_value;
  218. if (m_done_count == m_task_count)
  219. {
  220. // Ensure the progress bar hits 100%
  221. if (m_callback)
  222. {
  223. std::unique_lock<std::mutex> cblck(m_callback_lock);
  224. m_callback(100.0f);
  225. m_callback_last_value = 100.0f;
  226. }
  227. lck.unlock();
  228. m_complete.notify_all();
  229. }
  230. }
  231. // Process progress callback if we have one
  232. if (m_callback)
  233. {
  234. // Initial lockless test - have we progressed enough to emit?
  235. float num = static_cast<float>(local_count);
  236. float den = static_cast<float>(m_task_count);
  237. float this_value = (num / den) * 100.0f;
  238. bool report_test = (this_value - local_last_value) > m_callback_min_diff;
  239. // Recheck under lock, because another thread might report first
  240. if (report_test)
  241. {
  242. std::unique_lock<std::mutex> cblck(m_callback_lock);
  243. bool report_retest = (this_value - m_callback_last_value) > m_callback_min_diff;
  244. if (report_retest)
  245. {
  246. m_callback(this_value);
  247. m_callback_last_value = this_value;
  248. }
  249. }
  250. }
  251. }
  252. /**
  253. * @brief Wait for stage processing to complete.
  254. */
  255. void wait()
  256. {
  257. std::unique_lock<std::mutex> lck(m_lock);
  258. m_complete.wait(lck, [this]{ return m_done_count == m_task_count; });
  259. }
  260. /**
  261. * @brief Trigger the pipeline stage term step.
  262. *
  263. * This can be called from multi-threaded code. The first thread to hit this will process the
  264. * work pool termination. Caller must have called @c wait() prior to calling this function to
  265. * ensure that processing is complete.
  266. *
  267. * @param term_func Callable which executes the stage termination.
  268. */
  269. void term(std::function<void(void)> term_func)
  270. {
  271. std::lock_guard<std::mutex> lck(m_lock);
  272. if (!m_term_done)
  273. {
  274. term_func();
  275. m_term_done = true;
  276. }
  277. }
  278. };
  279. /**
  280. * @brief The astcenc compression context.
  281. */
  282. struct astcenc_context
  283. {
  284. /** @brief The context internal state. */
  285. astcenc_contexti context;
  286. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  287. /** @brief The parallel manager for averages computation. */
  288. ParallelManager manage_avg;
  289. /** @brief The parallel manager for compression. */
  290. ParallelManager manage_compress;
  291. #endif
  292. /** @brief The parallel manager for decompression. */
  293. ParallelManager manage_decompress;
  294. };
  295. #endif