vp9_ethread.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. /*
  2. * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vp9/encoder/vp9_encodeframe.h"
  11. #include "vp9/encoder/vp9_encoder.h"
  12. #include "vp9/encoder/vp9_ethread.h"
  13. #include "vp9/encoder/vp9_firstpass.h"
  14. #include "vp9/encoder/vp9_multi_thread.h"
  15. #include "vp9/encoder/vp9_temporal_filter.h"
  16. #include "vpx_dsp/vpx_dsp_common.h"
  17. static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
  18. int i, j, k, l, m, n;
  19. for (i = 0; i < REFERENCE_MODES; i++)
  20. td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
  21. for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
  22. td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i];
  23. for (i = 0; i < TX_SIZES; i++)
  24. for (j = 0; j < PLANE_TYPES; j++)
  25. for (k = 0; k < REF_TYPES; k++)
  26. for (l = 0; l < COEF_BANDS; l++)
  27. for (m = 0; m < COEFF_CONTEXTS; m++)
  28. for (n = 0; n < ENTROPY_TOKENS; n++)
  29. td->rd_counts.coef_counts[i][j][k][l][m][n] +=
  30. td_t->rd_counts.coef_counts[i][j][k][l][m][n];
  31. }
  32. static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
  33. VP9_COMP *const cpi = thread_data->cpi;
  34. const VP9_COMMON *const cm = &cpi->common;
  35. const int tile_cols = 1 << cm->log2_tile_cols;
  36. const int tile_rows = 1 << cm->log2_tile_rows;
  37. int t;
  38. (void)unused;
  39. for (t = thread_data->start; t < tile_rows * tile_cols;
  40. t += cpi->num_workers) {
  41. int tile_row = t / tile_cols;
  42. int tile_col = t % tile_cols;
  43. vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col);
  44. }
  45. return 0;
  46. }
  47. static int get_max_tile_cols(VP9_COMP *cpi) {
  48. const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
  49. int mi_cols = aligned_width >> MI_SIZE_LOG2;
  50. int min_log2_tile_cols, max_log2_tile_cols;
  51. int log2_tile_cols;
  52. vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
  53. log2_tile_cols =
  54. clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
  55. if (cpi->oxcf.target_level == LEVEL_AUTO) {
  56. const uint32_t pic_size = cpi->common.width * cpi->common.height;
  57. const int level_tile_cols = log_tile_cols_from_picsize_level(pic_size);
  58. if (log2_tile_cols > level_tile_cols) {
  59. log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols);
  60. }
  61. }
  62. return (1 << log2_tile_cols);
  63. }
  64. static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
  65. VP9_COMMON *const cm = &cpi->common;
  66. const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  67. int i;
  68. // Only run once to create threads and allocate thread data.
  69. if (cpi->num_workers == 0) {
  70. int allocated_workers = num_workers;
  71. // While using SVC, we need to allocate threads according to the highest
  72. // resolution. When row based multithreading is enabled, it is OK to
  73. // allocate more threads than the number of max tile columns.
  74. if (cpi->use_svc && !cpi->row_mt) {
  75. int max_tile_cols = get_max_tile_cols(cpi);
  76. allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
  77. }
  78. CHECK_MEM_ERROR(cm, cpi->workers,
  79. vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
  80. CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
  81. vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));
  82. for (i = 0; i < allocated_workers; i++) {
  83. VPxWorker *const worker = &cpi->workers[i];
  84. EncWorkerData *thread_data = &cpi->tile_thr_data[i];
  85. ++cpi->num_workers;
  86. winterface->init(worker);
  87. if (i < allocated_workers - 1) {
  88. thread_data->cpi = cpi;
  89. // Allocate thread data.
  90. CHECK_MEM_ERROR(cm, thread_data->td,
  91. vpx_memalign(32, sizeof(*thread_data->td)));
  92. vp9_zero(*thread_data->td);
  93. // Set up pc_tree.
  94. thread_data->td->leaf_tree = NULL;
  95. thread_data->td->pc_tree = NULL;
  96. vp9_setup_pc_tree(cm, thread_data->td);
  97. // Allocate frame counters in thread data.
  98. CHECK_MEM_ERROR(cm, thread_data->td->counts,
  99. vpx_calloc(1, sizeof(*thread_data->td->counts)));
  100. // Create threads
  101. if (!winterface->reset(worker))
  102. vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
  103. "Tile encoder thread creation failed");
  104. } else {
  105. // Main thread acts as a worker and uses the thread data in cpi.
  106. thread_data->cpi = cpi;
  107. thread_data->td = &cpi->td;
  108. }
  109. winterface->sync(worker);
  110. }
  111. }
  112. }
  113. static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2,
  114. int num_workers) {
  115. const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  116. int i;
  117. for (i = 0; i < num_workers; i++) {
  118. VPxWorker *const worker = &cpi->workers[i];
  119. worker->hook = (VPxWorkerHook)hook;
  120. worker->data1 = &cpi->tile_thr_data[i];
  121. worker->data2 = data2;
  122. }
  123. // Encode a frame
  124. for (i = 0; i < num_workers; i++) {
  125. VPxWorker *const worker = &cpi->workers[i];
  126. EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
  127. // Set the starting tile for each thread.
  128. thread_data->start = i;
  129. if (i == cpi->num_workers - 1)
  130. winterface->execute(worker);
  131. else
  132. winterface->launch(worker);
  133. }
  134. // Encoding ends.
  135. for (i = 0; i < num_workers; i++) {
  136. VPxWorker *const worker = &cpi->workers[i];
  137. winterface->sync(worker);
  138. }
  139. }
  140. void vp9_encode_tiles_mt(VP9_COMP *cpi) {
  141. VP9_COMMON *const cm = &cpi->common;
  142. const int tile_cols = 1 << cm->log2_tile_cols;
  143. const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  144. int i;
  145. vp9_init_tile_data(cpi);
  146. create_enc_workers(cpi, num_workers);
  147. for (i = 0; i < num_workers; i++) {
  148. EncWorkerData *thread_data;
  149. thread_data = &cpi->tile_thr_data[i];
  150. // Before encoding a frame, copy the thread data from cpi.
  151. if (thread_data->td != &cpi->td) {
  152. thread_data->td->mb = cpi->td.mb;
  153. thread_data->td->rd_counts = cpi->td.rd_counts;
  154. }
  155. if (thread_data->td->counts != &cpi->common.counts) {
  156. memcpy(thread_data->td->counts, &cpi->common.counts,
  157. sizeof(cpi->common.counts));
  158. }
  159. // Handle use_nonrd_pick_mode case.
  160. if (cpi->sf.use_nonrd_pick_mode) {
  161. MACROBLOCK *const x = &thread_data->td->mb;
  162. MACROBLOCKD *const xd = &x->e_mbd;
  163. struct macroblock_plane *const p = x->plane;
  164. struct macroblockd_plane *const pd = xd->plane;
  165. PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
  166. int j;
  167. for (j = 0; j < MAX_MB_PLANE; ++j) {
  168. p[j].coeff = ctx->coeff_pbuf[j][0];
  169. p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
  170. pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
  171. p[j].eobs = ctx->eobs_pbuf[j][0];
  172. }
  173. }
  174. }
  175. launch_enc_workers(cpi, (VPxWorkerHook)enc_worker_hook, NULL, num_workers);
  176. for (i = 0; i < num_workers; i++) {
  177. VPxWorker *const worker = &cpi->workers[i];
  178. EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
  179. // Accumulate counters.
  180. if (i < cpi->num_workers - 1) {
  181. vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
  182. accumulate_rd_opt(&cpi->td, thread_data->td);
  183. }
  184. }
  185. }
  186. #if !CONFIG_REALTIME_ONLY
  187. static void accumulate_fp_tile_stat(TileDataEnc *tile_data,
  188. TileDataEnc *tile_data_t) {
  189. tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor;
  190. tile_data->fp_data.brightness_factor +=
  191. tile_data_t->fp_data.brightness_factor;
  192. tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error;
  193. tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error;
  194. tile_data->fp_data.frame_noise_energy +=
  195. tile_data_t->fp_data.frame_noise_energy;
  196. tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error;
  197. tile_data->fp_data.intercount += tile_data_t->fp_data.intercount;
  198. tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count;
  199. tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count;
  200. tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low;
  201. tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high;
  202. tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count;
  203. tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount;
  204. tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr;
  205. tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs;
  206. tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc;
  207. tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs;
  208. tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs;
  209. tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs;
  210. tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors;
  211. tile_data->fp_data.intra_smooth_count +=
  212. tile_data_t->fp_data.intra_smooth_count;
  213. tile_data->fp_data.image_data_start_row =
  214. VPXMIN(tile_data->fp_data.image_data_start_row,
  215. tile_data_t->fp_data.image_data_start_row) == INVALID_ROW
  216. ? VPXMAX(tile_data->fp_data.image_data_start_row,
  217. tile_data_t->fp_data.image_data_start_row)
  218. : VPXMIN(tile_data->fp_data.image_data_start_row,
  219. tile_data_t->fp_data.image_data_start_row);
  220. }
  221. #endif // !CONFIG_REALTIME_ONLY
  222. // Allocate memory for row synchronization
  223. void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
  224. int rows) {
  225. row_mt_sync->rows = rows;
  226. #if CONFIG_MULTITHREAD
  227. {
  228. int i;
  229. CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
  230. vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows));
  231. if (row_mt_sync->mutex_) {
  232. for (i = 0; i < rows; ++i) {
  233. pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
  234. }
  235. }
  236. CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
  237. vpx_malloc(sizeof(*row_mt_sync->cond_) * rows));
  238. if (row_mt_sync->cond_) {
  239. for (i = 0; i < rows; ++i) {
  240. pthread_cond_init(&row_mt_sync->cond_[i], NULL);
  241. }
  242. }
  243. }
  244. #endif // CONFIG_MULTITHREAD
  245. CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
  246. vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows));
  247. // Set up nsync.
  248. row_mt_sync->sync_range = 1;
  249. }
  250. // Deallocate row based multi-threading synchronization related mutex and data
  251. void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) {
  252. if (row_mt_sync != NULL) {
  253. #if CONFIG_MULTITHREAD
  254. int i;
  255. if (row_mt_sync->mutex_ != NULL) {
  256. for (i = 0; i < row_mt_sync->rows; ++i) {
  257. pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
  258. }
  259. vpx_free(row_mt_sync->mutex_);
  260. }
  261. if (row_mt_sync->cond_ != NULL) {
  262. for (i = 0; i < row_mt_sync->rows; ++i) {
  263. pthread_cond_destroy(&row_mt_sync->cond_[i]);
  264. }
  265. vpx_free(row_mt_sync->cond_);
  266. }
  267. #endif // CONFIG_MULTITHREAD
  268. vpx_free(row_mt_sync->cur_col);
  269. // clear the structure as the source of this call may be dynamic change
  270. // in tiles in which case this call will be followed by an _alloc()
  271. // which may fail.
  272. vp9_zero(*row_mt_sync);
  273. }
  274. }
  275. void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) {
  276. #if CONFIG_MULTITHREAD
  277. const int nsync = row_mt_sync->sync_range;
  278. if (r && !(c & (nsync - 1))) {
  279. pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
  280. pthread_mutex_lock(mutex);
  281. while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) {
  282. pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
  283. }
  284. pthread_mutex_unlock(mutex);
  285. }
  286. #else
  287. (void)row_mt_sync;
  288. (void)r;
  289. (void)c;
  290. #endif // CONFIG_MULTITHREAD
  291. }
  292. void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c) {
  293. (void)row_mt_sync;
  294. (void)r;
  295. (void)c;
  296. return;
  297. }
  298. void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
  299. const int cols) {
  300. #if CONFIG_MULTITHREAD
  301. const int nsync = row_mt_sync->sync_range;
  302. int cur;
  303. // Only signal when there are enough encoded blocks for next row to run.
  304. int sig = 1;
  305. if (c < cols - 1) {
  306. cur = c;
  307. if (c % nsync != nsync - 1) sig = 0;
  308. } else {
  309. cur = cols + nsync;
  310. }
  311. if (sig) {
  312. pthread_mutex_lock(&row_mt_sync->mutex_[r]);
  313. row_mt_sync->cur_col[r] = cur;
  314. pthread_cond_signal(&row_mt_sync->cond_[r]);
  315. pthread_mutex_unlock(&row_mt_sync->mutex_[r]);
  316. }
  317. #else
  318. (void)row_mt_sync;
  319. (void)r;
  320. (void)c;
  321. (void)cols;
  322. #endif // CONFIG_MULTITHREAD
  323. }
  324. void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
  325. const int cols) {
  326. (void)row_mt_sync;
  327. (void)r;
  328. (void)c;
  329. (void)cols;
  330. return;
  331. }
  332. #if !CONFIG_REALTIME_ONLY
  333. static int first_pass_worker_hook(EncWorkerData *const thread_data,
  334. MultiThreadHandle *multi_thread_ctxt) {
  335. VP9_COMP *const cpi = thread_data->cpi;
  336. const VP9_COMMON *const cm = &cpi->common;
  337. const int tile_cols = 1 << cm->log2_tile_cols;
  338. int tile_row, tile_col;
  339. TileDataEnc *this_tile;
  340. int end_of_frame;
  341. int thread_id = thread_data->thread_id;
  342. int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
  343. JobNode *proc_job = NULL;
  344. FIRSTPASS_DATA fp_acc_data;
  345. MV zero_mv = { 0, 0 };
  346. MV best_ref_mv;
  347. int mb_row;
  348. end_of_frame = 0;
  349. while (0 == end_of_frame) {
  350. // Get the next job in the queue
  351. proc_job =
  352. (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
  353. if (NULL == proc_job) {
  354. // Query for the status of other tiles
  355. end_of_frame = vp9_get_tiles_proc_status(
  356. multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
  357. tile_cols);
  358. } else {
  359. tile_col = proc_job->tile_col_id;
  360. tile_row = proc_job->tile_row_id;
  361. this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  362. mb_row = proc_job->vert_unit_row_num;
  363. best_ref_mv = zero_mv;
  364. vp9_zero(fp_acc_data);
  365. fp_acc_data.image_data_start_row = INVALID_ROW;
  366. vp9_first_pass_encode_tile_mb_row(cpi, thread_data->td, &fp_acc_data,
  367. this_tile, &best_ref_mv, mb_row);
  368. }
  369. }
  370. return 0;
  371. }
  372. void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
  373. VP9_COMMON *const cm = &cpi->common;
  374. const int tile_cols = 1 << cm->log2_tile_cols;
  375. const int tile_rows = 1 << cm->log2_tile_rows;
  376. MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
  377. TileDataEnc *first_tile_col;
  378. int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
  379. int i;
  380. if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
  381. multi_thread_ctxt->allocated_tile_rows < tile_rows ||
  382. multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
  383. vp9_row_mt_mem_dealloc(cpi);
  384. vp9_init_tile_data(cpi);
  385. vp9_row_mt_mem_alloc(cpi);
  386. } else {
  387. vp9_init_tile_data(cpi);
  388. }
  389. create_enc_workers(cpi, num_workers);
  390. vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
  391. vp9_prepare_job_queue(cpi, FIRST_PASS_JOB);
  392. vp9_multi_thread_tile_init(cpi);
  393. for (i = 0; i < num_workers; i++) {
  394. EncWorkerData *thread_data;
  395. thread_data = &cpi->tile_thr_data[i];
  396. // Before encoding a frame, copy the thread data from cpi.
  397. if (thread_data->td != &cpi->td) {
  398. thread_data->td->mb = cpi->td.mb;
  399. }
  400. }
  401. launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
  402. multi_thread_ctxt, num_workers);
  403. first_tile_col = &cpi->tile_data[0];
  404. for (i = 1; i < tile_cols; i++) {
  405. TileDataEnc *this_tile = &cpi->tile_data[i];
  406. accumulate_fp_tile_stat(first_tile_col, this_tile);
  407. }
  408. }
  409. static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
  410. MultiThreadHandle *multi_thread_ctxt) {
  411. VP9_COMP *const cpi = thread_data->cpi;
  412. const VP9_COMMON *const cm = &cpi->common;
  413. const int tile_cols = 1 << cm->log2_tile_cols;
  414. int tile_row, tile_col;
  415. int mb_col_start, mb_col_end;
  416. TileDataEnc *this_tile;
  417. int end_of_frame;
  418. int thread_id = thread_data->thread_id;
  419. int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
  420. JobNode *proc_job = NULL;
  421. int mb_row;
  422. end_of_frame = 0;
  423. while (0 == end_of_frame) {
  424. // Get the next job in the queue
  425. proc_job =
  426. (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
  427. if (NULL == proc_job) {
  428. // Query for the status of other tiles
  429. end_of_frame = vp9_get_tiles_proc_status(
  430. multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
  431. tile_cols);
  432. } else {
  433. tile_col = proc_job->tile_col_id;
  434. tile_row = proc_job->tile_row_id;
  435. this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
  436. mb_col_start = (this_tile->tile_info.mi_col_start) >> 1;
  437. mb_col_end = (this_tile->tile_info.mi_col_end + 1) >> 1;
  438. mb_row = proc_job->vert_unit_row_num;
  439. vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row,
  440. mb_col_start, mb_col_end);
  441. }
  442. }
  443. return 0;
  444. }
  445. void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
  446. VP9_COMMON *const cm = &cpi->common;
  447. const int tile_cols = 1 << cm->log2_tile_cols;
  448. const int tile_rows = 1 << cm->log2_tile_rows;
  449. MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
  450. int num_workers = cpi->num_workers ? cpi->num_workers : 1;
  451. int i;
  452. if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
  453. multi_thread_ctxt->allocated_tile_rows < tile_rows ||
  454. multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
  455. vp9_row_mt_mem_dealloc(cpi);
  456. vp9_init_tile_data(cpi);
  457. vp9_row_mt_mem_alloc(cpi);
  458. } else {
  459. vp9_init_tile_data(cpi);
  460. }
  461. create_enc_workers(cpi, num_workers);
  462. vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
  463. vp9_prepare_job_queue(cpi, ARNR_JOB);
  464. for (i = 0; i < num_workers; i++) {
  465. EncWorkerData *thread_data;
  466. thread_data = &cpi->tile_thr_data[i];
  467. // Before encoding a frame, copy the thread data from cpi.
  468. if (thread_data->td != &cpi->td) {
  469. thread_data->td->mb = cpi->td.mb;
  470. }
  471. }
  472. launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
  473. multi_thread_ctxt, num_workers);
  474. }
  475. #endif // !CONFIG_REALTIME_ONLY
  476. static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
  477. MultiThreadHandle *multi_thread_ctxt) {
  478. VP9_COMP *const cpi = thread_data->cpi;
  479. const VP9_COMMON *const cm = &cpi->common;
  480. const int tile_cols = 1 << cm->log2_tile_cols;
  481. int tile_row, tile_col;
  482. int end_of_frame;
  483. int thread_id = thread_data->thread_id;
  484. int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
  485. JobNode *proc_job = NULL;
  486. int mi_row;
  487. end_of_frame = 0;
  488. while (0 == end_of_frame) {
  489. // Get the next job in the queue
  490. proc_job =
  491. (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
  492. if (NULL == proc_job) {
  493. // Query for the status of other tiles
  494. end_of_frame = vp9_get_tiles_proc_status(
  495. multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
  496. tile_cols);
  497. } else {
  498. tile_col = proc_job->tile_col_id;
  499. tile_row = proc_job->tile_row_id;
  500. mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
  501. vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
  502. }
  503. }
  504. return 0;
  505. }
  506. void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
  507. VP9_COMMON *const cm = &cpi->common;
  508. const int tile_cols = 1 << cm->log2_tile_cols;
  509. const int tile_rows = 1 << cm->log2_tile_rows;
  510. MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
  511. int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
  512. int i;
  513. if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
  514. multi_thread_ctxt->allocated_tile_rows < tile_rows ||
  515. multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
  516. vp9_row_mt_mem_dealloc(cpi);
  517. vp9_init_tile_data(cpi);
  518. vp9_row_mt_mem_alloc(cpi);
  519. } else {
  520. vp9_init_tile_data(cpi);
  521. }
  522. create_enc_workers(cpi, num_workers);
  523. vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
  524. vp9_prepare_job_queue(cpi, ENCODE_JOB);
  525. vp9_multi_thread_tile_init(cpi);
  526. for (i = 0; i < num_workers; i++) {
  527. EncWorkerData *thread_data;
  528. thread_data = &cpi->tile_thr_data[i];
  529. // Before encoding a frame, copy the thread data from cpi.
  530. if (thread_data->td != &cpi->td) {
  531. thread_data->td->mb = cpi->td.mb;
  532. thread_data->td->rd_counts = cpi->td.rd_counts;
  533. }
  534. if (thread_data->td->counts != &cpi->common.counts) {
  535. memcpy(thread_data->td->counts, &cpi->common.counts,
  536. sizeof(cpi->common.counts));
  537. }
  538. // Handle use_nonrd_pick_mode case.
  539. if (cpi->sf.use_nonrd_pick_mode) {
  540. MACROBLOCK *const x = &thread_data->td->mb;
  541. MACROBLOCKD *const xd = &x->e_mbd;
  542. struct macroblock_plane *const p = x->plane;
  543. struct macroblockd_plane *const pd = xd->plane;
  544. PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
  545. int j;
  546. for (j = 0; j < MAX_MB_PLANE; ++j) {
  547. p[j].coeff = ctx->coeff_pbuf[j][0];
  548. p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
  549. pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
  550. p[j].eobs = ctx->eobs_pbuf[j][0];
  551. }
  552. }
  553. }
  554. launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
  555. multi_thread_ctxt, num_workers);
  556. for (i = 0; i < num_workers; i++) {
  557. VPxWorker *const worker = &cpi->workers[i];
  558. EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
  559. // Accumulate counters.
  560. if (i < cpi->num_workers - 1) {
  561. vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
  562. accumulate_rd_opt(&cpi->td, thread_data->td);
  563. }
  564. }
  565. }