vp8_multi_resolution_encoder.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. /*
  11. * This is an example demonstrating multi-resolution encoding in VP8.
  12. * High-resolution input video is down-sampled to lower-resolutions. The
  13. * encoder then encodes the video and outputs multiple bitstreams with
  14. * different resolutions.
  15. *
  16. * This test also allows for settings temporal layers for each spatial layer.
  17. * Different number of temporal layers per spatial stream may be used.
  18. * Currently up to 3 temporal layers per spatial stream (encoder) are supported
  19. * in this test.
  20. */
  21. #include "./vpx_config.h"
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <stdarg.h>
  25. #include <string.h>
  26. #include <math.h>
  27. #include <assert.h>
  28. #include <sys/time.h>
  29. #include "vpx_ports/vpx_timer.h"
  30. #include "vpx/vpx_encoder.h"
  31. #include "vpx/vp8cx.h"
  32. #include "vpx_ports/mem_ops.h"
  33. #include "../tools_common.h"
  34. #define interface (vpx_codec_vp8_cx())
  35. #define fourcc 0x30385056
  36. void usage_exit(void) { exit(EXIT_FAILURE); }
  37. /*
  38. * The input video frame is downsampled several times to generate a multi-level
  39. * hierarchical structure. NUM_ENCODERS is defined as the number of encoding
  40. * levels required. For example, if the size of input video is 1280x720,
  41. * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3
  42. * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and
  43. * 320x180(level 2) respectively.
  44. */
  45. /* Number of encoders (spatial resolutions) used in this test. */
  46. #define NUM_ENCODERS 3
  47. /* Maximum number of temporal layers allowed for this test. */
  48. #define MAX_NUM_TEMPORAL_LAYERS 3
  49. /* This example uses the scaler function in libyuv. */
  50. #include "third_party/libyuv/include/libyuv/basic_types.h"
  51. #include "third_party/libyuv/include/libyuv/scale.h"
  52. #include "third_party/libyuv/include/libyuv/cpu_id.h"
  53. int (*read_frame_p)(FILE *f, vpx_image_t *img);
  54. static int read_frame(FILE *f, vpx_image_t *img) {
  55. size_t nbytes, to_read;
  56. int res = 1;
  57. to_read = img->w * img->h * 3 / 2;
  58. nbytes = fread(img->planes[0], 1, to_read, f);
  59. if (nbytes != to_read) {
  60. res = 0;
  61. if (nbytes > 0)
  62. printf("Warning: Read partial frame. Check your width & height!\n");
  63. }
  64. return res;
  65. }
  66. static int read_frame_by_row(FILE *f, vpx_image_t *img) {
  67. size_t nbytes, to_read;
  68. int res = 1;
  69. int plane;
  70. for (plane = 0; plane < 3; plane++) {
  71. unsigned char *ptr;
  72. int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
  73. int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
  74. int r;
  75. /* Determine the correct plane based on the image format. The for-loop
  76. * always counts in Y,U,V order, but this may not match the order of
  77. * the data on disk.
  78. */
  79. switch (plane) {
  80. case 1:
  81. ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V
  82. : VPX_PLANE_U];
  83. break;
  84. case 2:
  85. ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U
  86. : VPX_PLANE_V];
  87. break;
  88. default: ptr = img->planes[plane];
  89. }
  90. for (r = 0; r < h; r++) {
  91. to_read = w;
  92. nbytes = fread(ptr, 1, to_read, f);
  93. if (nbytes != to_read) {
  94. res = 0;
  95. if (nbytes > 0)
  96. printf("Warning: Read partial frame. Check your width & height!\n");
  97. break;
  98. }
  99. ptr += img->stride[plane];
  100. }
  101. if (!res) break;
  102. }
  103. return res;
  104. }
  105. static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg,
  106. int frame_cnt) {
  107. char header[32];
  108. if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return;
  109. header[0] = 'D';
  110. header[1] = 'K';
  111. header[2] = 'I';
  112. header[3] = 'F';
  113. mem_put_le16(header + 4, 0); /* version */
  114. mem_put_le16(header + 6, 32); /* headersize */
  115. mem_put_le32(header + 8, fourcc); /* headersize */
  116. mem_put_le16(header + 12, cfg->g_w); /* width */
  117. mem_put_le16(header + 14, cfg->g_h); /* height */
  118. mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
  119. mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
  120. mem_put_le32(header + 24, frame_cnt); /* length */
  121. mem_put_le32(header + 28, 0); /* unused */
  122. (void)fwrite(header, 1, 32, outfile);
  123. }
  124. static void write_ivf_frame_header(FILE *outfile,
  125. const vpx_codec_cx_pkt_t *pkt) {
  126. char header[12];
  127. vpx_codec_pts_t pts;
  128. if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return;
  129. pts = pkt->data.frame.pts;
  130. mem_put_le32(header, pkt->data.frame.sz);
  131. mem_put_le32(header + 4, pts & 0xFFFFFFFF);
  132. mem_put_le32(header + 8, pts >> 32);
  133. (void)fwrite(header, 1, 12, outfile);
  134. }
  135. /* Temporal scaling parameters */
  136. /* This sets all the temporal layer parameters given |num_temporal_layers|,
  137. * including the target bit allocation across temporal layers. Bit allocation
  138. * parameters will be passed in as user parameters in another version.
  139. */
  140. static void set_temporal_layer_pattern(int num_temporal_layers,
  141. vpx_codec_enc_cfg_t *cfg, int bitrate,
  142. int *layer_flags) {
  143. assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS);
  144. switch (num_temporal_layers) {
  145. case 1: {
  146. /* 1-layer */
  147. cfg->ts_number_layers = 1;
  148. cfg->ts_periodicity = 1;
  149. cfg->ts_rate_decimator[0] = 1;
  150. cfg->ts_layer_id[0] = 0;
  151. cfg->ts_target_bitrate[0] = bitrate;
  152. // Update L only.
  153. layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
  154. break;
  155. }
  156. case 2: {
  157. /* 2-layers, with sync point at first frame of layer 1. */
  158. cfg->ts_number_layers = 2;
  159. cfg->ts_periodicity = 2;
  160. cfg->ts_rate_decimator[0] = 2;
  161. cfg->ts_rate_decimator[1] = 1;
  162. cfg->ts_layer_id[0] = 0;
  163. cfg->ts_layer_id[1] = 1;
  164. // Use 60/40 bit allocation as example.
  165. cfg->ts_target_bitrate[0] = 0.6f * bitrate;
  166. cfg->ts_target_bitrate[1] = bitrate;
  167. /* 0=L, 1=GF */
  168. // ARF is used as predictor for all frames, and is only updated on
  169. // key frame. Sync point every 8 frames.
  170. // Layer 0: predict from L and ARF, update L and G.
  171. layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF;
  172. // Layer 1: sync point: predict from L and ARF, and update G.
  173. layer_flags[1] =
  174. VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
  175. // Layer 0, predict from L and ARF, update L.
  176. layer_flags[2] =
  177. VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
  178. // Layer 1: predict from L, G and ARF, and update G.
  179. layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
  180. VP8_EFLAG_NO_UPD_ENTROPY;
  181. // Layer 0
  182. layer_flags[4] = layer_flags[2];
  183. // Layer 1
  184. layer_flags[5] = layer_flags[3];
  185. // Layer 0
  186. layer_flags[6] = layer_flags[4];
  187. // Layer 1
  188. layer_flags[7] = layer_flags[5];
  189. break;
  190. }
  191. case 3:
  192. default: {
  193. // 3-layers structure where ARF is used as predictor for all frames,
  194. // and is only updated on key frame.
  195. // Sync points for layer 1 and 2 every 8 frames.
  196. cfg->ts_number_layers = 3;
  197. cfg->ts_periodicity = 4;
  198. cfg->ts_rate_decimator[0] = 4;
  199. cfg->ts_rate_decimator[1] = 2;
  200. cfg->ts_rate_decimator[2] = 1;
  201. cfg->ts_layer_id[0] = 0;
  202. cfg->ts_layer_id[1] = 2;
  203. cfg->ts_layer_id[2] = 1;
  204. cfg->ts_layer_id[3] = 2;
  205. // Use 45/20/35 bit allocation as example.
  206. cfg->ts_target_bitrate[0] = 0.45f * bitrate;
  207. cfg->ts_target_bitrate[1] = 0.65f * bitrate;
  208. cfg->ts_target_bitrate[2] = bitrate;
  209. /* 0=L, 1=GF, 2=ARF */
  210. // Layer 0: predict from L and ARF; update L and G.
  211. layer_flags[0] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
  212. // Layer 2: sync point: predict from L and ARF; update none.
  213. layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
  214. VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
  215. VP8_EFLAG_NO_UPD_ENTROPY;
  216. // Layer 1: sync point: predict from L and ARF; update G.
  217. layer_flags[2] =
  218. VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
  219. // Layer 2: predict from L, G, ARF; update none.
  220. layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
  221. VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
  222. // Layer 0: predict from L and ARF; update L.
  223. layer_flags[4] =
  224. VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF;
  225. // Layer 2: predict from L, G, ARF; update none.
  226. layer_flags[5] = layer_flags[3];
  227. // Layer 1: predict from L, G, ARF; update G.
  228. layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
  229. // Layer 2: predict from L, G, ARF; update none.
  230. layer_flags[7] = layer_flags[3];
  231. break;
  232. }
  233. }
  234. }
  235. /* The periodicity of the pattern given the number of temporal layers. */
  236. static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = { 1, 8, 8 };
  237. int main(int argc, char **argv) {
  238. FILE *infile, *outfile[NUM_ENCODERS];
  239. FILE *downsampled_input[NUM_ENCODERS - 1];
  240. char filename[50];
  241. vpx_codec_ctx_t codec[NUM_ENCODERS];
  242. vpx_codec_enc_cfg_t cfg[NUM_ENCODERS];
  243. int frame_cnt = 0;
  244. vpx_image_t raw[NUM_ENCODERS];
  245. vpx_codec_err_t res[NUM_ENCODERS];
  246. int i;
  247. long width;
  248. long height;
  249. int length_frame;
  250. int frame_avail;
  251. int got_data;
  252. int flags = 0;
  253. int layer_id = 0;
  254. int layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS] = { 0 };
  255. int flag_periodicity;
  256. /*Currently, only realtime mode is supported in multi-resolution encoding.*/
  257. int arg_deadline = VPX_DL_REALTIME;
  258. /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
  259. don't need to know PSNR, which will skip PSNR calculation and save
  260. encoding time. */
  261. int show_psnr = 0;
  262. int key_frame_insert = 0;
  263. uint64_t psnr_sse_total[NUM_ENCODERS] = { 0 };
  264. uint64_t psnr_samples_total[NUM_ENCODERS] = { 0 };
  265. double psnr_totals[NUM_ENCODERS][4] = { { 0, 0 } };
  266. int psnr_count[NUM_ENCODERS] = { 0 };
  267. int64_t cx_time = 0;
  268. /* Set the required target bitrates for each resolution level.
  269. * If target bitrate for highest-resolution level is set to 0,
  270. * (i.e. target_bitrate[0]=0), we skip encoding at that level.
  271. */
  272. unsigned int target_bitrate[NUM_ENCODERS] = { 1000, 500, 100 };
  273. /* Enter the frame rate of the input video */
  274. int framerate = 30;
  275. /* Set down-sampling factor for each resolution level.
  276. dsf[0] controls down sampling from level 0 to level 1;
  277. dsf[1] controls down sampling from level 1 to level 2;
  278. dsf[2] is not used. */
  279. vpx_rational_t dsf[NUM_ENCODERS] = { { 2, 1 }, { 2, 1 }, { 1, 1 } };
  280. /* Set the number of temporal layers for each encoder/resolution level,
  281. * starting from highest resoln down to lowest resoln. */
  282. unsigned int num_temporal_layers[NUM_ENCODERS] = { 3, 3, 3 };
  283. if (argc != (7 + 3 * NUM_ENCODERS))
  284. die("Usage: %s <width> <height> <frame_rate> <infile> <outfile(s)> "
  285. "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output "
  286. "psnr?> \n",
  287. argv[0]);
  288. printf("Using %s\n", vpx_codec_iface_name(interface));
  289. width = strtol(argv[1], NULL, 0);
  290. height = strtol(argv[2], NULL, 0);
  291. framerate = strtol(argv[3], NULL, 0);
  292. if (width < 16 || width % 2 || height < 16 || height % 2)
  293. die("Invalid resolution: %ldx%ld", width, height);
  294. /* Open input video file for encoding */
  295. if (!(infile = fopen(argv[4], "rb")))
  296. die("Failed to open %s for reading", argv[4]);
  297. /* Open output file for each encoder to output bitstreams */
  298. for (i = 0; i < NUM_ENCODERS; i++) {
  299. if (!target_bitrate[i]) {
  300. outfile[i] = NULL;
  301. continue;
  302. }
  303. if (!(outfile[i] = fopen(argv[i + 5], "wb")))
  304. die("Failed to open %s for writing", argv[i + 4]);
  305. }
  306. // Bitrates per spatial layer: overwrite default rates above.
  307. for (i = 0; i < NUM_ENCODERS; i++) {
  308. target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
  309. }
  310. // Temporal layers per spatial layers: overwrite default settings above.
  311. for (i = 0; i < NUM_ENCODERS; i++) {
  312. num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
  313. if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
  314. die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
  315. num_temporal_layers);
  316. }
  317. /* Open file to write out each spatially downsampled input stream. */
  318. for (i = 0; i < NUM_ENCODERS - 1; i++) {
  319. // Highest resoln is encoder 0.
  320. if (sprintf(filename, "ds%d.yuv", NUM_ENCODERS - i) < 0) {
  321. return EXIT_FAILURE;
  322. }
  323. downsampled_input[i] = fopen(filename, "wb");
  324. }
  325. key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
  326. show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
  327. /* Populate default encoder configuration */
  328. for (i = 0; i < NUM_ENCODERS; i++) {
  329. res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0);
  330. if (res[i]) {
  331. printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i]));
  332. return EXIT_FAILURE;
  333. }
  334. }
  335. /*
  336. * Update the default configuration according to needs of the application.
  337. */
  338. /* Highest-resolution encoder settings */
  339. cfg[0].g_w = width;
  340. cfg[0].g_h = height;
  341. cfg[0].rc_dropframe_thresh = 0;
  342. cfg[0].rc_end_usage = VPX_CBR;
  343. cfg[0].rc_resize_allowed = 0;
  344. cfg[0].rc_min_quantizer = 2;
  345. cfg[0].rc_max_quantizer = 56;
  346. cfg[0].rc_undershoot_pct = 100;
  347. cfg[0].rc_overshoot_pct = 15;
  348. cfg[0].rc_buf_initial_sz = 500;
  349. cfg[0].rc_buf_optimal_sz = 600;
  350. cfg[0].rc_buf_sz = 1000;
  351. cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
  352. cfg[0].g_lag_in_frames = 0;
  353. /* Disable automatic keyframe placement */
  354. /* Note: These 3 settings are copied to all levels. But, except the lowest
  355. * resolution level, all other levels are set to VPX_KF_DISABLED internally.
  356. */
  357. cfg[0].kf_mode = VPX_KF_AUTO;
  358. cfg[0].kf_min_dist = 3000;
  359. cfg[0].kf_max_dist = 3000;
  360. cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */
  361. cfg[0].g_timebase.num = 1; /* Set fps */
  362. cfg[0].g_timebase.den = framerate;
  363. /* Other-resolution encoder settings */
  364. for (i = 1; i < NUM_ENCODERS; i++) {
  365. memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t));
  366. cfg[i].rc_target_bitrate = target_bitrate[i];
  367. /* Note: Width & height of other-resolution encoders are calculated
  368. * from the highest-resolution encoder's size and the corresponding
  369. * down_sampling_factor.
  370. */
  371. {
  372. unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
  373. unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
  374. cfg[i].g_w = iw / dsf[i - 1].num;
  375. cfg[i].g_h = ih / dsf[i - 1].num;
  376. }
  377. /* Make width & height to be multiplier of 2. */
  378. // Should support odd size ???
  379. if ((cfg[i].g_w) % 2) cfg[i].g_w++;
  380. if ((cfg[i].g_h) % 2) cfg[i].g_h++;
  381. }
  382. // Set the number of threads per encode/spatial layer.
  383. // (1, 1, 1) means no encoder threading.
  384. cfg[0].g_threads = 1;
  385. cfg[1].g_threads = 1;
  386. cfg[2].g_threads = 1;
  387. /* Allocate image for each encoder */
  388. for (i = 0; i < NUM_ENCODERS; i++)
  389. if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
  390. die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
  391. if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w)
  392. read_frame_p = read_frame;
  393. else
  394. read_frame_p = read_frame_by_row;
  395. for (i = 0; i < NUM_ENCODERS; i++)
  396. if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0);
  397. /* Temporal layers settings */
  398. for (i = 0; i < NUM_ENCODERS; i++) {
  399. set_temporal_layer_pattern(num_temporal_layers[i], &cfg[i],
  400. cfg[i].rc_target_bitrate,
  401. &layer_flags[i * VPX_TS_MAX_PERIODICITY]);
  402. }
  403. /* Initialize multi-encoder */
  404. if (vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS,
  405. (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0]))
  406. die_codec(&codec[0], "Failed to initialize encoder");
  407. /* The extra encoding configuration parameters can be set as follows. */
  408. /* Set encoding speed */
  409. for (i = 0; i < NUM_ENCODERS; i++) {
  410. int speed = -6;
  411. /* Lower speed for the lowest resolution. */
  412. if (i == NUM_ENCODERS - 1) speed = -4;
  413. if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed))
  414. die_codec(&codec[i], "Failed to set cpu_used");
  415. }
  416. /* Set static threshold = 1 for all encoders */
  417. for (i = 0; i < NUM_ENCODERS; i++) {
  418. if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
  419. die_codec(&codec[i], "Failed to set static threshold");
  420. }
  421. /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
  422. /* Enable denoising for the highest-resolution encoder. */
  423. if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
  424. die_codec(&codec[0], "Failed to set noise_sensitivity");
  425. if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1))
  426. die_codec(&codec[1], "Failed to set noise_sensitivity");
  427. for (i = 2; i < NUM_ENCODERS; i++) {
  428. if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
  429. die_codec(&codec[i], "Failed to set noise_sensitivity");
  430. }
  431. /* Set the number of token partitions */
  432. for (i = 0; i < NUM_ENCODERS; i++) {
  433. if (vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1))
  434. die_codec(&codec[i], "Failed to set static threshold");
  435. }
  436. /* Set the max intra target bitrate */
  437. for (i = 0; i < NUM_ENCODERS; i++) {
  438. unsigned int max_intra_size_pct =
  439. (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10);
  440. if (vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT,
  441. max_intra_size_pct))
  442. die_codec(&codec[i], "Failed to set static threshold");
  443. // printf("%d %d \n",i,max_intra_size_pct);
  444. }
  445. frame_avail = 1;
  446. got_data = 0;
  447. while (frame_avail || got_data) {
  448. struct vpx_usec_timer timer;
  449. vpx_codec_iter_t iter[NUM_ENCODERS] = { NULL };
  450. const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
  451. flags = 0;
  452. frame_avail = read_frame_p(infile, &raw[0]);
  453. if (frame_avail) {
  454. for (i = 1; i < NUM_ENCODERS; i++) {
  455. /*Scale the image down a number of times by downsampling factor*/
  456. /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
  457. I420Scale(
  458. raw[i - 1].planes[VPX_PLANE_Y], raw[i - 1].stride[VPX_PLANE_Y],
  459. raw[i - 1].planes[VPX_PLANE_U], raw[i - 1].stride[VPX_PLANE_U],
  460. raw[i - 1].planes[VPX_PLANE_V], raw[i - 1].stride[VPX_PLANE_V],
  461. raw[i - 1].d_w, raw[i - 1].d_h, raw[i].planes[VPX_PLANE_Y],
  462. raw[i].stride[VPX_PLANE_Y], raw[i].planes[VPX_PLANE_U],
  463. raw[i].stride[VPX_PLANE_U], raw[i].planes[VPX_PLANE_V],
  464. raw[i].stride[VPX_PLANE_V], raw[i].d_w, raw[i].d_h, 1);
  465. /* Write out down-sampled input. */
  466. length_frame = cfg[i].g_w * cfg[i].g_h * 3 / 2;
  467. if (fwrite(raw[i].planes[0], 1, length_frame,
  468. downsampled_input[NUM_ENCODERS - i - 1]) !=
  469. (unsigned int)length_frame) {
  470. return EXIT_FAILURE;
  471. }
  472. }
  473. }
  474. /* Set the flags (reference and update) for all the encoders.*/
  475. for (i = 0; i < NUM_ENCODERS; i++) {
  476. layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity];
  477. flags = 0;
  478. flag_periodicity = periodicity_to_num_layers[num_temporal_layers[i] - 1];
  479. flags = layer_flags[i * VPX_TS_MAX_PERIODICITY +
  480. frame_cnt % flag_periodicity];
  481. // Key frame flag for first frame.
  482. if (frame_cnt == 0) {
  483. flags |= VPX_EFLAG_FORCE_KF;
  484. }
  485. if (frame_cnt > 0 && frame_cnt == key_frame_insert) {
  486. flags = VPX_EFLAG_FORCE_KF;
  487. }
  488. vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags);
  489. vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
  490. }
  491. /* Encode each frame at multi-levels */
  492. /* Note the flags must be set to 0 in the encode call if they are set
  493. for each frame with the vpx_codec_control(), as done above. */
  494. vpx_usec_timer_start(&timer);
  495. if (vpx_codec_encode(&codec[0], frame_avail ? &raw[0] : NULL, frame_cnt, 1,
  496. 0, arg_deadline)) {
  497. die_codec(&codec[0], "Failed to encode frame");
  498. }
  499. vpx_usec_timer_mark(&timer);
  500. cx_time += vpx_usec_timer_elapsed(&timer);
  501. for (i = NUM_ENCODERS - 1; i >= 0; i--) {
  502. got_data = 0;
  503. while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
  504. got_data = 1;
  505. switch (pkt[i]->kind) {
  506. case VPX_CODEC_CX_FRAME_PKT:
  507. write_ivf_frame_header(outfile[i], pkt[i]);
  508. (void)fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz,
  509. outfile[i]);
  510. break;
  511. case VPX_CODEC_PSNR_PKT:
  512. if (show_psnr) {
  513. int j;
  514. psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
  515. psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
  516. for (j = 0; j < 4; j++) {
  517. psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
  518. }
  519. psnr_count[i]++;
  520. }
  521. break;
  522. default: break;
  523. }
  524. printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
  525. (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)
  526. ? "K"
  527. : "");
  528. fflush(stdout);
  529. }
  530. }
  531. frame_cnt++;
  532. }
  533. printf("\n");
  534. printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
  535. frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
  536. 1000000 * (double)frame_cnt / (double)cx_time);
  537. fclose(infile);
  538. printf("Processed %ld frames.\n", (long int)frame_cnt - 1);
  539. for (i = 0; i < NUM_ENCODERS; i++) {
  540. /* Calculate PSNR and print it out */
  541. if ((show_psnr) && (psnr_count[i] > 0)) {
  542. int j;
  543. double ovpsnr =
  544. sse_to_psnr(psnr_samples_total[i], 255.0, psnr_sse_total[i]);
  545. fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
  546. fprintf(stderr, " %.3lf", ovpsnr);
  547. for (j = 0; j < 4; j++) {
  548. fprintf(stderr, " %.3lf", psnr_totals[i][j] / psnr_count[i]);
  549. }
  550. }
  551. if (vpx_codec_destroy(&codec[i]))
  552. die_codec(&codec[i], "Failed to destroy codec");
  553. vpx_img_free(&raw[i]);
  554. if (!outfile[i]) continue;
  555. /* Try to rewrite the file header with the actual frame count */
  556. if (!fseek(outfile[i], 0, SEEK_SET))
  557. write_ivf_file_header(outfile[i], &cfg[i], frame_cnt - 1);
  558. fclose(outfile[i]);
  559. }
  560. printf("\n");
  561. return EXIT_SUCCESS;
  562. }