Browse Source

Merge pull request #6490 from zaps166/webm-pr

Add WebM support
Rémi Verschelde 8 years ago
parent
commit
c6c13eb8fc
100 changed files with 26173 additions and 3 deletions
  1. 1 0
      SConstruct
  2. 3 3
      modules/theora/yuv2rgb.h
  3. 34 0
      modules/webm/SCsub
  4. 6 0
      modules/webm/config.py
  5. 390 0
      modules/webm/libvpx/SCsub
  6. 38 0
      modules/webm/libvpx/yasm_osx_fat.py
  7. 45 0
      modules/webm/register_types.cpp
  8. 30 0
      modules/webm/register_types.h
  9. 446 0
      modules/webm/video_stream_webm.cpp
  10. 128 0
      modules/webm/video_stream_webm.h
  11. 3 0
      platform/x11/detect.py
  12. 13 0
      thirdparty/README.md
  13. 21 0
      thirdparty/libsimplewebm/LICENSE
  14. 224 0
      thirdparty/libsimplewebm/OpusVorbisDecoder.cpp
  15. 63 0
      thirdparty/libsimplewebm/OpusVorbisDecoder.hpp
  16. 142 0
      thirdparty/libsimplewebm/VPXDecoder.cpp
  17. 80 0
      thirdparty/libsimplewebm/VPXDecoder.hpp
  18. 241 0
      thirdparty/libsimplewebm/WebMDemuxer.cpp
  19. 125 0
      thirdparty/libsimplewebm/WebMDemuxer.hpp
  20. 4 0
      thirdparty/libsimplewebm/libwebm/AUTHORS.TXT
  21. 30 0
      thirdparty/libsimplewebm/libwebm/LICENSE.TXT
  22. 23 0
      thirdparty/libsimplewebm/libwebm/PATENTS.TXT
  23. 11 0
      thirdparty/libsimplewebm/libwebm/README.libvpx
  24. 184 0
      thirdparty/libsimplewebm/libwebm/common/webmids.h
  25. 28 0
      thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h
  26. 7831 0
      thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc
  27. 1111 0
      thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h
  28. 142 0
      thirdparty/libvpx/AUTHORS
  29. 654 0
      thirdparty/libvpx/CHANGELOG
  30. 31 0
      thirdparty/libvpx/LICENSE
  31. 23 0
      thirdparty/libvpx/PATENTS
  32. 240 0
      thirdparty/libvpx/rtcd/vp8_rtcd_arm.h
  33. 117 0
      thirdparty/libvpx/rtcd/vp8_rtcd_c.h
  34. 247 0
      thirdparty/libvpx/rtcd/vp8_rtcd_x86.h
  35. 54 0
      thirdparty/libvpx/rtcd/vp9_rtcd_arm.h
  36. 41 0
      thirdparty/libvpx/rtcd/vp9_rtcd_c.h
  37. 55 0
      thirdparty/libvpx/rtcd/vp9_rtcd_x86.h
  38. 678 0
      thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h
  39. 355 0
      thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h
  40. 614 0
      thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h
  41. 18 0
      thirdparty/libvpx/third_party/x86inc/LICENSE
  42. 20 0
      thirdparty/libvpx/third_party/x86inc/README.libvpx
  43. 1649 0
      thirdparty/libvpx/third_party/x86inc/x86inc.asm
  44. 190 0
      thirdparty/libvpx/vp8/common/alloccommon.c
  45. 31 0
      thirdparty/libvpx/vp8/common/alloccommon.h
  46. 181 0
      thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c
  47. 591 0
      thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c
  48. 59 0
      thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c
  49. 42 0
      thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c
  50. 142 0
      thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c
  51. 25 0
      thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c
  52. 96 0
      thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c
  53. 63 0
      thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c
  54. 185 0
      thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c
  55. 102 0
      thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c
  56. 111 0
      thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
  57. 283 0
      thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
  58. 625 0
      thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c
  59. 123 0
      thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c
  60. 1377 0
      thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c
  61. 550 0
      thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c
  62. 22 0
      thirdparty/libvpx/vp8/common/blockd.c
  63. 312 0
      thirdparty/libvpx/vp8/common/blockd.h
  64. 197 0
      thirdparty/libvpx/vp8/common/coefupdateprobs.h
  65. 48 0
      thirdparty/libvpx/vp8/common/common.h
  66. 32 0
      thirdparty/libvpx/vp8/common/copy_c.c
  67. 155 0
      thirdparty/libvpx/vp8/common/debugmodes.c
  68. 200 0
      thirdparty/libvpx/vp8/common/default_coef_probs.h
  69. 43 0
      thirdparty/libvpx/vp8/common/dequantize.c
  70. 188 0
      thirdparty/libvpx/vp8/common/entropy.c
  71. 109 0
      thirdparty/libvpx/vp8/common/entropy.h
  72. 171 0
      thirdparty/libvpx/vp8/common/entropymode.c
  73. 88 0
      thirdparty/libvpx/vp8/common/entropymode.h
  74. 49 0
      thirdparty/libvpx/vp8/common/entropymv.c
  75. 52 0
      thirdparty/libvpx/vp8/common/entropymv.h
  76. 188 0
      thirdparty/libvpx/vp8/common/extend.c
  77. 33 0
      thirdparty/libvpx/vp8/common/extend.h
  78. 493 0
      thirdparty/libvpx/vp8/common/filter.c
  79. 32 0
      thirdparty/libvpx/vp8/common/filter.h
  80. 193 0
      thirdparty/libvpx/vp8/common/findnearmv.c
  81. 195 0
      thirdparty/libvpx/vp8/common/findnearmv.h
  82. 106 0
      thirdparty/libvpx/vp8/common/generic/systemdependent.c
  83. 51 0
      thirdparty/libvpx/vp8/common/header.h
  84. 90 0
      thirdparty/libvpx/vp8/common/idct_blk.c
  85. 205 0
      thirdparty/libvpx/vp8/common/idctllm.c
  86. 70 0
      thirdparty/libvpx/vp8/common/invtrans.h
  87. 113 0
      thirdparty/libvpx/vp8/common/loopfilter.h
  88. 430 0
      thirdparty/libvpx/vp8/common/loopfilter_filters.c
  89. 68 0
      thirdparty/libvpx/vp8/common/mbpitch.c
  90. 40 0
      thirdparty/libvpx/vp8/common/modecont.c
  91. 25 0
      thirdparty/libvpx/vp8/common/modecont.h
  92. 36 0
      thirdparty/libvpx/vp8/common/mv.h
  93. 185 0
      thirdparty/libvpx/vp8/common/onyxc_int.h
  94. 63 0
      thirdparty/libvpx/vp8/common/onyxd.h
  95. 49 0
      thirdparty/libvpx/vp8/common/ppflags.h
  96. 135 0
      thirdparty/libvpx/vp8/common/quant_common.c
  97. 34 0
      thirdparty/libvpx/vp8/common/quant_common.h
  98. 544 0
      thirdparty/libvpx/vp8/common/reconinter.c
  99. 43 0
      thirdparty/libvpx/vp8/common/reconinter.h
  100. 117 0
      thirdparty/libvpx/vp8/common/reconintra.c

+ 1 - 0
SConstruct

@@ -126,6 +126,7 @@ opts.Add('gdscript','Build GDSCript support: (yes/no)','yes')
 opts.Add('libogg','Ogg library for ogg container support (system/builtin)','builtin')
 opts.Add('libvorbis','Ogg Vorbis library for vorbis support (system/builtin)','builtin')
 opts.Add('libtheora','Theora library for theora module (system/builtin)','builtin')
+opts.Add('libvpx','VPX library for webm module (system/builtin)','builtin')
 opts.Add('opus','Opus and opusfile library for Opus format support: (system/builtin)','builtin')
 opts.Add('minizip','Build Minizip Archive Support: (yes/no)','yes')
 opts.Add('squish','Squish library for BC Texture Compression in editor (system/builtin)','builtin')

+ 3 - 3
modules/theora/yuv2rgb.h

@@ -801,7 +801,7 @@ do {                            \
     *(DSTPTR)++ = 255;            \
 } while (0 == 1)
 
-void yuv422_2_rgb8888(uint8_t  *dst_ptr,
+static void yuv422_2_rgb8888(uint8_t  *dst_ptr,
 		const uint8_t  *y_ptr,
 		const uint8_t  *u_ptr,
 		const uint8_t  *v_ptr,
@@ -912,7 +912,7 @@ do {                        \
     (DSTPTR) = 0xFF000000 | (Y & 0xFF) | (0xFF00 & (Y>>14)) | (0xFF0000 & (Y<<5));\
 } while (0 == 1)
 
-void yuv420_2_rgb8888(uint8_t  *dst_ptr_,
+static void yuv420_2_rgb8888(uint8_t  *dst_ptr_,
 		const uint8_t  *y_ptr,
 		const uint8_t  *u_ptr,
 		const uint8_t  *v_ptr,
@@ -1034,7 +1034,7 @@ do {                            \
 	*(DSTPTR)++ = 255;           \
 } while (0 == 1)
 
-void yuv444_2_rgb8888(uint8_t  *dst_ptr,
+static void yuv444_2_rgb8888(uint8_t  *dst_ptr,
 		const uint8_t  *y_ptr,
 		const uint8_t  *u_ptr,
 		const uint8_t  *v_ptr,

+ 34 - 0
modules/webm/SCsub

@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+Import('env')
+Import('env_modules')
+
+env_webm = env_modules.Clone()
+
+# Thirdparty source files
+thirdparty_libsimplewebm_dir = "#thirdparty/libsimplewebm/"
+thirdparty_libsimplewebm_sources = [
+	"libwebm/mkvparser/mkvparser.cc",
+	"OpusVorbisDecoder.cpp",
+	"VPXDecoder.cpp",
+	"WebMDemuxer.cpp",
+]
+thirdparty_libsimplewebm_sources = [thirdparty_libsimplewebm_dir + file for file in thirdparty_libsimplewebm_sources]
+
+env_webm.add_source_files(env.modules_sources, thirdparty_libsimplewebm_sources)
+env_webm.Append(CPPPATH = [thirdparty_libsimplewebm_dir, thirdparty_libsimplewebm_dir + "libwebm/"])
+
+# also requires libogg, libvorbis and libopus
+if (env["libogg"] != "system"): # builtin
+	env_webm.Append(CPPPATH = ["#thirdparty/libogg"])
+if (env["libvorbis"] != "system"): # builtin
+	env_webm.Append(CPPPATH = ["#thirdparty/libvorbis"])
+if (env["opus"] != "system"): # builtin
+	env_webm.Append(CPPPATH = ["#thirdparty"])
+
+if (env["libvpx"] != "system"): # builtin
+	Export('env_webm')
+	SConscript("libvpx/SCsub")
+
+# Godot source files
+env_webm.add_source_files(env.modules_sources, "*.cpp")

+ 6 - 0
modules/webm/config.py

@@ -0,0 +1,6 @@
+
+def can_build(platform):
+	return True
+
+def configure(env):
+	pass

+ 390 - 0
modules/webm/libvpx/SCsub

@@ -0,0 +1,390 @@
+#!/usr/bin/env python
+
+libvpx_dir = "#thirdparty/libvpx/"
+
+libvpx_sources = [
+	"vp8/vp8_dx_iface.c",
+
+	"vp8/common/generic/systemdependent.c",
+
+	"vp8/common/alloccommon.c",
+	"vp8/common/blockd.c",
+	"vp8/common/copy_c.c",
+	"vp8/common/debugmodes.c",
+	"vp8/common/dequantize.c",
+	"vp8/common/entropy.c",
+	"vp8/common/entropymode.c",
+	"vp8/common/entropymv.c",
+	"vp8/common/extend.c",
+	"vp8/common/filter.c",
+	"vp8/common/findnearmv.c",
+	"vp8/common/idct_blk.c",
+	"vp8/common/idctllm.c",
+	"vp8/common/loopfilter_filters.c",
+	"vp8/common/mbpitch.c",
+	"vp8/common/modecont.c",
+	"vp8/common/quant_common.c",
+	"vp8/common/reconinter.c",
+	"vp8/common/reconintra.c",
+	"vp8/common/reconintra4x4.c",
+	"vp8/common/rtcd.c",
+	"vp8/common/setupintrarecon.c",
+	"vp8/common/swapyv12buffer.c",
+	"vp8/common/treecoder.c",
+	"vp8/common/vp8_loopfilter.c",
+
+	"vp8/decoder/dboolhuff.c",
+	"vp8/decoder/decodeframe.c",
+	"vp8/decoder/decodemv.c",
+	"vp8/decoder/detokenize.c",
+	"vp8/decoder/onyxd_if.c",
+	"vp8/decoder/threading.c",
+
+
+	"vp9/vp9_dx_iface.c",
+
+	"vp9/common/vp9_alloccommon.c",
+	"vp9/common/vp9_blockd.c",
+	"vp9/common/vp9_common_data.c",
+	"vp9/common/vp9_debugmodes.c",
+	"vp9/common/vp9_entropy.c",
+	"vp9/common/vp9_entropymode.c",
+	"vp9/common/vp9_entropymv.c",
+	"vp9/common/vp9_filter.c",
+	"vp9/common/vp9_frame_buffers.c",
+	"vp9/common/vp9_idct.c",
+	"vp9/common/vp9_loopfilter.c",
+	"vp9/common/vp9_mvref_common.c",
+	"vp9/common/vp9_pred_common.c",
+	"vp9/common/vp9_quant_common.c",
+	"vp9/common/vp9_reconinter.c",
+	"vp9/common/vp9_reconintra.c",
+	"vp9/common/vp9_rtcd.c",
+	"vp9/common/vp9_scale.c",
+	"vp9/common/vp9_scan.c",
+	"vp9/common/vp9_seg_common.c",
+	"vp9/common/vp9_thread_common.c",
+	"vp9/common/vp9_tile_common.c",
+
+	"vp9/decoder/vp9_decodeframe.c",
+	"vp9/decoder/vp9_decodemv.c",
+	"vp9/decoder/vp9_decoder.c",
+	"vp9/decoder/vp9_detokenize.c",
+	"vp9/decoder/vp9_dsubexp.c",
+	"vp9/decoder/vp9_dthread.c",
+
+
+	"vpx/src/vpx_codec.c",
+	"vpx/src/vpx_decoder.c",
+	"vpx/src/vpx_image.c",
+	"vpx/src/vpx_psnr.c",
+
+
+	"vpx_dsp/bitreader.c",
+	"vpx_dsp/bitreader_buffer.c",
+	"vpx_dsp/intrapred.c",
+	"vpx_dsp/inv_txfm.c",
+	"vpx_dsp/loopfilter.c",
+	"vpx_dsp/prob.c",
+	"vpx_dsp/vpx_convolve.c",
+	"vpx_dsp/vpx_dsp_rtcd.c",
+
+
+	"vpx_mem/vpx_mem.c",
+
+
+	"vpx_scale/vpx_scale_rtcd.c",
+
+	"vpx_scale/generic/yv12config.c",
+	"vpx_scale/generic/yv12extend.c",
+
+
+	"vpx_util/vpx_thread.c"
+]
+
+libvpx_sources_intrin_x86 = [
+	"vp8/common/x86/filter_x86.c",
+	"vp8/common/x86/loopfilter_x86.c",
+	"vp8/common/x86/vp8_asm_stubs.c",
+
+
+	"vpx_dsp/x86/vpx_asm_stubs.c"
+]
+libvpx_sources_intrin_x86_mmx = [
+	"vp8/common/x86/idct_blk_mmx.c",
+]
+libvpx_sources_intrin_x86_sse2 = [
+	"vp8/common/x86/idct_blk_sse2.c",
+
+
+	"vp9/common/x86/vp9_idct_intrin_sse2.c",
+
+
+	"vpx_dsp/x86/inv_txfm_sse2.c",
+	"vpx_dsp/x86/loopfilter_sse2.c",
+]
+libvpx_sources_intrin_x86_ssse3 = [
+	"vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c"
+]
+libvpx_sources_intrin_x86_avx2 = [
+	"vpx_dsp/x86/loopfilter_avx2.c",
+	"vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c"
+]
+libvpx_sources_x86asm = [
+	"vp8/common/x86/copy_sse2.asm",
+	"vp8/common/x86/copy_sse3.asm",
+	"vp8/common/x86/dequantize_mmx.asm",
+	"vp8/common/x86/idctllm_mmx.asm",
+	"vp8/common/x86/idctllm_sse2.asm",
+	"vp8/common/x86/iwalsh_mmx.asm",
+	"vp8/common/x86/iwalsh_sse2.asm",
+	"vp8/common/x86/loopfilter_sse2.asm",
+	"vp8/common/x86/recon_mmx.asm",
+	"vp8/common/x86/recon_sse2.asm",
+	"vp8/common/x86/subpixel_mmx.asm",
+	"vp8/common/x86/subpixel_sse2.asm",
+	"vp8/common/x86/subpixel_ssse3.asm",
+	"vp8/common/x86/vp8_loopfilter_mmx.asm",
+
+
+	"vpx_dsp/x86/intrapred_sse2.asm",
+	"vpx_dsp/x86/intrapred_ssse3.asm",
+	"vpx_dsp/x86/inv_wht_sse2.asm",
+	"vpx_dsp/x86/vpx_convolve_copy_sse2.asm",
+	"vpx_dsp/x86/vpx_subpixel_8t_sse2.asm",
+	"vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm",
+	"vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm",
+	"vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm",
+
+
+	"vpx_ports/emms.asm"
+]
+libvpx_sources_x86_64asm = [
+	"vp8/common/x86/loopfilter_block_sse2_x86_64.asm",
+
+
+	"vpx_dsp/x86/inv_txfm_ssse3_x86_64.asm"
+]
+
+libvpx_sources_arm = [
+	"vpx_ports/arm_cpudetect.c",
+
+
+	"vp8/common/arm/loopfilter_arm.c",
+]
+libvpx_sources_arm_neon = [
+	"vp8/common/arm/neon/bilinearpredict_neon.c",
+	"vp8/common/arm/neon/copymem_neon.c",
+	"vp8/common/arm/neon/dc_only_idct_add_neon.c",
+	"vp8/common/arm/neon/dequant_idct_neon.c",
+	"vp8/common/arm/neon/dequantizeb_neon.c",
+	"vp8/common/arm/neon/idct_blk_neon.c",
+	"vp8/common/arm/neon/idct_dequant_0_2x_neon.c",
+	"vp8/common/arm/neon/idct_dequant_full_2x_neon.c",
+	"vp8/common/arm/neon/iwalsh_neon.c",
+	"vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c",
+	"vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c",
+	"vp8/common/arm/neon/mbloopfilter_neon.c",
+	"vp8/common/arm/neon/shortidct4x4llm_neon.c",
+	"vp8/common/arm/neon/sixtappredict_neon.c",
+	"vp8/common/arm/neon/vp8_loopfilter_neon.c",
+
+
+	"vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
+	"vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
+
+
+	"vpx_dsp/arm/idct16x16_1_add_neon.c",
+	"vpx_dsp/arm/idct16x16_add_neon.c",
+	"vpx_dsp/arm/idct16x16_neon.c",
+	"vpx_dsp/arm/idct32x32_1_add_neon.c",
+	"vpx_dsp/arm/idct32x32_add_neon.c",
+	"vpx_dsp/arm/idct4x4_1_add_neon.c",
+	"vpx_dsp/arm/idct4x4_add_neon.c",
+	"vpx_dsp/arm/idct8x8_1_add_neon.c",
+	"vpx_dsp/arm/idct8x8_add_neon.c",
+	"vpx_dsp/arm/intrapred_neon.c",
+	"vpx_dsp/arm/loopfilter_16_neon.c",
+	"vpx_dsp/arm/loopfilter_4_neon.c",
+	"vpx_dsp/arm/loopfilter_8_neon.c",
+	"vpx_dsp/arm/loopfilter_neon.c",
+	"vpx_dsp/arm/vpx_convolve8_avg_neon.c",
+	"vpx_dsp/arm/vpx_convolve8_neon.c",
+	"vpx_dsp/arm/vpx_convolve_avg_neon.c",
+	"vpx_dsp/arm/vpx_convolve_copy_neon.c",
+	"vpx_dsp/arm/vpx_convolve_neon.c"
+]
+libvpx_sources_arm_neon_gas = [
+	"vpx_dsp/arm/gas/intrapred_neon_asm.s",
+	"vpx_dsp/arm/gas/loopfilter_mb_neon.s",
+	"vpx_dsp/arm/gas/save_reg_neon.s"
+]
+libvpx_sources_arm_neon_armasm_ms = [
+	"vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm",
+	"vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm",
+	"vpx_dsp/arm/armasm_ms/save_reg_neon.asm"
+]
+libvpx_sources_arm_neon_gas_apple = [
+	"vpx_dsp/arm/gas_apple/intrapred_neon_asm.s",
+	"vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s",
+	"vpx_dsp/arm/gas_apple/save_reg_neon.s"
+]
+
+libvpx_sources = [libvpx_dir + file for file in libvpx_sources]
+libvpx_sources_intrin_x86 = [libvpx_dir + file for file in libvpx_sources_intrin_x86]
+libvpx_sources_intrin_x86_mmx = [libvpx_dir + file for file in libvpx_sources_intrin_x86_mmx]
+libvpx_sources_intrin_x86_sse2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_sse2]
+libvpx_sources_intrin_x86_ssse3 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_ssse3]
+libvpx_sources_intrin_x86_avx2 = [libvpx_dir + file for file in libvpx_sources_intrin_x86_avx2]
+libvpx_sources_x86asm = [libvpx_dir + file for file in libvpx_sources_x86asm]
+libvpx_sources_x86_64asm = [libvpx_dir + file for file in libvpx_sources_x86_64asm]
+libvpx_sources_arm = [libvpx_dir + file for file in libvpx_sources_arm]
+libvpx_sources_arm_neon = [libvpx_dir + file for file in libvpx_sources_arm_neon]
+libvpx_sources_arm_neon_gas = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas]
+libvpx_sources_arm_neon_armasm_ms = [libvpx_dir + file for file in libvpx_sources_arm_neon_armasm_ms]
+libvpx_sources_arm_neon_gas_apple = [libvpx_dir + file for file in libvpx_sources_arm_neon_gas_apple]
+
+
+Import('env')
+Import('env_webm')
+
+env_webm.Append(CPPPATH = [libvpx_dir])
+
+env_libvpx = env.Clone()
+env_libvpx.Append(CPPPATH = [libvpx_dir])
+
+cpu_bits = env["bits"]
+osx_fat = (env["platform"] == 'osx' and cpu_bits == 'fat')
+webm_cpu_x86 = False
+webm_cpu_arm = False
+if env["platform"] == 'winrt':
+	if 'arm' in env["PROGSUFFIX"]:
+		webm_cpu_arm = True
+	else:
+		webm_cpu_x86 = True
+else:
+	is_android_x86 = (env["platform"] == 'android' and env["android_arch"] == 'x86')
+	if is_android_x86:
+		cpu_bits = '32'
+	if osx_fat:
+		webm_cpu_x86 = True
+	else:
+		webm_cpu_x86 = (cpu_bits == '32' or cpu_bits == '64') and (env["platform"] == 'windows' or env["platform"] == 'x11' or env["platform"] == 'osx' or env["platform"] == 'haiku' or is_android_x86)
+		webm_cpu_arm = env["platform"] == 'iphone' or env["platform"] == 'bb10' or (env["platform"] == 'android' and env["android_arch"] != 'x86')
+
+if webm_cpu_x86:
+	import subprocess
+	import os
+
+	yasm_paths = [
+		"yasm",
+		"../../../yasm",
+	]
+
+	yasm_found = False
+
+	devnull = open(os.devnull)
+	for yasm_path in yasm_paths:
+		try:
+			yasm_found = True
+			subprocess.Popen([yasm_path, "--version"], stdout=devnull, stderr=devnull).communicate()
+		except:
+			yasm_found = False
+		if yasm_found:
+			break
+
+	if not yasm_found:
+		webm_cpu_x86 = False
+		print "YASM is necessary for WebM SIMD optimizations."
+
+webm_simd_optimizations = False
+
+if webm_cpu_x86:
+	if osx_fat:
+		#'osx' platform only: run python script which will compile using 'yasm' command and then merge 32-bit and 64-bit using 'lipo' command
+		env_libvpx["AS"] = 'python modules/webm/libvpx/yasm_osx_fat.py'
+		env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:]
+		env_libvpx["ASCOM"] = '$AS $ASFLAGS $TARGET $SOURCES'
+	else:
+		if env["platform"] == 'windows' or env["platform"] == 'winrt':
+			env_libvpx["ASFORMAT"] = 'win'
+		elif env["platform"] == 'osx':
+			env_libvpx["ASFORMAT"] = 'macho'
+		else:
+			env_libvpx["ASFORMAT"] = 'elf'
+		env_libvpx["ASFORMAT"] += cpu_bits
+
+		env_libvpx["AS"] = 'yasm'
+		env_libvpx["ASFLAGS"] = '-I' + libvpx_dir[1:] + ' -f $ASFORMAT -D $ASCPU'
+		env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES'
+
+		if cpu_bits == '32':
+			env_libvpx["ASCPU"] = 'X86_32'
+		elif cpu_bits == '64':
+			env_libvpx["ASCPU"] = 'X86_64'
+
+	env_libvpx.Append(CCFLAGS=['-DWEBM_X86ASM'])
+
+	webm_simd_optimizations = True
+
+if webm_cpu_arm:
+	if env["platform"] == 'iphone':
+		env_libvpx["ASFLAGS"] = '-arch armv7'
+	elif env["platform"] == 'android':
+		env_libvpx["ASFLAGS"] = '-mfpu=neon'
+	elif env["platform"] == 'winrt':
+		env_libvpx["AS"] = 'armasm'
+		env_libvpx["ASFLAGS"] = ''
+		env_libvpx["ASCOM"] = '$AS $ASFLAGS -o $TARGET $SOURCES'
+
+	env_libvpx.Append(CCFLAGS=['-DWEBM_ARMASM'])
+
+	webm_simd_optimizations = True
+
+if webm_simd_optimizations == False:
+	print "WebM SIMD optimizations are disabled. Check if your CPU architecture, CPU bits or platform are supported!"
+
+
+env_libvpx.add_source_files(env.modules_sources, libvpx_sources)
+if webm_cpu_x86:
+	is_clang_or_gcc = ('gcc' in env["CC"]) or ('clang' in env["CC"])
+
+	env_libvpx_mmx = env_libvpx.Clone()
+	if cpu_bits == '32' and is_clang_or_gcc:
+		env_libvpx_mmx.Append(CCFLAGS=['-mmmx'])
+	env_libvpx_mmx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_mmx)
+
+	env_libvpx_sse2 = env_libvpx.Clone()
+	if cpu_bits == '32' and is_clang_or_gcc:
+		env_libvpx_sse2.Append(CCFLAGS=['-msse2'])
+	env_libvpx_sse2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_sse2)
+
+	env_libvpx_ssse3 = env_libvpx.Clone()
+	if is_clang_or_gcc:
+		env_libvpx_ssse3.Append(CCFLAGS=['-mssse3'])
+	env_libvpx_ssse3.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_ssse3)
+
+	env_libvpx_avx2 = env_libvpx.Clone()
+	if is_clang_or_gcc:
+		env_libvpx_avx2.Append(CCFLAGS=['-mavx2'])
+	env_libvpx_avx2.add_source_files(env.modules_sources, libvpx_sources_intrin_x86_avx2)
+
+	env_libvpx.add_source_files(env.modules_sources, libvpx_sources_intrin_x86)
+
+	env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86asm)
+	if cpu_bits == '64' or osx_fat:
+		env_libvpx.add_source_files(env.modules_sources, libvpx_sources_x86_64asm)
+elif webm_cpu_arm:
+	env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm)
+
+	env_libvpx_neon = env_libvpx.Clone()
+	if env["platform"] == 'android' and env["android_arch"] == 'armv6':
+		env_libvpx_neon.Append(CCFLAGS=['-mfpu=neon'])
+	env_libvpx_neon.add_source_files(env.modules_sources, libvpx_sources_arm_neon)
+
+	if env["platform"] == 'winrt':
+		env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_armasm_ms)
+	elif env["platform"] == 'iphone':
+		env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas_apple)
+	else:
+		env_libvpx.add_source_files(env.modules_sources, libvpx_sources_arm_neon_gas)

+ 38 - 0
modules/webm/libvpx/yasm_osx_fat.py

@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+import sys
+import os
+
+includes    = sys.argv[1]
+output_file = sys.argv[2]
+input_file  = sys.argv[3]
+
+can_remove = {}
+
+lipo_command = ''
+
+exit_code = 1
+
+for arch in ['32', '64']:
+	if arch == '32' and input_file.endswith('x86_64.asm'):
+		can_remove[arch] = False
+	else:
+		command = 'yasm ' + includes + ' -f macho' + arch + ' -D X86_' + arch + ' -o ' + output_file + '.' + arch + ' ' + input_file
+		print(command)
+		if os.system(command) == 0:
+			lipo_command += output_file + '.' + arch + ' '
+			can_remove[arch] = True
+		else:
+			can_remove[arch] = False
+
+if lipo_command != '':
+	lipo_command = 'lipo -create ' + lipo_command + '-output ' + output_file
+	print(lipo_command)
+	if os.system(lipo_command) == 0:
+		exit_code = 0
+
+for arch in ['32', '64']:
+	if can_remove[arch]:
+		os.remove(output_file + '.' + arch)
+
+sys.exit(exit_code)

+ 45 - 0
modules/webm/register_types.cpp

@@ -0,0 +1,45 @@
+/*************************************************************************/
+/*  register_types.cpp                                                   */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "register_types.h"
+
+#include "video_stream_webm.h"
+
+static ResourceFormatLoaderVideoStreamWebm *webm_stream_loader = NULL;
+
+void register_webm_types() {
+
+	webm_stream_loader = memnew(ResourceFormatLoaderVideoStreamWebm);
+	ResourceLoader::add_resource_format_loader(webm_stream_loader);
+	ObjectTypeDB::register_type<VideoStreamWebm>();
+}
+
+void unregister_webm_types() {
+
+	memdelete(webm_stream_loader);
+}

+ 30 - 0
modules/webm/register_types.h

@@ -0,0 +1,30 @@
+/*************************************************************************/
+/*  register_types.h                                                     */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+void register_webm_types();
+void unregister_webm_types();

+ 446 - 0
modules/webm/video_stream_webm.cpp

@@ -0,0 +1,446 @@
+/*************************************************************************/
+/*  av_stream_webm.cpp.cpp                                              */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "video_stream_webm.h"
+
+#include "VPXDecoder.hpp"
+#include "OpusVorbisDecoder.hpp"
+
+#include "mkvparser/mkvparser.h"
+#include "../theora/yuv2rgb.h"
+
+#include "os/file_access.h"
+#include "globals.h"
+
+#include <string.h>
+
+class MkvReader : public mkvparser::IMkvReader {
+
+public:
+	MkvReader(const String &p_file) {
+
+		file = FileAccess::open(p_file, FileAccess::READ);
+		ERR_FAIL_COND(!file);
+	}
+	~MkvReader() {
+
+		if (file)
+			memdelete(file);
+	}
+
+	virtual int Read(long long pos, long len, unsigned char *buf) {
+
+		if (file) {
+
+			if (file->get_pos() != (size_t)pos)
+				file->seek(pos);
+			if (file->get_buffer(buf, len) == len)
+				return 0;
+		}
+		return -1;
+	}
+
+	virtual int Length(long long *total, long long *available) {
+
+		if (file) {
+
+			const size_t len = file->get_len();
+			if (total)
+				*total = len;
+			if (available)
+				*available = len;
+			return 0;
+		}
+		return -1;
+	}
+
+private:
+	FileAccess *file;
+};
+
+/**/
+
+VideoStreamPlaybackWebm::VideoStreamPlaybackWebm() :
+	audio_track(0),
+	webm(NULL),
+	video(NULL),
+	audio(NULL),
+	video_frames(NULL), audio_frame(NULL),
+	video_frames_pos(0), video_frames_capacity(0),
+	num_decoded_samples(0), samples_offset(-1),
+	mix_callback(NULL),
+	mix_udata(NULL),
+	playing(false), paused(false),
+	delay_compensation(0.0),
+	time(0.0), video_frame_delay(0.0), video_pos(0.0),
+	texture(memnew(ImageTexture)),
+	pcm(NULL)
+{}
+VideoStreamPlaybackWebm::~VideoStreamPlaybackWebm() {
+
+	delete_pointers();
+}
+
+bool VideoStreamPlaybackWebm::open_file(const String &p_file) {
+
+	file_name = p_file;
+	webm = memnew(WebMDemuxer(new MkvReader(file_name), 0, audio_track));
+	if (webm->isOpen()) {
+
+		video = memnew(VPXDecoder(*webm, 8)); //TODO: Detect CPU threads
+		if (video->isOpen()) {
+
+			audio = memnew(OpusVorbisDecoder(*webm));
+			if (audio->isOpen()) {
+
+				audio_frame = memnew(WebMFrame);
+				pcm = (int16_t *)memalloc(sizeof(int16_t) * audio->getBufferSamples() * webm->getChannels());
+			} else {
+
+				memdelete(audio);
+				audio = NULL;
+			}
+
+			frame_data.resize((webm->getWidth() * webm->getHeight()) << 2);
+			texture->create(webm->getWidth(), webm->getHeight(), Image::FORMAT_RGBA, Texture::FLAG_FILTER | Texture::FLAG_VIDEO_SURFACE);
+
+			return true;
+		}
+		memdelete(video);
+		video = NULL;
+	}
+	memdelete(webm);
+	webm = NULL;
+	return false;
+}
+
+void VideoStreamPlaybackWebm::stop() {
+
+	if (playing) {
+
+		delete_pointers();
+
+		pcm = NULL;
+
+		audio_frame = NULL;
+		video_frames = NULL;
+
+		video = NULL;
+		audio = NULL;
+
+		open_file(file_name); //Should not fail here...
+
+		video_frames_capacity = video_frames_pos = 0;
+		num_decoded_samples = 0;
+		samples_offset = -1;
+		video_frame_delay = video_pos = 0.0;
+	}
+	time = 0.0;
+	playing = false;
+}
+void VideoStreamPlaybackWebm::play() {
+
+	stop();
+
+	delay_compensation = Globals::get_singleton()->get("audio/video_delay_compensation_ms");
+	delay_compensation /= 1000.0;
+
+	playing = true;
+}
+
+bool VideoStreamPlaybackWebm::is_playing() const {
+
+	return playing;
+}
+
+void VideoStreamPlaybackWebm::set_paused(bool p_paused) {
+
+	paused = p_paused;
+}
+bool VideoStreamPlaybackWebm::is_paused(bool p_paused) const {
+
+	return paused;
+}
+
+void VideoStreamPlaybackWebm::set_loop(bool p_enable) {
+
+	//Empty
+}
+bool VideoStreamPlaybackWebm::has_loop() const {
+
+	return false;
+}
+
+float VideoStreamPlaybackWebm::get_length() const {
+
+	if (webm)
+		return webm->getLength();
+	return 0.0f;
+}
+
+float VideoStreamPlaybackWebm::get_pos() const {
+
+	return video_pos;
+}
+void VideoStreamPlaybackWebm::seek_pos(float p_time) {
+
+	//Not implemented
+}
+
+void VideoStreamPlaybackWebm::set_audio_track(int p_idx) {
+
+	audio_track = p_idx;
+}
+
+Ref<Texture> VideoStreamPlaybackWebm::get_texture() {
+
+	return texture;
+}
+void VideoStreamPlaybackWebm::update(float p_delta) {
+
+	if ((!playing || paused) || !video)
+		return;
+
+	bool audio_buffer_full = false;
+
+	if (samples_offset > -1) {
+
+		//Mix remaining samples
+		const int to_read = num_decoded_samples - samples_offset;
+		const int mixed = mix_callback(mix_udata, pcm + samples_offset * webm->getChannels(), to_read);
+		if (mixed != to_read) {
+
+			samples_offset += mixed;
+			audio_buffer_full = true;
+		} else {
+
+			samples_offset = -1;
+		}
+	}
+
+	const bool hasAudio = (audio && mix_callback);
+	while ((hasAudio && (!audio_buffer_full || !has_enough_video_frames())) || (!hasAudio && video_frames_pos == 0)) {
+
+		if (hasAudio && !audio_buffer_full && audio_frame->isValid() && audio->getPCMS16(*audio_frame, pcm, num_decoded_samples) && num_decoded_samples > 0) {
+
+			const int mixed = mix_callback(mix_udata, pcm, num_decoded_samples);
+			if (mixed != num_decoded_samples) {
+
+				samples_offset = mixed;
+				audio_buffer_full = true;
+			}
+		}
+
+		WebMFrame *video_frame;
+		if (video_frames_pos >= video_frames_capacity) {
+
+			WebMFrame **video_frames_new = (WebMFrame **)memrealloc(video_frames, ++video_frames_capacity * sizeof(void *));
+			ERR_FAIL_COND(!video_frames_new); //Out of memory
+			(video_frames = video_frames_new)[video_frames_capacity - 1] = memnew(WebMFrame);
+		}
+		video_frame = video_frames[video_frames_pos];
+
+		if (!webm->readFrame(video_frame, audio_frame)) //This will invalidate frames
+			break; //Can't demux, EOS?
+
+		if (video_frame->isValid())
+			++video_frames_pos;
+	};
+
+	const double video_delay = video->getFramesDelay() * video_frame_delay;
+
+	bool want_this_frame = false;
+	while (video_frames_pos > 0 && !want_this_frame) {
+
+		WebMFrame *video_frame = video_frames[0];
+		if (video_frame->time <= time + video_delay) {
+
+			if (video->decode(*video_frame)) {
+
+				VPXDecoder::IMAGE_ERROR err;
+				VPXDecoder::Image image;
+
+				while ((err = video->getImage(image)) != VPXDecoder::NO_FRAME) {
+
+					want_this_frame = (time - video_frame->time <= video_frame_delay);
+
+					if (want_this_frame) {
+
+						if (err == VPXDecoder::NO_ERROR && image.w == webm->getWidth() && image.h == webm->getHeight()) {
+
+							DVector<uint8_t>::Write w = frame_data.write();
+							bool converted = false;
+
+							if (image.chromaShiftW == 1 && image.chromaShiftH == 1) {
+
+								yuv420_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0);
+// 								libyuv::I420ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h);
+								converted = true;
+							} else if (image.chromaShiftW == 1 && image.chromaShiftH == 0) {
+
+								yuv422_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0);
+// 								libyuv::I422ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h);
+								converted = true;
+							} else if (image.chromaShiftW == 0 && image.chromaShiftH == 0) {
+
+								yuv444_2_rgb8888(w.ptr(), image.planes[0], image.planes[2], image.planes[1], image.w, image.h, image.linesize[0], image.linesize[1], image.w << 2, 0);
+// 								libyuv::I444ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h);
+								converted = true;
+							} else if (image.chromaShiftW == 2 && image.chromaShiftH == 0) {
+
+// 								libyuv::I411ToARGB(image.planes[0], image.linesize[0], image.planes[2], image.linesize[2], image.planes[1], image.linesize[1], w.ptr(), image.w << 2, image.w, image.h);
+// 								converted = true;
+							}
+
+							if (converted)
+								texture->set_data(Image(image.w, image.h, 0, Image::FORMAT_RGBA, frame_data)); //Zero copy send to visual server
+						}
+
+						break;
+					}
+				}
+			}
+
+			video_frame_delay = video_frame->time - video_pos;
+			video_pos = video_frame->time;
+
+			memmove(video_frames, video_frames + 1, (--video_frames_pos) * sizeof(void *));
+			video_frames[video_frames_pos] = video_frame;
+		} else {
+
+			break;
+		}
+	}
+
+	time += p_delta;
+
+	if (video_frames_pos == 0 && webm->isEOS())
+		stop();
+}
+
+void VideoStreamPlaybackWebm::set_mix_callback(VideoStreamPlayback::AudioMixCallback p_callback, void *p_userdata) {
+
+	mix_callback = p_callback;
+	mix_udata = p_userdata;
+}
+int VideoStreamPlaybackWebm::get_channels() const {
+
+	if (audio)
+		return webm->getChannels();
+	return 0;
+}
+int VideoStreamPlaybackWebm::get_mix_rate() const {
+
+	if (audio)
+		return webm->getSampleRate();
+	return 0;
+}
+
+inline bool VideoStreamPlaybackWebm::has_enough_video_frames() const
+{
+	if (video_frames_pos > 0) {
+
+		const double audio_delay = AudioServer::get_singleton()->get_output_delay();
+		const double video_time = video_frames[video_frames_pos - 1]->time;
+		return video_time >= time + audio_delay + delay_compensation;
+	}
+	return false;
+}
+
+void VideoStreamPlaybackWebm::delete_pointers() {
+
+	if (pcm)
+		memfree(pcm);
+
+	if (audio_frame)
+		memdelete(audio_frame);
+	for (int i = 0; i < video_frames_capacity; ++i)
+		memdelete(video_frames[i]);
+	if (video_frames)
+		memfree(video_frames);
+
+	if (video)
+		memdelete(video);
+	if (audio)
+		memdelete(audio);
+
+	if (webm)
+		memdelete(webm);
+}
+
+/**/
+
+RES ResourceFormatLoaderVideoStreamWebm::load(const String &p_path, const String &p_original_path, Error *r_error) {
+
+	Ref<VideoStreamWebm> stream = memnew(VideoStreamWebm);
+	stream->set_file(p_path);
+	if (r_error)
+		*r_error = OK;
+	return stream;
+}
+
+void ResourceFormatLoaderVideoStreamWebm::get_recognized_extensions(List<String> *p_extensions) const {
+
+	p_extensions->push_back("webm");
+}
+bool ResourceFormatLoaderVideoStreamWebm::handles_type(const String &p_type) const {
+
+	return (p_type == "VideoStream" || p_type == "VideoStreamWebm");
+}
+
+String ResourceFormatLoaderVideoStreamWebm::get_resource_type(const String &p_path) const {
+
+	const String exl = p_path.extension().to_lower();
+	if (exl == "webm")
+		return "VideoStreamWebm";
+	return "";
+}
+
+/**/
+
+VideoStreamWebm::VideoStreamWebm() :
+	audio_track(0)
+{}
+
+Ref<VideoStreamPlayback> VideoStreamWebm::instance_playback() {
+
+	Ref<VideoStreamPlaybackWebm> pb = memnew(VideoStreamPlaybackWebm);
+	pb->set_audio_track(audio_track);
+	if (pb->open_file(file))
+		return pb;
+	return NULL;
+}
+
+void VideoStreamWebm::set_file(const String &p_file) {
+
+	file = p_file;
+}
+void VideoStreamWebm::set_audio_track(int p_track) {
+
+	audio_track = p_track;
+}

+ 128 - 0
modules/webm/video_stream_webm.h

@@ -0,0 +1,128 @@
+/*************************************************************************/
+/*  av_stream_webm.cpp.cpp                                              */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2016 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "scene/resources/video_stream.h"
+#include "io/resource_loader.h"
+
+class WebMFrame;
+class WebMDemuxer;
+class VPXDecoder;
+class OpusVorbisDecoder;
+
+class VideoStreamPlaybackWebm : public VideoStreamPlayback {
+
+	OBJ_TYPE(VideoStreamPlaybackWebm, VideoStreamPlayback)
+
+	String file_name;
+	int audio_track;
+
+	WebMDemuxer *webm;
+	VPXDecoder *video;
+	OpusVorbisDecoder *audio;
+
+	WebMFrame **video_frames, *audio_frame;
+	int video_frames_pos, video_frames_capacity;
+
+	int num_decoded_samples, samples_offset;
+	AudioMixCallback mix_callback;
+	void *mix_udata;
+
+	bool playing, paused;
+	double delay_compensation;
+	double time, video_frame_delay, video_pos;
+
+	DVector<uint8_t> frame_data;
+	Ref<ImageTexture> texture;
+
+	int16_t *pcm;
+
+public:
+	VideoStreamPlaybackWebm();
+	~VideoStreamPlaybackWebm();
+
+	bool open_file(const String &p_file);
+
+	virtual void stop();
+	virtual void play();
+
+	virtual bool is_playing() const;
+
+	virtual void set_paused(bool p_paused);
+	virtual bool is_paused(bool p_paused) const;
+
+	virtual void set_loop(bool p_enable);
+	virtual bool has_loop() const;
+
+	virtual float get_length() const;
+
+	virtual float get_pos() const;
+	virtual void seek_pos(float p_time);
+
+	virtual void set_audio_track(int p_idx);
+
+	virtual Ref<Texture> get_texture();
+	virtual void update(float p_delta);
+
+	virtual void set_mix_callback(AudioMixCallback p_callback, void *p_userdata);
+	virtual int get_channels() const;
+	virtual int get_mix_rate() const;
+
+private:
+	inline bool has_enough_video_frames() const;
+
+	void delete_pointers();
+};
+
+/**/
+
+class VideoStreamWebm : public VideoStream {
+
+	OBJ_TYPE(VideoStreamWebm, VideoStream)
+
+	String file;
+	int audio_track;
+
+public:
+	VideoStreamWebm();
+
+	virtual Ref<VideoStreamPlayback> instance_playback();
+
+	virtual void set_file(const String &p_file);
+	virtual void set_audio_track(int p_track);
+};
+
+/**/
+
+class ResourceFormatLoaderVideoStreamWebm : public ResourceFormatLoader {
+
+public:
+	virtual RES load(const String &p_path, const String &p_original_path, Error *r_error);
+	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+	virtual bool handles_type(const String &p_type) const;
+	virtual String get_resource_type(const String &p_path) const;
+};

+ 3 - 0
platform/x11/detect.py

@@ -160,6 +160,9 @@ def configure(env):
 		env["libvorbis"] = "system"  # Needed to link against system libtheora
 		env.ParseConfig('pkg-config theora theoradec --cflags --libs')
 
+	if (env["libvpx"] == "system"):
+		env.ParseConfig('pkg-config vpx --cflags --libs')
+
 	if (env["libvorbis"] == "system"):
 		env["libogg"] = "system"  # Needed to link against system libvorbis
 		env.ParseConfig('pkg-config vorbis vorbisfile --cflags --libs')

+ 13 - 0
thirdparty/README.md

@@ -95,6 +95,12 @@ Files extracted from upstream source:
 - `scripts/pnglibconf.h.prebuilt` as `pnglibconf.h`
 
 
+## libsimplewebm
+
+- Upstream: https://github.com/zaps166/libsimplewebm
+- License: MIT, BSD-3-Clause
+
+
 ## libvorbis
 
 - Upstream: https://www.xiph.org/vorbis
@@ -108,6 +114,13 @@ Files extracted from upstream source:
 - COPYING
 
 
+## libvpx
+
+- Upstream: http://www.webmproject.org/code/
+- Version: 1.6.0
+- License: BSD-3-Clause
+
+
 ## libwebp
 
 - Upstream: https://chromium.googlesource.com/webm/libwebp/

+ 21 - 0
thirdparty/libsimplewebm/LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Błażej Szczygieł
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 224 - 0
thirdparty/libsimplewebm/OpusVorbisDecoder.cpp

@@ -0,0 +1,224 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#include "OpusVorbisDecoder.hpp"
+
+#include <vorbis/codec.h>
+#include <opus/opus.h>
+
+#include <string.h>
+
+struct VorbisDecoder
+{
+	vorbis_info info;
+	vorbis_dsp_state dspState;
+	vorbis_block block;
+	ogg_packet op;
+
+	bool hasDSPState, hasBlock;
+};
+
+/**/
+
+OpusVorbisDecoder::OpusVorbisDecoder(const WebMDemuxer &demuxer) :
+	m_vorbis(NULL), m_opus(NULL),
+	m_numSamples(0),
+	m_channels(demuxer.getChannels())
+{
+	switch (demuxer.getAudioCodec())
+	{
+		case WebMDemuxer::AUDIO_VORBIS:
+			if (openVorbis(demuxer))
+				return;
+			break;
+		case WebMDemuxer::AUDIO_OPUS:
+			if (openOpus(demuxer))
+				return;
+			break;
+		default:
+			return;
+	}
+	close();
+}
+OpusVorbisDecoder::~OpusVorbisDecoder()
+{
+	close();
+}
+
+bool OpusVorbisDecoder::isOpen() const
+{
+	return (m_vorbis || m_opus);
+}
+
+bool OpusVorbisDecoder::getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples)
+{
+	if (m_vorbis)
+	{
+		m_vorbis->op.packet = frame.buffer;
+		m_vorbis->op.bytes = frame.bufferSize;
+
+		if (vorbis_synthesis(&m_vorbis->block, &m_vorbis->op))
+			return false;
+		if (vorbis_synthesis_blockin(&m_vorbis->dspState, &m_vorbis->block))
+			return false;
+
+		const int maxSamples = getBufferSamples();
+		int samplesCount, count = 0;
+		float **pcm;
+		while ((samplesCount = vorbis_synthesis_pcmout(&m_vorbis->dspState, &pcm)))
+		{
+			const int toConvert = samplesCount <= maxSamples ? samplesCount : maxSamples;
+			for (int c = 0; c < m_channels; ++c)
+			{
+				float *samples = pcm[c];
+				for (int i = 0, j = c; i < toConvert; ++i, j += m_channels)
+				{
+					int sample = samples[i] * 32767.0f;
+					if (sample > 32767)
+						sample = 32767;
+					else if (sample < -32768)
+						sample = -32768;
+					buffer[count + j] = sample;
+				}
+			}
+			vorbis_synthesis_read(&m_vorbis->dspState, toConvert);
+			count += toConvert;
+		}
+
+		numOutSamples = count;
+		return true;
+	}
+	else if (m_opus)
+	{
+		const int samples = opus_decode(m_opus, frame.buffer, frame.bufferSize, buffer, m_numSamples, 0);
+		if (samples >= 0)
+		{
+			numOutSamples = samples;
+			return true;
+		}
+	}
+	return false;
+}
+
+bool OpusVorbisDecoder::openVorbis(const WebMDemuxer &demuxer)
+{
+	size_t extradataSize = 0;
+	const unsigned char *extradata = demuxer.getAudioExtradata(extradataSize);
+
+	if (extradataSize < 3 || !extradata || extradata[0] != 2)
+		return false;
+
+	size_t headerSize[3] = {0};
+	size_t offset = 1;
+
+	/* Calculate three headers sizes */
+	for (int i = 0; i < 2; ++i)
+	{
+		for (;;)
+		{
+			if (offset >= extradataSize)
+				return false;
+			headerSize[i] += extradata[offset];
+			if (extradata[offset++] < 0xFF)
+				break;
+		}
+	}
+	headerSize[2] = extradataSize - (headerSize[0] + headerSize[1] + offset);
+
+	if (headerSize[0] + headerSize[1] + headerSize[2] + offset != extradataSize)
+		return false;
+
+	ogg_packet op[3];
+	memset(op, 0, sizeof op);
+
+	op[0].packet = (unsigned char *)extradata + offset;
+	op[0].bytes = headerSize[0];
+	op[0].b_o_s = 1;
+
+	op[1].packet = (unsigned char *)extradata + offset + headerSize[0];
+	op[1].bytes = headerSize[1];
+
+	op[2].packet = (unsigned char *)extradata + offset + headerSize[0] + headerSize[1];
+	op[2].bytes = headerSize[2];
+
+	m_vorbis = new VorbisDecoder;
+	m_vorbis->hasDSPState = m_vorbis->hasBlock = false;
+	vorbis_info_init(&m_vorbis->info);
+
+	/* Upload three Vorbis headers into libvorbis */
+	vorbis_comment vc;
+	vorbis_comment_init(&vc);
+	for (int i = 0; i < 3; ++i)
+	{
+		if (vorbis_synthesis_headerin(&m_vorbis->info, &vc, &op[i]))
+		{
+			vorbis_comment_clear(&vc);
+			return false;
+		}
+	}
+	vorbis_comment_clear(&vc);
+
+	if (vorbis_synthesis_init(&m_vorbis->dspState, &m_vorbis->info))
+		return false;
+	m_vorbis->hasDSPState = true;
+
+	if (m_vorbis->info.channels != m_channels || m_vorbis->info.rate != demuxer.getSampleRate())
+		return false;
+
+	if (vorbis_block_init(&m_vorbis->dspState, &m_vorbis->block))
+		return false;
+	m_vorbis->hasBlock = true;
+
+	memset(&m_vorbis->op, 0, sizeof m_vorbis->op);
+
+	m_numSamples = 4096 / m_channels;
+
+	return true;
+}
+bool OpusVorbisDecoder::openOpus(const WebMDemuxer &demuxer)
+{
+	int opusErr = 0;
+	m_opus = opus_decoder_create(demuxer.getSampleRate(), m_channels, &opusErr);
+	if (!opusErr)
+	{
+		m_numSamples = demuxer.getSampleRate() * 0.06 + 0.5; //Maximum frame size (for 60 ms frame)
+		return true;
+	}
+	return false;
+}
+
+void OpusVorbisDecoder::close()
+{
+	if (m_vorbis)
+	{
+		if (m_vorbis->hasBlock)
+			vorbis_block_clear(&m_vorbis->block);
+		if (m_vorbis->hasDSPState)
+			vorbis_dsp_clear(&m_vorbis->dspState);
+		vorbis_info_clear(&m_vorbis->info);
+		delete m_vorbis;
+	}
+	if (m_opus)
+		opus_decoder_destroy(m_opus);
+}

+ 63 - 0
thirdparty/libsimplewebm/OpusVorbisDecoder.hpp

@@ -0,0 +1,63 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#ifndef OPUSVORBISDECODER_HPP
+#define OPUSVORBISDECODER_HPP
+
+#include "WebMDemuxer.hpp"
+
+struct VorbisDecoder;
+struct OpusDecoder;
+
+class OpusVorbisDecoder
+{
+	OpusVorbisDecoder(const OpusVorbisDecoder &);
+	void operator =(const OpusVorbisDecoder &);
+public:
+	OpusVorbisDecoder(const WebMDemuxer &demuxer);
+	~OpusVorbisDecoder();
+
+	bool isOpen() const;
+
+	inline int getBufferSamples() const
+	{
+		return m_numSamples;
+	}
+
+	bool getPCMS16(WebMFrame &frame, short *buffer, int &numOutSamples);
+
+private:
+	bool openVorbis(const WebMDemuxer &demuxer);
+	bool openOpus(const WebMDemuxer &demuxer);
+
+	void close();
+
+	VorbisDecoder *m_vorbis;
+	OpusDecoder *m_opus;
+	int m_numSamples;
+	int m_channels;
+
+};
+
+#endif // OPUSVORBISDECODER_HPP

+ 142 - 0
thirdparty/libsimplewebm/VPXDecoder.cpp

@@ -0,0 +1,142 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#include "VPXDecoder.hpp"
+
+#include <vpx/vpx_decoder.h>
+#include <vpx/vp8dx.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+VPXDecoder::VPXDecoder(const WebMDemuxer &demuxer, unsigned threads) :
+	m_ctx(NULL),
+	m_iter(NULL),
+	m_delay(0)
+{
+	if (threads > 8)
+		threads = 8;
+	else if (threads < 1)
+		threads = 1;
+
+	const vpx_codec_dec_cfg_t codecCfg = {
+		threads,
+		0,
+		0
+	};
+	vpx_codec_iface_t *codecIface = NULL;
+
+	switch (demuxer.getVideoCodec())
+	{
+		case WebMDemuxer::VIDEO_VP8:
+			codecIface = vpx_codec_vp8_dx();
+			break;
+		case WebMDemuxer::VIDEO_VP9:
+			codecIface = vpx_codec_vp9_dx();
+			m_delay = threads - 1;
+			break;
+		default:
+			return;
+	}
+
+	m_ctx = new vpx_codec_ctx_t;
+	if (vpx_codec_dec_init(m_ctx, codecIface, &codecCfg, m_delay > 0 ? VPX_CODEC_USE_FRAME_THREADING : 0))
+	{
+		delete m_ctx;
+		m_ctx = NULL;
+	}
+}
+VPXDecoder::~VPXDecoder()
+{
+	if (m_ctx)
+	{
+		vpx_codec_destroy(m_ctx);
+		delete m_ctx;
+	}
+}
+
+bool VPXDecoder::decode(const WebMFrame &frame)
+{
+	m_iter = NULL;
+	return !vpx_codec_decode(m_ctx, frame.buffer, frame.bufferSize, NULL, 0);
+}
+VPXDecoder::IMAGE_ERROR VPXDecoder::getImage(Image &image)
+{
+	IMAGE_ERROR err = NO_FRAME;
+	if (vpx_image_t *img = vpx_codec_get_frame(m_ctx, &m_iter))
+	{
+		if ((img->fmt & VPX_IMG_FMT_PLANAR) && !(img->fmt & (VPX_IMG_FMT_HAS_ALPHA | VPX_IMG_FMT_HIGHBITDEPTH)))
+		{
+			if (img->stride[0] && img->stride[1] && img->stride[2])
+			{
+				const int uPlane = !!(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1;
+				const int vPlane =  !(img->fmt & VPX_IMG_FMT_UV_FLIP) + 1;
+
+				image.w = img->d_w;
+				image.h = img->d_h;
+				image.chromaShiftW = img->x_chroma_shift;
+				image.chromaShiftH = img->y_chroma_shift;
+
+				image.planes[0] = img->planes[0];
+				image.planes[1] = img->planes[uPlane];
+				image.planes[2] = img->planes[vPlane];
+
+				image.linesize[0] = img->stride[0];
+				image.linesize[1] = img->stride[uPlane];
+				image.linesize[2] = img->stride[vPlane];
+
+				err = NO_ERROR;
+			}
+		}
+		else
+		{
+			err = UNSUPPORTED_FRAME;
+		}
+	}
+	return err;
+}
+
+/**/
+
+#if 0
+
+static inline int ceilRshift(int val, int shift)
+{
+	return (val + (1 << shift) - 1) >> shift;
+}
+
+int VPXDecoder::Image::getWidth(int plane) const
+{
+	if (!plane)
+		return w;
+	return ceilRshift(w, chromaShiftW);
+}
+int VPXDecoder::Image::getHeight(int plane) const
+{
+	if (!plane)
+		return h;
+	return ceilRshift(h, chromaShiftH);
+}
+
+#endif

+ 80 - 0
thirdparty/libsimplewebm/VPXDecoder.hpp

@@ -0,0 +1,80 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#ifndef VPXDECODER_HPP
+#define VPXDECODER_HPP
+
+#include "WebMDemuxer.hpp"
+
+struct vpx_codec_ctx;
+
+class VPXDecoder
+{
+	VPXDecoder(const VPXDecoder &);
+	void operator =(const VPXDecoder &);
+public:
+	class Image
+	{
+	public:
+#if 0
+		int getWidth(int plane) const;
+		int getHeight(int plane) const;
+#endif
+
+		int w, h;
+		int chromaShiftW, chromaShiftH;
+		unsigned char *planes[3];
+		int linesize[3];
+	};
+
+	enum IMAGE_ERROR
+	{
+		UNSUPPORTED_FRAME = -1,
+		NO_ERROR,
+		NO_FRAME
+	};
+
+	VPXDecoder(const WebMDemuxer &demuxer, unsigned threads = 1);
+	~VPXDecoder();
+
+	inline bool isOpen() const
+	{
+		return (bool)m_ctx;
+	}
+
+	inline int getFramesDelay() const
+	{
+		return m_delay;
+	}
+
+	bool decode(const WebMFrame &frame);
+	IMAGE_ERROR getImage(Image &image); //The data is NOT copied! Only 3-plane, 8-bit images are supported.
+
+private:
+	vpx_codec_ctx *m_ctx;
+	const void *m_iter;
+	int m_delay;
+};
+
+#endif // VPXDECODER_HPP

+ 241 - 0
thirdparty/libsimplewebm/WebMDemuxer.cpp

@@ -0,0 +1,241 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#include "WebMDemuxer.hpp"
+
+#include "mkvparser/mkvparser.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+WebMFrame::WebMFrame() :
+	bufferSize(0), bufferCapacity(0),
+	buffer(NULL),
+	time(0),
+	key(false)
+{}
+WebMFrame::~WebMFrame()
+{
+	free(buffer);
+}
+
+/**/
+
+WebMDemuxer::WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack, int audioTrack) :
+	m_reader(reader),
+	m_segment(NULL),
+	m_cluster(NULL), m_block(NULL), m_blockEntry(NULL),
+	m_blockFrameIndex(0),
+	m_videoTrack(NULL), m_vCodec(NO_VIDEO),
+	m_audioTrack(NULL), m_aCodec(NO_AUDIO),
+	m_isOpen(false),
+	m_eos(false)
+{
+	long long pos = 0;
+	if (mkvparser::EBMLHeader().Parse(m_reader, pos))
+		return;
+
+	if (mkvparser::Segment::CreateInstance(m_reader, pos, m_segment))
+		return;
+
+	if (m_segment->Load() < 0)
+		return;
+
+	const mkvparser::Tracks *tracks = m_segment->GetTracks();
+	const unsigned long tracksCount = tracks->GetTracksCount();
+	int currVideoTrack = -1, currAudioTrack = -1;
+	for (unsigned long i = 0; i < tracksCount; ++i)
+	{
+		const mkvparser::Track *track = tracks->GetTrackByIndex(i);
+		if (const char *codecId = track->GetCodecId())
+		{
+			if ((!m_videoTrack || currVideoTrack != videoTrack) && track->GetType() == mkvparser::Track::kVideo)
+			{
+				if (!strcmp(codecId, "V_VP8"))
+					m_vCodec = VIDEO_VP8;
+				else if (!strcmp(codecId, "V_VP9"))
+					m_vCodec = VIDEO_VP9;
+				if (m_vCodec != NO_VIDEO)
+					m_videoTrack = static_cast<const mkvparser::VideoTrack *>(track);
+				++currVideoTrack;
+			}
+			if ((!m_audioTrack || currAudioTrack != audioTrack) && track->GetType() == mkvparser::Track::kAudio)
+			{
+				if (!strcmp(codecId, "A_VORBIS"))
+					m_aCodec = AUDIO_VORBIS;
+				else if (!strcmp(codecId, "A_OPUS"))
+					m_aCodec = AUDIO_OPUS;
+				if (m_aCodec != NO_AUDIO)
+					m_audioTrack = static_cast<const mkvparser::AudioTrack *>(track);
+				++currAudioTrack;
+			}
+		}
+	}
+	if (!m_videoTrack && !m_audioTrack)
+		return;
+
+	m_isOpen = true;
+}
+WebMDemuxer::~WebMDemuxer()
+{
+	delete m_segment;
+	delete m_reader;
+}
+
+double WebMDemuxer::getLength() const
+{
+	return m_segment->GetDuration() / 1e9;
+}
+
+WebMDemuxer::VIDEO_CODEC WebMDemuxer::getVideoCodec() const
+{
+	return m_vCodec;
+}
+int WebMDemuxer::getWidth() const
+{
+	return m_videoTrack->GetWidth();
+}
+int WebMDemuxer::getHeight() const
+{
+	return m_videoTrack->GetHeight();
+}
+
+WebMDemuxer::AUDIO_CODEC WebMDemuxer::getAudioCodec() const
+{
+	return m_aCodec;
+}
+const unsigned char *WebMDemuxer::getAudioExtradata(size_t &size) const
+{
+	return m_audioTrack->GetCodecPrivate(size);
+}
+double WebMDemuxer::getSampleRate() const
+{
+	return m_audioTrack->GetSamplingRate();
+}
+int WebMDemuxer::getChannels() const
+{
+	return m_audioTrack->GetChannels();
+}
+int WebMDemuxer::getAudioDepth() const
+{
+	return m_audioTrack->GetBitDepth();
+}
+
+bool WebMDemuxer::readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame)
+{
+	const long videoTrackNumber = (videoFrame && m_videoTrack) ? m_videoTrack->GetNumber() : 0;
+	const long audioTrackNumber = (audioFrame && m_audioTrack) ? m_audioTrack->GetNumber() : 0;
+	bool blockEntryEOS = false;
+
+	if (videoFrame)
+		videoFrame->bufferSize = 0;
+	if (audioFrame)
+		audioFrame->bufferSize = 0;
+
+	if (videoTrackNumber == 0 && audioTrackNumber == 0)
+		return false;
+
+	if (m_eos)
+		return false;
+
+	if (!m_cluster)
+		m_cluster = m_segment->GetFirst();
+
+	do
+	{
+		bool getNewBlock = false;
+		long status = 0;
+		if (!m_blockEntry && !blockEntryEOS)
+		{
+			status = m_cluster->GetFirst(m_blockEntry);
+			getNewBlock = true;
+		}
+		else if (blockEntryEOS || m_blockEntry->EOS())
+		{
+			m_cluster = m_segment->GetNext(m_cluster);
+			if (!m_cluster || m_cluster->EOS())
+			{
+				m_eos = true;
+				return false;
+			}
+			status = m_cluster->GetFirst(m_blockEntry);
+			blockEntryEOS = false;
+			getNewBlock = true;
+		}
+		else if (!m_block || m_blockFrameIndex == m_block->GetFrameCount() || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber))
+		{
+			status = m_cluster->GetNext(m_blockEntry, m_blockEntry);
+			if (!m_blockEntry  || m_blockEntry->EOS())
+			{
+				blockEntryEOS = true;
+				continue;
+			}
+			getNewBlock = true;
+		}
+		if (status || !m_blockEntry)
+			return false;
+		if (getNewBlock)
+		{
+			m_block = m_blockEntry->GetBlock();
+			m_blockFrameIndex = 0;
+		}
+	} while (blockEntryEOS || notSupportedTrackNumber(videoTrackNumber, audioTrackNumber));
+
+	WebMFrame *frame = NULL;
+
+	const long trackNumber = m_block->GetTrackNumber();
+	if (trackNumber == videoTrackNumber)
+		frame = videoFrame;
+	else if (trackNumber == audioTrackNumber)
+		frame = audioFrame;
+	else
+	{
+		//Should not be possible
+		assert(trackNumber == videoTrackNumber || trackNumber == audioTrackNumber);
+		return false;
+	}
+
+	const mkvparser::Block::Frame &blockFrame = m_block->GetFrame(m_blockFrameIndex++);
+	if (blockFrame.len > frame->bufferCapacity)
+	{
+		unsigned char *newBuff = (unsigned char *)realloc(frame->buffer, frame->bufferCapacity = blockFrame.len);
+		if (newBuff)
+			frame->buffer = newBuff;
+		else // Out of memory
+			return false;
+	}
+	frame->bufferSize = blockFrame.len;
+
+	frame->time = m_block->GetTime(m_cluster) / 1e9;
+	frame->key  = m_block->IsKey();
+
+	return !blockFrame.Read(m_reader, frame->buffer);
+}
+
+inline bool WebMDemuxer::notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const
+{
+	const long trackNumber = m_block->GetTrackNumber();
+	return (trackNumber != videoTrackNumber && trackNumber != audioTrackNumber);
+}

+ 125 - 0
thirdparty/libsimplewebm/WebMDemuxer.hpp

@@ -0,0 +1,125 @@
+/*
+	MIT License
+
+	Copyright (c) 2016 Błażej Szczygieł
+
+	Permission is hereby granted, free of charge, to any person obtaining a copy
+	of this software and associated documentation files (the "Software"), to deal
+	in the Software without restriction, including without limitation the rights
+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+	copies of the Software, and to permit persons to whom the Software is
+	furnished to do so, subject to the following conditions:
+
+	The above copyright notice and this permission notice shall be included in all
+	copies or substantial portions of the Software.
+
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+	SOFTWARE.
+*/
+
+#ifndef WEBMDEMUXER_HPP
+#define WEBMDEMUXER_HPP
+
+#include <stddef.h>
+
+namespace mkvparser {
+	class IMkvReader;
+	class Segment;
+	class Cluster;
+	class Block;
+	class BlockEntry;
+	class VideoTrack;
+	class AudioTrack;
+}
+
+class WebMFrame
+{
+	WebMFrame(const WebMFrame &);
+	void operator =(const WebMFrame &);
+public:
+	WebMFrame();
+	~WebMFrame();
+
+	inline bool isValid() const
+	{
+		return bufferSize > 0;
+	}
+
+	long bufferSize, bufferCapacity;
+	unsigned char *buffer;
+	double time;
+	bool key;
+};
+
+class WebMDemuxer
+{
+	WebMDemuxer(const WebMDemuxer &);
+	void operator =(const WebMDemuxer &);
+public:
+	enum VIDEO_CODEC
+	{
+		NO_VIDEO,
+		VIDEO_VP8,
+		VIDEO_VP9
+	};
+	enum AUDIO_CODEC
+	{
+		NO_AUDIO,
+		AUDIO_VORBIS,
+		AUDIO_OPUS
+	};
+
+	WebMDemuxer(mkvparser::IMkvReader *reader, int videoTrack = 0, int audioTrack = 0);
+	~WebMDemuxer();
+
+	inline bool isOpen() const
+	{
+		return m_isOpen;
+	}
+	inline bool isEOS() const
+	{
+		return m_eos;
+	}
+
+	double getLength() const;
+
+	VIDEO_CODEC getVideoCodec() const;
+	int getWidth() const;
+	int getHeight() const;
+
+	AUDIO_CODEC getAudioCodec() const;
+	const unsigned char *getAudioExtradata(size_t &size) const; // Needed for Vorbis
+	double getSampleRate() const;
+	int getChannels() const;
+	int getAudioDepth() const;
+
+	bool readFrame(WebMFrame *videoFrame, WebMFrame *audioFrame);
+
+private:
+	inline bool notSupportedTrackNumber(long videoTrackNumber, long audioTrackNumber) const;
+
+	mkvparser::IMkvReader *m_reader;
+	mkvparser::Segment *m_segment;
+
+	const mkvparser::Cluster *m_cluster;
+	const mkvparser::Block *m_block;
+	const mkvparser::BlockEntry *m_blockEntry;
+
+	int m_blockFrameIndex;
+
+	const mkvparser::VideoTrack *m_videoTrack;
+	VIDEO_CODEC m_vCodec;
+
+	const mkvparser::AudioTrack *m_audioTrack;
+	AUDIO_CODEC m_aCodec;
+
+	bool m_isOpen;
+	bool m_eos;
+};
+
+#endif // WEBMDEMUXER_HPP

+ 4 - 0
thirdparty/libsimplewebm/libwebm/AUTHORS.TXT

@@ -0,0 +1,4 @@
+# Names should be added to this file like so:
+# Name or Organization <email address>
+
+Google Inc.

+ 30 - 0
thirdparty/libsimplewebm/libwebm/LICENSE.TXT

@@ -0,0 +1,30 @@
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+

+ 23 - 0
thirdparty/libsimplewebm/libwebm/PATENTS.TXT

@@ -0,0 +1,23 @@
+Additional IP Rights Grant (Patents)
+------------------------------------
+
+"These implementations" means the copyrightable works that implement the WebM
+codecs distributed by Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
+royalty-free, irrevocable (except as stated in this section) patent license to
+make, have made, use, offer to sell, sell, import, transfer, and otherwise
+run, modify and propagate the contents of these implementations of WebM, where
+such license applies only to those patent claims, both currently owned by
+Google and acquired in the future, licensable by Google that are necessarily
+infringed by these implementations of WebM. This grant does not include claims
+that would be infringed only as a consequence of further modification of these
+implementations. If you or your agent or exclusive licensee institute or order
+or agree to the institution of patent litigation or any other patent
+enforcement activity against any entity (including a cross-claim or
+counterclaim in a lawsuit) alleging that any of these implementations of WebM
+or any code incorporated within any of these implementations of WebM
+constitute direct or contributory patent infringement, or inducement of
+patent infringement, then any patent rights granted to you under this License
+for these implementations of WebM shall terminate as of the date such
+litigation is filed.

+ 11 - 0
thirdparty/libsimplewebm/libwebm/README.libvpx

@@ -0,0 +1,11 @@
+URL: https://chromium.googlesource.com/webm/libwebm
+Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d
+License: BSD
+License File: LICENSE.txt
+
+Description:
+libwebm is used to handle WebM container I/O.
+
+Local Changes:
+* Removed: "mkvmuxer", "hdr_util", "file_util", "mkv_reader".
+* Make "~IMkvRerader()" public.

+ 184 - 0
thirdparty/libsimplewebm/libwebm/common/webmids.h

@@ -0,0 +1,184 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef COMMON_WEBMIDS_H_
+#define COMMON_WEBMIDS_H_
+
+namespace libwebm {
+
+enum MkvId {
+  kMkvEBML = 0x1A45DFA3,
+  kMkvEBMLVersion = 0x4286,
+  kMkvEBMLReadVersion = 0x42F7,
+  kMkvEBMLMaxIDLength = 0x42F2,
+  kMkvEBMLMaxSizeLength = 0x42F3,
+  kMkvDocType = 0x4282,
+  kMkvDocTypeVersion = 0x4287,
+  kMkvDocTypeReadVersion = 0x4285,
+  kMkvVoid = 0xEC,
+  kMkvSignatureSlot = 0x1B538667,
+  kMkvSignatureAlgo = 0x7E8A,
+  kMkvSignatureHash = 0x7E9A,
+  kMkvSignaturePublicKey = 0x7EA5,
+  kMkvSignature = 0x7EB5,
+  kMkvSignatureElements = 0x7E5B,
+  kMkvSignatureElementList = 0x7E7B,
+  kMkvSignedElement = 0x6532,
+  // segment
+  kMkvSegment = 0x18538067,
+  // Meta Seek Information
+  kMkvSeekHead = 0x114D9B74,
+  kMkvSeek = 0x4DBB,
+  kMkvSeekID = 0x53AB,
+  kMkvSeekPosition = 0x53AC,
+  // Segment Information
+  kMkvInfo = 0x1549A966,
+  kMkvTimecodeScale = 0x2AD7B1,
+  kMkvDuration = 0x4489,
+  kMkvDateUTC = 0x4461,
+  kMkvTitle = 0x7BA9,
+  kMkvMuxingApp = 0x4D80,
+  kMkvWritingApp = 0x5741,
+  // Cluster
+  kMkvCluster = 0x1F43B675,
+  kMkvTimecode = 0xE7,
+  kMkvPrevSize = 0xAB,
+  kMkvBlockGroup = 0xA0,
+  kMkvBlock = 0xA1,
+  kMkvBlockDuration = 0x9B,
+  kMkvReferenceBlock = 0xFB,
+  kMkvLaceNumber = 0xCC,
+  kMkvSimpleBlock = 0xA3,
+  kMkvBlockAdditions = 0x75A1,
+  kMkvBlockMore = 0xA6,
+  kMkvBlockAddID = 0xEE,
+  kMkvBlockAdditional = 0xA5,
+  kMkvDiscardPadding = 0x75A2,
+  // Track
+  kMkvTracks = 0x1654AE6B,
+  kMkvTrackEntry = 0xAE,
+  kMkvTrackNumber = 0xD7,
+  kMkvTrackUID = 0x73C5,
+  kMkvTrackType = 0x83,
+  kMkvFlagEnabled = 0xB9,
+  kMkvFlagDefault = 0x88,
+  kMkvFlagForced = 0x55AA,
+  kMkvFlagLacing = 0x9C,
+  kMkvDefaultDuration = 0x23E383,
+  kMkvMaxBlockAdditionID = 0x55EE,
+  kMkvName = 0x536E,
+  kMkvLanguage = 0x22B59C,
+  kMkvCodecID = 0x86,
+  kMkvCodecPrivate = 0x63A2,
+  kMkvCodecName = 0x258688,
+  kMkvCodecDelay = 0x56AA,
+  kMkvSeekPreRoll = 0x56BB,
+  // video
+  kMkvVideo = 0xE0,
+  kMkvFlagInterlaced = 0x9A,
+  kMkvStereoMode = 0x53B8,
+  kMkvAlphaMode = 0x53C0,
+  kMkvPixelWidth = 0xB0,
+  kMkvPixelHeight = 0xBA,
+  kMkvPixelCropBottom = 0x54AA,
+  kMkvPixelCropTop = 0x54BB,
+  kMkvPixelCropLeft = 0x54CC,
+  kMkvPixelCropRight = 0x54DD,
+  kMkvDisplayWidth = 0x54B0,
+  kMkvDisplayHeight = 0x54BA,
+  kMkvDisplayUnit = 0x54B2,
+  kMkvAspectRatioType = 0x54B3,
+  kMkvFrameRate = 0x2383E3,
+  // end video
+  // colour
+  kMkvColour = 0x55B0,
+  kMkvMatrixCoefficients = 0x55B1,
+  kMkvBitsPerChannel = 0x55B2,
+  kMkvChromaSubsamplingHorz = 0x55B3,
+  kMkvChromaSubsamplingVert = 0x55B4,
+  kMkvCbSubsamplingHorz = 0x55B5,
+  kMkvCbSubsamplingVert = 0x55B6,
+  kMkvChromaSitingHorz = 0x55B7,
+  kMkvChromaSitingVert = 0x55B8,
+  kMkvRange = 0x55B9,
+  kMkvTransferCharacteristics = 0x55BA,
+  kMkvPrimaries = 0x55BB,
+  kMkvMaxCLL = 0x55BC,
+  kMkvMaxFALL = 0x55BD,
+  // mastering metadata
+  kMkvMasteringMetadata = 0x55D0,
+  kMkvPrimaryRChromaticityX = 0x55D1,
+  kMkvPrimaryRChromaticityY = 0x55D2,
+  kMkvPrimaryGChromaticityX = 0x55D3,
+  kMkvPrimaryGChromaticityY = 0x55D4,
+  kMkvPrimaryBChromaticityX = 0x55D5,
+  kMkvPrimaryBChromaticityY = 0x55D6,
+  kMkvWhitePointChromaticityX = 0x55D7,
+  kMkvWhitePointChromaticityY = 0x55D8,
+  kMkvLuminanceMax = 0x55D9,
+  kMkvLuminanceMin = 0x55DA,
+  // end mastering metadata
+  // end colour
+  // audio
+  kMkvAudio = 0xE1,
+  kMkvSamplingFrequency = 0xB5,
+  kMkvOutputSamplingFrequency = 0x78B5,
+  kMkvChannels = 0x9F,
+  kMkvBitDepth = 0x6264,
+  // end audio
+  // ContentEncodings
+  kMkvContentEncodings = 0x6D80,
+  kMkvContentEncoding = 0x6240,
+  kMkvContentEncodingOrder = 0x5031,
+  kMkvContentEncodingScope = 0x5032,
+  kMkvContentEncodingType = 0x5033,
+  kMkvContentCompression = 0x5034,
+  kMkvContentCompAlgo = 0x4254,
+  kMkvContentCompSettings = 0x4255,
+  kMkvContentEncryption = 0x5035,
+  kMkvContentEncAlgo = 0x47E1,
+  kMkvContentEncKeyID = 0x47E2,
+  kMkvContentSignature = 0x47E3,
+  kMkvContentSigKeyID = 0x47E4,
+  kMkvContentSigAlgo = 0x47E5,
+  kMkvContentSigHashAlgo = 0x47E6,
+  kMkvContentEncAESSettings = 0x47E7,
+  kMkvAESSettingsCipherMode = 0x47E8,
+  kMkvAESSettingsCipherInitData = 0x47E9,
+  // end ContentEncodings
+  // Cueing Data
+  kMkvCues = 0x1C53BB6B,
+  kMkvCuePoint = 0xBB,
+  kMkvCueTime = 0xB3,
+  kMkvCueTrackPositions = 0xB7,
+  kMkvCueTrack = 0xF7,
+  kMkvCueClusterPosition = 0xF1,
+  kMkvCueBlockNumber = 0x5378,
+  // Chapters
+  kMkvChapters = 0x1043A770,
+  kMkvEditionEntry = 0x45B9,
+  kMkvChapterAtom = 0xB6,
+  kMkvChapterUID = 0x73C4,
+  kMkvChapterStringUID = 0x5654,
+  kMkvChapterTimeStart = 0x91,
+  kMkvChapterTimeEnd = 0x92,
+  kMkvChapterDisplay = 0x80,
+  kMkvChapString = 0x85,
+  kMkvChapLanguage = 0x437C,
+  kMkvChapCountry = 0x437E,
+  // Tags
+  kMkvTags = 0x1254C367,
+  kMkvTag = 0x7373,
+  kMkvSimpleTag = 0x67C8,
+  kMkvTagName = 0x45A3,
+  kMkvTagString = 0x4487
+};
+
+}  // namespace libwebm
+
+#endif  // COMMON_WEBMIDS_H_

+ 28 - 0
thirdparty/libsimplewebm/libwebm/mkvmuxer/mkvmuxertypes.h

@@ -0,0 +1,28 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef MKVMUXER_MKVMUXERTYPES_H_
+#define MKVMUXER_MKVMUXERTYPES_H_
+
+namespace mkvmuxer {
+typedef unsigned char uint8;
+typedef short int16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+}  // namespace mkvmuxer
+
+// Copied from Chromium basictypes.h
+// A macro to disallow the copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);                       \
+  void operator=(const TypeName&)
+
+#endif  // MKVMUXER_MKVMUXERTYPES_HPP_

+ 7831 - 0
thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.cc

@@ -0,0 +1,7831 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#include "mkvparser/mkvparser.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#include <float.h>  // _isnan() / _finite()
+#define MSC_COMPAT
+#endif
+
+#include <assert.h>
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+
+#include "common/webmids.h"
+
+namespace mkvparser {
+const float MasteringMetadata::kValueNotPresent = FLT_MAX;
+const long long Colour::kValueNotPresent = LLONG_MAX;
+
+#ifdef MSC_COMPAT
+inline bool isnan(double val) { return !!_isnan(val); }
+inline bool isinf(double val) { return !_finite(val); }
+#endif  // MSC_COMPAT
+
+template<typename T>
+class my_auto_ptr {
+  my_auto_ptr(const my_auto_ptr &);
+  T *operator =(const my_auto_ptr &);
+
+  T *m_ptr;
+public:
+  my_auto_ptr(T *ptr) :
+    m_ptr(ptr)
+  {}
+  my_auto_ptr() :
+    m_ptr(NULL)
+  {}
+  ~my_auto_ptr() {
+    delete m_ptr;
+  }
+
+  T *release() {
+    T *ptr = m_ptr;
+    m_ptr = NULL;
+    return ptr;
+  }
+
+  T *operator ->() const {
+    return m_ptr;
+  }
+};
+
+IMkvReader::~IMkvReader() {}
+
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+                     unsigned long long element_size) {
+  if (num_elements == 0 || element_size == 0)
+    return NULL;
+
+  const size_t kMaxAllocSize = 0x80000000;  // 2GiB
+  const unsigned long long num_bytes = num_elements * element_size;
+  if (element_size > (kMaxAllocSize / num_elements))
+    return NULL;
+  if (num_bytes != static_cast<size_t>(num_bytes))
+    return NULL;
+
+  return new Type[static_cast<size_t>(num_bytes)];
+}
+
+void GetVersion(int& major, int& minor, int& build, int& revision) {
+  major = 1;
+  minor = 0;
+  build = 0;
+  revision = 30;
+}
+
+long long ReadUInt(IMkvReader* pReader, long long pos, long& len) {
+  if (!pReader || pos < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  len = 1;
+  unsigned char b;
+  int status = pReader->Read(pos, 1, &b);
+
+  if (status < 0)  // error or underflow
+    return status;
+
+  if (status > 0)  // interpreted as "underflow"
+    return E_BUFFER_NOT_FULL;
+
+  if (b == 0)  // we can't handle u-int values larger than 8 bytes
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char m = 0x80;
+
+  while (!(b & m)) {
+    m >>= 1;
+    ++len;
+  }
+
+  long long result = b & (~m);
+  ++pos;
+
+  for (int i = 1; i < len; ++i) {
+    status = pReader->Read(pos, 1, &b);
+
+    if (status < 0) {
+      len = 1;
+      return status;
+    }
+
+    if (status > 0) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result <<= 8;
+    result |= b;
+
+    ++pos;
+  }
+
+  return result;
+}
+
+// Reads an EBML ID and returns it.
+// An ID must at least 1 byte long, cannot exceed 4, and its value must be
+// greater than 0.
+// See known EBML values and EBMLMaxIDLength:
+// http://www.matroska.org/technical/specs/index.html
+// Returns the ID, or a value less than 0 to report an error while reading the
+// ID.
+long long ReadID(IMkvReader* pReader, long long pos, long& len) {
+  if (pReader == NULL || pos < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  // Read the first byte. The length in bytes of the ID is determined by
+  // finding the first set bit in the first byte of the ID.
+  unsigned char temp_byte = 0;
+  int read_status = pReader->Read(pos, 1, &temp_byte);
+
+  if (read_status < 0)
+    return E_FILE_FORMAT_INVALID;
+  else if (read_status > 0)  // No data to read.
+    return E_BUFFER_NOT_FULL;
+
+  if (temp_byte == 0)  // ID length > 8 bytes; invalid file.
+    return E_FILE_FORMAT_INVALID;
+
+  int bit_pos = 0;
+  const int kMaxIdLengthInBytes = 4;
+  const int kCheckByte = 0x80;
+
+  // Find the first bit that's set.
+  bool found_bit = false;
+  for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) {
+    if ((kCheckByte >> bit_pos) & temp_byte) {
+      found_bit = true;
+      break;
+    }
+  }
+
+  if (!found_bit) {
+    // The value is too large to be a valid ID.
+    return E_FILE_FORMAT_INVALID;
+  }
+
+  // Read the remaining bytes of the ID (if any).
+  const int id_length = bit_pos + 1;
+  long long ebml_id = temp_byte;
+  for (int i = 1; i < id_length; ++i) {
+    ebml_id <<= 8;
+    read_status = pReader->Read(pos + i, 1, &temp_byte);
+
+    if (read_status < 0)
+      return E_FILE_FORMAT_INVALID;
+    else if (read_status > 0)
+      return E_BUFFER_NOT_FULL;
+
+    ebml_id |= temp_byte;
+  }
+
+  len = id_length;
+  return ebml_id;
+}
+
+long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) {
+  if (!pReader || pos < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  long long total, available;
+
+  int status = pReader->Length(&total, &available);
+  if (status < 0 || (total >= 0 && available > total))
+    return E_FILE_FORMAT_INVALID;
+
+  len = 1;
+
+  if (pos >= available)
+    return pos;  // too few bytes available
+
+  unsigned char b;
+
+  status = pReader->Read(pos, 1, &b);
+
+  if (status != 0)
+    return status;
+
+  if (b == 0)  // we can't handle u-int values larger than 8 bytes
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char m = 0x80;
+
+  while (!(b & m)) {
+    m >>= 1;
+    ++len;
+  }
+
+  return 0;  // success
+}
+
+// TODO(vigneshv): This function assumes that unsigned values never have their
+// high bit set.
+long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) {
+  if (!pReader || pos < 0 || (size <= 0) || (size > 8))
+    return E_FILE_FORMAT_INVALID;
+
+  long long result = 0;
+
+  for (long long i = 0; i < size; ++i) {
+    unsigned char b;
+
+    const long status = pReader->Read(pos, 1, &b);
+
+    if (status < 0)
+      return status;
+
+    result <<= 8;
+    result |= b;
+
+    ++pos;
+  }
+
+  return result;
+}
+
+long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_,
+                      double& result) {
+  if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8)))
+    return E_FILE_FORMAT_INVALID;
+
+  const long size = static_cast<long>(size_);
+
+  unsigned char buf[8];
+
+  const int status = pReader->Read(pos, size, buf);
+
+  if (status < 0)  // error
+    return status;
+
+  if (size == 4) {
+    union {
+      float f;
+      unsigned long ff;
+    };
+
+    ff = 0;
+
+    for (int i = 0;;) {
+      ff |= buf[i];
+
+      if (++i >= 4)
+        break;
+
+      ff <<= 8;
+    }
+
+    result = f;
+  } else {
+    union {
+      double d;
+      unsigned long long dd;
+    };
+
+    dd = 0;
+
+    for (int i = 0;;) {
+      dd |= buf[i];
+
+      if (++i >= 8)
+        break;
+
+      dd <<= 8;
+    }
+
+    result = d;
+  }
+
+  if (isinf(result) || isnan(result))
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+long UnserializeInt(IMkvReader* pReader, long long pos, long long size,
+                    long long& result_ref) {
+  if (!pReader || pos < 0 || size < 1 || size > 8)
+    return E_FILE_FORMAT_INVALID;
+
+  signed char first_byte = 0;
+  const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte);
+
+  if (status < 0)
+    return status;
+
+  unsigned long long result = first_byte;
+  ++pos;
+
+  for (long i = 1; i < size; ++i) {
+    unsigned char b;
+
+    const long status = pReader->Read(pos, 1, &b);
+
+    if (status < 0)
+      return status;
+
+    result <<= 8;
+    result |= b;
+
+    ++pos;
+  }
+
+  result_ref = static_cast<long long>(result);
+  return 0;
+}
+
+long UnserializeString(IMkvReader* pReader, long long pos, long long size,
+                       char*& str) {
+  delete[] str;
+  str = NULL;
+
+  if (size >= LONG_MAX || size < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  // +1 for '\0' terminator
+  const long required_size = static_cast<long>(size) + 1;
+
+  str = SafeArrayAlloc<char>(1, required_size);
+  if (str == NULL)
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
+
+  const long status = pReader->Read(pos, static_cast<long>(size), buf);
+
+  if (status) {
+    delete[] str;
+    str = NULL;
+
+    return status;
+  }
+
+  str[required_size - 1] = '\0';
+  return 0;
+}
+
+long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop,
+                        long long& id, long long& size) {
+  if (stop >= 0 && pos >= stop)
+    return E_FILE_FORMAT_INVALID;
+
+  long len;
+
+  id = ReadID(pReader, pos, len);
+
+  if (id < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume id
+
+  if (stop >= 0 && pos >= stop)
+    return E_FILE_FORMAT_INVALID;
+
+  size = ReadUInt(pReader, pos, len);
+
+  if (size < 0 || len < 1 || len > 8) {
+    // Invalid: Negative payload size, negative or 0 length integer, or integer
+    // larger than 64 bits (libwebm cannot handle them).
+    return E_FILE_FORMAT_INVALID;
+  }
+
+  // Avoid rolling over pos when very close to LLONG_MAX.
+  const unsigned long long rollover_check =
+      static_cast<unsigned long long>(pos) + len;
+  if (rollover_check > LLONG_MAX)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume length of size
+
+  // pos now designates payload
+
+  if (stop >= 0 && pos > stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;  // success
+}
+
+bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
+           long long& val) {
+  if (!pReader || pos < 0)
+    return false;
+
+  long long total = 0;
+  long long available = 0;
+
+  const long status = pReader->Length(&total, &available);
+  if (status < 0 || (total >= 0 && available > total))
+    return false;
+
+  long len = 0;
+
+  const long long id = ReadID(pReader, pos, len);
+  if (id < 0 || (available - pos) > len)
+    return false;
+
+  if (static_cast<unsigned long>(id) != expected_id)
+    return false;
+
+  pos += len;  // consume id
+
+  const long long size = ReadUInt(pReader, pos, len);
+  if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len)
+    return false;
+
+  pos += len;  // consume length of size of payload
+
+  val = UnserializeUInt(pReader, pos, size);
+  if (val < 0)
+    return false;
+
+  pos += size;  // consume size of payload
+
+  return true;
+}
+
+bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
+           unsigned char*& buf, size_t& buflen) {
+  if (!pReader || pos < 0)
+    return false;
+
+  long long total = 0;
+  long long available = 0;
+
+  long status = pReader->Length(&total, &available);
+  if (status < 0 || (total >= 0 && available > total))
+    return false;
+
+  long len = 0;
+  const long long id = ReadID(pReader, pos, len);
+  if (id < 0 || (available - pos) > len)
+    return false;
+
+  if (static_cast<unsigned long>(id) != expected_id)
+    return false;
+
+  pos += len;  // consume id
+
+  const long long size = ReadUInt(pReader, pos, len);
+  if (size < 0 || len <= 0 || len > 8 || (available - pos) > len)
+    return false;
+
+  unsigned long long rollover_check =
+      static_cast<unsigned long long>(pos) + len;
+  if (rollover_check > LLONG_MAX)
+    return false;
+
+  pos += len;  // consume length of size of payload
+
+  rollover_check = static_cast<unsigned long long>(pos) + size;
+  if (rollover_check > LLONG_MAX)
+    return false;
+
+  if ((pos + size) > available)
+    return false;
+
+  if (size >= LONG_MAX)
+    return false;
+
+  const long buflen_ = static_cast<long>(size);
+
+  buf = SafeArrayAlloc<unsigned char>(1, buflen_);
+  if (!buf)
+    return false;
+
+  status = pReader->Read(pos, buflen_, buf);
+  if (status != 0)
+    return false;
+
+  buflen = buflen_;
+
+  pos += size;  // consume size of payload
+  return true;
+}
+
+EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); }
+
+EBMLHeader::~EBMLHeader() { delete[] m_docType; }
+
+void EBMLHeader::Init() {
+  m_version = 1;
+  m_readVersion = 1;
+  m_maxIdLength = 4;
+  m_maxSizeLength = 8;
+
+  if (m_docType) {
+    delete[] m_docType;
+    m_docType = NULL;
+  }
+
+  m_docTypeVersion = 1;
+  m_docTypeReadVersion = 1;
+}
+
+long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
+  if (!pReader)
+    return E_FILE_FORMAT_INVALID;
+
+  long long total, available;
+
+  long status = pReader->Length(&total, &available);
+
+  if (status < 0)  // error
+    return status;
+
+  pos = 0;
+
+  // Scan until we find what looks like the first byte of the EBML header.
+  const long long kMaxScanBytes = (available >= 1024) ? 1024 : available;
+  const unsigned char kEbmlByte0 = 0x1A;
+  unsigned char scan_byte = 0;
+
+  while (pos < kMaxScanBytes) {
+    status = pReader->Read(pos, 1, &scan_byte);
+
+    if (status < 0)  // error
+      return status;
+    else if (status > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if (scan_byte == kEbmlByte0)
+      break;
+
+    ++pos;
+  }
+
+  long len = 0;
+  const long long ebml_id = ReadID(pReader, pos, len);
+
+  if (ebml_id == E_BUFFER_NOT_FULL)
+    return E_BUFFER_NOT_FULL;
+
+  if (len != 4 || ebml_id != libwebm::kMkvEBML)
+    return E_FILE_FORMAT_INVALID;
+
+  // Move read pos forward to the EBML header size field.
+  pos += 4;
+
+  // Read length of size field.
+  long long result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error
+    return E_FILE_FORMAT_INVALID;
+  else if (result > 0)  // need more data
+    return E_BUFFER_NOT_FULL;
+
+  if (len < 1 || len > 8)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((total >= 0) && ((total - pos) < len))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((available - pos) < len)
+    return pos + len;  // try again later
+
+  // Read the EBML header size.
+  result = ReadUInt(pReader, pos, len);
+
+  if (result < 0)  // error
+    return result;
+
+  pos += len;  // consume size field
+
+  // pos now designates start of payload
+
+  if ((total >= 0) && ((total - pos) < result))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((available - pos) < result)
+    return pos + result;
+
+  const long long end = pos + result;
+
+  Init();
+
+  while (pos < end) {
+    long long id, size;
+
+    status = ParseElementHeader(pReader, pos, end, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == libwebm::kMkvEBMLVersion) {
+      m_version = UnserializeUInt(pReader, pos, size);
+
+      if (m_version <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvEBMLReadVersion) {
+      m_readVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_readVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvEBMLMaxIDLength) {
+      m_maxIdLength = UnserializeUInt(pReader, pos, size);
+
+      if (m_maxIdLength <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvEBMLMaxSizeLength) {
+      m_maxSizeLength = UnserializeUInt(pReader, pos, size);
+
+      if (m_maxSizeLength <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDocType) {
+      if (m_docType)
+        return E_FILE_FORMAT_INVALID;
+
+      status = UnserializeString(pReader, pos, size, m_docType);
+
+      if (status)  // error
+        return status;
+    } else if (id == libwebm::kMkvDocTypeVersion) {
+      m_docTypeVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_docTypeVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDocTypeReadVersion) {
+      m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
+
+      if (m_docTypeReadVersion <= 0)
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;
+  }
+
+  if (pos != end)
+    return E_FILE_FORMAT_INVALID;
+
+  // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid.
+  if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0)
+    return E_FILE_FORMAT_INVALID;
+
+  // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid.
+  if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 ||
+      m_maxSizeLength > 8)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+Segment::Segment(IMkvReader* pReader, long long elem_start,
+                 // long long elem_size,
+                 long long start, long long size)
+    : m_pReader(pReader),
+      m_element_start(elem_start),
+      // m_element_size(elem_size),
+      m_start(start),
+      m_size(size),
+      m_pos(start),
+      m_pUnknownSize(0),
+      m_pSeekHead(NULL),
+      m_pInfo(NULL),
+      m_pTracks(NULL),
+      m_pCues(NULL),
+      m_pChapters(NULL),
+      m_pTags(NULL),
+      m_clusters(NULL),
+      m_clusterCount(0),
+      m_clusterPreloadCount(0),
+      m_clusterSize(0) {}
+
+Segment::~Segment() {
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  Cluster** i = m_clusters;
+  Cluster** j = m_clusters + count;
+
+  while (i != j) {
+    Cluster* const p = *i++;
+    delete p;
+  }
+
+  delete[] m_clusters;
+
+  delete m_pTracks;
+  delete m_pInfo;
+  delete m_pCues;
+  delete m_pChapters;
+  delete m_pTags;
+  delete m_pSeekHead;
+}
+
+long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
+                                  Segment*& pSegment) {
+  if (pReader == NULL || pos < 0)
+    return E_PARSE_FAILED;
+
+  pSegment = NULL;
+
+  long long total, available;
+
+  const long status = pReader->Length(&total, &available);
+
+  if (status < 0)  // error
+    return status;
+
+  if (available < 0)
+    return -1;
+
+  if ((total >= 0) && (available > total))
+    return -1;
+
+  // I would assume that in practice this loop would execute
+  // exactly once, but we allow for other elements (e.g. Void)
+  // to immediately follow the EBML header.  This is fine for
+  // the source filter case (since the entire file is available),
+  // but in the splitter case over a network we should probably
+  // just give up early.  We could for example decide only to
+  // execute this loop a maximum of, say, 10 times.
+  // TODO:
+  // There is an implied "give up early" by only parsing up
+  // to the available limit.  We do do that, but only if the
+  // total file size is unknown.  We could decide to always
+  // use what's available as our limit (irrespective of whether
+  // we happen to know the total file length).  This would have
+  // as its sense "parse this much of the file before giving up",
+  // which a slightly different sense from "try to parse up to
+  // 10 EMBL elements before giving up".
+
+  for (;;) {
+    if ((total >= 0) && (pos >= total))
+      return E_FILE_FORMAT_INVALID;
+
+    // Read ID
+    long len;
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result)  // error, or too few available bytes
+      return result;
+
+    if ((total >= 0) && ((pos + len) > total))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > available)
+      return pos + len;
+
+    const long long idpos = pos;
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume ID
+
+    // Read Size
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result)  // error, or too few available bytes
+      return result;
+
+    if ((total >= 0) && ((pos + len) > total))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > available)
+      return pos + len;
+
+    long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return size;
+
+    pos += len;  // consume length of size of element
+
+    // Pos now points to start of payload
+
+    // Handle "unknown size" for live streaming of webm files.
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (id == libwebm::kMkvSegment) {
+      if (size == unknown_size)
+        size = -1;
+
+      else if (total < 0)
+        size = -1;
+
+      else if ((pos + size) > total)
+        size = -1;
+
+      pSegment = new Segment(pReader, idpos, pos, size);
+
+      return 0;  // success
+    }
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && ((pos + size) > total))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + size) > available)
+      return pos + size;
+
+    pos += size;  // consume payload
+  }
+}
+
+long long Segment::ParseHeaders() {
+  // Outermost (level 0) segment object has been constructed,
+  // and pos designates start of payload.  We need to find the
+  // inner (level 1) elements.
+  long long total, available;
+
+  const int status = m_pReader->Length(&total, &available);
+
+  if (status < 0)  // error
+    return status;
+
+  if (total > 0 && available > total)
+    return E_FILE_FORMAT_INVALID;
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  if ((segment_stop >= 0 && total >= 0 && segment_stop > total) ||
+      (segment_stop >= 0 && m_pos > segment_stop)) {
+    return E_FILE_FORMAT_INVALID;
+  }
+
+  for (;;) {
+    if ((total >= 0) && (m_pos >= total))
+      break;
+
+    if ((segment_stop >= 0) && (m_pos >= segment_stop))
+      break;
+
+    long long pos = m_pos;
+    const long long element_start = pos;
+
+    // Avoid rolling over pos when very close to LLONG_MAX.
+    unsigned long long rollover_check = pos + 1ULL;
+    if (rollover_check > LLONG_MAX)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + 1) > available)
+      return (pos + 1);
+
+    long len;
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return result;
+
+    if (result > 0) {
+      // MkvReader doesn't have enough data to satisfy this read attempt.
+      return (pos + 1);
+    }
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > available)
+      return pos + len;
+
+    const long long idpos = pos;
+    const long long id = ReadID(m_pReader, idpos, len);
+
+    if (id < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == libwebm::kMkvCluster)
+      break;
+
+    pos += len;  // consume ID
+
+    if ((pos + 1) > available)
+      return (pos + 1);
+
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return result;
+
+    if (result > 0) {
+      // MkvReader doesn't have enough data to satisfy this read attempt.
+      return (pos + 1);
+    }
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > available)
+      return pos + len;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0 || len < 1 || len > 8) {
+      // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or
+      // len > 8 is true instead of checking this _everywhere_.
+      return size;
+    }
+
+    pos += len;  // consume length of size of element
+
+    // Avoid rolling over pos when very close to LLONG_MAX.
+    rollover_check = static_cast<unsigned long long>(pos) + size;
+    if (rollover_check > LLONG_MAX)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long element_size = size + pos - element_start;
+
+    // Pos now points to start of payload
+
+    if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // We read EBML elements either in total or nothing at all.
+
+    if ((pos + size) > available)
+      return pos + size;
+
+    if (id == libwebm::kMkvInfo) {
+      if (m_pInfo)
+        return E_FILE_FORMAT_INVALID;
+
+      m_pInfo = new SegmentInfo(this, pos, size, element_start, element_size);
+
+      const long status = m_pInfo->Parse();
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvTracks) {
+      if (m_pTracks)
+        return E_FILE_FORMAT_INVALID;
+
+      m_pTracks = new Tracks(this, pos, size, element_start, element_size);
+
+      const long status = m_pTracks->Parse();
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvCues) {
+      if (m_pCues == NULL) {
+        m_pCues = new Cues(this, pos, size, element_start, element_size);
+      }
+    } else if (id == libwebm::kMkvSeekHead) {
+      if (m_pSeekHead == NULL) {
+        m_pSeekHead = new SeekHead(this, pos, size, element_start, element_size);
+
+        const long status = m_pSeekHead->Parse();
+
+        if (status)
+          return status;
+      }
+    } else if (id == libwebm::kMkvChapters) {
+      if (m_pChapters == NULL) {
+        m_pChapters = new Chapters(this, pos, size, element_start, element_size);
+
+        const long status = m_pChapters->Parse();
+
+        if (status)
+          return status;
+      }
+    } else if (id == libwebm::kMkvTags) {
+      if (m_pTags == NULL) {
+        m_pTags = new Tags(this, pos, size, element_start, element_size);
+
+        const long status = m_pTags->Parse();
+
+        if (status)
+          return status;
+      }
+    }
+
+    m_pos = pos + size;  // consume payload
+  }
+
+  if (segment_stop >= 0 && m_pos > segment_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if (m_pInfo == NULL)  // TODO: liberalize this behavior
+    return E_FILE_FORMAT_INVALID;
+
+  if (m_pTracks == NULL)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;  // success
+}
+
+long Segment::LoadCluster(long long& pos, long& len) {
+  for (;;) {
+    const long result = DoLoadCluster(pos, len);
+
+    if (result <= 1)
+      return result;
+  }
+}
+
+long Segment::DoLoadCluster(long long& pos, long& len) {
+  if (m_pos < 0)
+    return DoLoadClusterUnknownSize(pos, len);
+
+  long long total, avail;
+
+  long status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  if (total >= 0 && avail > total)
+    return E_FILE_FORMAT_INVALID;
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  long long cluster_off = -1;  // offset relative to start of segment
+  long long cluster_size = -1;  // size of cluster payload
+
+  for (;;) {
+    if ((total >= 0) && (m_pos >= total))
+      return 1;  // no more clusters
+
+    if ((segment_stop >= 0) && (m_pos >= segment_stop))
+      return 1;  // no more clusters
+
+    pos = m_pos;
+
+    // Read ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long idpos = pos;
+    const long long id = ReadID(m_pReader, idpos, len);
+
+    if (id < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume ID
+
+    // Read Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume length of size of element
+
+    // pos now points to start of payload
+
+    if (size == 0) {
+      // Missing element payload: move on.
+      m_pos = pos;
+      continue;
+    }
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if ((segment_stop >= 0) && (size != unknown_size) &&
+        ((pos + size) > segment_stop)) {
+      return E_FILE_FORMAT_INVALID;
+    }
+
+    if (id == libwebm::kMkvCues) {
+      if (size == unknown_size) {
+        // Cues element of unknown size: Not supported.
+        return E_FILE_FORMAT_INVALID;
+      }
+
+      if (m_pCues == NULL) {
+        const long long element_size = (pos - idpos) + size;
+
+        m_pCues = new Cues(this, pos, size, idpos, element_size);
+      }
+
+      m_pos = pos + size;  // consume payload
+      continue;
+    }
+
+    if (id != libwebm::kMkvCluster) {
+      // Besides the Segment, Libwebm allows only cluster elements of unknown
+      // size. Fail the parse upon encountering a non-cluster element reporting
+      // unknown size.
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;
+
+      m_pos = pos + size;  // consume payload
+      continue;
+    }
+
+    // We have a cluster.
+
+    cluster_off = idpos - m_start;  // relative pos
+
+    if (size != unknown_size)
+      cluster_size = size;
+
+    break;
+  }
+
+  if (cluster_off < 0) {
+    // No cluster, die.
+    return E_FILE_FORMAT_INVALID;
+  }
+
+  long long pos_;
+  long len_;
+
+  status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_);
+
+  if (status < 0) {  // error, or underflow
+    pos = pos_;
+    len = len_;
+
+    return status;
+  }
+
+  // status == 0 means "no block entries found"
+  // status > 0 means "found at least one block entry"
+
+  // TODO:
+  // The issue here is that the segment increments its own
+  // pos ptr past the most recent cluster parsed, and then
+  // starts from there to parse the next cluster.  If we
+  // don't know the size of the current cluster, then we
+  // must either parse its payload (as we do below), looking
+  // for the cluster (or cues) ID to terminate the parse.
+  // This isn't really what we want: rather, we really need
+  // a way to create the curr cluster object immediately.
+  // The pity is that cluster::parse can determine its own
+  // boundary, and we largely duplicate that same logic here.
+  //
+  // Maybe we need to get rid of our look-ahead preloading
+  // in source::parse???
+  //
+  // As we're parsing the blocks in the curr cluster
+  //(in cluster::parse), we should have some way to signal
+  // to the segment that we have determined the boundary,
+  // so it can adjust its own segment::m_pos member.
+  //
+  // The problem is that we're asserting in asyncreadinit,
+  // because we adjust the pos down to the curr seek pos,
+  // and the resulting adjusted len is > 2GB.  I'm suspicious
+  // that this is even correct, but even if it is, we can't
+  // be loading that much data in the cache anyway.
+
+  const long idx = m_clusterCount;
+
+  if (m_clusterPreloadCount > 0) {
+    if (idx >= m_clusterSize)
+      return E_FILE_FORMAT_INVALID;
+
+    Cluster* const pCluster = m_clusters[idx];
+    if (pCluster == NULL || pCluster->m_index >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long off = pCluster->GetPosition();
+    if (off < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (off == cluster_off) {  // preloaded already
+      if (status == 0)  // no entries found
+        return E_FILE_FORMAT_INVALID;
+
+      if (cluster_size >= 0)
+        pos += cluster_size;
+      else {
+        const long long element_size = pCluster->GetElementSize();
+
+        if (element_size <= 0)
+          return E_FILE_FORMAT_INVALID;  // TODO: handle this case
+
+        pos = pCluster->m_element_start + element_size;
+      }
+
+      pCluster->m_index = idx;  // move from preloaded to loaded
+      ++m_clusterCount;
+      --m_clusterPreloadCount;
+
+      m_pos = pos;  // consume payload
+      if (segment_stop >= 0 && m_pos > segment_stop)
+        return E_FILE_FORMAT_INVALID;
+
+      return 0;  // success
+    }
+  }
+
+  if (status == 0) {  // no entries found
+    if (cluster_size >= 0)
+      pos += cluster_size;
+
+    if ((total >= 0) && (pos >= total)) {
+      m_pos = total;
+      return 1;  // no more clusters
+    }
+
+    if ((segment_stop >= 0) && (pos >= segment_stop)) {
+      m_pos = segment_stop;
+      return 1;  // no more clusters
+    }
+
+    m_pos = pos;
+    return 2;  // try again
+  }
+
+  // status > 0 means we have an entry
+
+  Cluster* const pCluster = Cluster::Create(this, idx, cluster_off);
+  if (pCluster == NULL)
+    return -1;
+
+  if (!AppendCluster(pCluster)) {
+    delete pCluster;
+    return -1;
+  }
+
+  if (cluster_size >= 0) {
+    pos += cluster_size;
+
+    m_pos = pos;
+
+    if (segment_stop > 0 && m_pos > segment_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    return 0;
+  }
+
+  m_pUnknownSize = pCluster;
+  m_pos = -pos;
+
+  return 0;  // partial success, since we have a new cluster
+
+  // status == 0 means "no block entries found"
+  // pos designates start of payload
+  // m_pos has NOT been adjusted yet (in case we need to come back here)
+}
+
+long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) {
+  if (m_pos >= 0 || m_pUnknownSize == NULL)
+    return E_PARSE_FAILED;
+
+  const long status = m_pUnknownSize->Parse(pos, len);
+
+  if (status < 0)  // error or underflow
+    return status;
+
+  if (status == 0)  // parsed a block
+    return 2;  // continue parsing
+
+  const long long start = m_pUnknownSize->m_element_start;
+  const long long size = m_pUnknownSize->GetElementSize();
+
+  if (size < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  pos = start + size;
+  m_pos = pos;
+
+  m_pUnknownSize = 0;
+
+  return 2;  // continue parsing
+}
+
+bool Segment::AppendCluster(Cluster* pCluster) {
+  if (pCluster == NULL || pCluster->m_index < 0)
+    return false;
+
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  long& size = m_clusterSize;
+  const long idx = pCluster->m_index;
+
+  if (size < count || idx != m_clusterCount)
+    return false;
+
+  if (count >= size) {
+    const long n = (size <= 0) ? 2048 : 2 * size;
+
+    Cluster** const qq = new Cluster*[n];
+
+    Cluster** q = qq;
+    Cluster** p = m_clusters;
+    Cluster** const pp = p + count;
+
+    while (p != pp)
+      *q++ = *p++;
+
+    delete[] m_clusters;
+
+    m_clusters = qq;
+    size = n;
+  }
+
+  if (m_clusterPreloadCount > 0) {
+    Cluster** const p = m_clusters + m_clusterCount;
+    if (*p == NULL || (*p)->m_index >= 0)
+      return false;
+
+    Cluster** q = p + m_clusterPreloadCount;
+    if (q >= (m_clusters + size))
+      return false;
+
+    for (;;) {
+      Cluster** const qq = q - 1;
+      if ((*qq)->m_index >= 0)
+        return false;
+
+      *q = *qq;
+      q = qq;
+
+      if (q == p)
+        break;
+    }
+  }
+
+  m_clusters[idx] = pCluster;
+  ++m_clusterCount;
+  return true;
+}
+
+bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
+  if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount)
+    return false;
+
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  long& size = m_clusterSize;
+  if (size < count)
+    return false;
+
+  if (count >= size) {
+    const long n = (size <= 0) ? 2048 : 2 * size;
+
+    Cluster** const qq = new Cluster*[n];
+    Cluster** q = qq;
+
+    Cluster** p = m_clusters;
+    Cluster** const pp = p + count;
+
+    while (p != pp)
+      *q++ = *p++;
+
+    delete[] m_clusters;
+
+    m_clusters = qq;
+    size = n;
+  }
+
+  if (m_clusters == NULL)
+    return false;
+
+  Cluster** const p = m_clusters + idx;
+
+  Cluster** q = m_clusters + count;
+  if (q < p || q >= (m_clusters + size))
+    return false;
+
+  while (q > p) {
+    Cluster** const qq = q - 1;
+
+    if ((*qq)->m_index >= 0)
+      return false;
+
+    *q = *qq;
+    q = qq;
+  }
+
+  m_clusters[idx] = pCluster;
+  ++m_clusterPreloadCount;
+  return true;
+}
+
+long Segment::Load() {
+  if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0)
+    return E_PARSE_FAILED;
+
+  // Outermost (level 0) segment object has been constructed,
+  // and pos designates start of payload.  We need to find the
+  // inner (level 1) elements.
+
+  const long long header_status = ParseHeaders();
+
+  if (header_status < 0)  // error
+    return static_cast<long>(header_status);
+
+  if (header_status > 0)  // underflow
+    return E_BUFFER_NOT_FULL;
+
+  if (m_pInfo == NULL || m_pTracks == NULL)
+    return E_FILE_FORMAT_INVALID;
+
+  for (;;) {
+    const long status = LoadCluster();
+
+    if (status < 0)  // error
+      return status;
+
+    if (status >= 1)  // no more clusters
+      return 0;
+  }
+}
+
+SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
+                   long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_entries(0),
+      m_entry_count(0),
+      m_void_elements(0),
+      m_void_element_count(0) {}
+
+SeekHead::~SeekHead() {
+  delete[] m_entries;
+  delete[] m_void_elements;
+}
+
+long SeekHead::Parse() {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
+
+  // first count the seek head entries
+
+  int entry_count = 0;
+  int void_element_count = 0;
+
+  while (pos < stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvSeek)
+      ++entry_count;
+    else if (id == libwebm::kMkvVoid)
+      ++void_element_count;
+
+    pos += size;  // consume payload
+
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  m_entries = new Entry[entry_count];
+
+  m_void_elements = new VoidElement[void_element_count];
+
+  // now parse the entries and void elements
+
+  Entry* pEntry = m_entries;
+  VoidElement* pVoidElement = m_void_elements;
+
+  pos = m_start;
+
+  while (pos < stop) {
+    const long long idpos = pos;
+
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvSeek) {
+      if (ParseEntry(pReader, pos, size, pEntry)) {
+        Entry& e = *pEntry++;
+
+        e.element_start = idpos;
+        e.element_size = (pos + size) - idpos;
+      }
+    } else if (id == libwebm::kMkvVoid) {
+      VoidElement& e = *pVoidElement++;
+
+      e.element_start = idpos;
+      e.element_size = (pos + size) - idpos;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
+  assert(count_ >= 0);
+  assert(count_ <= entry_count);
+
+  m_entry_count = static_cast<int>(count_);
+
+  count_ = ptrdiff_t(pVoidElement - m_void_elements);
+  assert(count_ >= 0);
+  assert(count_ <= void_element_count);
+
+  m_void_element_count = static_cast<int>(count_);
+
+  return 0;
+}
+
+int SeekHead::GetCount() const { return m_entry_count; }
+
+const SeekHead::Entry* SeekHead::GetEntry(int idx) const {
+  if (idx < 0)
+    return 0;
+
+  if (idx >= m_entry_count)
+    return 0;
+
+  return m_entries + idx;
+}
+
+int SeekHead::GetVoidElementCount() const { return m_void_element_count; }
+
+const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const {
+  if (idx < 0)
+    return 0;
+
+  if (idx >= m_void_element_count)
+    return 0;
+
+  return m_void_elements + idx;
+}
+
+long Segment::ParseCues(long long off, long long& pos, long& len) {
+  if (m_pCues)
+    return 0;  // success
+
+  if (off < 0)
+    return -1;
+
+  long long total, avail;
+
+  const int status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  pos = m_start + off;
+
+  if ((total < 0) || (pos >= total))
+    return 1;  // don't bother parsing cues
+
+  const long long element_start = pos;
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  long long result = GetUIntLength(m_pReader, pos, len);
+
+  if (result < 0)  // error
+    return static_cast<long>(result);
+
+  if (result > 0)  // underflow (weird)
+  {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long idpos = pos;
+
+  const long long id = ReadID(m_pReader, idpos, len);
+
+  if (id != libwebm::kMkvCues)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume ID
+  assert((segment_stop < 0) || (pos <= segment_stop));
+
+  // Read Size
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  result = GetUIntLength(m_pReader, pos, len);
+
+  if (result < 0)  // error
+    return static_cast<long>(result);
+
+  if (result > 0)  // underflow (weird)
+  {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long size = ReadUInt(m_pReader, pos, len);
+
+  if (size < 0)  // error
+    return static_cast<long>(size);
+
+  if (size == 0)  // weird, although technically not illegal
+    return 1;  // done
+
+  pos += len;  // consume length of size of element
+  assert((segment_stop < 0) || (pos <= segment_stop));
+
+  // Pos now points to start of payload
+
+  const long long element_stop = pos + size;
+
+  if ((segment_stop >= 0) && (element_stop > segment_stop))
+    return E_FILE_FORMAT_INVALID;
+
+  if ((total >= 0) && (element_stop > total))
+    return 1;  // don't bother parsing anymore
+
+  len = static_cast<long>(size);
+
+  if (element_stop > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long element_size = element_stop - element_start;
+
+  m_pCues = new Cues(this, pos, size, element_start, element_size);
+
+  return 0;  // success
+}
+
+bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
+                          Entry* pEntry) {
+  if (size_ <= 0)
+    return false;
+
+  long long pos = start;
+  const long long stop = start + size_;
+
+  long len;
+
+  // parse the container for the level-1 element ID
+
+  const long long seekIdId = ReadID(pReader, pos, len);
+  if (seekIdId < 0)
+    return false;
+
+  if (seekIdId != libwebm::kMkvSeekID)
+    return false;
+
+  if ((pos + len) > stop)
+    return false;
+
+  pos += len;  // consume SeekID id
+
+  const long long seekIdSize = ReadUInt(pReader, pos, len);
+
+  if (seekIdSize <= 0)
+    return false;
+
+  if ((pos + len) > stop)
+    return false;
+
+  pos += len;  // consume size of field
+
+  if ((pos + seekIdSize) > stop)
+    return false;
+
+  // Note that the SeekId payload really is serialized
+  // as a "Matroska integer", not as a plain binary value.
+  // In fact, Matroska requires that ID values in the
+  // stream exactly match the binary representation as listed
+  // in the Matroska specification.
+  //
+  // This parser is more liberal, and permits IDs to have
+  // any width.  (This could make the representation in the stream
+  // different from what's in the spec, but it doesn't matter here,
+  // since we always normalize "Matroska integer" values.)
+
+  pEntry->id = ReadUInt(pReader, pos, len);  // payload
+
+  if (pEntry->id <= 0)
+    return false;
+
+  if (len != seekIdSize)
+    return false;
+
+  pos += seekIdSize;  // consume SeekID payload
+
+  const long long seekPosId = ReadID(pReader, pos, len);
+
+  if (seekPosId != libwebm::kMkvSeekPosition)
+    return false;
+
+  if ((pos + len) > stop)
+    return false;
+
+  pos += len;  // consume id
+
+  const long long seekPosSize = ReadUInt(pReader, pos, len);
+
+  if (seekPosSize <= 0)
+    return false;
+
+  if ((pos + len) > stop)
+    return false;
+
+  pos += len;  // consume size
+
+  if ((pos + seekPosSize) > stop)
+    return false;
+
+  pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize);
+
+  if (pEntry->pos < 0)
+    return false;
+
+  pos += seekPosSize;  // consume payload
+
+  if (pos != stop)
+    return false;
+
+  return true;
+}
+
+Cues::Cues(Segment* pSegment, long long start_, long long size_,
+           long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start_),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_cue_points(NULL),
+      m_count(0),
+      m_preload_count(0),
+      m_pos(start_) {}
+
+Cues::~Cues() {
+  const long n = m_count + m_preload_count;
+
+  CuePoint** p = m_cue_points;
+  CuePoint** const q = p + n;
+
+  while (p != q) {
+    CuePoint* const pCP = *p++;
+    assert(pCP);
+
+    delete pCP;
+  }
+
+  delete[] m_cue_points;
+}
+
+long Cues::GetCount() const {
+  if (m_cue_points == NULL)
+    return -1;
+
+  return m_count;  // TODO: really ignore preload count?
+}
+
+bool Cues::DoneParsing() const {
+  const long long stop = m_start + m_size;
+  return (m_pos >= stop);
+}
+
+bool Cues::Init() const {
+  if (m_cue_points)
+    return true;
+
+  if (m_count != 0 || m_preload_count != 0)
+    return false;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  const long long stop = m_start + m_size;
+  long long pos = m_start;
+
+  long cue_points_size = 0;
+
+  while (pos < stop) {
+    const long long idpos = pos;
+
+    long len;
+
+    const long long id = ReadID(pReader, pos, len);
+    if (id < 0 || (pos + len) > stop) {
+      return false;
+    }
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    if (size < 0 || (pos + len > stop)) {
+      return false;
+    }
+
+    pos += len;  // consume Size field
+    if (pos + size > stop) {
+      return false;
+    }
+
+    if (id == libwebm::kMkvCuePoint) {
+      if (!PreloadCuePoint(cue_points_size, idpos))
+        return false;
+    }
+
+    pos += size;  // skip payload
+  }
+  return true;
+}
+
+bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
+  if (m_count != 0)
+    return false;
+
+  if (m_preload_count >= cue_points_size) {
+    const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size;
+
+    CuePoint** const qq = new CuePoint*[n];
+
+    CuePoint** q = qq;  // beginning of target
+
+    CuePoint** p = m_cue_points;  // beginning of source
+    CuePoint** const pp = p + m_preload_count;  // end of source
+
+    while (p != pp)
+      *q++ = *p++;
+
+    delete[] m_cue_points;
+
+    m_cue_points = qq;
+    cue_points_size = n;
+  }
+
+  CuePoint* const pCP = new CuePoint(m_preload_count, pos);
+
+  m_cue_points[m_preload_count++] = pCP;
+  return true;
+}
+
+bool Cues::LoadCuePoint() const {
+  const long long stop = m_start + m_size;
+
+  if (m_pos >= stop)
+    return false;  // nothing else to do
+
+  if (!Init()) {
+    m_pos = stop;
+    return false;
+  }
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  while (m_pos < stop) {
+    const long long idpos = m_pos;
+
+    long len;
+
+    const long long id = ReadID(pReader, m_pos, len);
+    if (id < 0 || (m_pos + len) > stop)
+      return false;
+
+    m_pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, m_pos, len);
+    if (size < 0 || (m_pos + len) > stop)
+      return false;
+
+    m_pos += len;  // consume Size field
+    if ((m_pos + size) > stop)
+      return false;
+
+    if (id != libwebm::kMkvCuePoint) {
+      m_pos += size;  // consume payload
+      if (m_pos > stop)
+        return false;
+
+      continue;
+    }
+
+    if (m_preload_count < 1)
+      return false;
+
+    CuePoint* const pCP = m_cue_points[m_count];
+    if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos)))
+      return false;
+
+    if (!pCP->Load(pReader)) {
+      m_pos = stop;
+      return false;
+    }
+    ++m_count;
+    --m_preload_count;
+
+    m_pos += size;  // consume payload
+    if (m_pos > stop)
+      return false;
+
+    return true;  // yes, we loaded a cue point
+  }
+
+  return false;  // no, we did not load a cue point
+}
+
+bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP,
+                const CuePoint::TrackPosition*& pTP) const {
+  if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0)
+    return false;
+
+  CuePoint** const ii = m_cue_points;
+  CuePoint** i = ii;
+
+  CuePoint** const jj = ii + m_count;
+  CuePoint** j = jj;
+
+  pCP = *i;
+  if (pCP == NULL)
+    return false;
+
+  if (time_ns <= pCP->GetTime(m_pSegment)) {
+    pTP = pCP->Find(pTrack);
+    return (pTP != NULL);
+  }
+
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) <= time_ns
+    //[i, j)  ?
+    //[j, jj) > time_ns
+
+    CuePoint** const k = i + (j - i) / 2;
+    if (k >= jj)
+      return false;
+
+    CuePoint* const pCP = *k;
+    if (pCP == NULL)
+      return false;
+
+    const long long t = pCP->GetTime(m_pSegment);
+
+    if (t <= time_ns)
+      i = k + 1;
+    else
+      j = k;
+
+    if (i > j)
+      return false;
+  }
+
+  if (i != j || i > jj || i <= ii)
+    return false;
+
+  pCP = *--i;
+
+  if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns)
+    return false;
+
+  // TODO: here and elsewhere, it's probably not correct to search
+  // for the cue point with this time, and then search for a matching
+  // track.  In principle, the matching track could be on some earlier
+  // cue point, and with our current algorithm, we'd miss it.  To make
+  // this bullet-proof, we'd need to create a secondary structure,
+  // with a list of cue points that apply to a track, and then search
+  // that track-based structure for a matching cue point.
+
+  pTP = pCP->Find(pTrack);
+  return (pTP != NULL);
+}
+
+const CuePoint* Cues::GetFirst() const {
+  if (m_cue_points == NULL || m_count == 0)
+    return NULL;
+
+  CuePoint* const* const pp = m_cue_points;
+  if (pp == NULL)
+    return NULL;
+
+  CuePoint* const pCP = pp[0];
+  if (pCP == NULL || pCP->GetTimeCode() < 0)
+    return NULL;
+
+  return pCP;
+}
+
+const CuePoint* Cues::GetLast() const {
+  if (m_cue_points == NULL || m_count <= 0)
+    return NULL;
+
+  const long index = m_count - 1;
+
+  CuePoint* const* const pp = m_cue_points;
+  if (pp == NULL)
+    return NULL;
+
+  CuePoint* const pCP = pp[index];
+  if (pCP == NULL || pCP->GetTimeCode() < 0)
+    return NULL;
+
+  return pCP;
+}
+
+const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
+  if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL ||
+      m_count < 1) {
+    return NULL;
+  }
+
+  long index = pCurr->m_index;
+  if (index >= m_count)
+    return NULL;
+
+  CuePoint* const* const pp = m_cue_points;
+  if (pp == NULL || pp[index] != pCurr)
+    return NULL;
+
+  ++index;
+
+  if (index >= m_count)
+    return NULL;
+
+  CuePoint* const pNext = pp[index];
+
+  if (pNext == NULL || pNext->GetTimeCode() < 0)
+    return NULL;
+
+  return pNext;
+}
+
+const BlockEntry* Cues::GetBlock(const CuePoint* pCP,
+                                 const CuePoint::TrackPosition* pTP) const {
+  if (pCP == NULL || pTP == NULL)
+    return NULL;
+
+  return m_pSegment->GetBlock(*pCP, *pTP);
+}
+
+const BlockEntry* Segment::GetBlock(const CuePoint& cp,
+                                    const CuePoint::TrackPosition& tp) {
+  Cluster** const ii = m_clusters;
+  Cluster** i = ii;
+
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  Cluster** const jj = ii + count;
+  Cluster** j = jj;
+
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) < pTP->m_pos
+    //[i, j) ?
+    //[j, jj)  > pTP->m_pos
+
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    Cluster* const pCluster = *k;
+    assert(pCluster);
+
+    // const long long pos_ = pCluster->m_pos;
+    // assert(pos_);
+    // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+    const long long pos = pCluster->GetPosition();
+    assert(pos >= 0);
+
+    if (pos < tp.m_pos)
+      i = k + 1;
+    else if (pos > tp.m_pos)
+      j = k;
+    else
+      return pCluster->GetEntry(cp, tp);
+  }
+
+  assert(i == j);
+  // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+  Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos);  //, -1);
+  if (pCluster == NULL)
+    return NULL;
+
+  const ptrdiff_t idx = i - m_clusters;
+
+  if (!PreloadCluster(pCluster, idx)) {
+    delete pCluster;
+    return NULL;
+  }
+  assert(m_clusters);
+  assert(m_clusterPreloadCount > 0);
+  assert(m_clusters[idx] == pCluster);
+
+  return pCluster->GetEntry(cp, tp);
+}
+
+const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) {
+  if (requested_pos < 0)
+    return 0;
+
+  Cluster** const ii = m_clusters;
+  Cluster** i = ii;
+
+  const long count = m_clusterCount + m_clusterPreloadCount;
+
+  Cluster** const jj = ii + count;
+  Cluster** j = jj;
+
+  while (i < j) {
+    // INVARIANT:
+    //[ii, i) < pTP->m_pos
+    //[i, j) ?
+    //[j, jj)  > pTP->m_pos
+
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    Cluster* const pCluster = *k;
+    assert(pCluster);
+
+    // const long long pos_ = pCluster->m_pos;
+    // assert(pos_);
+    // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+    const long long pos = pCluster->GetPosition();
+    assert(pos >= 0);
+
+    if (pos < requested_pos)
+      i = k + 1;
+    else if (pos > requested_pos)
+      j = k;
+    else
+      return pCluster;
+  }
+
+  assert(i == j);
+  // assert(Cluster::HasBlockEntries(this, tp.m_pos));
+
+  Cluster* const pCluster = Cluster::Create(this, -1, requested_pos);
+  if (pCluster == NULL)
+    return NULL;
+
+  const ptrdiff_t idx = i - m_clusters;
+
+  if (!PreloadCluster(pCluster, idx)) {
+    delete pCluster;
+    return NULL;
+  }
+  assert(m_clusters);
+  assert(m_clusterPreloadCount > 0);
+  assert(m_clusters[idx] == pCluster);
+
+  return pCluster;
+}
+
+CuePoint::CuePoint(long idx, long long pos)
+    : m_element_start(0),
+      m_element_size(0),
+      m_index(idx),
+      m_timecode(-1 * pos),
+      m_track_positions(NULL),
+      m_track_positions_count(0) {
+  assert(pos > 0);
+}
+
+CuePoint::~CuePoint() { delete[] m_track_positions; }
+
+bool CuePoint::Load(IMkvReader* pReader) {
+  // odbgstream os;
+  // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl;
+
+  if (m_timecode >= 0)  // already loaded
+    return true;
+
+  assert(m_track_positions == NULL);
+  assert(m_track_positions_count == 0);
+
+  long long pos_ = -m_timecode;
+  const long long element_start = pos_;
+
+  long long stop;
+
+  {
+    long len;
+
+    const long long id = ReadID(pReader, pos_, len);
+    if (id != libwebm::kMkvCuePoint)
+      return false;
+
+    pos_ += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos_, len);
+    assert(size >= 0);
+
+    pos_ += len;  // consume Size field
+    // pos_ now points to start of payload
+
+    stop = pos_ + size;
+  }
+
+  const long long element_size = stop - element_start;
+
+  long long pos = pos_;
+
+  // First count number of track positions
+
+  while (pos < stop) {
+    long len;
+
+    const long long id = ReadID(pReader, pos, len);
+    if ((id < 0) || (pos + len > stop)) {
+      return false;
+    }
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    if ((size < 0) || (pos + len > stop)) {
+      return false;
+    }
+
+    pos += len;  // consume Size field
+    if ((pos + size) > stop) {
+      return false;
+    }
+
+    if (id == libwebm::kMkvCueTime)
+      m_timecode = UnserializeUInt(pReader, pos, size);
+
+    else if (id == libwebm::kMkvCueTrackPositions)
+      ++m_track_positions_count;
+
+    pos += size;  // consume payload
+  }
+
+  if (m_timecode < 0 || m_track_positions_count <= 0) {
+    return false;
+  }
+
+  // os << "CuePoint::Load(cont'd): idpos=" << idpos
+  //   << " timecode=" << m_timecode
+  //   << endl;
+
+  m_track_positions = new TrackPosition[m_track_positions_count];
+
+  // Now parse track positions
+
+  TrackPosition* p = m_track_positions;
+  pos = pos_;
+
+  while (pos < stop) {
+    long len;
+
+    const long long id = ReadID(pReader, pos, len);
+    if (id < 0 || (pos + len) > stop)
+      return false;
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume Size field
+    assert((pos + size) <= stop);
+
+    if (id == libwebm::kMkvCueTrackPositions) {
+      TrackPosition& tp = *p++;
+      if (!tp.Parse(pReader, pos, size)) {
+        return false;
+      }
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return false;
+  }
+
+  assert(size_t(p - m_track_positions) == m_track_positions_count);
+
+  m_element_start = element_start;
+  m_element_size = element_size;
+
+  return true;
+}
+
+bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_,
+                                    long long size_) {
+  const long long stop = start_ + size_;
+  long long pos = start_;
+
+  m_track = -1;
+  m_pos = -1;
+  m_block = 1;  // default
+
+  while (pos < stop) {
+    long len;
+
+    const long long id = ReadID(pReader, pos, len);
+    if ((id < 0) || ((pos + len) > stop)) {
+      return false;
+    }
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    if ((size < 0) || ((pos + len) > stop)) {
+      return false;
+    }
+
+    pos += len;  // consume Size field
+    if ((pos + size) > stop) {
+      return false;
+    }
+
+    if (id == libwebm::kMkvCueTrack)
+      m_track = UnserializeUInt(pReader, pos, size);
+    else if (id == libwebm::kMkvCueClusterPosition)
+      m_pos = UnserializeUInt(pReader, pos, size);
+    else if (id == libwebm::kMkvCueBlockNumber)
+      m_block = UnserializeUInt(pReader, pos, size);
+
+    pos += size;  // consume payload
+  }
+
+  if ((m_pos < 0) || (m_track <= 0)) {
+    return false;
+  }
+
+  return true;
+}
+
+const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const {
+  assert(pTrack);
+
+  const long long n = pTrack->GetNumber();
+
+  const TrackPosition* i = m_track_positions;
+  const TrackPosition* const j = i + m_track_positions_count;
+
+  while (i != j) {
+    const TrackPosition& p = *i++;
+
+    if (p.m_track == n)
+      return &p;
+  }
+
+  return NULL;  // no matching track number found
+}
+
+long long CuePoint::GetTimeCode() const { return m_timecode; }
+
+long long CuePoint::GetTime(const Segment* pSegment) const {
+  assert(pSegment);
+  assert(m_timecode >= 0);
+
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
+  assert(pInfo);
+
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
+
+  const long long time = scale * m_timecode;
+
+  return time;
+}
+
+bool Segment::DoneParsing() const {
+  if (m_size < 0) {
+    long long total, avail;
+
+    const int status = m_pReader->Length(&total, &avail);
+
+    if (status < 0)  // error
+      return true;  // must assume done
+
+    if (total < 0)
+      return false;  // assume live stream
+
+    return (m_pos >= total);
+  }
+
+  const long long stop = m_start + m_size;
+
+  return (m_pos >= stop);
+}
+
+const Cluster* Segment::GetFirst() const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
+
+  Cluster* const pCluster = m_clusters[0];
+  assert(pCluster);
+
+  return pCluster;
+}
+
+const Cluster* Segment::GetLast() const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
+
+  const long idx = m_clusterCount - 1;
+
+  Cluster* const pCluster = m_clusters[idx];
+  assert(pCluster);
+
+  return pCluster;
+}
+
+unsigned long Segment::GetCount() const { return m_clusterCount; }
+
+const Cluster* Segment::GetNext(const Cluster* pCurr) {
+  assert(pCurr);
+  assert(pCurr != &m_eos);
+  assert(m_clusters);
+
+  long idx = pCurr->m_index;
+
+  if (idx >= 0) {
+    assert(m_clusterCount > 0);
+    assert(idx < m_clusterCount);
+    assert(pCurr == m_clusters[idx]);
+
+    ++idx;
+
+    if (idx >= m_clusterCount)
+      return &m_eos;  // caller will LoadCluster as desired
+
+    Cluster* const pNext = m_clusters[idx];
+    assert(pNext);
+    assert(pNext->m_index >= 0);
+    assert(pNext->m_index == idx);
+
+    return pNext;
+  }
+
+  assert(m_clusterPreloadCount > 0);
+
+  long long pos = pCurr->m_element_start;
+
+  assert(m_size >= 0);  // TODO
+  const long long stop = m_start + m_size;  // end of segment
+
+  {
+    long len;
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);  // TODO
+    if (result != 0)
+      return NULL;
+
+    const long long id = ReadID(m_pReader, pos, len);
+    if (id != libwebm::kMkvCluster)
+      return NULL;
+
+    pos += len;  // consume ID
+
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);  // TODO
+    assert((pos + len) <= stop);  // TODO
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+    assert(size > 0);  // TODO
+    // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
+
+    pos += len;  // consume length of size of element
+    assert((pos + size) <= stop);  // TODO
+
+    // Pos now points to start of payload
+
+    pos += size;  // consume payload
+  }
+
+  long long off_next = 0;
+
+  while (pos < stop) {
+    long len;
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);
+    assert((pos + len) <= stop);  // TODO
+    if (result != 0)
+      return NULL;
+
+    const long long idpos = pos;  // pos of next (potential) cluster
+
+    const long long id = ReadID(m_pReader, idpos, len);
+    if (id < 0)
+      return NULL;
+
+    pos += len;  // consume ID
+
+    // Read Size
+    result = GetUIntLength(m_pReader, pos, len);
+    assert(result == 0);  // TODO
+    assert((pos + len) <= stop);  // TODO
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+    assert(size >= 0);  // TODO
+
+    pos += len;  // consume length of size of element
+    assert((pos + size) <= stop);  // TODO
+
+    // Pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == libwebm::kMkvCluster) {
+      const long long off_next_ = idpos - m_start;
+
+      long long pos_;
+      long len_;
+
+      const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_);
+
+      assert(status >= 0);
+
+      if (status > 0) {
+        off_next = off_next_;
+        break;
+      }
+    }
+
+    pos += size;  // consume payload
+  }
+
+  if (off_next <= 0)
+    return 0;
+
+  Cluster** const ii = m_clusters + m_clusterCount;
+  Cluster** i = ii;
+
+  Cluster** const jj = ii + m_clusterPreloadCount;
+  Cluster** j = jj;
+
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) < pos_next
+    //[i, j) ?
+    //[j, jj)  > pos_next
+
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    Cluster* const pNext = *k;
+    assert(pNext);
+    assert(pNext->m_index < 0);
+
+    // const long long pos_ = pNext->m_pos;
+    // assert(pos_);
+    // pos = pos_ * ((pos_ < 0) ? -1 : 1);
+
+    pos = pNext->GetPosition();
+
+    if (pos < off_next)
+      i = k + 1;
+    else if (pos > off_next)
+      j = k;
+    else
+      return pNext;
+  }
+
+  assert(i == j);
+
+  Cluster* const pNext = Cluster::Create(this, -1, off_next);
+  if (pNext == NULL)
+    return NULL;
+
+  const ptrdiff_t idx_next = i - m_clusters;  // insertion position
+
+  if (!PreloadCluster(pNext, idx_next)) {
+    delete pNext;
+    return NULL;
+  }
+  assert(m_clusters);
+  assert(idx_next < m_clusterSize);
+  assert(m_clusters[idx_next] == pNext);
+
+  return pNext;
+}
+
+long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult,
+                        long long& pos, long& len) {
+  assert(pCurr);
+  assert(!pCurr->EOS());
+  assert(m_clusters);
+
+  pResult = 0;
+
+  if (pCurr->m_index >= 0) {  // loaded (not merely preloaded)
+    assert(m_clusters[pCurr->m_index] == pCurr);
+
+    const long next_idx = pCurr->m_index + 1;
+
+    if (next_idx < m_clusterCount) {
+      pResult = m_clusters[next_idx];
+      return 0;  // success
+    }
+
+    // curr cluster is last among loaded
+
+    const long result = LoadCluster(pos, len);
+
+    if (result < 0)  // error or underflow
+      return result;
+
+    if (result > 0)  // no more clusters
+    {
+      // pResult = &m_eos;
+      return 1;
+    }
+
+    pResult = GetLast();
+    return 0;  // success
+  }
+
+  assert(m_pos > 0);
+
+  long long total, avail;
+
+  long status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  // interrogate curr cluster
+
+  pos = pCurr->m_element_start;
+
+  if (pCurr->m_element_size >= 0)
+    pos += pCurr->m_element_size;
+  else {
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadUInt(m_pReader, pos, len);
+
+    if (id != libwebm::kMkvCluster)
+      return -1;
+
+    pos += len;  // consume ID
+
+    // Read Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume size field
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)  // TODO: should never happen
+      return E_FILE_FORMAT_INVALID;  // TODO: resolve this
+
+    // assert((pCurr->m_size <= 0) || (pCurr->m_size == size));
+
+    if ((segment_stop >= 0) && ((pos + size) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // Pos now points to start of payload
+
+    pos += size;  // consume payload (that is, the current cluster)
+    if (segment_stop >= 0 && pos > segment_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    // By consuming the payload, we are assuming that the curr
+    // cluster isn't interesting.  That is, we don't bother checking
+    // whether the payload of the curr cluster is less than what
+    // happens to be available (obtained via IMkvReader::Length).
+    // Presumably the caller has already dispensed with the current
+    // cluster, and really does want the next cluster.
+  }
+
+  // pos now points to just beyond the last fully-loaded cluster
+
+  for (;;) {
+    const long status = DoParseNext(pResult, pos, len);
+
+    if (status <= 1)
+      return status;
+  }
+}
+
+long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) {
+  long long total, avail;
+
+  long status = m_pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
+
+  // Parse next cluster.  This is strictly a parsing activity.
+  // Creation of a new cluster object happens later, after the
+  // parsing is done.
+
+  long long off_next = 0;
+  long long cluster_size = -1;
+
+  for (;;) {
+    if ((total >= 0) && (pos >= total))
+      return 1;  // EOF
+
+    if ((segment_stop >= 0) && (pos >= segment_stop))
+      return 1;  // EOF
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long idpos = pos;  // absolute
+    const long long idoff = pos - m_start;  // relative
+
+    const long long id = ReadID(m_pReader, idpos, len);  // absolute
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)  // weird
+      return -1;  // generic error
+
+    pos += len;  // consume ID
+
+    // Read Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(m_pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(m_pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume length of size of element
+
+    // Pos now points to start of payload
+
+    if (size == 0)  // weird
+      continue;
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if ((segment_stop >= 0) && (size != unknown_size) &&
+        ((pos + size) > segment_stop)) {
+      return E_FILE_FORMAT_INVALID;
+    }
+
+    if (id == libwebm::kMkvCues) {
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;
+
+      const long long element_stop = pos + size;
+
+      if ((segment_stop >= 0) && (element_stop > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      const long long element_start = idpos;
+      const long long element_size = element_stop - element_start;
+
+      if (m_pCues == NULL) {
+        m_pCues = new Cues(this, pos, size, element_start, element_size);
+      }
+
+      pos += size;  // consume payload
+      if (segment_stop >= 0 && pos > segment_stop)
+        return E_FILE_FORMAT_INVALID;
+
+      continue;
+    }
+
+    if (id != libwebm::kMkvCluster) {  // not a Cluster ID
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;
+
+      pos += size;  // consume payload
+      if (segment_stop >= 0 && pos > segment_stop)
+        return E_FILE_FORMAT_INVALID;
+
+      continue;
+    }
+
+    // We have a cluster.
+    off_next = idoff;
+
+    if (size != unknown_size)
+      cluster_size = size;
+
+    break;
+  }
+
+  assert(off_next > 0);  // have cluster
+
+  // We have parsed the next cluster.
+  // We have not created a cluster object yet.  What we need
+  // to do now is determine whether it has already be preloaded
+  //(in which case, an object for this cluster has already been
+  // created), and if not, create a new cluster object.
+
+  Cluster** const ii = m_clusters + m_clusterCount;
+  Cluster** i = ii;
+
+  Cluster** const jj = ii + m_clusterPreloadCount;
+  Cluster** j = jj;
+
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) < pos_next
+    //[i, j) ?
+    //[j, jj)  > pos_next
+
+    Cluster** const k = i + (j - i) / 2;
+    assert(k < jj);
+
+    const Cluster* const pNext = *k;
+    assert(pNext);
+    assert(pNext->m_index < 0);
+
+    pos = pNext->GetPosition();
+    assert(pos >= 0);
+
+    if (pos < off_next)
+      i = k + 1;
+    else if (pos > off_next)
+      j = k;
+    else {
+      pResult = pNext;
+      return 0;  // success
+    }
+  }
+
+  assert(i == j);
+
+  long long pos_;
+  long len_;
+
+  status = Cluster::HasBlockEntries(this, off_next, pos_, len_);
+
+  if (status < 0) {  // error or underflow
+    pos = pos_;
+    len = len_;
+
+    return status;
+  }
+
+  if (status > 0) {  // means "found at least one block entry"
+    Cluster* const pNext = Cluster::Create(this,
+                                           -1,  // preloaded
+                                           off_next);
+    if (pNext == NULL)
+      return -1;
+
+    const ptrdiff_t idx_next = i - m_clusters;  // insertion position
+
+    if (!PreloadCluster(pNext, idx_next)) {
+      delete pNext;
+      return -1;
+    }
+    assert(m_clusters);
+    assert(idx_next < m_clusterSize);
+    assert(m_clusters[idx_next] == pNext);
+
+    pResult = pNext;
+    return 0;  // success
+  }
+
+  // status == 0 means "no block entries found"
+
+  if (cluster_size < 0) {  // unknown size
+    const long long payload_pos = pos;  // absolute pos of cluster payload
+
+    for (;;) {  // determine cluster size
+      if ((total >= 0) && (pos >= total))
+        break;
+
+      if ((segment_stop >= 0) && (pos >= segment_stop))
+        break;  // no more clusters
+
+      // Read ID
+
+      if ((pos + 1) > avail) {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+      }
+
+      long long result = GetUIntLength(m_pReader, pos, len);
+
+      if (result < 0)  // error
+        return static_cast<long>(result);
+
+      if (result > 0)  // weird
+        return E_BUFFER_NOT_FULL;
+
+      if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
+
+      const long long idpos = pos;
+      const long long id = ReadID(m_pReader, idpos, len);
+
+      if (id < 0)  // error (or underflow)
+        return static_cast<long>(id);
+
+      // This is the distinguished set of ID's we use to determine
+      // that we have exhausted the sub-element's inside the cluster
+      // whose ID we parsed earlier.
+
+      if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues)
+        break;
+
+      pos += len;  // consume ID (of sub-element)
+
+      // Read Size
+
+      if ((pos + 1) > avail) {
+        len = 1;
+        return E_BUFFER_NOT_FULL;
+      }
+
+      result = GetUIntLength(m_pReader, pos, len);
+
+      if (result < 0)  // error
+        return static_cast<long>(result);
+
+      if (result > 0)  // weird
+        return E_BUFFER_NOT_FULL;
+
+      if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      if ((pos + len) > avail)
+        return E_BUFFER_NOT_FULL;
+
+      const long long size = ReadUInt(m_pReader, pos, len);
+
+      if (size < 0)  // error
+        return static_cast<long>(size);
+
+      pos += len;  // consume size field of element
+
+      // pos now points to start of sub-element's payload
+
+      if (size == 0)  // weird
+        continue;
+
+      const long long unknown_size = (1LL << (7 * len)) - 1;
+
+      if (size == unknown_size)
+        return E_FILE_FORMAT_INVALID;  // not allowed for sub-elements
+
+      if ((segment_stop >= 0) && ((pos + size) > segment_stop))  // weird
+        return E_FILE_FORMAT_INVALID;
+
+      pos += size;  // consume payload of sub-element
+      if (segment_stop >= 0 && pos > segment_stop)
+        return E_FILE_FORMAT_INVALID;
+    }  // determine cluster size
+
+    cluster_size = pos - payload_pos;
+    assert(cluster_size >= 0);  // TODO: handle cluster_size = 0
+
+    pos = payload_pos;  // reset and re-parse original cluster
+  }
+
+  pos += cluster_size;  // consume payload
+  if (segment_stop >= 0 && pos > segment_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 2;  // try to find a cluster that follows next
+}
+
+const Cluster* Segment::FindCluster(long long time_ns) const {
+  if ((m_clusters == NULL) || (m_clusterCount <= 0))
+    return &m_eos;
+
+  {
+    Cluster* const pCluster = m_clusters[0];
+    assert(pCluster);
+    assert(pCluster->m_index == 0);
+
+    if (time_ns <= pCluster->GetTime())
+      return pCluster;
+  }
+
+  // Binary search of cluster array
+
+  long i = 0;
+  long j = m_clusterCount;
+
+  while (i < j) {
+    // INVARIANT:
+    //[0, i) <= time_ns
+    //[i, j) ?
+    //[j, m_clusterCount)  > time_ns
+
+    const long k = i + (j - i) / 2;
+    assert(k < m_clusterCount);
+
+    Cluster* const pCluster = m_clusters[k];
+    assert(pCluster);
+    assert(pCluster->m_index == k);
+
+    const long long t = pCluster->GetTime();
+
+    if (t <= time_ns)
+      i = k + 1;
+    else
+      j = k;
+
+    assert(i <= j);
+  }
+
+  assert(i == j);
+  assert(i > 0);
+  assert(i <= m_clusterCount);
+
+  const long k = i - 1;
+
+  Cluster* const pCluster = m_clusters[k];
+  assert(pCluster);
+  assert(pCluster->m_index == k);
+  assert(pCluster->GetTime() <= time_ns);
+
+  return pCluster;
+}
+
+const Tracks* Segment::GetTracks() const { return m_pTracks; }
+const SegmentInfo* Segment::GetInfo() const { return m_pInfo; }
+const Cues* Segment::GetCues() const { return m_pCues; }
+const Chapters* Segment::GetChapters() const { return m_pChapters; }
+const Tags* Segment::GetTags() const { return m_pTags; }
+const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; }
+
+long long Segment::GetDuration() const {
+  assert(m_pInfo);
+  return m_pInfo->GetDuration();
+}
+
+Chapters::Chapters(Segment* pSegment, long long payload_start,
+                   long long payload_size, long long element_start,
+                   long long element_size)
+    : m_pSegment(pSegment),
+      m_start(payload_start),
+      m_size(payload_size),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_editions(NULL),
+      m_editions_size(0),
+      m_editions_count(0) {}
+
+Chapters::~Chapters() {
+  while (m_editions_count > 0) {
+    Edition& e = m_editions[--m_editions_count];
+    e.Clear();
+  }
+  delete[] m_editions;
+}
+
+long Chapters::Parse() {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = m_start;  // payload start
+  const long long stop = pos + m_size;  // payload stop
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == libwebm::kMkvEditionEntry) {
+      status = ParseEdition(pos, size);
+
+      if (status < 0)  // error
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+int Chapters::GetEditionCount() const { return m_editions_count; }
+
+const Chapters::Edition* Chapters::GetEdition(int idx) const {
+  if (idx < 0)
+    return NULL;
+
+  if (idx >= m_editions_count)
+    return NULL;
+
+  return m_editions + idx;
+}
+
+bool Chapters::ExpandEditionsArray() {
+  if (m_editions_size > m_editions_count)
+    return true;  // nothing else to do
+
+  const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size;
+
+  Edition* const editions = new Edition[size];
+
+  for (int idx = 0; idx < m_editions_count; ++idx) {
+    m_editions[idx].ShallowCopy(editions[idx]);
+  }
+
+  delete[] m_editions;
+  m_editions = editions;
+
+  m_editions_size = size;
+  return true;
+}
+
+long Chapters::ParseEdition(long long pos, long long size) {
+  if (!ExpandEditionsArray())
+    return -1;
+
+  Edition& e = m_editions[m_editions_count++];
+  e.Init();
+
+  return e.Parse(m_pSegment->m_pReader, pos, size);
+}
+
+Chapters::Edition::Edition() {}
+
+Chapters::Edition::~Edition() {}
+
+int Chapters::Edition::GetAtomCount() const { return m_atoms_count; }
+
+const Chapters::Atom* Chapters::Edition::GetAtom(int index) const {
+  if (index < 0)
+    return NULL;
+
+  if (index >= m_atoms_count)
+    return NULL;
+
+  return m_atoms + index;
+}
+
+void Chapters::Edition::Init() {
+  m_atoms = NULL;
+  m_atoms_size = 0;
+  m_atoms_count = 0;
+}
+
+void Chapters::Edition::ShallowCopy(Edition& rhs) const {
+  rhs.m_atoms = m_atoms;
+  rhs.m_atoms_size = m_atoms_size;
+  rhs.m_atoms_count = m_atoms_count;
+}
+
+void Chapters::Edition::Clear() {
+  while (m_atoms_count > 0) {
+    Atom& a = m_atoms[--m_atoms_count];
+    a.Clear();
+  }
+
+  delete[] m_atoms;
+  m_atoms = NULL;
+
+  m_atoms_size = 0;
+}
+
+long Chapters::Edition::Parse(IMkvReader* pReader, long long pos,
+                              long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)
+      continue;
+
+    if (id == libwebm::kMkvChapterAtom) {
+      status = ParseAtom(pReader, pos, size);
+
+      if (status < 0)  // error
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos,
+                                  long long size) {
+  if (!ExpandAtomsArray())
+    return -1;
+
+  Atom& a = m_atoms[m_atoms_count++];
+  a.Init();
+
+  return a.Parse(pReader, pos, size);
+}
+
+bool Chapters::Edition::ExpandAtomsArray() {
+  if (m_atoms_size > m_atoms_count)
+    return true;  // nothing else to do
+
+  const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size;
+
+  Atom* const atoms = new Atom[size];
+
+  for (int idx = 0; idx < m_atoms_count; ++idx) {
+    m_atoms[idx].ShallowCopy(atoms[idx]);
+  }
+
+  delete[] m_atoms;
+  m_atoms = atoms;
+
+  m_atoms_size = size;
+  return true;
+}
+
+Chapters::Atom::Atom() {}
+
+Chapters::Atom::~Atom() {}
+
+unsigned long long Chapters::Atom::GetUID() const { return m_uid; }
+
+const char* Chapters::Atom::GetStringUID() const { return m_string_uid; }
+
+long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; }
+
+long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; }
+
+long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const {
+  return GetTime(pChapters, m_start_timecode);
+}
+
+long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const {
+  return GetTime(pChapters, m_stop_timecode);
+}
+
+int Chapters::Atom::GetDisplayCount() const { return m_displays_count; }
+
+const Chapters::Display* Chapters::Atom::GetDisplay(int index) const {
+  if (index < 0)
+    return NULL;
+
+  if (index >= m_displays_count)
+    return NULL;
+
+  return m_displays + index;
+}
+
+void Chapters::Atom::Init() {
+  m_string_uid = NULL;
+  m_uid = 0;
+  m_start_timecode = -1;
+  m_stop_timecode = -1;
+
+  m_displays = NULL;
+  m_displays_size = 0;
+  m_displays_count = 0;
+}
+
+void Chapters::Atom::ShallowCopy(Atom& rhs) const {
+  rhs.m_string_uid = m_string_uid;
+  rhs.m_uid = m_uid;
+  rhs.m_start_timecode = m_start_timecode;
+  rhs.m_stop_timecode = m_stop_timecode;
+
+  rhs.m_displays = m_displays;
+  rhs.m_displays_size = m_displays_size;
+  rhs.m_displays_count = m_displays_count;
+}
+
+void Chapters::Atom::Clear() {
+  delete[] m_string_uid;
+  m_string_uid = NULL;
+
+  while (m_displays_count > 0) {
+    Display& d = m_displays[--m_displays_count];
+    d.Clear();
+  }
+
+  delete[] m_displays;
+  m_displays = NULL;
+
+  m_displays_size = 0;
+}
+
+long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // 0 length payload, skip.
+      continue;
+
+    if (id == libwebm::kMkvChapterDisplay) {
+      status = ParseDisplay(pReader, pos, size);
+
+      if (status < 0)  // error
+        return status;
+    } else if (id == libwebm::kMkvChapterStringUID) {
+      status = UnserializeString(pReader, pos, size, m_string_uid);
+
+      if (status < 0)  // error
+        return status;
+    } else if (id == libwebm::kMkvChapterUID) {
+      long long val;
+      status = UnserializeInt(pReader, pos, size, val);
+
+      if (status < 0)  // error
+        return status;
+
+      m_uid = static_cast<unsigned long long>(val);
+    } else if (id == libwebm::kMkvChapterTimeStart) {
+      const long long val = UnserializeUInt(pReader, pos, size);
+
+      if (val < 0)  // error
+        return static_cast<long>(val);
+
+      m_start_timecode = val;
+    } else if (id == libwebm::kMkvChapterTimeEnd) {
+      const long long val = UnserializeUInt(pReader, pos, size);
+
+      if (val < 0)  // error
+        return static_cast<long>(val);
+
+      m_stop_timecode = val;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+long long Chapters::Atom::GetTime(const Chapters* pChapters,
+                                  long long timecode) {
+  if (pChapters == NULL)
+    return -1;
+
+  Segment* const pSegment = pChapters->m_pSegment;
+
+  if (pSegment == NULL)  // weird
+    return -1;
+
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
+
+  if (pInfo == NULL)
+    return -1;
+
+  const long long timecode_scale = pInfo->GetTimeCodeScale();
+
+  if (timecode_scale < 1)  // weird
+    return -1;
+
+  if (timecode < 0)
+    return -1;
+
+  const long long result = timecode_scale * timecode;
+
+  return result;
+}
+
+long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos,
+                                  long long size) {
+  if (!ExpandDisplaysArray())
+    return -1;
+
+  Display& d = m_displays[m_displays_count++];
+  d.Init();
+
+  return d.Parse(pReader, pos, size);
+}
+
+bool Chapters::Atom::ExpandDisplaysArray() {
+  if (m_displays_size > m_displays_count)
+    return true;  // nothing else to do
+
+  const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size;
+
+  Display* const displays = new Display[size];
+
+  for (int idx = 0; idx < m_displays_count; ++idx) {
+    m_displays[idx].ShallowCopy(displays[idx]);
+  }
+
+  delete[] m_displays;
+  m_displays = displays;
+
+  m_displays_size = size;
+  return true;
+}
+
+Chapters::Display::Display() {}
+
+Chapters::Display::~Display() {}
+
+const char* Chapters::Display::GetString() const { return m_string; }
+
+const char* Chapters::Display::GetLanguage() const { return m_language; }
+
+const char* Chapters::Display::GetCountry() const { return m_country; }
+
+void Chapters::Display::Init() {
+  m_string = NULL;
+  m_language = NULL;
+  m_country = NULL;
+}
+
+void Chapters::Display::ShallowCopy(Display& rhs) const {
+  rhs.m_string = m_string;
+  rhs.m_language = m_language;
+  rhs.m_country = m_country;
+}
+
+void Chapters::Display::Clear() {
+  delete[] m_string;
+  m_string = NULL;
+
+  delete[] m_language;
+  m_language = NULL;
+
+  delete[] m_country;
+  m_country = NULL;
+}
+
+long Chapters::Display::Parse(IMkvReader* pReader, long long pos,
+                              long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // No payload.
+      continue;
+
+    if (id == libwebm::kMkvChapString) {
+      status = UnserializeString(pReader, pos, size, m_string);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvChapLanguage) {
+      status = UnserializeString(pReader, pos, size, m_language);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvChapCountry) {
+      status = UnserializeString(pReader, pos, size, m_country);
+
+      if (status)
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size,
+           long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(payload_start),
+      m_size(payload_size),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_tags(NULL),
+      m_tags_size(0),
+      m_tags_count(0) {}
+
+Tags::~Tags() {
+  while (m_tags_count > 0) {
+    Tag& t = m_tags[--m_tags_count];
+    t.Clear();
+  }
+  delete[] m_tags;
+}
+
+long Tags::Parse() {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = m_start;  // payload start
+  const long long stop = pos + m_size;  // payload stop
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)
+      return status;
+
+    if (size == 0)  // 0 length tag, read another
+      continue;
+
+    if (id == libwebm::kMkvTag) {
+      status = ParseTag(pos, size);
+
+      if (status < 0)
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+int Tags::GetTagCount() const { return m_tags_count; }
+
+const Tags::Tag* Tags::GetTag(int idx) const {
+  if (idx < 0)
+    return NULL;
+
+  if (idx >= m_tags_count)
+    return NULL;
+
+  return m_tags + idx;
+}
+
+bool Tags::ExpandTagsArray() {
+  if (m_tags_size > m_tags_count)
+    return true;  // nothing else to do
+
+  const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size;
+
+  Tag* const tags = new Tag[size];
+
+  for (int idx = 0; idx < m_tags_count; ++idx) {
+    m_tags[idx].ShallowCopy(tags[idx]);
+  }
+
+  delete[] m_tags;
+  m_tags = tags;
+
+  m_tags_size = size;
+  return true;
+}
+
+long Tags::ParseTag(long long pos, long long size) {
+  if (!ExpandTagsArray())
+    return -1;
+
+  Tag& t = m_tags[m_tags_count++];
+  t.Init();
+
+  return t.Parse(m_pSegment->m_pReader, pos, size);
+}
+
+Tags::Tag::Tag() {}
+
+Tags::Tag::~Tag() {}
+
+int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; }
+
+const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const {
+  if (index < 0)
+    return NULL;
+
+  if (index >= m_simple_tags_count)
+    return NULL;
+
+  return m_simple_tags + index;
+}
+
+void Tags::Tag::Init() {
+  m_simple_tags = NULL;
+  m_simple_tags_size = 0;
+  m_simple_tags_count = 0;
+}
+
+void Tags::Tag::ShallowCopy(Tag& rhs) const {
+  rhs.m_simple_tags = m_simple_tags;
+  rhs.m_simple_tags_size = m_simple_tags_size;
+  rhs.m_simple_tags_count = m_simple_tags_count;
+}
+
+void Tags::Tag::Clear() {
+  while (m_simple_tags_count > 0) {
+    SimpleTag& d = m_simple_tags[--m_simple_tags_count];
+    d.Clear();
+  }
+
+  delete[] m_simple_tags;
+  m_simple_tags = NULL;
+
+  m_simple_tags_size = 0;
+}
+
+long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)
+      return status;
+
+    if (size == 0)  // 0 length tag, read another
+      continue;
+
+    if (id == libwebm::kMkvSimpleTag) {
+      status = ParseSimpleTag(pReader, pos, size);
+
+      if (status < 0)
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos,
+                               long long size) {
+  if (!ExpandSimpleTagsArray())
+    return -1;
+
+  SimpleTag& st = m_simple_tags[m_simple_tags_count++];
+  st.Init();
+
+  return st.Parse(pReader, pos, size);
+}
+
+bool Tags::Tag::ExpandSimpleTagsArray() {
+  if (m_simple_tags_size > m_simple_tags_count)
+    return true;  // nothing else to do
+
+  const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size;
+
+  SimpleTag* const displays = new SimpleTag[size];
+
+  for (int idx = 0; idx < m_simple_tags_count; ++idx) {
+    m_simple_tags[idx].ShallowCopy(displays[idx]);
+  }
+
+  delete[] m_simple_tags;
+  m_simple_tags = displays;
+
+  m_simple_tags_size = size;
+  return true;
+}
+
+Tags::SimpleTag::SimpleTag() {}
+
+Tags::SimpleTag::~SimpleTag() {}
+
+const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; }
+
+const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; }
+
+void Tags::SimpleTag::Init() {
+  m_tag_name = NULL;
+  m_tag_string = NULL;
+}
+
+void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const {
+  rhs.m_tag_name = m_tag_name;
+  rhs.m_tag_string = m_tag_string;
+}
+
+void Tags::SimpleTag::Clear() {
+  delete[] m_tag_name;
+  m_tag_name = NULL;
+
+  delete[] m_tag_string;
+  m_tag_string = NULL;
+}
+
+long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos,
+                            long long size) {
+  const long long stop = pos + size;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == libwebm::kMkvTagName) {
+      status = UnserializeString(pReader, pos, size, m_tag_name);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvTagString) {
+      status = UnserializeString(pReader, pos, size, m_tag_string);
+
+      if (status)
+        return status;
+    }
+
+    pos += size;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_,
+                         long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_pMuxingAppAsUTF8(NULL),
+      m_pWritingAppAsUTF8(NULL),
+      m_pTitleAsUTF8(NULL) {}
+
+SegmentInfo::~SegmentInfo() {
+  delete[] m_pMuxingAppAsUTF8;
+  m_pMuxingAppAsUTF8 = NULL;
+
+  delete[] m_pWritingAppAsUTF8;
+  m_pWritingAppAsUTF8 = NULL;
+
+  delete[] m_pTitleAsUTF8;
+  m_pTitleAsUTF8 = NULL;
+}
+
+long SegmentInfo::Parse() {
+  assert(m_pMuxingAppAsUTF8 == NULL);
+  assert(m_pWritingAppAsUTF8 == NULL);
+  assert(m_pTitleAsUTF8 == NULL);
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
+
+  m_timecodeScale = 1000000;
+  m_duration = -1;
+
+  while (pos < stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvTimecodeScale) {
+      m_timecodeScale = UnserializeUInt(pReader, pos, size);
+
+      if (m_timecodeScale <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDuration) {
+      const long status = UnserializeFloat(pReader, pos, size, m_duration);
+
+      if (status < 0)
+        return status;
+
+      if (m_duration < 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvMuxingApp) {
+      const long status =
+          UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvWritingApp) {
+      const long status =
+          UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvTitle) {
+      const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
+
+      if (status)
+        return status;
+    }
+
+    pos += size;
+
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  const double rollover_check = m_duration * m_timecodeScale;
+  if (rollover_check > LLONG_MAX)
+    return E_FILE_FORMAT_INVALID;
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; }
+
+long long SegmentInfo::GetDuration() const {
+  if (m_duration < 0)
+    return -1;
+
+  assert(m_timecodeScale >= 1);
+
+  const double dd = double(m_duration) * double(m_timecodeScale);
+  const long long d = static_cast<long long>(dd);
+
+  return d;
+}
+
+const char* SegmentInfo::GetMuxingAppAsUTF8() const {
+  return m_pMuxingAppAsUTF8;
+}
+
+const char* SegmentInfo::GetWritingAppAsUTF8() const {
+  return m_pWritingAppAsUTF8;
+}
+
+const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; }
+
+///////////////////////////////////////////////////////////////
+// ContentEncoding element
+ContentEncoding::ContentCompression::ContentCompression()
+    : algo(0), settings(NULL), settings_len(0) {}
+
+ContentEncoding::ContentCompression::~ContentCompression() {
+  delete[] settings;
+}
+
+ContentEncoding::ContentEncryption::ContentEncryption()
+    : algo(0),
+      key_id(NULL),
+      key_id_len(0),
+      signature(NULL),
+      signature_len(0),
+      sig_key_id(NULL),
+      sig_key_id_len(0),
+      sig_algo(0),
+      sig_hash_algo(0) {}
+
+ContentEncoding::ContentEncryption::~ContentEncryption() {
+  delete[] key_id;
+  delete[] signature;
+  delete[] sig_key_id;
+}
+
+ContentEncoding::ContentEncoding()
+    : compression_entries_(NULL),
+      compression_entries_end_(NULL),
+      encryption_entries_(NULL),
+      encryption_entries_end_(NULL),
+      encoding_order_(0),
+      encoding_scope_(1),
+      encoding_type_(0) {}
+
+ContentEncoding::~ContentEncoding() {
+  ContentCompression** comp_i = compression_entries_;
+  ContentCompression** const comp_j = compression_entries_end_;
+
+  while (comp_i != comp_j) {
+    ContentCompression* const comp = *comp_i++;
+    delete comp;
+  }
+
+  delete[] compression_entries_;
+
+  ContentEncryption** enc_i = encryption_entries_;
+  ContentEncryption** const enc_j = encryption_entries_end_;
+
+  while (enc_i != enc_j) {
+    ContentEncryption* const enc = *enc_i++;
+    delete enc;
+  }
+
+  delete[] encryption_entries_;
+}
+
+const ContentEncoding::ContentCompression*
+    ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
+  const ptrdiff_t count = compression_entries_end_ - compression_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return compression_entries_[idx];
+}
+
+unsigned long ContentEncoding::GetCompressionCount() const {
+  const ptrdiff_t count = compression_entries_end_ - compression_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex(
+    unsigned long idx) const {
+  const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return encryption_entries_[idx];
+}
+
+unsigned long ContentEncoding::GetEncryptionCount() const {
+  const ptrdiff_t count = encryption_entries_end_ - encryption_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+long ContentEncoding::ParseContentEncAESSettingsEntry(
+    long long start, long long size, IMkvReader* pReader,
+    ContentEncAESSettings* aes) {
+  assert(pReader);
+  assert(aes);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvAESSettingsCipherMode) {
+      aes->cipher_mode = UnserializeUInt(pReader, pos, size);
+      if (aes->cipher_mode != 1)
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  return 0;
+}
+
+long ContentEncoding::ParseContentEncodingEntry(long long start, long long size,
+                                                IMkvReader* pReader) {
+  assert(pReader);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  // Count ContentCompression and ContentEncryption elements.
+  int compression_count = 0;
+  int encryption_count = 0;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvContentCompression)
+      ++compression_count;
+
+    if (id == libwebm::kMkvContentEncryption)
+      ++encryption_count;
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (compression_count <= 0 && encryption_count <= 0)
+    return -1;
+
+  if (compression_count > 0) {
+    compression_entries_ = new ContentCompression*[compression_count];
+    compression_entries_end_ = compression_entries_;
+  }
+
+  if (encryption_count > 0) {
+    encryption_entries_ = new ContentEncryption*[encryption_count];
+    encryption_entries_end_ = encryption_entries_;
+  }
+
+  pos = start;
+  while (pos < stop) {
+    long long id, size;
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvContentEncodingOrder) {
+      encoding_order_ = UnserializeUInt(pReader, pos, size);
+    } else if (id == libwebm::kMkvContentEncodingScope) {
+      encoding_scope_ = UnserializeUInt(pReader, pos, size);
+      if (encoding_scope_ < 1)
+        return -1;
+    } else if (id == libwebm::kMkvContentEncodingType) {
+      encoding_type_ = UnserializeUInt(pReader, pos, size);
+    } else if (id == libwebm::kMkvContentCompression) {
+      ContentCompression* const compression = new ContentCompression();
+
+      status = ParseCompressionEntry(pos, size, pReader, compression);
+      if (status) {
+        delete compression;
+        return status;
+      }
+      *compression_entries_end_++ = compression;
+    } else if (id == libwebm::kMkvContentEncryption) {
+      ContentEncryption* const encryption = new ContentEncryption();
+
+      status = ParseEncryptionEntry(pos, size, pReader, encryption);
+      if (status) {
+        delete encryption;
+        return status;
+      }
+      *encryption_entries_end_++ = encryption;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  return 0;
+}
+
+long ContentEncoding::ParseCompressionEntry(long long start, long long size,
+                                            IMkvReader* pReader,
+                                            ContentCompression* compression) {
+  assert(pReader);
+  assert(compression);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  bool valid = false;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvContentCompAlgo) {
+      long long algo = UnserializeUInt(pReader, pos, size);
+      if (algo < 0)
+        return E_FILE_FORMAT_INVALID;
+      compression->algo = algo;
+      valid = true;
+    } else if (id == libwebm::kMkvContentCompSettings) {
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
+      if (buf == NULL)
+        return -1;
+
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
+      if (read_status) {
+        delete[] buf;
+        return status;
+      }
+
+      compression->settings = buf;
+      compression->settings_len = buflen;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  // ContentCompAlgo is mandatory
+  if (!valid)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+long ContentEncoding::ParseEncryptionEntry(long long start, long long size,
+                                           IMkvReader* pReader,
+                                           ContentEncryption* encryption) {
+  assert(pReader);
+  assert(encryption);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvContentEncAlgo) {
+      encryption->algo = UnserializeUInt(pReader, pos, size);
+      if (encryption->algo != 5)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvContentEncKeyID) {
+      delete[] encryption->key_id;
+      encryption->key_id = NULL;
+      encryption->key_id_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
+      if (buf == NULL)
+        return -1;
+
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
+      if (read_status) {
+        delete[] buf;
+        return status;
+      }
+
+      encryption->key_id = buf;
+      encryption->key_id_len = buflen;
+    } else if (id == libwebm::kMkvContentSignature) {
+      delete[] encryption->signature;
+      encryption->signature = NULL;
+      encryption->signature_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
+      if (buf == NULL)
+        return -1;
+
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
+      if (read_status) {
+        delete[] buf;
+        return status;
+      }
+
+      encryption->signature = buf;
+      encryption->signature_len = buflen;
+    } else if (id == libwebm::kMkvContentSigKeyID) {
+      delete[] encryption->sig_key_id;
+      encryption->sig_key_id = NULL;
+      encryption->sig_key_id_len = 0;
+
+      if (size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      const size_t buflen = static_cast<size_t>(size);
+      unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
+      if (buf == NULL)
+        return -1;
+
+      const int read_status =
+          pReader->Read(pos, static_cast<long>(buflen), buf);
+      if (read_status) {
+        delete[] buf;
+        return status;
+      }
+
+      encryption->sig_key_id = buf;
+      encryption->sig_key_id_len = buflen;
+    } else if (id == libwebm::kMkvContentSigAlgo) {
+      encryption->sig_algo = UnserializeUInt(pReader, pos, size);
+    } else if (id == libwebm::kMkvContentSigHashAlgo) {
+      encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
+    } else if (id == libwebm::kMkvContentEncAESSettings) {
+      const long status = ParseContentEncAESSettingsEntry(
+          pos, size, pReader, &encryption->aes_settings);
+      if (status)
+        return status;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  return 0;
+}
+
+Track::Track(Segment* pSegment, long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      content_encoding_entries_(NULL),
+      content_encoding_entries_end_(NULL) {}
+
+Track::~Track() {
+  Info& info = const_cast<Info&>(m_info);
+  info.Clear();
+
+  ContentEncoding** i = content_encoding_entries_;
+  ContentEncoding** const j = content_encoding_entries_end_;
+
+  while (i != j) {
+    ContentEncoding* const encoding = *i++;
+    delete encoding;
+  }
+
+  delete[] content_encoding_entries_;
+}
+
+long Track::Create(Segment* pSegment, const Info& info, long long element_start,
+                   long long element_size, Track*& pResult) {
+  if (pResult)
+    return -1;
+
+  Track* const pTrack = new Track(pSegment, element_start, element_size);
+
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {  // error
+    delete pTrack;
+    return status;
+  }
+
+  pResult = pTrack;
+  return 0;  // success
+}
+
+Track::Info::Info()
+    : uid(0),
+      defaultDuration(0),
+      codecDelay(0),
+      seekPreRoll(0),
+      nameAsUTF8(NULL),
+      language(NULL),
+      codecId(NULL),
+      codecNameAsUTF8(NULL),
+      codecPrivate(NULL),
+      codecPrivateSize(0),
+      lacing(false) {}
+
+Track::Info::~Info() { Clear(); }
+
+void Track::Info::Clear() {
+  delete[] nameAsUTF8;
+  nameAsUTF8 = NULL;
+
+  delete[] language;
+  language = NULL;
+
+  delete[] codecId;
+  codecId = NULL;
+
+  delete[] codecPrivate;
+  codecPrivate = NULL;
+  codecPrivateSize = 0;
+
+  delete[] codecNameAsUTF8;
+  codecNameAsUTF8 = NULL;
+}
+
+int Track::Info::CopyStr(char* Info::*str, Info& dst_) const {
+  if (str == static_cast<char * Info::*>(NULL))
+    return -1;
+
+  char*& dst = dst_.*str;
+
+  if (dst)  // should be NULL already
+    return -1;
+
+  const char* const src = this->*str;
+
+  if (src == NULL)
+    return 0;
+
+  const size_t len = strlen(src);
+
+  dst = SafeArrayAlloc<char>(1, len + 1);
+
+  if (dst == NULL)
+    return -1;
+
+  strcpy(dst, src);
+
+  return 0;
+}
+
+int Track::Info::Copy(Info& dst) const {
+  if (&dst == this)
+    return 0;
+
+  dst.type = type;
+  dst.number = number;
+  dst.defaultDuration = defaultDuration;
+  dst.codecDelay = codecDelay;
+  dst.seekPreRoll = seekPreRoll;
+  dst.uid = uid;
+  dst.lacing = lacing;
+  dst.settings = settings;
+
+  // We now copy the string member variables from src to dst.
+  // This involves memory allocation so in principle the operation
+  // can fail (indeed, that's why we have Info::Copy), so we must
+  // report this to the caller.  An error return from this function
+  // therefore implies that the copy was only partially successful.
+
+  if (int status = CopyStr(&Info::nameAsUTF8, dst))
+    return status;
+
+  if (int status = CopyStr(&Info::language, dst))
+    return status;
+
+  if (int status = CopyStr(&Info::codecId, dst))
+    return status;
+
+  if (int status = CopyStr(&Info::codecNameAsUTF8, dst))
+    return status;
+
+  if (codecPrivateSize > 0) {
+    if (codecPrivate == NULL)
+      return -1;
+
+    if (dst.codecPrivate)
+      return -1;
+
+    if (dst.codecPrivateSize != 0)
+      return -1;
+
+    dst.codecPrivate = SafeArrayAlloc<unsigned char>(1, codecPrivateSize);
+
+    if (dst.codecPrivate == NULL)
+      return -1;
+
+    memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize);
+    dst.codecPrivateSize = codecPrivateSize;
+  }
+
+  return 0;
+}
+
+const BlockEntry* Track::GetEOS() const { return &m_eos; }
+
+long Track::GetType() const { return m_info.type; }
+
+long Track::GetNumber() const { return m_info.number; }
+
+unsigned long long Track::GetUid() const { return m_info.uid; }
+
+const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; }
+
+const char* Track::GetLanguage() const { return m_info.language; }
+
+const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; }
+
+const char* Track::GetCodecId() const { return m_info.codecId; }
+
+const unsigned char* Track::GetCodecPrivate(size_t& size) const {
+  size = m_info.codecPrivateSize;
+  return m_info.codecPrivate;
+}
+
+bool Track::GetLacing() const { return m_info.lacing; }
+
+unsigned long long Track::GetDefaultDuration() const {
+  return m_info.defaultDuration;
+}
+
+unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; }
+
+unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; }
+
+long Track::GetFirst(const BlockEntry*& pBlockEntry) const {
+  const Cluster* pCluster = m_pSegment->GetFirst();
+
+  for (int i = 0;;) {
+    if (pCluster == NULL) {
+      pBlockEntry = GetEOS();
+      return 1;
+    }
+
+    if (pCluster->EOS()) {
+      if (m_pSegment->DoneParsing()) {
+        pBlockEntry = GetEOS();
+        return 1;
+      }
+
+      pBlockEntry = 0;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long status = pCluster->GetFirst(pBlockEntry);
+
+    if (status < 0)  // error
+      return status;
+
+    if (pBlockEntry == 0) {  // empty cluster
+      pCluster = m_pSegment->GetNext(pCluster);
+      continue;
+    }
+
+    for (;;) {
+      const Block* const pBlock = pBlockEntry->GetBlock();
+      assert(pBlock);
+
+      const long long tn = pBlock->GetTrackNumber();
+
+      if ((tn == m_info.number) && VetEntry(pBlockEntry))
+        return 0;
+
+      const BlockEntry* pNextEntry;
+
+      status = pCluster->GetNext(pBlockEntry, pNextEntry);
+
+      if (status < 0)  // error
+        return status;
+
+      if (pNextEntry == 0)
+        break;
+
+      pBlockEntry = pNextEntry;
+    }
+
+    ++i;
+
+    if (i >= 100)
+      break;
+
+    pCluster = m_pSegment->GetNext(pCluster);
+  }
+
+  // NOTE: if we get here, it means that we didn't find a block with
+  // a matching track number.  We interpret that as an error (which
+  // might be too conservative).
+
+  pBlockEntry = GetEOS();  // so we can return a non-NULL value
+  return 1;
+}
+
+long Track::GetNext(const BlockEntry* pCurrEntry,
+                    const BlockEntry*& pNextEntry) const {
+  assert(pCurrEntry);
+  assert(!pCurrEntry->EOS());  //?
+
+  const Block* const pCurrBlock = pCurrEntry->GetBlock();
+  assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number);
+  if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number)
+    return -1;
+
+  const Cluster* pCluster = pCurrEntry->GetCluster();
+  assert(pCluster);
+  assert(!pCluster->EOS());
+
+  long status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  for (int i = 0;;) {
+    while (pNextEntry) {
+      const Block* const pNextBlock = pNextEntry->GetBlock();
+      assert(pNextBlock);
+
+      if (pNextBlock->GetTrackNumber() == m_info.number)
+        return 0;
+
+      pCurrEntry = pNextEntry;
+
+      status = pCluster->GetNext(pCurrEntry, pNextEntry);
+
+      if (status < 0)  // error
+        return status;
+    }
+
+    pCluster = m_pSegment->GetNext(pCluster);
+
+    if (pCluster == NULL) {
+      pNextEntry = GetEOS();
+      return 1;
+    }
+
+    if (pCluster->EOS()) {
+      if (m_pSegment->DoneParsing()) {
+        pNextEntry = GetEOS();
+        return 1;
+      }
+
+      // TODO: there is a potential O(n^2) problem here: we tell the
+      // caller to (pre)load another cluster, which he does, but then he
+      // calls GetNext again, which repeats the same search.  This is
+      // a pathological case, since the only way it can happen is if
+      // there exists a long sequence of clusters none of which contain a
+      // block from this track.  One way around this problem is for the
+      // caller to be smarter when he loads another cluster: don't call
+      // us back until you have a cluster that contains a block from this
+      // track. (Of course, that's not cheap either, since our caller
+      // would have to scan the each cluster as it's loaded, so that
+      // would just push back the problem.)
+
+      pNextEntry = NULL;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    status = pCluster->GetFirst(pNextEntry);
+
+    if (status < 0)  // error
+      return status;
+
+    if (pNextEntry == NULL)  // empty cluster
+      continue;
+
+    ++i;
+
+    if (i >= 100)
+      break;
+  }
+
+  // NOTE: if we get here, it means that we didn't find a block with
+  // a matching track number after lots of searching, so we give
+  // up trying.
+
+  pNextEntry = GetEOS();  // so we can return a non-NULL value
+  return 1;
+}
+
+bool Track::VetEntry(const BlockEntry* pBlockEntry) const {
+  assert(pBlockEntry);
+  const Block* const pBlock = pBlockEntry->GetBlock();
+  assert(pBlock);
+  assert(pBlock->GetTrackNumber() == m_info.number);
+  if (!pBlock || pBlock->GetTrackNumber() != m_info.number)
+    return false;
+
+  // This function is used during a seek to determine whether the
+  // frame is a valid seek target.  This default function simply
+  // returns true, which means all frames are valid seek targets.
+  // It gets overridden by the VideoTrack class, because only video
+  // keyframes can be used as seek target.
+
+  return true;
+}
+
+long Track::Seek(long long time_ns, const BlockEntry*& pResult) const {
+  const long status = GetFirst(pResult);
+
+  if (status < 0)  // buffer underflow, etc
+    return status;
+
+  assert(pResult);
+
+  if (pResult->EOS())
+    return 0;
+
+  const Cluster* pCluster = pResult->GetCluster();
+  assert(pCluster);
+  assert(pCluster->GetIndex() >= 0);
+
+  if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+    return 0;
+
+  Cluster** const clusters = m_pSegment->m_clusters;
+  assert(clusters);
+
+  const long count = m_pSegment->GetCount();  // loaded only, not preloaded
+  assert(count > 0);
+
+  Cluster** const i = clusters + pCluster->GetIndex();
+  assert(i);
+  assert(*i == pCluster);
+  assert(pCluster->GetTime() <= time_ns);
+
+  Cluster** const j = clusters + count;
+
+  Cluster** lo = i;
+  Cluster** hi = j;
+
+  while (lo < hi) {
+    // INVARIANT:
+    //[i, lo) <= time_ns
+    //[lo, hi) ?
+    //[hi, j)  > time_ns
+
+    Cluster** const mid = lo + (hi - lo) / 2;
+    assert(mid < hi);
+
+    pCluster = *mid;
+    assert(pCluster);
+    assert(pCluster->GetIndex() >= 0);
+    assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
+
+    const long long t = pCluster->GetTime();
+
+    if (t <= time_ns)
+      lo = mid + 1;
+    else
+      hi = mid;
+
+    assert(lo <= hi);
+  }
+
+  assert(lo == hi);
+  assert(lo > i);
+  assert(lo <= j);
+
+  while (lo > i) {
+    pCluster = *--lo;
+    assert(pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    pResult = pCluster->GetEntry(this);
+
+    if ((pResult != 0) && !pResult->EOS())
+      return 0;
+
+    // landed on empty cluster (no entries)
+  }
+
+  pResult = GetEOS();  // weird
+  return 0;
+}
+
+const ContentEncoding* Track::GetContentEncodingByIndex(
+    unsigned long idx) const {
+  const ptrdiff_t count =
+      content_encoding_entries_end_ - content_encoding_entries_;
+  assert(count >= 0);
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return content_encoding_entries_[idx];
+}
+
+unsigned long Track::GetContentEncodingCount() const {
+  const ptrdiff_t count =
+      content_encoding_entries_end_ - content_encoding_entries_;
+  assert(count >= 0);
+
+  return static_cast<unsigned long>(count);
+}
+
+long Track::ParseContentEncodingsEntry(long long start, long long size) {
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+  assert(pReader);
+
+  long long pos = start;
+  const long long stop = start + size;
+
+  // Count ContentEncoding elements.
+  int count = 0;
+  while (pos < stop) {
+    long long id, size;
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    // pos now designates start of element
+    if (id == libwebm::kMkvContentEncoding)
+      ++count;
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (count <= 0)
+    return -1;
+
+  content_encoding_entries_ = new ContentEncoding*[count];
+  content_encoding_entries_end_ = content_encoding_entries_;
+
+  pos = start;
+  while (pos < stop) {
+    long long id, size;
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+    if (status < 0)  // error
+      return status;
+
+    // pos now designates start of element
+    if (id == libwebm::kMkvContentEncoding) {
+      ContentEncoding* const content_encoding = new ContentEncoding();
+
+      status = content_encoding->ParseContentEncodingEntry(pos, size, pReader);
+      if (status) {
+        delete content_encoding;
+        return status;
+      }
+
+      *content_encoding_entries_end_++ = content_encoding;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;
+}
+
+Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {}
+
+BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; }
+
+const Block* Track::EOSBlock::GetBlock() const { return NULL; }
+
+bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos,
+                                long long value_size, bool is_x,
+                                PrimaryChromaticity** chromaticity) {
+  if (!reader)
+    return false;
+
+  my_auto_ptr<PrimaryChromaticity> chromaticity_ptr(*chromaticity ? *chromaticity : new PrimaryChromaticity());
+
+  float* value = is_x ? &chromaticity_ptr->x : &chromaticity_ptr->y;
+
+  double parser_value = 0;
+  const long long value_parse_status =
+      UnserializeFloat(reader, read_pos, value_size, parser_value);
+
+  *value = static_cast<float>(parser_value);
+
+  if (value_parse_status < 0 || *value < 0.0 || *value > 1.0)
+    return false;
+
+  *chromaticity = chromaticity_ptr.release();
+  return true;
+}
+
+bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
+                              long long mm_size, MasteringMetadata** mm) {
+  if (!reader || *mm)
+    return false;
+
+  my_auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+
+  const long long mm_end = mm_start + mm_size;
+  long long read_pos = mm_start;
+
+  while (read_pos < mm_end) {
+    long long child_id = 0;
+    long long child_size = 0;
+
+    const long long status =
+        ParseElementHeader(reader, read_pos, mm_end, child_id, child_size);
+    if (status < 0)
+      return false;
+
+    if (child_id == libwebm::kMkvLuminanceMax) {
+      double value = 0;
+      const long long value_parse_status =
+          UnserializeFloat(reader, read_pos, child_size, value);
+      mm_ptr->luminance_max = static_cast<float>(value);
+      if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
+          mm_ptr->luminance_max > 9999.99) {
+        return false;
+      }
+    } else if (child_id == libwebm::kMkvLuminanceMin) {
+      double value = 0;
+      const long long value_parse_status =
+          UnserializeFloat(reader, read_pos, child_size, value);
+      mm_ptr->luminance_min = static_cast<float>(value);
+      if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
+          mm_ptr->luminance_min > 999.9999) {
+        return false;
+      }
+    } else {
+      bool is_x = false;
+      PrimaryChromaticity** chromaticity;
+      switch (child_id) {
+        case libwebm::kMkvPrimaryRChromaticityX:
+        case libwebm::kMkvPrimaryRChromaticityY:
+          is_x = child_id == libwebm::kMkvPrimaryRChromaticityX;
+          chromaticity = &mm_ptr->r;
+          break;
+        case libwebm::kMkvPrimaryGChromaticityX:
+        case libwebm::kMkvPrimaryGChromaticityY:
+          is_x = child_id == libwebm::kMkvPrimaryGChromaticityX;
+          chromaticity = &mm_ptr->g;
+          break;
+        case libwebm::kMkvPrimaryBChromaticityX:
+        case libwebm::kMkvPrimaryBChromaticityY:
+          is_x = child_id == libwebm::kMkvPrimaryBChromaticityX;
+          chromaticity = &mm_ptr->b;
+          break;
+        case libwebm::kMkvWhitePointChromaticityX:
+        case libwebm::kMkvWhitePointChromaticityY:
+          is_x = child_id == libwebm::kMkvWhitePointChromaticityX;
+          chromaticity = &mm_ptr->white_point;
+          break;
+        default:
+          return false;
+      }
+      const bool value_parse_status = PrimaryChromaticity::Parse(
+          reader, read_pos, child_size, is_x, chromaticity);
+      if (!value_parse_status)
+        return false;
+    }
+
+    read_pos += child_size;
+    if (read_pos > mm_end)
+      return false;
+  }
+
+  *mm = mm_ptr.release();
+  return true;
+}
+
+bool Colour::Parse(IMkvReader* reader, long long colour_start,
+                   long long colour_size, Colour** colour) {
+  if (!reader || *colour)
+    return false;
+
+  my_auto_ptr<Colour> colour_ptr(new Colour());
+
+  const long long colour_end = colour_start + colour_size;
+  long long read_pos = colour_start;
+
+  while (read_pos < colour_end) {
+    long long child_id = 0;
+    long long child_size = 0;
+
+    const long status =
+        ParseElementHeader(reader, read_pos, colour_end, child_id, child_size);
+    if (status < 0)
+      return false;
+
+    if (child_id == libwebm::kMkvMatrixCoefficients) {
+      colour_ptr->matrix_coefficients =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->matrix_coefficients < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvBitsPerChannel) {
+      colour_ptr->bits_per_channel =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->bits_per_channel < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) {
+      colour_ptr->chroma_subsampling_horz =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->chroma_subsampling_horz < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvChromaSubsamplingVert) {
+      colour_ptr->chroma_subsampling_vert =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->chroma_subsampling_vert < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvCbSubsamplingHorz) {
+      colour_ptr->cb_subsampling_horz =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->cb_subsampling_horz < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvCbSubsamplingVert) {
+      colour_ptr->cb_subsampling_vert =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->cb_subsampling_vert < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvChromaSitingHorz) {
+      colour_ptr->chroma_siting_horz =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->chroma_siting_horz < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvChromaSitingVert) {
+      colour_ptr->chroma_siting_vert =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->chroma_siting_vert < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvRange) {
+      colour_ptr->range = UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->range < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvTransferCharacteristics) {
+      colour_ptr->transfer_characteristics =
+          UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->transfer_characteristics < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvPrimaries) {
+      colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->primaries < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvMaxCLL) {
+      colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->max_cll < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvMaxFALL) {
+      colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size);
+      if (colour_ptr->max_fall < 0)
+        return false;
+    } else if (child_id == libwebm::kMkvMasteringMetadata) {
+      if (!MasteringMetadata::Parse(reader, read_pos, child_size,
+                                    &colour_ptr->mastering_metadata))
+        return false;
+    } else {
+      return false;
+    }
+
+    read_pos += child_size;
+    if (read_pos > colour_end)
+      return false;
+  }
+  *colour = colour_ptr.release();
+  return true;
+}
+
+VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
+                       long long element_size)
+    : Track(pSegment, element_start, element_size), m_colour(NULL) {}
+
+VideoTrack::~VideoTrack() { delete m_colour; }
+
+long VideoTrack::Parse(Segment* pSegment, const Info& info,
+                       long long element_start, long long element_size,
+                       VideoTrack*& pResult) {
+  if (pResult)
+    return -1;
+
+  if (info.type != Track::kVideo)
+    return -1;
+
+  long long width = 0;
+  long long height = 0;
+  long long display_width = 0;
+  long long display_height = 0;
+  long long display_unit = 0;
+  long long stereo_mode = 0;
+
+  double rate = 0.0;
+
+  IMkvReader* const pReader = pSegment->m_pReader;
+
+  const Settings& s = info.settings;
+  assert(s.start >= 0);
+  assert(s.size >= 0);
+
+  long long pos = s.start;
+  assert(pos >= 0);
+
+  const long long stop = pos + s.size;
+
+  Colour* colour = NULL;
+
+  while (pos < stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvPixelWidth) {
+      width = UnserializeUInt(pReader, pos, size);
+
+      if (width <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvPixelHeight) {
+      height = UnserializeUInt(pReader, pos, size);
+
+      if (height <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDisplayWidth) {
+      display_width = UnserializeUInt(pReader, pos, size);
+
+      if (display_width <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDisplayHeight) {
+      display_height = UnserializeUInt(pReader, pos, size);
+
+      if (display_height <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvDisplayUnit) {
+      display_unit = UnserializeUInt(pReader, pos, size);
+
+      if (display_unit < 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvStereoMode) {
+      stereo_mode = UnserializeUInt(pReader, pos, size);
+
+      if (stereo_mode < 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvFrameRate) {
+      const long status = UnserializeFloat(pReader, pos, size, rate);
+
+      if (status < 0)
+        return status;
+
+      if (rate <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvColour) {
+      if (!Colour::Parse(pReader, pos, size, &colour))
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  VideoTrack* const pTrack = new VideoTrack(pSegment, element_start, element_size);
+
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {  // error
+    delete pTrack;
+    return status;
+  }
+
+  pTrack->m_width = width;
+  pTrack->m_height = height;
+  pTrack->m_display_width = display_width;
+  pTrack->m_display_height = display_height;
+  pTrack->m_display_unit = display_unit;
+  pTrack->m_stereo_mode = stereo_mode;
+  pTrack->m_rate = rate;
+  pTrack->m_colour = colour;
+
+  pResult = pTrack;
+  return 0;  // success
+}
+
+bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const {
+  return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey();
+}
+
+long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
+  const long status = GetFirst(pResult);
+
+  if (status < 0)  // buffer underflow, etc
+    return status;
+
+  assert(pResult);
+
+  if (pResult->EOS())
+    return 0;
+
+  const Cluster* pCluster = pResult->GetCluster();
+  assert(pCluster);
+  assert(pCluster->GetIndex() >= 0);
+
+  if (time_ns <= pResult->GetBlock()->GetTime(pCluster))
+    return 0;
+
+  Cluster** const clusters = m_pSegment->m_clusters;
+  assert(clusters);
+
+  const long count = m_pSegment->GetCount();  // loaded only, not pre-loaded
+  assert(count > 0);
+
+  Cluster** const i = clusters + pCluster->GetIndex();
+  assert(i);
+  assert(*i == pCluster);
+  assert(pCluster->GetTime() <= time_ns);
+
+  Cluster** const j = clusters + count;
+
+  Cluster** lo = i;
+  Cluster** hi = j;
+
+  while (lo < hi) {
+    // INVARIANT:
+    //[i, lo) <= time_ns
+    //[lo, hi) ?
+    //[hi, j)  > time_ns
+
+    Cluster** const mid = lo + (hi - lo) / 2;
+    assert(mid < hi);
+
+    pCluster = *mid;
+    assert(pCluster);
+    assert(pCluster->GetIndex() >= 0);
+    assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters));
+
+    const long long t = pCluster->GetTime();
+
+    if (t <= time_ns)
+      lo = mid + 1;
+    else
+      hi = mid;
+
+    assert(lo <= hi);
+  }
+
+  assert(lo == hi);
+  assert(lo > i);
+  assert(lo <= j);
+
+  pCluster = *--lo;
+  assert(pCluster);
+  assert(pCluster->GetTime() <= time_ns);
+
+  pResult = pCluster->GetEntry(this, time_ns);
+
+  if ((pResult != 0) && !pResult->EOS())  // found a keyframe
+    return 0;
+
+  while (lo != i) {
+    pCluster = *--lo;
+    assert(pCluster);
+    assert(pCluster->GetTime() <= time_ns);
+
+    pResult = pCluster->GetEntry(this, time_ns);
+
+    if ((pResult != 0) && !pResult->EOS())
+      return 0;
+  }
+
+  // weird: we're on the first cluster, but no keyframe found
+  // should never happen but we must return something anyway
+
+  pResult = GetEOS();
+  return 0;
+}
+
+Colour* VideoTrack::GetColour() const { return m_colour; }
+
+long long VideoTrack::GetWidth() const { return m_width; }
+
+long long VideoTrack::GetHeight() const { return m_height; }
+
+long long VideoTrack::GetDisplayWidth() const {
+  return m_display_width > 0 ? m_display_width : GetWidth();
+}
+
+long long VideoTrack::GetDisplayHeight() const {
+  return m_display_height > 0 ? m_display_height : GetHeight();
+}
+
+long long VideoTrack::GetDisplayUnit() const { return m_display_unit; }
+
+long long VideoTrack::GetStereoMode() const { return m_stereo_mode; }
+
+double VideoTrack::GetFrameRate() const { return m_rate; }
+
+AudioTrack::AudioTrack(Segment* pSegment, long long element_start,
+                       long long element_size)
+    : Track(pSegment, element_start, element_size) {}
+
+long AudioTrack::Parse(Segment* pSegment, const Info& info,
+                       long long element_start, long long element_size,
+                       AudioTrack*& pResult) {
+  if (pResult)
+    return -1;
+
+  if (info.type != Track::kAudio)
+    return -1;
+
+  IMkvReader* const pReader = pSegment->m_pReader;
+
+  const Settings& s = info.settings;
+  assert(s.start >= 0);
+  assert(s.size >= 0);
+
+  long long pos = s.start;
+  assert(pos >= 0);
+
+  const long long stop = pos + s.size;
+
+  double rate = 8000.0;  // MKV default
+  long long channels = 1;
+  long long bit_depth = 0;
+
+  while (pos < stop) {
+    long long id, size;
+
+    long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (id == libwebm::kMkvSamplingFrequency) {
+      status = UnserializeFloat(pReader, pos, size, rate);
+
+      if (status < 0)
+        return status;
+
+      if (rate <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvChannels) {
+      channels = UnserializeUInt(pReader, pos, size);
+
+      if (channels <= 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvBitDepth) {
+      bit_depth = UnserializeUInt(pReader, pos, size);
+
+      if (bit_depth <= 0)
+        return E_FILE_FORMAT_INVALID;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  AudioTrack* const pTrack = new AudioTrack(pSegment, element_start, element_size);
+
+  const int status = info.Copy(pTrack->m_info);
+
+  if (status) {
+    delete pTrack;
+    return status;
+  }
+
+  pTrack->m_rate = rate;
+  pTrack->m_channels = channels;
+  pTrack->m_bitDepth = bit_depth;
+
+  pResult = pTrack;
+  return 0;  // success
+}
+
+double AudioTrack::GetSamplingRate() const { return m_rate; }
+
+long long AudioTrack::GetChannels() const { return m_channels; }
+
+long long AudioTrack::GetBitDepth() const { return m_bitDepth; }
+
+Tracks::Tracks(Segment* pSegment, long long start, long long size_,
+               long long element_start, long long element_size)
+    : m_pSegment(pSegment),
+      m_start(start),
+      m_size(size_),
+      m_element_start(element_start),
+      m_element_size(element_size),
+      m_trackEntries(NULL),
+      m_trackEntriesEnd(NULL) {}
+
+long Tracks::Parse() {
+  assert(m_trackEntries == NULL);
+  assert(m_trackEntriesEnd == NULL);
+
+  const long long stop = m_start + m_size;
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  int count = 0;
+  long long pos = m_start;
+
+  while (pos < stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size == 0)  // weird
+      continue;
+
+    if (id == libwebm::kMkvTrackEntry)
+      ++count;
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if (count <= 0)
+    return 0;  // success
+
+  m_trackEntries = new Track*[count];
+  m_trackEntriesEnd = m_trackEntries;
+
+  pos = m_start;
+
+  while (pos < stop) {
+    const long long element_start = pos;
+
+    long long id, payload_size;
+
+    const long status =
+        ParseElementHeader(pReader, pos, stop, id, payload_size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (payload_size == 0)  // weird
+      continue;
+
+    const long long payload_stop = pos + payload_size;
+    assert(payload_stop <= stop);  // checked in ParseElement
+
+    const long long element_size = payload_stop - element_start;
+
+    if (id == libwebm::kMkvTrackEntry) {
+      Track*& pTrack = *m_trackEntriesEnd;
+      pTrack = NULL;
+
+      const long status = ParseTrackEntry(pos, payload_size, element_start,
+                                          element_size, pTrack);
+      if (status)
+        return status;
+
+      if (pTrack)
+        ++m_trackEntriesEnd;
+    }
+
+    pos = payload_stop;
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+
+  return 0;  // success
+}
+
+unsigned long Tracks::GetTracksCount() const {
+  const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries;
+  assert(result >= 0);
+
+  return static_cast<unsigned long>(result);
+}
+
+long Tracks::ParseTrackEntry(long long track_start, long long track_size,
+                             long long element_start, long long element_size,
+                             Track*& pResult) const {
+  if (pResult)
+    return -1;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = track_start;
+  const long long track_stop = track_start + track_size;
+
+  Track::Info info;
+
+  info.type = 0;
+  info.number = 0;
+  info.uid = 0;
+  info.defaultDuration = 0;
+
+  Track::Settings v;
+  v.start = -1;
+  v.size = -1;
+
+  Track::Settings a;
+  a.start = -1;
+  a.size = -1;
+
+  Track::Settings e;  // content_encodings_settings;
+  e.start = -1;
+  e.size = -1;
+
+  long long lacing = 1;  // default is true
+
+  while (pos < track_stop) {
+    long long id, size;
+
+    const long status = ParseElementHeader(pReader, pos, track_stop, id, size);
+
+    if (status < 0)  // error
+      return status;
+
+    if (size < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long start = pos;
+
+    if (id == libwebm::kMkvVideo) {
+      v.start = start;
+      v.size = size;
+    } else if (id == libwebm::kMkvAudio) {
+      a.start = start;
+      a.size = size;
+    } else if (id == libwebm::kMkvContentEncodings) {
+      e.start = start;
+      e.size = size;
+    } else if (id == libwebm::kMkvTrackUID) {
+      if (size > 8)
+        return E_FILE_FORMAT_INVALID;
+
+      info.uid = 0;
+
+      long long pos_ = start;
+      const long long pos_end = start + size;
+
+      while (pos_ != pos_end) {
+        unsigned char b;
+
+        const int status = pReader->Read(pos_, 1, &b);
+
+        if (status)
+          return status;
+
+        info.uid <<= 8;
+        info.uid |= b;
+
+        ++pos_;
+      }
+    } else if (id == libwebm::kMkvTrackNumber) {
+      const long long num = UnserializeUInt(pReader, pos, size);
+
+      if ((num <= 0) || (num > 127))
+        return E_FILE_FORMAT_INVALID;
+
+      info.number = static_cast<long>(num);
+    } else if (id == libwebm::kMkvTrackType) {
+      const long long type = UnserializeUInt(pReader, pos, size);
+
+      if ((type <= 0) || (type > 254))
+        return E_FILE_FORMAT_INVALID;
+
+      info.type = static_cast<long>(type);
+    } else if (id == libwebm::kMkvName) {
+      const long status =
+          UnserializeString(pReader, pos, size, info.nameAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvLanguage) {
+      const long status = UnserializeString(pReader, pos, size, info.language);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvDefaultDuration) {
+      const long long duration = UnserializeUInt(pReader, pos, size);
+
+      if (duration < 0)
+        return E_FILE_FORMAT_INVALID;
+
+      info.defaultDuration = static_cast<unsigned long long>(duration);
+    } else if (id == libwebm::kMkvCodecID) {
+      const long status = UnserializeString(pReader, pos, size, info.codecId);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvFlagLacing) {
+      lacing = UnserializeUInt(pReader, pos, size);
+
+      if ((lacing < 0) || (lacing > 1))
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvCodecPrivate) {
+      delete[] info.codecPrivate;
+      info.codecPrivate = NULL;
+      info.codecPrivateSize = 0;
+
+      const size_t buflen = static_cast<size_t>(size);
+
+      if (buflen) {
+        unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
+
+        if (buf == NULL)
+          return -1;
+
+        const int status = pReader->Read(pos, static_cast<long>(buflen), buf);
+
+        if (status) {
+          delete[] buf;
+          return status;
+        }
+
+        info.codecPrivate = buf;
+        info.codecPrivateSize = buflen;
+      }
+    } else if (id == libwebm::kMkvCodecName) {
+      const long status =
+          UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
+
+      if (status)
+        return status;
+    } else if (id == libwebm::kMkvCodecDelay) {
+      info.codecDelay = UnserializeUInt(pReader, pos, size);
+    } else if (id == libwebm::kMkvSeekPreRoll) {
+      info.seekPreRoll = UnserializeUInt(pReader, pos, size);
+    }
+
+    pos += size;  // consume payload
+    if (pos > track_stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != track_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if (info.number <= 0)  // not specified
+    return E_FILE_FORMAT_INVALID;
+
+  if (GetTrackByNumber(info.number))
+    return E_FILE_FORMAT_INVALID;
+
+  if (info.type <= 0)  // not specified
+    return E_FILE_FORMAT_INVALID;
+
+  info.lacing = (lacing > 0) ? true : false;
+
+  if (info.type == Track::kVideo) {
+    if (v.start < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (a.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings = v;
+
+    VideoTrack* pTrack = NULL;
+
+    const long status = VideoTrack::Parse(m_pSegment, info, element_start,
+                                          element_size, pTrack);
+
+    if (status)
+      return status;
+
+    pResult = pTrack;
+    assert(pResult);
+
+    if (e.start >= 0)
+      pResult->ParseContentEncodingsEntry(e.start, e.size);
+  } else if (info.type == Track::kAudio) {
+    if (a.start < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (v.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings = a;
+
+    AudioTrack* pTrack = NULL;
+
+    const long status = AudioTrack::Parse(m_pSegment, info, element_start,
+                                          element_size, pTrack);
+
+    if (status)
+      return status;
+
+    pResult = pTrack;
+    assert(pResult);
+
+    if (e.start >= 0)
+      pResult->ParseContentEncodingsEntry(e.start, e.size);
+  } else {
+    // neither video nor audio - probably metadata or subtitles
+
+    if (a.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (v.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (info.type == Track::kMetadata && e.start >= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    info.settings.start = -1;
+    info.settings.size = 0;
+
+    Track* pTrack = NULL;
+
+    const long status =
+        Track::Create(m_pSegment, info, element_start, element_size, pTrack);
+
+    if (status)
+      return status;
+
+    pResult = pTrack;
+    assert(pResult);
+  }
+
+  return 0;  // success
+}
+
+Tracks::~Tracks() {
+  Track** i = m_trackEntries;
+  Track** const j = m_trackEntriesEnd;
+
+  while (i != j) {
+    Track* const pTrack = *i++;
+    delete pTrack;
+  }
+
+  delete[] m_trackEntries;
+}
+
+const Track* Tracks::GetTrackByNumber(long tn) const {
+  if (tn < 0)
+    return NULL;
+
+  Track** i = m_trackEntries;
+  Track** const j = m_trackEntriesEnd;
+
+  while (i != j) {
+    Track* const pTrack = *i++;
+
+    if (pTrack == NULL)
+      continue;
+
+    if (tn == pTrack->GetNumber())
+      return pTrack;
+  }
+
+  return NULL;  // not found
+}
+
+const Track* Tracks::GetTrackByIndex(unsigned long idx) const {
+  const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries;
+
+  if (idx >= static_cast<unsigned long>(count))
+    return NULL;
+
+  return m_trackEntries[idx];
+}
+
+long Cluster::Load(long long& pos, long& len) const {
+  if (m_pSegment == NULL)
+    return E_PARSE_FAILED;
+
+  if (m_timecode >= 0)  // at least partially loaded
+    return 0;
+
+  if (m_pos != m_element_start || m_element_size >= 0)
+    return E_PARSE_FAILED;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+  long long total, avail;
+  const int status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  if (total >= 0 && (avail > total || m_pos > total))
+    return E_FILE_FORMAT_INVALID;
+
+  pos = m_pos;
+
+  long long cluster_size = -1;
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  long long result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error or underflow
+    return static_cast<long>(result);
+
+  if (result > 0)
+    return E_BUFFER_NOT_FULL;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long id_ = ReadID(pReader, pos, len);
+
+  if (id_ < 0)  // error
+    return static_cast<long>(id_);
+
+  if (id_ != libwebm::kMkvCluster)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume id
+
+  // read cluster size
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error
+    return static_cast<long>(result);
+
+  if (result > 0)
+    return E_BUFFER_NOT_FULL;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long size = ReadUInt(pReader, pos, len);
+
+  if (size < 0)  // error
+    return static_cast<long>(cluster_size);
+
+  if (size == 0)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume length of size of element
+
+  const long long unknown_size = (1LL << (7 * len)) - 1;
+
+  if (size != unknown_size)
+    cluster_size = size;
+
+  // pos points to start of payload
+  long long timecode = -1;
+  long long new_pos = -1;
+  bool bBlock = false;
+
+  long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size;
+
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      break;
+
+    // Parse ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)
+      return E_FILE_FORMAT_INVALID;
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if (id == libwebm::kMkvCluster)
+      break;
+
+    if (id == libwebm::kMkvCues)
+      break;
+
+    pos += len;  // consume ID field
+
+    // Parse Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume size field
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // pos now points to start of payload
+
+    if (size == 0)
+      continue;
+
+    if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == libwebm::kMkvTimecode) {
+      len = static_cast<long>(size);
+
+      if ((pos + size) > avail)
+        return E_BUFFER_NOT_FULL;
+
+      timecode = UnserializeUInt(pReader, pos, size);
+
+      if (timecode < 0)  // error (or underflow)
+        return static_cast<long>(timecode);
+
+      new_pos = pos + size;
+
+      if (bBlock)
+        break;
+    } else if (id == libwebm::kMkvBlockGroup) {
+      bBlock = true;
+      break;
+    } else if (id == libwebm::kMkvSimpleBlock) {
+      bBlock = true;
+      break;
+    }
+
+    pos += size;  // consume payload
+    if (cluster_stop >= 0 && pos > cluster_stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (cluster_stop >= 0 && pos > cluster_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if (timecode < 0)  // no timecode found
+    return E_FILE_FORMAT_INVALID;
+
+  if (!bBlock)
+    return E_FILE_FORMAT_INVALID;
+
+  m_pos = new_pos;  // designates position just beyond timecode payload
+  m_timecode = timecode;  // m_timecode >= 0 means we're partially loaded
+
+  if (cluster_size >= 0)
+    m_element_size = cluster_stop - m_element_start;
+
+  return 0;
+}
+
+long Cluster::Parse(long long& pos, long& len) const {
+  long status = Load(pos, len);
+
+  if (status < 0)
+    return status;
+
+  if (m_pos < m_element_start || m_timecode < 0)
+    return E_PARSE_FAILED;
+
+  const long long cluster_stop =
+      (m_element_size < 0) ? -1 : m_element_start + m_element_size;
+
+  if ((cluster_stop >= 0) && (m_pos >= cluster_stop))
+    return 1;  // nothing else to do
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long total, avail;
+
+  status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  if (total >= 0 && avail > total)
+    return E_FILE_FORMAT_INVALID;
+
+  pos = m_pos;
+
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      break;
+
+    if ((total >= 0) && (pos >= total)) {
+      if (m_element_size < 0)
+        m_element_size = pos - m_element_start;
+
+      break;
+    }
+
+    // Parse ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)
+      return E_FILE_FORMAT_INVALID;
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) {
+      if (m_element_size < 0)
+        m_element_size = pos - m_element_start;
+
+      break;
+    }
+
+    pos += len;  // consume ID field
+
+    // Parse Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume size field
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // pos now points to start of payload
+
+    if (size == 0)
+      continue;
+
+    // const long long block_start = pos;
+    const long long block_stop = pos + size;
+
+    if (cluster_stop >= 0) {
+      if (block_stop > cluster_stop) {
+        if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) {
+          return E_FILE_FORMAT_INVALID;
+        }
+
+        pos = cluster_stop;
+        break;
+      }
+    } else if ((total >= 0) && (block_stop > total)) {
+      m_element_size = total - m_element_start;
+      pos = total;
+      break;
+    } else if (block_stop > avail) {
+      len = static_cast<long>(size);
+      return E_BUFFER_NOT_FULL;
+    }
+
+    Cluster* const this_ = const_cast<Cluster*>(this);
+
+    if (id == libwebm::kMkvBlockGroup)
+      return this_->ParseBlockGroup(size, pos, len);
+
+    if (id == libwebm::kMkvSimpleBlock)
+      return this_->ParseSimpleBlock(size, pos, len);
+
+    pos += size;  // consume payload
+    if (cluster_stop >= 0 && pos > cluster_stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (m_element_size < 1)
+    return E_FILE_FORMAT_INVALID;
+
+  m_pos = pos;
+  if (cluster_stop >= 0 && m_pos > cluster_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if (m_entries_count > 0) {
+    const long idx = m_entries_count - 1;
+
+    const BlockEntry* const pLast = m_entries[idx];
+    if (pLast == NULL)
+      return E_PARSE_FAILED;
+
+    const Block* const pBlock = pLast->GetBlock();
+    if (pBlock == NULL)
+      return E_PARSE_FAILED;
+
+    const long long start = pBlock->m_start;
+
+    if ((total >= 0) && (start > total))
+      return E_PARSE_FAILED;  // defend against trucated stream
+
+    const long long size = pBlock->m_size;
+
+    const long long stop = start + size;
+    if (cluster_stop >= 0 && stop > cluster_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && (stop > total))
+      return E_PARSE_FAILED;  // defend against trucated stream
+  }
+
+  return 1;  // no more entries
+}
+
+long Cluster::ParseSimpleBlock(long long block_size, long long& pos,
+                               long& len) {
+  const long long block_start = pos;
+  const long long block_stop = pos + block_size;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long total, avail;
+
+  long status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  // parse track number
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  long long result = GetUIntLength(pReader, pos, len);
+
+  if (result < 0)  // error
+    return static_cast<long>(result);
+
+  if (result > 0)  // weird
+    return E_BUFFER_NOT_FULL;
+
+  if ((pos + len) > block_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + len) > avail)
+    return E_BUFFER_NOT_FULL;
+
+  const long long track = ReadUInt(pReader, pos, len);
+
+  if (track < 0)  // error
+    return static_cast<long>(track);
+
+  if (track == 0)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume track number
+
+  if ((pos + 2) > block_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + 2) > avail) {
+    len = 2;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  pos += 2;  // consume timecode
+
+  if ((pos + 1) > block_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + 1) > avail) {
+    len = 1;
+    return E_BUFFER_NOT_FULL;
+  }
+
+  unsigned char flags;
+
+  status = pReader->Read(pos, 1, &flags);
+
+  if (status < 0) {  // error or underflow
+    len = 1;
+    return status;
+  }
+
+  ++pos;  // consume flags byte
+  assert(pos <= avail);
+
+  if (pos >= block_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  const int lacing = int(flags & 0x06) >> 1;
+
+  if ((lacing != 0) && (block_stop > avail)) {
+    len = static_cast<long>(block_stop - pos);
+    return E_BUFFER_NOT_FULL;
+  }
+
+  status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size,
+                       0);  // DiscardPadding
+
+  if (status != 0)
+    return status;
+
+  m_pos = block_stop;
+
+  return 0;  // success
+}
+
+long Cluster::ParseBlockGroup(long long payload_size, long long& pos,
+                              long& len) {
+  const long long payload_start = pos;
+  const long long payload_stop = pos + payload_size;
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long total, avail;
+
+  long status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  if ((total >= 0) && (payload_stop > total))
+    return E_FILE_FORMAT_INVALID;
+
+  if (payload_stop > avail) {
+    len = static_cast<long>(payload_size);
+    return E_BUFFER_NOT_FULL;
+  }
+
+  long long discard_padding = 0;
+
+  while (pos < payload_stop) {
+    // parse sub-block element ID
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((pos + len) > payload_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id == 0)  // not a valid ID
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume ID field
+
+    // Parse Size
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((pos + len) > payload_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume size field
+
+    // pos now points to start of sub-block group payload
+
+    if (pos > payload_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if (size == 0)  // weird
+      continue;
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == libwebm::kMkvDiscardPadding) {
+      status = UnserializeInt(pReader, pos, size, discard_padding);
+
+      if (status < 0)  // error
+        return status;
+    }
+
+    if (id != libwebm::kMkvBlock) {
+      pos += size;  // consume sub-part of block group
+
+      if (pos > payload_stop)
+        return E_FILE_FORMAT_INVALID;
+
+      continue;
+    }
+
+    const long long block_stop = pos + size;
+
+    if (block_stop > payload_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    // parse track number
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((pos + len) > block_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long track = ReadUInt(pReader, pos, len);
+
+    if (track < 0)  // error
+      return static_cast<long>(track);
+
+    if (track == 0)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume track number
+
+    if ((pos + 2) > block_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + 2) > avail) {
+      len = 2;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    pos += 2;  // consume timecode
+
+    if ((pos + 1) > block_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    unsigned char flags;
+
+    status = pReader->Read(pos, 1, &flags);
+
+    if (status < 0) {  // error or underflow
+      len = 1;
+      return status;
+    }
+
+    ++pos;  // consume flags byte
+    assert(pos <= avail);
+
+    if (pos >= block_stop)
+      return E_FILE_FORMAT_INVALID;
+
+    const int lacing = int(flags & 0x06) >> 1;
+
+    if ((lacing != 0) && (block_stop > avail)) {
+      len = static_cast<long>(block_stop - pos);
+      return E_BUFFER_NOT_FULL;
+    }
+
+    pos = block_stop;  // consume block-part of block group
+    if (pos > payload_stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  if (pos != payload_stop)
+    return E_FILE_FORMAT_INVALID;
+
+  status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size,
+                       discard_padding);
+  if (status != 0)
+    return status;
+
+  m_pos = payload_stop;
+
+  return 0;  // success
+}
+
+long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const {
+  assert(m_pos >= m_element_start);
+
+  pEntry = NULL;
+
+  if (index < 0)
+    return -1;  // generic error
+
+  if (m_entries_count < 0)
+    return E_BUFFER_NOT_FULL;
+
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count <= m_entries_size);
+
+  if (index < m_entries_count) {
+    pEntry = m_entries[index];
+    assert(pEntry);
+
+    return 1;  // found entry
+  }
+
+  if (m_element_size < 0)  // we don't know cluster end yet
+    return E_BUFFER_NOT_FULL;  // underflow
+
+  const long long element_stop = m_element_start + m_element_size;
+
+  if (m_pos >= element_stop)
+    return 0;  // nothing left to parse
+
+  return E_BUFFER_NOT_FULL;  // underflow, since more remains to be parsed
+}
+
+Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) {
+  if (!pSegment || off < 0)
+    return NULL;
+
+  const long long element_start = pSegment->m_start + off;
+
+  return new Cluster(pSegment, idx, element_start);
+}
+
+Cluster::Cluster()
+    : m_pSegment(NULL),
+      m_element_start(0),
+      m_index(0),
+      m_pos(0),
+      m_element_size(0),
+      m_timecode(0),
+      m_entries(NULL),
+      m_entries_size(0),
+      m_entries_count(0)  // means "no entries"
+{}
+
+Cluster::Cluster(Segment* pSegment, long idx, long long element_start
+                 /* long long element_size */)
+    : m_pSegment(pSegment),
+      m_element_start(element_start),
+      m_index(idx),
+      m_pos(element_start),
+      m_element_size(-1 /* element_size */),
+      m_timecode(-1),
+      m_entries(NULL),
+      m_entries_size(0),
+      m_entries_count(-1)  // means "has not been parsed yet"
+{}
+
+Cluster::~Cluster() {
+  if (m_entries_count <= 0)
+    return;
+
+  BlockEntry** i = m_entries;
+  BlockEntry** const j = m_entries + m_entries_count;
+
+  while (i != j) {
+    BlockEntry* p = *i++;
+    assert(p);
+
+    delete p;
+  }
+
+  delete[] m_entries;
+}
+
+bool Cluster::EOS() const { return (m_pSegment == NULL); }
+
+long Cluster::GetIndex() const { return m_index; }
+
+long long Cluster::GetPosition() const {
+  const long long pos = m_element_start - m_pSegment->m_start;
+  assert(pos >= 0);
+
+  return pos;
+}
+
+long long Cluster::GetElementSize() const { return m_element_size; }
+
+long Cluster::HasBlockEntries(
+    const Segment* pSegment,
+    long long off,  // relative to start of segment payload
+    long long& pos, long& len) {
+  assert(pSegment);
+  assert(off >= 0);  // relative to segment
+
+  IMkvReader* const pReader = pSegment->m_pReader;
+
+  long long total, avail;
+
+  long status = pReader->Length(&total, &avail);
+
+  if (status < 0)  // error
+    return status;
+
+  assert((total < 0) || (avail <= total));
+
+  pos = pSegment->m_start + off;  // absolute
+
+  if ((total >= 0) && (pos >= total))
+    return 0;  // we don't even have a complete cluster
+
+  const long long segment_stop =
+      (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size;
+
+  long long cluster_stop = -1;  // interpreted later to mean "unknown size"
+
+  {
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // need more data
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && ((pos + len) > total))
+      return 0;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    if (id != libwebm::kMkvCluster)
+      return E_PARSE_FAILED;
+
+    pos += len;  // consume Cluster ID field
+
+    // read size field
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // weird
+      return E_BUFFER_NOT_FULL;
+
+    if ((segment_stop >= 0) && ((pos + len) > segment_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((total >= 0) && ((pos + len) > total))
+      return 0;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    if (size == 0)
+      return 0;  // cluster does not have entries
+
+    pos += len;  // consume size field
+
+    // pos now points to start of payload
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size != unknown_size) {
+      cluster_stop = pos + size;
+      assert(cluster_stop >= 0);
+
+      if ((segment_stop >= 0) && (cluster_stop > segment_stop))
+        return E_FILE_FORMAT_INVALID;
+
+      if ((total >= 0) && (cluster_stop > total))
+        // return E_FILE_FORMAT_INVALID;  //too conservative
+        return 0;  // cluster does not have any entries
+    }
+  }
+
+  for (;;) {
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      return 0;  // no entries detected
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    long long result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // need more data
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long id = ReadID(pReader, pos, len);
+
+    if (id < 0)  // error
+      return static_cast<long>(id);
+
+    // This is the distinguished set of ID's we use to determine
+    // that we have exhausted the sub-element's inside the cluster
+    // whose ID we parsed earlier.
+
+    if (id == libwebm::kMkvCluster)
+      return 0;  // no entries found
+
+    if (id == libwebm::kMkvCues)
+      return 0;  // no entries found
+
+    pos += len;  // consume id field
+
+    if ((cluster_stop >= 0) && (pos >= cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    // read size field
+
+    if ((pos + 1) > avail) {
+      len = 1;
+      return E_BUFFER_NOT_FULL;
+    }
+
+    result = GetUIntLength(pReader, pos, len);
+
+    if (result < 0)  // error
+      return static_cast<long>(result);
+
+    if (result > 0)  // underflow
+      return E_BUFFER_NOT_FULL;
+
+    if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > avail)
+      return E_BUFFER_NOT_FULL;
+
+    const long long size = ReadUInt(pReader, pos, len);
+
+    if (size < 0)  // error
+      return static_cast<long>(size);
+
+    pos += len;  // consume size field
+
+    // pos now points to start of payload
+
+    if ((cluster_stop >= 0) && (pos > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (size == 0)  // weird
+      continue;
+
+    const long long unknown_size = (1LL << (7 * len)) - 1;
+
+    if (size == unknown_size)
+      return E_FILE_FORMAT_INVALID;  // not supported inside cluster
+
+    if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
+      return E_FILE_FORMAT_INVALID;
+
+    if (id == libwebm::kMkvBlockGroup)
+      return 1;  // have at least one entry
+
+    if (id == libwebm::kMkvSimpleBlock)
+      return 1;  // have at least one entry
+
+    pos += size;  // consume payload
+    if (cluster_stop >= 0 && pos > cluster_stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+}
+
+long long Cluster::GetTimeCode() const {
+  long long pos;
+  long len;
+
+  const long status = Load(pos, len);
+
+  if (status < 0)  // error
+    return status;
+
+  return m_timecode;
+}
+
+long long Cluster::GetTime() const {
+  const long long tc = GetTimeCode();
+
+  if (tc < 0)
+    return tc;
+
+  const SegmentInfo* const pInfo = m_pSegment->GetInfo();
+  assert(pInfo);
+
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
+
+  const long long t = m_timecode * scale;
+
+  return t;
+}
+
+long long Cluster::GetFirstTime() const {
+  const BlockEntry* pEntry;
+
+  const long status = GetFirst(pEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  if (pEntry == NULL)  // empty cluster
+    return GetTime();
+
+  const Block* const pBlock = pEntry->GetBlock();
+  assert(pBlock);
+
+  return pBlock->GetTime(this);
+}
+
+long long Cluster::GetLastTime() const {
+  const BlockEntry* pEntry;
+
+  const long status = GetLast(pEntry);
+
+  if (status < 0)  // error
+    return status;
+
+  if (pEntry == NULL)  // empty cluster
+    return GetTime();
+
+  const Block* const pBlock = pEntry->GetBlock();
+  assert(pBlock);
+
+  return pBlock->GetTime(this);
+}
+
+long Cluster::CreateBlock(long long id,
+                          long long pos,  // absolute pos of payload
+                          long long size, long long discard_padding) {
+  if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock)
+    return E_PARSE_FAILED;
+
+  if (m_entries_count < 0) {  // haven't parsed anything yet
+    assert(m_entries == NULL);
+    assert(m_entries_size == 0);
+
+    m_entries_size = 1024;
+    m_entries = new BlockEntry*[m_entries_size];
+
+    m_entries_count = 0;
+  } else {
+    assert(m_entries);
+    assert(m_entries_size > 0);
+    assert(m_entries_count <= m_entries_size);
+
+    if (m_entries_count >= m_entries_size) {
+      const long entries_size = 2 * m_entries_size;
+
+      BlockEntry** const entries = new BlockEntry*[entries_size];
+
+      BlockEntry** src = m_entries;
+      BlockEntry** const src_end = src + m_entries_count;
+
+      BlockEntry** dst = entries;
+
+      while (src != src_end)
+        *dst++ = *src++;
+
+      delete[] m_entries;
+
+      m_entries = entries;
+      m_entries_size = entries_size;
+    }
+  }
+
+  if (id == libwebm::kMkvBlockGroup)
+    return CreateBlockGroup(pos, size, discard_padding);
+  else
+    return CreateSimpleBlock(pos, size);
+}
+
+long Cluster::CreateBlockGroup(long long start_offset, long long size,
+                               long long discard_padding) {
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count >= 0);
+  assert(m_entries_count < m_entries_size);
+
+  IMkvReader* const pReader = m_pSegment->m_pReader;
+
+  long long pos = start_offset;
+  const long long stop = start_offset + size;
+
+  // For WebM files, there is a bias towards previous reference times
+  //(in order to support alt-ref frames, which refer back to the previous
+  // keyframe).  Normally a 0 value is not possible, but here we tenatively
+  // allow 0 as the value of a reference frame, with the interpretation
+  // that this is a "previous" reference time.
+
+  long long prev = 1;  // nonce
+  long long next = 0;  // nonce
+  long long duration = -1;  // really, this is unsigned
+
+  long long bpos = -1;
+  long long bsize = -1;
+
+  while (pos < stop) {
+    long len;
+    const long long id = ReadID(pReader, pos, len);
+    if (id < 0 || (pos + len) > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume ID
+
+    const long long size = ReadUInt(pReader, pos, len);
+    assert(size >= 0);  // TODO
+    assert((pos + len) <= stop);
+
+    pos += len;  // consume size
+
+    if (id == libwebm::kMkvBlock) {
+      if (bpos < 0) {  // Block ID
+        bpos = pos;
+        bsize = size;
+      }
+    } else if (id == libwebm::kMkvBlockDuration) {
+      if (size > 8)
+        return E_FILE_FORMAT_INVALID;
+
+      duration = UnserializeUInt(pReader, pos, size);
+
+      if (duration < 0)
+        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvReferenceBlock) {
+      if (size > 8 || size <= 0)
+        return E_FILE_FORMAT_INVALID;
+      const long size_ = static_cast<long>(size);
+
+      long long time;
+
+      long status = UnserializeInt(pReader, pos, size_, time);
+      assert(status == 0);
+      if (status != 0)
+        return -1;
+
+      if (time <= 0)  // see note above
+        prev = time;
+      else
+        next = time;
+    }
+
+    pos += size;  // consume payload
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+  if (bpos < 0)
+    return E_FILE_FORMAT_INVALID;
+
+  if (pos != stop)
+    return E_FILE_FORMAT_INVALID;
+  assert(bsize >= 0);
+
+  const long idx = m_entries_count;
+
+  BlockEntry** const ppEntry = m_entries + idx;
+  BlockEntry*& pEntry = *ppEntry;
+
+  pEntry = new BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding);
+
+  BlockGroup* const p = static_cast<BlockGroup*>(pEntry);
+
+  const long status = p->Parse();
+
+  if (status == 0) {  // success
+    ++m_entries_count;
+    return 0;
+  }
+
+  delete pEntry;
+  pEntry = 0;
+
+  return status;
+}
+
+long Cluster::CreateSimpleBlock(long long st, long long sz) {
+  assert(m_entries);
+  assert(m_entries_size > 0);
+  assert(m_entries_count >= 0);
+  assert(m_entries_count < m_entries_size);
+
+  const long idx = m_entries_count;
+
+  BlockEntry** const ppEntry = m_entries + idx;
+  BlockEntry*& pEntry = *ppEntry;
+
+  pEntry = new SimpleBlock(this, idx, st, sz);
+
+  SimpleBlock* const p = static_cast<SimpleBlock*>(pEntry);
+
+  const long status = p->Parse();
+
+  if (status == 0) {
+    ++m_entries_count;
+    return 0;
+  }
+
+  delete pEntry;
+  pEntry = 0;
+
+  return status;
+}
+
+long Cluster::GetFirst(const BlockEntry*& pFirst) const {
+  if (m_entries_count <= 0) {
+    long long pos;
+    long len;
+
+    const long status = Parse(pos, len);
+
+    if (status < 0) {  // error
+      pFirst = NULL;
+      return status;
+    }
+
+    if (m_entries_count <= 0) {  // empty cluster
+      pFirst = NULL;
+      return 0;
+    }
+  }
+
+  assert(m_entries);
+
+  pFirst = m_entries[0];
+  assert(pFirst);
+
+  return 0;  // success
+}
+
+long Cluster::GetLast(const BlockEntry*& pLast) const {
+  for (;;) {
+    long long pos;
+    long len;
+
+    const long status = Parse(pos, len);
+
+    if (status < 0) {  // error
+      pLast = NULL;
+      return status;
+    }
+
+    if (status > 0)  // no new block
+      break;
+  }
+
+  if (m_entries_count <= 0) {
+    pLast = NULL;
+    return 0;
+  }
+
+  assert(m_entries);
+
+  const long idx = m_entries_count - 1;
+
+  pLast = m_entries[idx];
+  assert(pLast);
+
+  return 0;
+}
+
+long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const {
+  assert(pCurr);
+  assert(m_entries);
+  assert(m_entries_count > 0);
+
+  size_t idx = pCurr->GetIndex();
+  assert(idx < size_t(m_entries_count));
+  assert(m_entries[idx] == pCurr);
+
+  ++idx;
+
+  if (idx >= size_t(m_entries_count)) {
+    long long pos;
+    long len;
+
+    const long status = Parse(pos, len);
+
+    if (status < 0) {  // error
+      pNext = NULL;
+      return status;
+    }
+
+    if (status > 0) {
+      pNext = NULL;
+      return 0;
+    }
+
+    assert(m_entries);
+    assert(m_entries_count > 0);
+    assert(idx < size_t(m_entries_count));
+  }
+
+  pNext = m_entries[idx];
+  assert(pNext);
+
+  return 0;
+}
+
+long Cluster::GetEntryCount() const { return m_entries_count; }
+
+const BlockEntry* Cluster::GetEntry(const Track* pTrack,
+                                    long long time_ns) const {
+  assert(pTrack);
+
+  if (m_pSegment == NULL)  // this is the special EOS cluster
+    return pTrack->GetEOS();
+
+  const BlockEntry* pResult = pTrack->GetEOS();
+
+  long index = 0;
+
+  for (;;) {
+    if (index >= m_entries_count) {
+      long long pos;
+      long len;
+
+      const long status = Parse(pos, len);
+      assert(status >= 0);
+
+      if (status > 0)  // completely parsed, and no more entries
+        return pResult;
+
+      if (status < 0)  // should never happen
+        return 0;
+
+      assert(m_entries);
+      assert(index < m_entries_count);
+    }
+
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    if (pBlock->GetTrackNumber() != pTrack->GetNumber()) {
+      ++index;
+      continue;
+    }
+
+    if (pTrack->VetEntry(pEntry)) {
+      if (time_ns < 0)  // just want first candidate block
+        return pEntry;
+
+      const long long ns = pBlock->GetTime(this);
+
+      if (ns > time_ns)
+        return pResult;
+
+      pResult = pEntry;  // have a candidate
+    } else if (time_ns >= 0) {
+      const long long ns = pBlock->GetTime(this);
+
+      if (ns > time_ns)
+        return pResult;
+    }
+
+    ++index;
+  }
+}
+
+const BlockEntry* Cluster::GetEntry(const CuePoint& cp,
+                                    const CuePoint::TrackPosition& tp) const {
+  assert(m_pSegment);
+  const long long tc = cp.GetTimeCode();
+
+  if (tp.m_block > 0) {
+    const long block = static_cast<long>(tp.m_block);
+    const long index = block - 1;
+
+    while (index >= m_entries_count) {
+      long long pos;
+      long len;
+
+      const long status = Parse(pos, len);
+
+      if (status < 0)  // TODO: can this happen?
+        return NULL;
+
+      if (status > 0)  // nothing remains to be parsed
+        return NULL;
+    }
+
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    if ((pBlock->GetTrackNumber() == tp.m_track) &&
+        (pBlock->GetTimeCode(this) == tc)) {
+      return pEntry;
+    }
+  }
+
+  long index = 0;
+
+  for (;;) {
+    if (index >= m_entries_count) {
+      long long pos;
+      long len;
+
+      const long status = Parse(pos, len);
+
+      if (status < 0)  // TODO: can this happen?
+        return NULL;
+
+      if (status > 0)  // nothing remains to be parsed
+        return NULL;
+
+      assert(m_entries);
+      assert(index < m_entries_count);
+    }
+
+    const BlockEntry* const pEntry = m_entries[index];
+    assert(pEntry);
+    assert(!pEntry->EOS());
+
+    const Block* const pBlock = pEntry->GetBlock();
+    assert(pBlock);
+
+    if (pBlock->GetTrackNumber() != tp.m_track) {
+      ++index;
+      continue;
+    }
+
+    const long long tc_ = pBlock->GetTimeCode(this);
+
+    if (tc_ < tc) {
+      ++index;
+      continue;
+    }
+
+    if (tc_ > tc)
+      return NULL;
+
+    const Tracks* const pTracks = m_pSegment->GetTracks();
+    assert(pTracks);
+
+    const long tn = static_cast<long>(tp.m_track);
+    const Track* const pTrack = pTracks->GetTrackByNumber(tn);
+
+    if (pTrack == NULL)
+      return NULL;
+
+    const long long type = pTrack->GetType();
+
+    if (type == 2)  // audio
+      return pEntry;
+
+    if (type != 1)  // not video
+      return NULL;
+
+    if (!pBlock->IsKey())
+      return NULL;
+
+    return pEntry;
+  }
+}
+
+BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
+BlockEntry::~BlockEntry() {}
+const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
+long BlockEntry::GetIndex() const { return m_index; }
+
+SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start,
+                         long long size)
+    : BlockEntry(pCluster, idx), m_block(start, size, 0) {}
+
+long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); }
+BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; }
+const Block* SimpleBlock::GetBlock() const { return &m_block; }
+
+BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start,
+                       long long block_size, long long prev, long long next,
+                       long long duration, long long discard_padding)
+    : BlockEntry(pCluster, idx),
+      m_block(block_start, block_size, discard_padding),
+      m_prev(prev),
+      m_next(next),
+      m_duration(duration) {}
+
+long BlockGroup::Parse() {
+  const long status = m_block.Parse(m_pCluster);
+
+  if (status)
+    return status;
+
+  m_block.SetKey((m_prev > 0) && (m_next <= 0));
+
+  return 0;
+}
+
+BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; }
+const Block* BlockGroup::GetBlock() const { return &m_block; }
+long long BlockGroup::GetPrevTimeCode() const { return m_prev; }
+long long BlockGroup::GetNextTimeCode() const { return m_next; }
+long long BlockGroup::GetDurationTimeCode() const { return m_duration; }
+
+Block::Block(long long start, long long size_, long long discard_padding)
+    : m_start(start),
+      m_size(size_),
+      m_track(0),
+      m_timecode(-1),
+      m_flags(0),
+      m_frames(NULL),
+      m_frame_count(-1),
+      m_discard_padding(discard_padding) {}
+
+Block::~Block() { delete[] m_frames; }
+
+long Block::Parse(const Cluster* pCluster) {
+  if (pCluster == NULL)
+    return -1;
+
+  if (pCluster->m_pSegment == NULL)
+    return -1;
+
+  assert(m_start >= 0);
+  assert(m_size >= 0);
+  assert(m_track <= 0);
+  assert(m_frames == NULL);
+  assert(m_frame_count <= 0);
+
+  long long pos = m_start;
+  const long long stop = m_start + m_size;
+
+  long len;
+
+  IMkvReader* const pReader = pCluster->m_pSegment->m_pReader;
+
+  m_track = ReadUInt(pReader, pos, len);
+
+  if (m_track <= 0)
+    return E_FILE_FORMAT_INVALID;
+
+  if ((pos + len) > stop)
+    return E_FILE_FORMAT_INVALID;
+
+  pos += len;  // consume track number
+
+  if ((stop - pos) < 2)
+    return E_FILE_FORMAT_INVALID;
+
+  long status;
+  long long value;
+
+  status = UnserializeInt(pReader, pos, 2, value);
+
+  if (status)
+    return E_FILE_FORMAT_INVALID;
+
+  if (value < SHRT_MIN)
+    return E_FILE_FORMAT_INVALID;
+
+  if (value > SHRT_MAX)
+    return E_FILE_FORMAT_INVALID;
+
+  m_timecode = static_cast<short>(value);
+
+  pos += 2;
+
+  if ((stop - pos) <= 0)
+    return E_FILE_FORMAT_INVALID;
+
+  status = pReader->Read(pos, 1, &m_flags);
+
+  if (status)
+    return E_FILE_FORMAT_INVALID;
+
+  const int lacing = int(m_flags & 0x06) >> 1;
+
+  ++pos;  // consume flags byte
+
+  if (lacing == 0) {  // no lacing
+    if (pos > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    m_frame_count = 1;
+    m_frames = new Frame[m_frame_count];
+
+    Frame& f = m_frames[0];
+    f.pos = pos;
+
+    const long long frame_size = stop - pos;
+
+    if (frame_size > LONG_MAX || frame_size <= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    f.len = static_cast<long>(frame_size);
+
+    return 0;  // success
+  }
+
+  if (pos >= stop)
+    return E_FILE_FORMAT_INVALID;
+
+  unsigned char biased_count;
+
+  status = pReader->Read(pos, 1, &biased_count);
+
+  if (status)
+    return E_FILE_FORMAT_INVALID;
+
+  ++pos;  // consume frame count
+  if (pos > stop)
+    return E_FILE_FORMAT_INVALID;
+
+  m_frame_count = int(biased_count) + 1;
+
+  m_frames = new Frame[m_frame_count];
+
+  if (!m_frames)
+    return E_FILE_FORMAT_INVALID;
+
+  if (lacing == 1) {  // Xiph
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
+
+    long long size = 0;
+    int frame_count = m_frame_count;
+
+    while (frame_count > 1) {
+      long frame_size = 0;
+
+      for (;;) {
+        unsigned char val;
+
+        if (pos >= stop)
+          return E_FILE_FORMAT_INVALID;
+
+        status = pReader->Read(pos, 1, &val);
+
+        if (status)
+          return E_FILE_FORMAT_INVALID;
+
+        ++pos;  // consume xiph size byte
+
+        frame_size += val;
+
+        if (val < 255)
+          break;
+      }
+
+      Frame& f = *pf++;
+      assert(pf < pf_end);
+      if (pf >= pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      f.pos = 0;  // patch later
+
+      if (frame_size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      f.len = frame_size;
+      size += frame_size;  // contribution of this frame
+
+      --frame_count;
+    }
+
+    if (pf >= pf_end || pos > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    {
+      Frame& f = *pf++;
+
+      if (pf != pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      f.pos = 0;  // patch later
+
+      const long long total_size = stop - pos;
+
+      if (total_size < size)
+        return E_FILE_FORMAT_INVALID;
+
+      const long long frame_size = total_size - size;
+
+      if (frame_size > LONG_MAX || frame_size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      f.len = static_cast<long>(frame_size);
+    }
+
+    pf = m_frames;
+    while (pf != pf_end) {
+      Frame& f = *pf++;
+      assert((pos + f.len) <= stop);
+
+      if ((pos + f.len) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      f.pos = pos;
+      pos += f.len;
+    }
+
+    assert(pos == stop);
+    if (pos != stop)
+      return E_FILE_FORMAT_INVALID;
+
+  } else if (lacing == 2) {  // fixed-size lacing
+    if (pos >= stop)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long total_size = stop - pos;
+
+    if ((total_size % m_frame_count) != 0)
+      return E_FILE_FORMAT_INVALID;
+
+    const long long frame_size = total_size / m_frame_count;
+
+    if (frame_size > LONG_MAX || frame_size <= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
+
+    while (pf != pf_end) {
+      assert((pos + frame_size) <= stop);
+      if ((pos + frame_size) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      Frame& f = *pf++;
+
+      f.pos = pos;
+      f.len = static_cast<long>(frame_size);
+
+      pos += frame_size;
+    }
+
+    assert(pos == stop);
+    if (pos != stop)
+      return E_FILE_FORMAT_INVALID;
+
+  } else {
+    assert(lacing == 3);  // EBML lacing
+
+    if (pos >= stop)
+      return E_FILE_FORMAT_INVALID;
+
+    long long size = 0;
+    int frame_count = m_frame_count;
+
+    long long frame_size = ReadUInt(pReader, pos, len);
+
+    if (frame_size <= 0)
+      return E_FILE_FORMAT_INVALID;
+
+    if (frame_size > LONG_MAX)
+      return E_FILE_FORMAT_INVALID;
+
+    if ((pos + len) > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    pos += len;  // consume length of size of first frame
+
+    if ((pos + frame_size) > stop)
+      return E_FILE_FORMAT_INVALID;
+
+    Frame* pf = m_frames;
+    Frame* const pf_end = pf + m_frame_count;
+
+    {
+      Frame& curr = *pf;
+
+      curr.pos = 0;  // patch later
+
+      curr.len = static_cast<long>(frame_size);
+      size += curr.len;  // contribution of this frame
+    }
+
+    --frame_count;
+
+    while (frame_count > 1) {
+      if (pos >= stop)
+        return E_FILE_FORMAT_INVALID;
+
+      assert(pf < pf_end);
+      if (pf >= pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      const Frame& prev = *pf++;
+      assert(prev.len == frame_size);
+      if (prev.len != frame_size)
+        return E_FILE_FORMAT_INVALID;
+
+      assert(pf < pf_end);
+      if (pf >= pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      Frame& curr = *pf;
+
+      curr.pos = 0;  // patch later
+
+      const long long delta_size_ = ReadUInt(pReader, pos, len);
+
+      if (delta_size_ < 0)
+        return E_FILE_FORMAT_INVALID;
+
+      if ((pos + len) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      pos += len;  // consume length of (delta) size
+      if (pos > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      const long exp = 7 * len - 1;
+      const long long bias = (1LL << exp) - 1LL;
+      const long long delta_size = delta_size_ - bias;
+
+      frame_size += delta_size;
+
+      if (frame_size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      if (frame_size > LONG_MAX)
+        return E_FILE_FORMAT_INVALID;
+
+      curr.len = static_cast<long>(frame_size);
+      size += curr.len;  // contribution of this frame
+
+      --frame_count;
+    }
+
+    // parse last frame
+    if (frame_count > 0) {
+      if (pos > stop || pf >= pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      const Frame& prev = *pf++;
+      assert(prev.len == frame_size);
+      if (prev.len != frame_size)
+        return E_FILE_FORMAT_INVALID;
+
+      if (pf >= pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      Frame& curr = *pf++;
+      if (pf != pf_end)
+        return E_FILE_FORMAT_INVALID;
+
+      curr.pos = 0;  // patch later
+
+      const long long total_size = stop - pos;
+
+      if (total_size < size)
+        return E_FILE_FORMAT_INVALID;
+
+      frame_size = total_size - size;
+
+      if (frame_size > LONG_MAX || frame_size <= 0)
+        return E_FILE_FORMAT_INVALID;
+
+      curr.len = static_cast<long>(frame_size);
+    }
+
+    pf = m_frames;
+    while (pf != pf_end) {
+      Frame& f = *pf++;
+      assert((pos + f.len) <= stop);
+      if ((pos + f.len) > stop)
+        return E_FILE_FORMAT_INVALID;
+
+      f.pos = pos;
+      pos += f.len;
+    }
+
+    if (pos != stop)
+      return E_FILE_FORMAT_INVALID;
+  }
+
+  return 0;  // success
+}
+
+long long Block::GetTimeCode(const Cluster* pCluster) const {
+  if (pCluster == 0)
+    return m_timecode;
+
+  const long long tc0 = pCluster->GetTimeCode();
+  assert(tc0 >= 0);
+
+  const long long tc = tc0 + m_timecode;
+
+  return tc;  // unscaled timecode units
+}
+
+long long Block::GetTime(const Cluster* pCluster) const {
+  assert(pCluster);
+
+  const long long tc = GetTimeCode(pCluster);
+
+  const Segment* const pSegment = pCluster->m_pSegment;
+  const SegmentInfo* const pInfo = pSegment->GetInfo();
+  assert(pInfo);
+
+  const long long scale = pInfo->GetTimeCodeScale();
+  assert(scale >= 1);
+
+  const long long ns = tc * scale;
+
+  return ns;
+}
+
+long long Block::GetTrackNumber() const { return m_track; }
+
+bool Block::IsKey() const {
+  return ((m_flags & static_cast<unsigned char>(1 << 7)) != 0);
+}
+
+void Block::SetKey(bool bKey) {
+  if (bKey)
+    m_flags |= static_cast<unsigned char>(1 << 7);
+  else
+    m_flags &= 0x7F;
+}
+
+bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); }
+
+Block::Lacing Block::GetLacing() const {
+  const int value = int(m_flags & 0x06) >> 1;
+  return static_cast<Lacing>(value);
+}
+
+int Block::GetFrameCount() const { return m_frame_count; }
+
+const Block::Frame& Block::GetFrame(int idx) const {
+  assert(idx >= 0);
+  assert(idx < m_frame_count);
+
+  const Frame& f = m_frames[idx];
+  assert(f.pos > 0);
+  assert(f.len > 0);
+
+  return f;
+}
+
+long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const {
+  assert(pReader);
+  assert(buf);
+
+  const long status = pReader->Read(pos, len, buf);
+  return status;
+}
+
+long long Block::GetDiscardPadding() const { return m_discard_padding; }
+
+}  // namespace mkvparser

+ 1111 - 0
thirdparty/libsimplewebm/libwebm/mkvparser/mkvparser.h

@@ -0,0 +1,1111 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+#ifndef MKVPARSER_MKVPARSER_H_
+#define MKVPARSER_MKVPARSER_H_
+
+#include <stddef.h>
+
+namespace mkvparser {
+
+const int E_PARSE_FAILED = -1;
+const int E_FILE_FORMAT_INVALID = -2;
+const int E_BUFFER_NOT_FULL = -3;
+
+class IMkvReader {
+ public:
+  virtual int Read(long long pos, long len, unsigned char* buf) = 0;
+  virtual int Length(long long* total, long long* available) = 0;
+
+  virtual ~IMkvReader();
+};
+
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+                     unsigned long long element_size);
+long long GetUIntLength(IMkvReader*, long long, long&);
+long long ReadUInt(IMkvReader*, long long, long&);
+long long ReadID(IMkvReader* pReader, long long pos, long& len);
+long long UnserializeUInt(IMkvReader*, long long pos, long long size);
+
+long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
+long UnserializeInt(IMkvReader*, long long pos, long long size,
+                    long long& result);
+
+long UnserializeString(IMkvReader*, long long pos, long long size, char*& str);
+
+long ParseElementHeader(IMkvReader* pReader,
+                        long long& pos,  // consume id and size fields
+                        long long stop,  // if you know size of element's parent
+                        long long& id, long long& size);
+
+bool Match(IMkvReader*, long long&, unsigned long, long long&);
+bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&);
+
+void GetVersion(int& major, int& minor, int& build, int& revision);
+
+struct EBMLHeader {
+  EBMLHeader();
+  ~EBMLHeader();
+  long long m_version;
+  long long m_readVersion;
+  long long m_maxIdLength;
+  long long m_maxSizeLength;
+  char* m_docType;
+  long long m_docTypeVersion;
+  long long m_docTypeReadVersion;
+
+  long long Parse(IMkvReader*, long long&);
+  void Init();
+};
+
+class Segment;
+class Track;
+class Cluster;
+
+class Block {
+  Block(const Block&);
+  Block& operator=(const Block&);
+
+ public:
+  const long long m_start;
+  const long long m_size;
+
+  Block(long long start, long long size, long long discard_padding);
+  ~Block();
+
+  long Parse(const Cluster*);
+
+  long long GetTrackNumber() const;
+  long long GetTimeCode(const Cluster*) const;  // absolute, but not scaled
+  long long GetTime(const Cluster*) const;  // absolute, and scaled (ns)
+  bool IsKey() const;
+  void SetKey(bool);
+  bool IsInvisible() const;
+
+  enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml };
+  Lacing GetLacing() const;
+
+  int GetFrameCount() const;  // to index frames: [0, count)
+
+  struct Frame {
+    long long pos;  // absolute offset
+    long len;
+
+    long Read(IMkvReader*, unsigned char*) const;
+  };
+
+  const Frame& GetFrame(int frame_index) const;
+
+  long long GetDiscardPadding() const;
+
+ private:
+  long long m_track;  // Track::Number()
+  short m_timecode;  // relative to cluster
+  unsigned char m_flags;
+
+  Frame* m_frames;
+  int m_frame_count;
+
+ protected:
+  const long long m_discard_padding;
+};
+
+class BlockEntry {
+  BlockEntry(const BlockEntry&);
+  BlockEntry& operator=(const BlockEntry&);
+
+ protected:
+  BlockEntry(Cluster*, long index);
+
+ public:
+  virtual ~BlockEntry();
+
+  bool EOS() const { return (GetKind() == kBlockEOS); }
+  const Cluster* GetCluster() const;
+  long GetIndex() const;
+  virtual const Block* GetBlock() const = 0;
+
+  enum Kind { kBlockEOS, kBlockSimple, kBlockGroup };
+  virtual Kind GetKind() const = 0;
+
+ protected:
+  Cluster* const m_pCluster;
+  const long m_index;
+};
+
+class SimpleBlock : public BlockEntry {
+  SimpleBlock(const SimpleBlock&);
+  SimpleBlock& operator=(const SimpleBlock&);
+
+ public:
+  SimpleBlock(Cluster*, long index, long long start, long long size);
+  long Parse();
+
+  Kind GetKind() const;
+  const Block* GetBlock() const;
+
+ protected:
+  Block m_block;
+};
+
+class BlockGroup : public BlockEntry {
+  BlockGroup(const BlockGroup&);
+  BlockGroup& operator=(const BlockGroup&);
+
+ public:
+  BlockGroup(Cluster*, long index,
+             long long block_start,  // absolute pos of block's payload
+             long long block_size,  // size of block's payload
+             long long prev, long long next, long long duration,
+             long long discard_padding);
+
+  long Parse();
+
+  Kind GetKind() const;
+  const Block* GetBlock() const;
+
+  long long GetPrevTimeCode() const;  // relative to block's time
+  long long GetNextTimeCode() const;  // as above
+  long long GetDurationTimeCode() const;
+
+ private:
+  Block m_block;
+  const long long m_prev;
+  const long long m_next;
+  const long long m_duration;
+};
+
+///////////////////////////////////////////////////////////////
+// ContentEncoding element
+// Elements used to describe if the track data has been encrypted or
+// compressed with zlib or header stripping.
+class ContentEncoding {
+ public:
+  enum { kCTR = 1 };
+
+  ContentEncoding();
+  ~ContentEncoding();
+
+  // ContentCompression element names
+  struct ContentCompression {
+    ContentCompression();
+    ~ContentCompression();
+
+    unsigned long long algo;
+    unsigned char* settings;
+    long long settings_len;
+  };
+
+  // ContentEncAESSettings element names
+  struct ContentEncAESSettings {
+    ContentEncAESSettings() : cipher_mode(kCTR) {}
+    ~ContentEncAESSettings() {}
+
+    unsigned long long cipher_mode;
+  };
+
+  // ContentEncryption element names
+  struct ContentEncryption {
+    ContentEncryption();
+    ~ContentEncryption();
+
+    unsigned long long algo;
+    unsigned char* key_id;
+    long long key_id_len;
+    unsigned char* signature;
+    long long signature_len;
+    unsigned char* sig_key_id;
+    long long sig_key_id_len;
+    unsigned long long sig_algo;
+    unsigned long long sig_hash_algo;
+
+    ContentEncAESSettings aes_settings;
+  };
+
+  // Returns ContentCompression represented by |idx|. Returns NULL if |idx|
+  // is out of bounds.
+  const ContentCompression* GetCompressionByIndex(unsigned long idx) const;
+
+  // Returns number of ContentCompression elements in this ContentEncoding
+  // element.
+  unsigned long GetCompressionCount() const;
+
+  // Parses the ContentCompression element from |pReader|. |start| is the
+  // starting offset of the ContentCompression payload. |size| is the size in
+  // bytes of the ContentCompression payload. |compression| is where the parsed
+  // values will be stored.
+  long ParseCompressionEntry(long long start, long long size,
+                             IMkvReader* pReader,
+                             ContentCompression* compression);
+
+  // Returns ContentEncryption represented by |idx|. Returns NULL if |idx|
+  // is out of bounds.
+  const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const;
+
+  // Returns number of ContentEncryption elements in this ContentEncoding
+  // element.
+  unsigned long GetEncryptionCount() const;
+
+  // Parses the ContentEncAESSettings element from |pReader|. |start| is the
+  // starting offset of the ContentEncAESSettings payload. |size| is the
+  // size in bytes of the ContentEncAESSettings payload. |encryption| is
+  // where the parsed values will be stored.
+  long ParseContentEncAESSettingsEntry(long long start, long long size,
+                                       IMkvReader* pReader,
+                                       ContentEncAESSettings* aes);
+
+  // Parses the ContentEncoding element from |pReader|. |start| is the
+  // starting offset of the ContentEncoding payload. |size| is the size in
+  // bytes of the ContentEncoding payload. Returns true on success.
+  long ParseContentEncodingEntry(long long start, long long size,
+                                 IMkvReader* pReader);
+
+  // Parses the ContentEncryption element from |pReader|. |start| is the
+  // starting offset of the ContentEncryption payload. |size| is the size in
+  // bytes of the ContentEncryption payload. |encryption| is where the parsed
+  // values will be stored.
+  long ParseEncryptionEntry(long long start, long long size,
+                            IMkvReader* pReader, ContentEncryption* encryption);
+
+  unsigned long long encoding_order() const { return encoding_order_; }
+  unsigned long long encoding_scope() const { return encoding_scope_; }
+  unsigned long long encoding_type() const { return encoding_type_; }
+
+ private:
+  // Member variables for list of ContentCompression elements.
+  ContentCompression** compression_entries_;
+  ContentCompression** compression_entries_end_;
+
+  // Member variables for list of ContentEncryption elements.
+  ContentEncryption** encryption_entries_;
+  ContentEncryption** encryption_entries_end_;
+
+  // ContentEncoding element names
+  unsigned long long encoding_order_;
+  unsigned long long encoding_scope_;
+  unsigned long long encoding_type_;
+
+  // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding);
+  ContentEncoding(const ContentEncoding&);
+  ContentEncoding& operator=(const ContentEncoding&);
+};
+
+class Track {
+  Track(const Track&);
+  Track& operator=(const Track&);
+
+ public:
+  class Info;
+  static long Create(Segment*, const Info&, long long element_start,
+                     long long element_size, Track*&);
+
+  enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 };
+
+  Segment* const m_pSegment;
+  const long long m_element_start;
+  const long long m_element_size;
+  virtual ~Track();
+
+  long GetType() const;
+  long GetNumber() const;
+  unsigned long long GetUid() const;
+  const char* GetNameAsUTF8() const;
+  const char* GetLanguage() const;
+  const char* GetCodecNameAsUTF8() const;
+  const char* GetCodecId() const;
+  const unsigned char* GetCodecPrivate(size_t&) const;
+  bool GetLacing() const;
+  unsigned long long GetDefaultDuration() const;
+  unsigned long long GetCodecDelay() const;
+  unsigned long long GetSeekPreRoll() const;
+
+  const BlockEntry* GetEOS() const;
+
+  struct Settings {
+    long long start;
+    long long size;
+  };
+
+  class Info {
+   public:
+    Info();
+    ~Info();
+    int Copy(Info&) const;
+    void Clear();
+    long type;
+    long number;
+    unsigned long long uid;
+    unsigned long long defaultDuration;
+    unsigned long long codecDelay;
+    unsigned long long seekPreRoll;
+    char* nameAsUTF8;
+    char* language;
+    char* codecId;
+    char* codecNameAsUTF8;
+    unsigned char* codecPrivate;
+    size_t codecPrivateSize;
+    bool lacing;
+    Settings settings;
+
+   private:
+    Info(const Info&);
+    Info& operator=(const Info&);
+    int CopyStr(char* Info::*str, Info&) const;
+  };
+
+  long GetFirst(const BlockEntry*&) const;
+  long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const;
+  virtual bool VetEntry(const BlockEntry*) const;
+  virtual long Seek(long long time_ns, const BlockEntry*&) const;
+
+  const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const;
+  unsigned long GetContentEncodingCount() const;
+
+  long ParseContentEncodingsEntry(long long start, long long size);
+
+ protected:
+  Track(Segment*, long long element_start, long long element_size);
+
+  Info m_info;
+
+  class EOSBlock : public BlockEntry {
+   public:
+    EOSBlock();
+
+    Kind GetKind() const;
+    const Block* GetBlock() const;
+  };
+
+  EOSBlock m_eos;
+
+ private:
+  ContentEncoding** content_encoding_entries_;
+  ContentEncoding** content_encoding_entries_end_;
+};
+
+struct PrimaryChromaticity {
+  PrimaryChromaticity() : x(0), y(0) {}
+  ~PrimaryChromaticity() {}
+  static bool Parse(IMkvReader* reader, long long read_pos,
+                    long long value_size, bool is_x,
+                    PrimaryChromaticity** chromaticity);
+  float x;
+  float y;
+};
+
+struct MasteringMetadata {
+  static const float kValueNotPresent;
+
+  MasteringMetadata()
+      : r(NULL),
+        g(NULL),
+        b(NULL),
+        white_point(NULL),
+        luminance_max(kValueNotPresent),
+        luminance_min(kValueNotPresent) {}
+  ~MasteringMetadata() {
+    delete r;
+    delete g;
+    delete b;
+    delete white_point;
+  }
+
+  static bool Parse(IMkvReader* reader, long long element_start,
+                    long long element_size,
+                    MasteringMetadata** mastering_metadata);
+
+  PrimaryChromaticity* r;
+  PrimaryChromaticity* g;
+  PrimaryChromaticity* b;
+  PrimaryChromaticity* white_point;
+  float luminance_max;
+  float luminance_min;
+};
+
+struct Colour {
+  static const long long kValueNotPresent;
+
+  // Unless otherwise noted all values assigned upon construction are the
+  // equivalent of unspecified/default.
+  Colour()
+      : matrix_coefficients(kValueNotPresent),
+        bits_per_channel(kValueNotPresent),
+        chroma_subsampling_horz(kValueNotPresent),
+        chroma_subsampling_vert(kValueNotPresent),
+        cb_subsampling_horz(kValueNotPresent),
+        cb_subsampling_vert(kValueNotPresent),
+        chroma_siting_horz(kValueNotPresent),
+        chroma_siting_vert(kValueNotPresent),
+        range(kValueNotPresent),
+        transfer_characteristics(kValueNotPresent),
+        primaries(kValueNotPresent),
+        max_cll(kValueNotPresent),
+        max_fall(kValueNotPresent),
+        mastering_metadata(NULL) {}
+  ~Colour() {
+    delete mastering_metadata;
+    mastering_metadata = NULL;
+  }
+
+  static bool Parse(IMkvReader* reader, long long element_start,
+                    long long element_size, Colour** colour);
+
+  long long matrix_coefficients;
+  long long bits_per_channel;
+  long long chroma_subsampling_horz;
+  long long chroma_subsampling_vert;
+  long long cb_subsampling_horz;
+  long long cb_subsampling_vert;
+  long long chroma_siting_horz;
+  long long chroma_siting_vert;
+  long long range;
+  long long transfer_characteristics;
+  long long primaries;
+  long long max_cll;
+  long long max_fall;
+
+  MasteringMetadata* mastering_metadata;
+};
+
+class VideoTrack : public Track {
+  VideoTrack(const VideoTrack&);
+  VideoTrack& operator=(const VideoTrack&);
+
+  VideoTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+  virtual ~VideoTrack();
+  static long Parse(Segment*, const Info&, long long element_start,
+                    long long element_size, VideoTrack*&);
+
+  long long GetWidth() const;
+  long long GetHeight() const;
+  long long GetDisplayWidth() const;
+  long long GetDisplayHeight() const;
+  long long GetDisplayUnit() const;
+  long long GetStereoMode() const;
+  double GetFrameRate() const;
+
+  bool VetEntry(const BlockEntry*) const;
+  long Seek(long long time_ns, const BlockEntry*&) const;
+
+  Colour* GetColour() const;
+
+ private:
+  long long m_width;
+  long long m_height;
+  long long m_display_width;
+  long long m_display_height;
+  long long m_display_unit;
+  long long m_stereo_mode;
+
+  double m_rate;
+
+  Colour* m_colour;
+};
+
+class AudioTrack : public Track {
+  AudioTrack(const AudioTrack&);
+  AudioTrack& operator=(const AudioTrack&);
+
+  AudioTrack(Segment*, long long element_start, long long element_size);
+
+ public:
+  static long Parse(Segment*, const Info&, long long element_start,
+                    long long element_size, AudioTrack*&);
+
+  double GetSamplingRate() const;
+  long long GetChannels() const;
+  long long GetBitDepth() const;
+
+ private:
+  double m_rate;
+  long long m_channels;
+  long long m_bitDepth;
+};
+
+class Tracks {
+  Tracks(const Tracks&);
+  Tracks& operator=(const Tracks&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Tracks(Segment*, long long start, long long size, long long element_start,
+         long long element_size);
+
+  ~Tracks();
+
+  long Parse();
+
+  unsigned long GetTracksCount() const;
+
+  const Track* GetTrackByNumber(long tn) const;
+  const Track* GetTrackByIndex(unsigned long idx) const;
+
+ private:
+  Track** m_trackEntries;
+  Track** m_trackEntriesEnd;
+
+  long ParseTrackEntry(long long payload_start, long long payload_size,
+                       long long element_start, long long element_size,
+                       Track*&) const;
+};
+
+class Chapters {
+  Chapters(const Chapters&);
+  Chapters& operator=(const Chapters&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Chapters(Segment*, long long payload_start, long long payload_size,
+           long long element_start, long long element_size);
+
+  ~Chapters();
+
+  long Parse();
+
+  class Atom;
+  class Edition;
+
+  class Display {
+    friend class Atom;
+    Display();
+    Display(const Display&);
+    ~Display();
+    Display& operator=(const Display&);
+
+   public:
+    const char* GetString() const;
+    const char* GetLanguage() const;
+    const char* GetCountry() const;
+
+   private:
+    void Init();
+    void ShallowCopy(Display&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    char* m_string;
+    char* m_language;
+    char* m_country;
+  };
+
+  class Atom {
+    friend class Edition;
+    Atom();
+    Atom(const Atom&);
+    ~Atom();
+    Atom& operator=(const Atom&);
+
+   public:
+    unsigned long long GetUID() const;
+    const char* GetStringUID() const;
+
+    long long GetStartTimecode() const;
+    long long GetStopTimecode() const;
+
+    long long GetStartTime(const Chapters*) const;
+    long long GetStopTime(const Chapters*) const;
+
+    int GetDisplayCount() const;
+    const Display* GetDisplay(int index) const;
+
+   private:
+    void Init();
+    void ShallowCopy(Atom&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+    static long long GetTime(const Chapters*, long long timecode);
+
+    long ParseDisplay(IMkvReader*, long long pos, long long size);
+    bool ExpandDisplaysArray();
+
+    char* m_string_uid;
+    unsigned long long m_uid;
+    long long m_start_timecode;
+    long long m_stop_timecode;
+
+    Display* m_displays;
+    int m_displays_size;
+    int m_displays_count;
+  };
+
+  class Edition {
+    friend class Chapters;
+    Edition();
+    Edition(const Edition&);
+    ~Edition();
+    Edition& operator=(const Edition&);
+
+   public:
+    int GetAtomCount() const;
+    const Atom* GetAtom(int index) const;
+
+   private:
+    void Init();
+    void ShallowCopy(Edition&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    long ParseAtom(IMkvReader*, long long pos, long long size);
+    bool ExpandAtomsArray();
+
+    Atom* m_atoms;
+    int m_atoms_size;
+    int m_atoms_count;
+  };
+
+  int GetEditionCount() const;
+  const Edition* GetEdition(int index) const;
+
+ private:
+  long ParseEdition(long long pos, long long size);
+  bool ExpandEditionsArray();
+
+  Edition* m_editions;
+  int m_editions_size;
+  int m_editions_count;
+};
+
+class Tags {
+  Tags(const Tags&);
+  Tags& operator=(const Tags&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  Tags(Segment*, long long payload_start, long long payload_size,
+       long long element_start, long long element_size);
+
+  ~Tags();
+
+  long Parse();
+
+  class Tag;
+  class SimpleTag;
+
+  class SimpleTag {
+    friend class Tag;
+    SimpleTag();
+    SimpleTag(const SimpleTag&);
+    ~SimpleTag();
+    SimpleTag& operator=(const SimpleTag&);
+
+   public:
+    const char* GetTagName() const;
+    const char* GetTagString() const;
+
+   private:
+    void Init();
+    void ShallowCopy(SimpleTag&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    char* m_tag_name;
+    char* m_tag_string;
+  };
+
+  class Tag {
+    friend class Tags;
+    Tag();
+    Tag(const Tag&);
+    ~Tag();
+    Tag& operator=(const Tag&);
+
+   public:
+    int GetSimpleTagCount() const;
+    const SimpleTag* GetSimpleTag(int index) const;
+
+   private:
+    void Init();
+    void ShallowCopy(Tag&) const;
+    void Clear();
+    long Parse(IMkvReader*, long long pos, long long size);
+
+    long ParseSimpleTag(IMkvReader*, long long pos, long long size);
+    bool ExpandSimpleTagsArray();
+
+    SimpleTag* m_simple_tags;
+    int m_simple_tags_size;
+    int m_simple_tags_count;
+  };
+
+  int GetTagCount() const;
+  const Tag* GetTag(int index) const;
+
+ private:
+  long ParseTag(long long pos, long long size);
+  bool ExpandTagsArray();
+
+  Tag* m_tags;
+  int m_tags_size;
+  int m_tags_count;
+};
+
+class SegmentInfo {
+  SegmentInfo(const SegmentInfo&);
+  SegmentInfo& operator=(const SegmentInfo&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  SegmentInfo(Segment*, long long start, long long size,
+              long long element_start, long long element_size);
+
+  ~SegmentInfo();
+
+  long Parse();
+
+  long long GetTimeCodeScale() const;
+  long long GetDuration() const;  // scaled
+  const char* GetMuxingAppAsUTF8() const;
+  const char* GetWritingAppAsUTF8() const;
+  const char* GetTitleAsUTF8() const;
+
+ private:
+  long long m_timecodeScale;
+  double m_duration;
+  char* m_pMuxingAppAsUTF8;
+  char* m_pWritingAppAsUTF8;
+  char* m_pTitleAsUTF8;
+};
+
+class SeekHead {
+  SeekHead(const SeekHead&);
+  SeekHead& operator=(const SeekHead&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  SeekHead(Segment*, long long start, long long size, long long element_start,
+           long long element_size);
+
+  ~SeekHead();
+
+  long Parse();
+
+  struct Entry {
+    // the SeekHead entry payload
+    long long id;
+    long long pos;
+
+    // absolute pos of SeekEntry ID
+    long long element_start;
+
+    // SeekEntry ID size + size size + payload
+    long long element_size;
+  };
+
+  int GetCount() const;
+  const Entry* GetEntry(int idx) const;
+
+  struct VoidElement {
+    // absolute pos of Void ID
+    long long element_start;
+
+    // ID size + size size + payload size
+    long long element_size;
+  };
+
+  int GetVoidElementCount() const;
+  const VoidElement* GetVoidElement(int idx) const;
+
+ private:
+  Entry* m_entries;
+  int m_entry_count;
+
+  VoidElement* m_void_elements;
+  int m_void_element_count;
+
+  static bool ParseEntry(IMkvReader*,
+                         long long pos,  // payload
+                         long long size, Entry*);
+};
+
+class Cues;
+class CuePoint {
+  friend class Cues;
+
+  CuePoint(long, long long);
+  ~CuePoint();
+
+  CuePoint(const CuePoint&);
+  CuePoint& operator=(const CuePoint&);
+
+ public:
+  long long m_element_start;
+  long long m_element_size;
+
+  bool Load(IMkvReader*);
+
+  long long GetTimeCode() const;  // absolute but unscaled
+  long long GetTime(const Segment*) const;  // absolute and scaled (ns units)
+
+  struct TrackPosition {
+    long long m_track;
+    long long m_pos;  // of cluster
+    long long m_block;
+    // codec_state  //defaults to 0
+    // reference = clusters containing req'd referenced blocks
+    //  reftime = timecode of the referenced block
+
+    bool Parse(IMkvReader*, long long, long long);
+  };
+
+  const TrackPosition* Find(const Track*) const;
+
+ private:
+  const long m_index;
+  long long m_timecode;
+  TrackPosition* m_track_positions;
+  size_t m_track_positions_count;
+};
+
+class Cues {
+  friend class Segment;
+
+  Cues(Segment*, long long start, long long size, long long element_start,
+       long long element_size);
+  ~Cues();
+
+  Cues(const Cues&);
+  Cues& operator=(const Cues&);
+
+ public:
+  Segment* const m_pSegment;
+  const long long m_start;
+  const long long m_size;
+  const long long m_element_start;
+  const long long m_element_size;
+
+  bool Find(  // lower bound of time_ns
+      long long time_ns, const Track*, const CuePoint*&,
+      const CuePoint::TrackPosition*&) const;
+
+  const CuePoint* GetFirst() const;
+  const CuePoint* GetLast() const;
+  const CuePoint* GetNext(const CuePoint*) const;
+
+  const BlockEntry* GetBlock(const CuePoint*,
+                             const CuePoint::TrackPosition*) const;
+
+  bool LoadCuePoint() const;
+  long GetCount() const;  // loaded only
+  // long GetTotal() const;  //loaded + preloaded
+  bool DoneParsing() const;
+
+ private:
+  bool Init() const;
+  bool PreloadCuePoint(long&, long long) const;
+
+  mutable CuePoint** m_cue_points;
+  mutable long m_count;
+  mutable long m_preload_count;
+  mutable long long m_pos;
+};
+
+class Cluster {
+  friend class Segment;
+
+  Cluster(const Cluster&);
+  Cluster& operator=(const Cluster&);
+
+ public:
+  Segment* const m_pSegment;
+
+ public:
+  static Cluster* Create(Segment*,
+                         long index,  // index in segment
+                         long long off);  // offset relative to segment
+  // long long element_size);
+
+  Cluster();  // EndOfStream
+  ~Cluster();
+
+  bool EOS() const;
+
+  long long GetTimeCode() const;  // absolute, but not scaled
+  long long GetTime() const;  // absolute, and scaled (nanosecond units)
+  long long GetFirstTime() const;  // time (ns) of first (earliest) block
+  long long GetLastTime() const;  // time (ns) of last (latest) block
+
+  long GetFirst(const BlockEntry*&) const;
+  long GetLast(const BlockEntry*&) const;
+  long GetNext(const BlockEntry* curr, const BlockEntry*& next) const;
+
+  const BlockEntry* GetEntry(const Track*, long long ns = -1) const;
+  const BlockEntry* GetEntry(const CuePoint&,
+                             const CuePoint::TrackPosition&) const;
+  // const BlockEntry* GetMaxKey(const VideoTrack*) const;
+
+  //    static bool HasBlockEntries(const Segment*, long long);
+
+  static long HasBlockEntries(const Segment*, long long idoff, long long& pos,
+                              long& size);
+
+  long GetEntryCount() const;
+
+  long Load(long long& pos, long& size) const;
+
+  long Parse(long long& pos, long& size) const;
+  long GetEntry(long index, const mkvparser::BlockEntry*&) const;
+
+ protected:
+  Cluster(Segment*, long index, long long element_start);
+  // long long element_size);
+
+ public:
+  const long long m_element_start;
+  long long GetPosition() const;  // offset relative to segment
+
+  long GetIndex() const;
+  long long GetElementSize() const;
+  // long long GetPayloadSize() const;
+
+  // long long Unparsed() const;
+
+ private:
+  long m_index;
+  mutable long long m_pos;
+  // mutable long long m_size;
+  mutable long long m_element_size;
+  mutable long long m_timecode;
+  mutable BlockEntry** m_entries;
+  mutable long m_entries_size;
+  mutable long m_entries_count;
+
+  long ParseSimpleBlock(long long, long long&, long&);
+  long ParseBlockGroup(long long, long long&, long&);
+
+  long CreateBlock(long long id, long long pos, long long size,
+                   long long discard_padding);
+  long CreateBlockGroup(long long start_offset, long long size,
+                        long long discard_padding);
+  long CreateSimpleBlock(long long, long long);
+};
+
+class Segment {
+  friend class Cues;
+  friend class Track;
+  friend class VideoTrack;
+
+  Segment(const Segment&);
+  Segment& operator=(const Segment&);
+
+ private:
+  Segment(IMkvReader*, long long elem_start,
+          // long long elem_size,
+          long long pos, long long size);
+
+ public:
+  IMkvReader* const m_pReader;
+  const long long m_element_start;
+  // const long long m_element_size;
+  const long long m_start;  // posn of segment payload
+  const long long m_size;  // size of segment payload
+  Cluster m_eos;  // TODO: make private?
+
+  static long long CreateInstance(IMkvReader*, long long, Segment*&);
+  ~Segment();
+
+  long Load();  // loads headers and all clusters
+
+  // for incremental loading
+  // long long Unparsed() const;
+  bool DoneParsing() const;
+  long long ParseHeaders();  // stops when first cluster is found
+  // long FindNextCluster(long long& pos, long& size) const;
+  long LoadCluster(long long& pos, long& size);  // load one cluster
+  long LoadCluster();
+
+  long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos,
+                 long& size);
+
+  const SeekHead* GetSeekHead() const;
+  const Tracks* GetTracks() const;
+  const SegmentInfo* GetInfo() const;
+  const Cues* GetCues() const;
+  const Chapters* GetChapters() const;
+  const Tags* GetTags() const;
+
+  long long GetDuration() const;
+
+  unsigned long GetCount() const;
+  const Cluster* GetFirst() const;
+  const Cluster* GetLast() const;
+  const Cluster* GetNext(const Cluster*);
+
+  const Cluster* FindCluster(long long time_nanoseconds) const;
+  // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const;
+
+  const Cluster* FindOrPreloadCluster(long long pos);
+
+  long ParseCues(long long cues_off,  // offset relative to start of segment
+                 long long& parse_pos, long& parse_len);
+
+ private:
+  long long m_pos;  // absolute file posn; what has been consumed so far
+  Cluster* m_pUnknownSize;
+
+  SeekHead* m_pSeekHead;
+  SegmentInfo* m_pInfo;
+  Tracks* m_pTracks;
+  Cues* m_pCues;
+  Chapters* m_pChapters;
+  Tags* m_pTags;
+  Cluster** m_clusters;
+  long m_clusterCount;  // number of entries for which m_index >= 0
+  long m_clusterPreloadCount;  // number of entries for which m_index < 0
+  long m_clusterSize;  // array size
+
+  long DoLoadCluster(long long&, long&);
+  long DoLoadClusterUnknownSize(long long&, long&);
+  long DoParseNext(const Cluster*&, long long&, long&);
+
+  bool AppendCluster(Cluster*);
+  bool PreloadCluster(Cluster*, ptrdiff_t);
+
+  // void ParseSeekHead(long long pos, long long size);
+  // void ParseSeekEntry(long long pos, long long size);
+  // void ParseCues(long long);
+
+  const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
+};
+
+}  // namespace mkvparser
+
+inline long mkvparser::Segment::LoadCluster() {
+  long long pos;
+  long size;
+
+  return LoadCluster(pos, size);
+}
+
+#endif  // MKVPARSER_MKVPARSER_H_

+ 142 - 0
thirdparty/libvpx/AUTHORS

@@ -0,0 +1,142 @@
+# This file is automatically generated from the git commit history
+# by tools/gen_authors.sh.
+
+Aaron Watry <[email protected]>
+Abo Talib Mahfoodh <[email protected]>
+Adam Xu <[email protected]>
+Adrian Grange <[email protected]>
+Aℓex Converse <[email protected]>
+Ahmad Sharif <[email protected]>
+Alexander Voronov <[email protected]>
+Alexis Ballier <[email protected]>
+Alok Ahuja <[email protected]>
+Alpha Lam <[email protected]>
+A.Mahfoodh <[email protected]>
+Ami Fischman <[email protected]>
+Andoni Morales Alastruey <[email protected]>
+Andres Mejia <[email protected]>
+Andrew Russell <[email protected]>
+Angie Chiang <[email protected]>
+Aron Rosenberg <[email protected]>
+Attila Nagy <[email protected]>
+Brion Vibber <[email protected]>
+changjun.yang <[email protected]>
+Charles 'Buck' Krasic <[email protected]>
+chm <[email protected]>
+Christian Duvivier <[email protected]>
+Daniele Castagna <[email protected]>
+Daniel Kang <[email protected]>
+Deb Mukherjee <[email protected]>
+Dim Temp <[email protected]>
+Dmitry Kovalev <[email protected]>
+Dragan Mrdjan <[email protected]>
+Ed Baker <[email protected]>
+Ehsan Akhgari <[email protected]>
+Erik Niemeyer <[email protected]>
+Fabio Pedretti <[email protected]>
+Frank Galligan <[email protected]>
+Fredrik Söderquist <[email protected]>
+Fritz Koenig <[email protected]>
+Gaute Strokkenes <[email protected]>
+Geza Lore <[email protected]>
+Ghislain MARY <[email protected]>
+Giuseppe Scrivano <[email protected]>
+Gordana Cmiljanovic <[email protected]>
+Guillaume Martres <[email protected]>
+Guillermo Ballester Valor <[email protected]>
+Hangyu Kuang <[email protected]>
+Hanno Böck <[email protected]>
+Henrik Lundin <[email protected]>
+Hui Su <[email protected]>
+Ivan Maltz <[email protected]>
+Jacek Caban <[email protected]>
+Jacky Chen <[email protected]>
+James Berry <[email protected]>
+James Yu <[email protected]>
+James Zern <[email protected]>
+Jan Gerber <[email protected]>
+Jan Kratochvil <[email protected]>
+Janne Salonen <[email protected]>
+Jean-Yves Avenard <[email protected]>
+Jeff Faust <[email protected]>
+Jeff Muizelaar <[email protected]>
+Jeff Petkau <[email protected]>
+Jia Jia <[email protected]>
+Jian Zhou <[email protected]>
+Jim Bankoski <[email protected]>
+Jingning Han <[email protected]>
+Joey Parrish <[email protected]>
+Johann Koenig <[email protected]>
+John Koleszar <[email protected]>
+Johnny Klonaris <[email protected]>
+John Stark <[email protected]>
+Joshua Bleecher Snyder <[email protected]>
+Joshua Litt <[email protected]>
+Julia Robson <[email protected]>
+Justin Clift <[email protected]>
+Justin Lebar <[email protected]>
+KO Myung-Hun <[email protected]>
+Lawrence Velázquez <[email protected]>
+Linfeng Zhang <[email protected]>
+Lou Quillio <[email protected]>
+Luca Barbato <[email protected]>
+Makoto Kato <[email protected]>
+Mans Rullgard <[email protected]>
+Marco Paniconi <[email protected]>
+Mark Mentovai <[email protected]>
+Martin Ettl <[email protected]>
+Martin Storsjo <[email protected]>
+Matthew Heaney <[email protected]>
+Michael Kohler <[email protected]>
+Mike Frysinger <[email protected]>
+Mike Hommey <[email protected]>
+Mikhal Shemer <[email protected]>
+Minghai Shang <[email protected]>
+Morton Jonuschat <[email protected]>
+Nico Weber <[email protected]>
+Parag Salasakar <[email protected]>
+Pascal Massimino <[email protected]>
+Patrik Westin <[email protected]>
+Paul Wilkins <[email protected]>
+Pavol Rusnak <[email protected]>
+Paweł Hajdan <[email protected]>
+Pengchong Jin <[email protected]>
+Peter de Rivaz <[email protected]>
+Philip Jägenstedt <[email protected]>
+Priit Laes <[email protected]>
+Rafael Ávila de Espíndola <[email protected]>
+Rafaël Carré <[email protected]>
+Ralph Giles <[email protected]>
+Rob Bradford <[email protected]>
+Ronald S. Bultje <[email protected]>
+Rui Ueyama <[email protected]>
+Sami Pietilä <[email protected]>
+Sasi Inguva <[email protected]>
+Scott Graham <[email protected]>
+Scott LaVarnway <[email protected]>
+Sean McGovern <[email protected]>
+Sergey Kolomenkin <[email protected]>
+Sergey Ulanov <[email protected]>
+Shimon Doodkin <[email protected]>
+Shunyao Li <[email protected]>
+Stefan Holmer <[email protected]>
+Suman Sunkara <[email protected]>
+Taekhyun Kim <[email protected]>
+Takanori MATSUURA <[email protected]>
+Tamar Levy <[email protected]>
+Tao Bai <[email protected]>
+Tero Rintaluoma <[email protected]>
+Thijs Vermeir <[email protected]>
+Tim Kopp <[email protected]>
+Timothy B. Terriberry <[email protected]>
+Tom Finegan <[email protected]>
+Vignesh Venkatasubramanian <[email protected]>
+Yaowu Xu <[email protected]>
+Yi Luo <[email protected]>
+Yongzhe Wang <[email protected]>
+Yunqing Wang <[email protected]>
+Yury Gitman <[email protected]>
+Zoe Liu <[email protected]>
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation

+ 654 - 0
thirdparty/libvpx/CHANGELOG

@@ -0,0 +1,654 @@
+2016-07-20 v1.6.0 "Khaki Campbell Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
+    in vpx_image and some minor changes to the VP8_COMP structure.
+
+    The default key frame interval for VP9 has changed from 128 to 9999.
+
+  - Enhancement:
+    A core focus has been performance for low end Intel processors. SSSE3
+    instructions such as 'pshufb' have been avoided and instructions have been
+    reordered to better accommodate the more constrained pipelines.
+
+    As a result, devices based on Celeron processors have seen substantial
+    decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
+    decoding speed improved between 10 and 30%. Between Javan Whistling Duck
+    and Khaki Campbell Duck, it improved another 10 to 15%.
+
+    While Celeron benefited most, Core-i5 also improved 5% and 10% between the
+    respective releases.
+
+    Realtime performance for WebRTC for both speed and quality has received a
+    lot of attention.
+
+  - Bug Fixes:
+    A number of fuzzing issues, found variously by Mozilla, Chromium and others,
+    have been fixed and we strongly recommend updating.
+
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+    controls and adds a variety of VP9 controls for testing.
+
+    The vpxenc utility now prefers VP9 by default.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Smaller library size by combining functions used by VP8 and VP9
+
+  - Bug Fixes:
+    A variety of fuzzing issues
+
+2015-04-03 v1.4.0 "Indian Runner Duck"
+  This release includes significant improvements to the VP9 codec.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.3.0. It drops the compatibility
+    layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
+    controls for VP9.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Multithreaded VP9 decoding (tile and frame-based)
+    Multithreaded VP9 encoding - on by default
+    YUV 4:2:2 and 4:4:4 support in VP9
+    10 and 12bit support in VP9
+    64bit ARM support by replacing ARM assembly with intrinsics
+
+  - Bug Fixes:
+    Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
+    files.
+
+  - Known Issues:
+    Frame Parallel decoding fails for segmented and non-420 files.
+
+2013-11-15 v1.3.0 "Forest"
+  This release introduces the VP9 codec in a backward-compatible way.
+  All existing users of VP8 can continue to use the library without
+  modification. However, some VP8 options do not map to VP9 in the same manner.
+
+  The VP9 encoder in this release is not feature complete. Users interested in
+  the encoder are advised to use the git master branch and discuss issues on
+  libvpx mailing lists.
+
+  - Upgrading:
+    This release is ABI and API compatible with Duclair (v1.0.0). Users
+    of older releases should refer to the Upgrading notes in this document
+    for that release.
+
+  - Enhancements:
+      Get rid of bashisms in the main build scripts
+      Added usage info on command line options
+      Add lossless compression mode
+      Dll build of libvpx
+      Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13)
+      Add option to disable documentation
+      configure: add --enable-external-build support
+      make: support V=1 as short form of verbose=yes
+      configure: support mingw-w64
+      configure: support hardfloat armv7 CHOSTS
+      configure: add support for android x86
+      Add estimated completion time to vpxenc
+      Don't exit on decode errors in vpxenc
+      vpxenc: support scaling prior to encoding
+      vpxdec: support scaling output
+      vpxenc: improve progress indicators with --skip
+      msvs: Don't link to winmm.lib
+      Add a new script for producing vcxproj files
+      Produce Visual Studio 10 and 11 project files
+      Produce Windows Phone project files
+      msvs-build: use msbuild for vs >= 2005
+      configure: default configure log to config.log
+      Add encoding option --static-thresh
+
+  - Speed:
+      Miscellaneous speed optimizations for VP8 and VP9.
+
+  - Quality:
+      In general, quality is consistent with the Eider release.
+
+  - Bug Fixes:
+      This release represents approximately a year of engineering effort,
+      and contains multiple bug fixes. Please refer to git history for details.
+
+
+2012-12-21 v1.2.0
+  This release acts as a checkpoint for a large amount of internal refactoring
+  and testing. It also contains a number of small bugfixes, so all users are
+  encouraged to upgrade.
+
+  - Upgrading:
+    This release is ABI and API compatible with Duclair (v1.0.0). Users
+    of older releases should refer to the Upgrading notes in this
+    document for that release.
+
+  - Enhancements:
+      VP8 optimizations for MIPS dspr2
+      vpxenc: add -quiet option
+
+  - Speed:
+      Encoder and decoder speed is consistent with the Eider release.
+
+  - Quality:
+      In general, quality is consistent with the Eider release.
+
+      Minor tweaks to ARNR filtering
+      Minor improvements to real time encoding with multiple temporal layers
+
+  - Bug Fixes:
+      Fixes multithreaded encoder race condition in loopfilter
+      Fixes multi-resolution threaded encoding
+      Fix potential encoder dead-lock after picture resize
+
+
+2012-05-09 v1.1.0 "Eider"
+  This introduces a number of enhancements, mostly focused on real-time
+  encoding. In addition, it fixes a decoder bug (first introduced in
+  Duclair) so all users of that release are encouraged to upgrade.
+
+  - Upgrading:
+    This release is ABI and API compatible with Duclair (v1.0.0). Users
+    of older releases should refer to the Upgrading notes in this
+    document for that release.
+
+    This release introduces a new temporal denoiser, controlled by the
+    VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not
+    currently take a strength parameter, so the control is effectively
+    a boolean - zero (off) or non-zero (on). For compatibility with
+    existing applications, the values accepted are the same as those
+    for the spatial denoiser (0-6). The temporal denoiser is enabled
+    by default, and the older spatial denoiser may be restored by
+    configuring with --disable-temporal-denoising. The temporal denoiser
+    is more computationally intensive than the spatial one.
+
+    This release removes support for a legacy, decode only API that was
+    supported, but deprecated, at the initial release of libvpx
+    (v0.9.0). This is not expected to have any impact. If you are
+    impacted, you can apply a reversion to commit 2bf8fb58 locally.
+    Please update to the latest libvpx API if you are affected.
+
+  - Enhancements:
+      Adds a motion compensated temporal denoiser to the encoder, which
+      gives higher quality than the older spatial denoiser. (See above
+      for notes on upgrading).
+
+      In addition, support for new compilers and platforms were added,
+      including:
+        improved support for XCode
+        Android x86 NDK build
+        OS/2 support
+        SunCC support
+
+      Changing resolution with vpx_codec_enc_config_set() is now
+      supported. Previously, reinitializing the codec was required to
+      change the input resolution.
+
+      The vpxenc application has initial support for producing multiple
+      encodes from the same input in one call. Resizing is not yet
+      supported, but varying other codec parameters is. Use -- to
+      delineate output streams. Options persist from one stream to the
+      next.
+
+      Also, the vpxenc application will now use a keyframe interval of
+      5 seconds by default. Use the --kf-max-dist option to override.
+
+  - Speed:
+      Decoder performance improved 2.5% versus Duclair. Encoder speed is
+      consistent with Duclair for most material. Two pass encoding of
+      slideshow-like material will see significant improvements.
+
+      Large realtime encoding speed gains at a small quality expense are
+      possible by configuring the on-the-fly bitpacking experiment with
+      --enable-onthefly-bitpacking. Realtime encoder can be up to 13%
+      faster (ARM) depending on the number of threads and bitrate
+      settings. This technique sees constant gain over the 5-16 speed
+      range. For VC style input the loss seen is up to 0.2dB. See commit
+      52cf4dca for further details.
+
+  - Quality:
+      On the whole, quality is consistent with the Duclair release. Some
+      tweaks:
+
+        Reduced blockiness in easy sections by applying a penalty to
+        intra modes.
+
+        Improved quality of static sections (like slideshows) with
+        two pass encoding.
+
+        Improved keyframe sizing with multiple temporal layers
+
+  - Bug Fixes:
+      Corrected alt-ref contribution to frame rate for visible updates
+      to the alt-ref buffer. This affected applications making manual
+      usage of the frame reference flags, or temporal layers.
+
+      Additional constraints were added to disable multi-frame quality
+      enhancement (MFQE) in sections of the frame where there is motion.
+      (#392)
+
+      Fixed corruption issues when vpx_codec_enc_config_set() was called
+      with spatial resampling enabled.
+
+      Fixed a decoder error introduced in Duclair where the segmentation
+      map was not being reinitialized on keyframes (#378)
+
+
+2012-01-27 v1.0.0 "Duclair"
+  Our fourth named release, focused on performance and features related to
+  real-time encoding. It also fixes a decoder crash bug introduced in
+  v0.9.7, so all users of that release are encouraged to upgrade.
+
+  - Upgrading:
+      This release is ABI incompatible with prior releases of libvpx, so the
+      "major" version number has been bumped to 1. You must recompile your
+      applications against the latest version of the libvpx headers. The
+      API remains compatible, and this should not require code changes in most
+      applications.
+
+  - Enhancements:
+      This release introduces several substantial new features to the encoder,
+      of particular interest to real time streaming applications.
+
+      Temporal scalability allows the encoder to produce a stream that can
+      be decimated to different frame rates, with independent rate targetting
+      for each substream.
+
+      Multiframe quality enhancement postprocessing can make visual quality
+      more consistent in the presence of frames that are substantially
+      different quality than the surrounding frames, as in the temporal
+      scalability case and in some forced keyframe scenarios.
+
+      Multiple-resolution encoding support allows the encoding of the
+      same content at different resolutions faster than encoding them
+      separately.
+
+  - Speed:
+      Optimization targets for this release included the decoder and the real-
+      time modes of the encoder. Decoder speed on x86 has improved 10.5% with
+      this release. Encoder improvements followed a curve where speeds 1-3
+      improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved
+      1.5% to 10.5%, respectively. "Best" mode speed is consistent with the
+      Cayuga release.
+
+  - Quality:
+      Encoder quality in the single stream case is consistent with the Cayuga
+      release.
+
+  - Bug Fixes:
+      This release fixes an OOB read decoder crash bug present in v0.9.7
+      related to the clamping of motion vectors in SPLITMV blocks. This
+      behavior could be triggered by corrupt input or by starting
+      decoding from a P-frame.
+
+
+2011-08-15 v0.9.7-p1 "Cayuga" patch 1
+  This is an incremental bugfix release against Cayuga. All users of that
+  release are strongly encouraged to upgrade.
+
+    - Fix potential OOB reads (cdae03a)
+
+          An unbounded out of bounds read was discovered when the
+          decoder was requested to perform error concealment (new in
+          Cayuga) given a frame with corrupt partition sizes.
+
+          A bounded out of bounds read was discovered affecting all
+          versions of libvpx. Given an multipartition input frame that
+          is truncated between the mode/mv partition and the first
+          residiual paritition (in the block of partition offsets), up
+          to 3 extra bytes could have been read from the source buffer.
+          The code will not take any action regardless of the contents
+          of these undefined bytes, as the truncated buffer is detected
+          immediately following the read based on the calculated
+          starting position of the coefficient partition.
+
+    - Fix potential error concealment crash when the very first frame
+      is missing or corrupt (a609be5)
+
+    - Fix significant artifacts in error concealment (a4c2211, 99d870a)
+
+    - Revert 1-pass CBR rate control changes (e961317)
+      Further testing showed this change produced undesirable visual
+      artifacts, rolling back for now.
+
+
+2011-08-02 v0.9.7 "Cayuga"
+  Our third named release, focused on a faster, higher quality, encoder.
+
+  - Upgrading:
+    This release is backwards compatible with Aylesbury (v0.9.5) and
+    Bali (v0.9.6). Users of older releases should refer to the Upgrading
+    notes in this document for that release.
+
+  - Enhancements:
+          Stereo 3D format support for vpxenc
+          Runtime detection of available processor cores.
+          Allow specifying --end-usage by enum name
+          vpxdec: test for frame corruption
+          vpxenc: add quantizer histogram display
+          vpxenc: add rate histogram display
+          Set VPX_FRAME_IS_DROPPABLE
+          update configure for ios sdk 4.3
+          Avoid text relocations in ARM vp8 decoder
+          Generate a vpx.pc file for pkg-config.
+          New ways of passing encoded data between encoder and decoder.
+
+  - Speed:
+      This release includes across-the-board speed improvements to the
+      encoder. On x86, these measure at approximately 11.5% in Best mode,
+      21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6).
+      On ARM Cortex A9 with Neon extensions, real-time encoding of video
+      telephony content is 35% faster than Bali on single core and 48%
+      faster on multi-core. On the NVidia Tegra2 platform, real time
+      encoding is 40% faster than Bali.
+
+      Decoder speed was not a priority for this release, but improved
+      approximately 8.4% on x86.
+
+          Reduce motion vector search on alt-ref frame.
+          Encoder loopfilter running in its own thread
+          Reworked loopfilter to precalculate more parameters
+          SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}().
+          Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3.
+          Removed redundant checks
+          Reduced structure sizes
+          utilize preload in ARMv6 MC/LPF/Copy routines
+          ARM optimized quantization, dfct, variance, subtract
+          Increase chrow row alignment to 16 bytes.
+          disable trellis optimization for first pass
+          Write SSSE3 sub-pixel filter function
+          Improve SSE2 half-pixel filter funtions
+          Add vp8_sub_pixel_variance16x8_ssse3 function
+          Reduce unnecessary distortion computation
+          Use diamond search to replace full search
+          Preload reference area in sub-pixel motion search (real-time mode)
+
+  - Quality:
+      This release focused primarily on one-pass use cases, including
+      video conferencing. Low latency data rate control was significantly
+      improved, improving streamability over bandwidth constrained links.
+      Added support for error concealment, allowing frames to maintain
+      visual quality in the presence of substantial packet loss.
+
+          Add rc_max_intra_bitrate_pct control
+          Limit size of initial keyframe in one-pass.
+          Improve framerate adaptation
+          Improved 1-pass CBR rate control
+          Improved KF insertion after fades to still.
+          Improved key frame detection.
+          Improved activity masking (lower PSNR impact for same SSIM boost)
+          Improved interaction between GF and ARFs
+          Adding error-concealment to the decoder.
+          Adding support for independent partitions
+          Adjusted rate-distortion constants
+
+
+  - Bug Fixes:
+          Removed firstpass motion map
+          Fix parallel make install
+          Fix multithreaded encoding for 1 MB wide frame
+          Fixed iwalsh_neon build problems with RVDS4.1
+          Fix semaphore emulation, spin-wait intrinsics on Windows
+          Fix build with xcode4 and simplify GLOBAL.
+          Mark ARM asm objects as allowing a non-executable stack.
+          Fix vpxenc encoding incorrect webm file header on big endian
+
+
+2011-03-07 v0.9.6 "Bali"
+  Our second named release, focused on a faster, higher quality, encoder.
+
+  - Upgrading:
+    This release is backwards compatible with Aylesbury (v0.9.5). Users
+    of older releases should refer to the Upgrading notes in this
+    document for that release.
+
+  - Enhancements:
+      vpxenc --psnr shows a summary when encode completes
+      --tune=ssim option to enable activity masking
+      improved postproc visualizations for development
+      updated support for Apple iOS to SDK 4.2
+      query decoder to determine which reference frames were updated
+      implemented error tracking in the decoder
+      fix pipe support on windows
+
+  - Speed:
+      Primary focus was on good quality mode, speed 0. Average improvement
+      on x86 about 40%, up to 100% on user-generated content at that speed.
+      Best quality mode speed improved 35%, and realtime speed 10-20%. This
+      release also saw significant improvement in realtime encoding speed
+      on ARM platforms.
+
+        Improved encoder threading
+        Dont pick encoder filter level when loopfilter is disabled.
+        Avoid double copying of key frames into alt and golden buffer
+        FDCT optimizations.
+        x86 sse2 temporal filter
+        SSSE3 version of fast quantizer
+        vp8_rd_pick_best_mbsegmentation code restructure
+        Adjusted breakout RD for SPLITMV
+        Changed segmentation check order
+        Improved rd_pick_intra4x4block
+        Adds armv6 optimized variance calculation
+        ARMv6 optimized sad16x16
+        ARMv6 optimized half pixel variance calculations
+        Full search SAD function optimization in SSE4.1
+        Improve MV prediction accuracy to achieve performance gain
+        Improve MV prediction in vp8_pick_inter_mode() for speed>3
+
+  - Quality:
+      Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release
+      also includes support for "activity masking," which greatly improves
+      SSIM at the expense of PSNR. For now, this feature is available with
+      the --tune=ssim option. Further experimentation in this area
+      is ongoing. This release also introduces a new rate control mode
+      called "CQ," which changes the allocation of bits within a clip to
+      the sections where they will have the most visual impact.
+
+        Tuning for the more exact quantizer.
+        Relax rate control for last few frames
+        CQ Mode
+        Limit key frame quantizer for forced key frames.
+        KF/GF Pulsing
+        Add simple version of activity masking.
+        make rdmult adaptive for intra in quantizer RDO
+        cap the best quantizer for 2nd order DC
+        change the threshold of DC check for encode breakout
+
+  - Bug Fixes:
+      Fix crash on Sparc Solaris.
+      Fix counter of fixed keyframe distance
+      ARNR filter pointer update bug fix
+      Fixed use of motion percentage in KF/GF group calc
+      Changed condition for using RD in Intra Mode
+      Fix encoder real-time only configuration.
+      Fix ARM encoder crash with multiple token partitions
+      Fixed bug first cluster timecode of webm file is wrong.
+      Fixed various encoder bugs with odd-sized images
+      vp8e_get_preview fixed when spatial resampling enabled
+      quantizer: fix assertion in fast quantizer path
+      Allocate source buffers to be multiples of 16
+      Fix for manual Golden frame frequency
+      Fix drastic undershoot in long form content
+
+
+2010-10-28 v0.9.5 "Aylesbury"
+  Our first named release, focused on a faster decoder, and a better encoder.
+
+  - Upgrading:
+    This release incorporates backwards-incompatible changes to the
+    ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
+
+    vpxdec
+      * the -q (quiet) option has been removed, and replaced with
+        -v (verbose). the output is quiet by default. Use -v to see
+        the version number of the binary.
+
+      * The default behavior is now to write output to a single file
+        instead of individual frames. The -y option has been removed.
+        Y4M output is the default.
+
+      * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
+        options must be specified.
+
+          $ ivfdec -o OUTPUT INPUT
+          $ vpxdec --i420 -o OUTPUT INPUT
+
+      * If an output file is not specified, the default is to write
+        Y4M to stdout. This makes piping more natural.
+
+          $ ivfdec -y -o - INPUT | ...
+          $ vpxdec INPUT | ...
+
+      * The output file has additional flexibility for formatting the
+        filename. It supports escape characters for constructing a
+        filename from the width, height, and sequence number. This
+        replaces the -p option. To get the equivalent:
+
+          $ ivfdec -p frame INPUT
+          $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
+
+    vpxenc
+      * The output file must be specified with -o, rather than as the
+        last argument.
+
+          $ ivfenc <options> INPUT OUTPUT
+          $ vpxenc <options> -o OUTPUT INPUT
+
+      * The output defaults to webm. To get IVF output, use the --ivf
+        option.
+
+          $ ivfenc <options> INPUT OUTPUT.ivf
+          $ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
+
+
+  - Enhancements:
+      ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
+      vpxdec supports .webm input
+      vpxdec writes .y4m by default
+      vpxenc writes .webm output by default
+      vpxenc --psnr now shows the average/overall PSNR at the end
+      ARM platforms now support runtime cpu detection
+      vpxdec visualizations added for motion vectors, block modes, references
+      vpxdec now silent by default
+      vpxdec --progress shows frame-by-frame timing information
+      vpxenc supports the distinction between --fps and --timebase
+      NASM is now a supported assembler
+      configure: enable PIC for shared libs by default
+      configure: add --enable-small
+      configure: support for ppc32-linux-gcc
+      configure: support for sparc-solaris-gcc
+
+  - Bugs:
+      Improve handling of invalid frames
+      Fix valgrind errors in the NEON loop filters.
+      Fix loopfilter delta zero transitions
+      Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
+      Build fixes for darwin-icc
+
+  - Speed:
+      20-40% (average 28%) improvement in libvpx decoder speed,
+      including:
+        Rewrite vp8_short_walsh4x4_sse2()
+        Optimizations on the loopfilters.
+        Miscellaneous improvements for Atom
+        Add 4-tap version of 2nd-pass ARMv6 MC filter.
+        Improved multithread utilization
+        Better instruction choices on x86
+        reorder data to use wider instructions
+        Update NEON wide idcts
+        Make block access to frame buffer sequential
+        Improved subset block search
+        Bilinear subpixel optimizations for ssse3.
+        Decrease memory footprint
+
+      Encoder speed improvements (percentage gain not measured):
+        Skip unnecessary search of identical frames
+        Add SSE2 subtract functions
+        Improve bounds checking in vp8_diamond_search_sadx4()
+        Added vp8_fast_quantize_b_sse2
+
+  - Quality:
+      Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
+      encoding mode, and up to 60% improvement on very noisy, still
+      or slow moving source video
+
+        Motion compensated temporal filter for Alt-Ref Noise Reduction
+        Improved use of trellis quantization on 2nd order Y blocks
+        Tune effect of motion on KF/GF boost in two pass
+        Allow coefficient optimization for good quality speed 0.
+        Improved control of active min quantizer for two pass.
+        Enable ARFs for non-lagged compress
+
+2010-09-02 v0.9.2
+  - Enhancements:
+      Disable frame dropping by default
+      Improved multithreaded performance
+      Improved Force Key Frame Behaviour
+      Increased rate control buffer level precision
+      Fix bug in 1st pass motion compensation
+      ivfenc: correct fixed kf interval, --disable-kf
+  - Speed:
+      Changed above and left context data layout
+      Rework idct calling structure.
+      Removed unnecessary MB_MODE_INFO copies
+      x86: SSSE3 sixtap prediction
+      Reworked IDCT to include reconstruction (add) step
+      Swap alt/gold/new/last frame buffer ptrs instead of copying.
+      Improve SSE2 loopfilter functions
+      Change bitreader to use a larger window.
+      Avoid loopfilter reinitialization when possible
+  - Quality:
+      Normalize quantizer's zero bin and rounding factors
+      Add trellis quantization.
+      Make the quantizer exact.
+      Updates to ARNR filtering algorithm
+      Fix breakout thresh computation for golden & AltRef frames
+      Redo the forward 4x4 dct
+      Improve the accuracy of forward walsh-hadamard transform
+      Further adjustment of RD behaviour with Q and Zbin.
+  - Build System:
+      Allow linking of libs built with MinGW to MSVC
+      Fix target auto-detection on mingw32
+      Allow --cpu= to work for x86.
+      configure: pass original arguments through to make dist
+      Fix builds without runtime CPU detection
+      msvs: fix install of codec sources
+      msvs: Change devenv.com command line for better msys support
+      msvs: Add vs9 targets.
+      Add x86_64-linux-icc target
+  - Bugs:
+      Potential crashes on older MinGW builds
+      Fix two-pass framrate for Y4M input.
+      Fixed simple loop filter, other crashes on ARM v6
+      arm: fix missing dependency with --enable-shared
+      configure: support directories containing .o
+      Replace pinsrw (SSE) with MMX instructions
+      apple: include proper mach primatives
+      Fixed rate control bug with long key frame interval.
+      Fix DSO link errors on x86-64 when not using a version script
+      Fixed buffer selection for UV in AltRef filtering
+
+
+2010-06-17 v0.9.1
+  - Enhancements:
+      * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
+      * Speed optimizations
+  - Bugfixes:
+      * Rate control
+      * Prevent out-of-bounds accesses on invalid data
+  - Build system updates:
+      * Detect toolchain to be used automatically for native builds
+      * Support building shared libraries
+      * Better autotools emulation (--prefix, --libdir, DESTDIR)
+  - Updated LICENSE
+      * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
+
+
+2010-05-18 v0.9.0
+  - Initial open source release. Welcome to WebM and VP8!
+

+ 31 - 0
thirdparty/libvpx/LICENSE

@@ -0,0 +1,31 @@
+Copyright (c) 2010, The WebM Project authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google, nor the WebM Project, nor the names
+    of its contributors may be used to endorse or promote products
+    derived from this software without specific prior written
+    permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+

+ 23 - 0
thirdparty/libvpx/PATENTS

@@ -0,0 +1,23 @@
+Additional IP Rights Grant (Patents)
+------------------------------------
+
+"These implementations" means the copyrightable works that implement the WebM
+codecs distributed by Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
+royalty-free, irrevocable (except as stated in this section) patent license to
+make, have made, use, offer to sell, sell, import, transfer, and otherwise
+run, modify and propagate the contents of these implementations of WebM, where
+such license applies only to those patent claims, both currently owned by
+Google and acquired in the future, licensable by Google that are necessarily
+infringed by these implementations of WebM. This grant does not include claims
+that would be infringed only as a consequence of further modification of these
+implementations. If you or your agent or exclusive licensee institute or order
+or agree to the institution of patent litigation or any other patent
+enforcement activity against any entity (including a cross-claim or
+counterclaim in a lawsuit) alleging that any of these implementations of WebM
+or any code incorporated within any of these implementations of WebM
+constitute direct or contributory patent infringement, or inducement of
+patent infringement, then any patent rights granted to you under this License
+for these implementations of WebM shall terminate as of the date such
+litigation is filed.

+ 240 - 0
thirdparty/libvpx/rtcd/vp8_rtcd_arm.h

@@ -0,0 +1,240 @@
+#ifndef VP8_RTCD_H_
+#define VP8_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP8
+ */
+
+struct blockd;
+struct loop_filter_info;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
+
+void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_clear_system_state_c();
+#define vp8_clear_system_state vp8_clear_system_state_c
+
+void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
+void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
+RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
+
+void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+
+void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+
+void vp8_dequantize_b_c(struct blockd*, short *dqc);
+void vp8_dequantize_b_neon(struct blockd*, short *dqc);
+RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
+
+void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+
+void vp8_short_inv_walsh4x4_c(short *input, short *output);
+void vp8_short_inv_walsh4x4_neon(short *input, short *output);
+RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
+
+void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
+#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
+
+void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
+
+void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
+#endif
+    vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
+#endif
+    vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon;
+#endif
+    vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon;
+#endif
+    vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon;
+#endif
+    vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon;
+#endif
+    vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon;
+#endif
+    vp8_dequant_idct_add = vp8_dequant_idct_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon;
+#endif
+    vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
+#endif
+    vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
+#endif
+    vp8_dequantize_b = vp8_dequantize_b_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
+#endif
+    vp8_loop_filter_bh = vp8_loop_filter_bh_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
+#endif
+    vp8_loop_filter_bv = vp8_loop_filter_bv_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon;
+#endif
+    vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon;
+#endif
+    vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon;
+#endif
+    vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon;
+#endif
+    vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon;
+#endif
+    vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
+#endif
+    vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
+#endif
+    vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon;
+#endif
+    vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon;
+#endif
+    vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon;
+#endif
+    vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon;
+#endif
+    vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
+#endif
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 117 - 0
thirdparty/libvpx/rtcd/vp8_rtcd_c.h

@@ -0,0 +1,117 @@
+#ifndef VP8_RTCD_H_
+#define VP8_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP8
+ */
+
+struct blockd;
+struct loop_filter_info;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c
+
+void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
+
+void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c
+
+void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c
+
+void vp8_clear_system_state_c();
+#define vp8_clear_system_state vp8_clear_system_state_c
+
+void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem16x16 vp8_copy_mem16x16_c
+
+void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem8x4 vp8_copy_mem8x4_c
+
+void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+#define vp8_copy_mem8x8 vp8_copy_mem8x8_c
+
+void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
+
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
+#define vp8_dequant_idct_add vp8_dequant_idct_add_c
+
+void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+
+void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
+
+void vp8_dequantize_b_c(struct blockd*, short *dqc);
+#define vp8_dequantize_b vp8_dequantize_b_c
+
+void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_bh vp8_loop_filter_bh_c
+
+void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_bv vp8_loop_filter_bv_c
+
+void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c
+
+void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c
+
+void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c
+
+void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c
+
+void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c
+
+void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c
+
+void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c
+
+void vp8_short_inv_walsh4x4_c(short *input, short *output);
+#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c
+
+void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
+#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
+
+void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c
+
+void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
+
+void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c
+
+void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
+
+void vp8_rtcd(void);
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 247 - 0
thirdparty/libvpx/rtcd/vp8_rtcd_x86.h

@@ -0,0 +1,247 @@
+#ifndef VP8_RTCD_H_
+#define VP8_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP8
+ */
+
+struct blockd;
+struct loop_filter_info;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_clear_system_state_c();
+void vpx_reset_mmx_state();
+RTCD_EXTERN void (*vp8_clear_system_state)();
+
+void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
+
+void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
+
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
+void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
+RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
+
+void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
+
+void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
+
+void vp8_dequantize_b_c(struct blockd*, short *dqc);
+void vp8_dequantize_b_mmx(struct blockd*, short *dqc);
+RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
+
+void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
+
+void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
+void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
+RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
+
+void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
+
+void vp8_short_inv_walsh4x4_c(short *input, short *output);
+void vp8_short_inv_walsh4x4_mmx(short *input, short *output);
+void vp8_short_inv_walsh4x4_sse2(short *input, short *output);
+RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
+
+void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
+#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
+
+void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+void vp8_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = x86_simd_caps();
+
+    vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
+    if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
+    if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
+    if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
+    vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
+    if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
+    vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
+    if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
+    vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
+    if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
+    if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
+    if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
+    vp8_clear_system_state = vp8_clear_system_state_c;
+    if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
+    vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
+    if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
+    if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
+    vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
+    if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
+    vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
+    if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
+    vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
+    if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
+    vp8_dequant_idct_add = vp8_dequant_idct_add_c;
+    if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
+    vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
+    if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
+    if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
+    vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
+    if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
+    if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
+    vp8_dequantize_b = vp8_dequantize_b_c;
+    if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
+    vp8_loop_filter_bh = vp8_loop_filter_bh_c;
+    if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
+    vp8_loop_filter_bv = vp8_loop_filter_bv_c;
+    if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
+    vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
+    if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
+    vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
+    if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
+    vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
+    if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
+    vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
+    if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
+    vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
+    if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
+    vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
+    if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
+    if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
+    vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
+    if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
+    vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
+    if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
+    if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
+    vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
+    if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
+    if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
+    if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
+    vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
+    if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
+    if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
+    vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
+    if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
+    if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
+    if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
+    vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
+    if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
+    if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
+    if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 54 - 0
thirdparty/libvpx/rtcd/vp9_rtcd_arm.h

@@ -0,0 +1,54 @@
+#ifndef VP9_RTCD_H_
+#define VP9_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP9
+ */
+
+#include "vp9/common/vp9_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
+#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
+
+void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+
+void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+
+void vp9_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon;
+#endif
+    vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon;
+#endif
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 41 - 0
thirdparty/libvpx/rtcd/vp9_rtcd_c.h

@@ -0,0 +1,41 @@
+#ifndef VP9_RTCD_H_
+#define VP9_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP9
+ */
+
+#include "vp9/common/vp9_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
+#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
+
+void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c
+
+void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
+
+void vp9_rtcd(void);
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 55 - 0
thirdparty/libvpx/rtcd/vp9_rtcd_x86.h

@@ -0,0 +1,55 @@
+#ifndef VP9_RTCD_H_
+#define VP9_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * VP9
+ */
+
+#include "vp9/common/vp9_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
+void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
+RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
+
+void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+
+void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
+
+void vp9_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = x86_simd_caps();
+
+    vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
+    if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
+
+    vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
+    if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
+
+    vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
+    if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 678 - 0
thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h

@@ -0,0 +1,678 @@
+#ifndef VPX_DSP_RTCD_H_
+#define VPX_DSP_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * DSP
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
+
+void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
+
+void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
+
+void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
+
+void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
+
+void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
+
+void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
+
+void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
+
+void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
+
+void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
+
+void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
+
+void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
+
+void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
+
+void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
+
+void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
+void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
+
+void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
+void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
+
+void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
+
+void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
+
+void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
+void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
+void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
+
+void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
+
+void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_2d vpx_scaled_2d_c
+
+void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
+
+void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
+
+void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
+
+void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_horiz vpx_scaled_horiz_c
+
+void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_vert vpx_scaled_vert_c
+
+void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
+void vpx_dsp_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/arm.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = arm_cpu_caps();
+
+    vpx_convolve8 = vpx_convolve8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon;
+#endif
+    vpx_convolve8_avg = vpx_convolve8_avg_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon;
+#endif
+    vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon;
+#endif
+    vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon;
+#endif
+    vpx_convolve8_horiz = vpx_convolve8_horiz_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon;
+#endif
+    vpx_convolve8_vert = vpx_convolve8_vert_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon;
+#endif
+    vpx_convolve_avg = vpx_convolve_avg_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon;
+#endif
+    vpx_convolve_copy = vpx_convolve_copy_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon;
+#endif
+    vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon;
+#endif
+    vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon;
+#endif
+    vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon;
+#endif
+    vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon;
+#endif
+    vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon;
+#endif
+    vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon;
+#endif
+    vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon;
+#endif
+    vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon;
+#endif
+    vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon;
+#endif
+    vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon;
+#endif
+    vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon;
+#endif
+    vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon;
+#endif
+    vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon;
+#endif
+    vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon;
+#endif
+    vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon;
+#endif
+    vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon;
+#endif
+    vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon;
+#endif
+    vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon;
+#endif
+    vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon;
+#endif
+    vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon;
+#endif
+    vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon;
+#endif
+    vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon;
+#endif
+    vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon;
+#endif
+    vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon;
+#endif
+    vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon;
+#endif
+    vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon;
+#endif
+    vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon;
+#endif
+    vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon;
+#endif
+    vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon;
+#endif
+    vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon;
+#endif
+    vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon;
+#endif
+    vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon;
+#endif
+    vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon;
+#endif
+    vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon;
+#endif
+    vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon;
+#endif
+    vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon;
+#endif
+    vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon;
+#endif
+    vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon;
+#endif
+    vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon;
+#endif
+    vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon;
+#endif
+    vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon;
+#endif
+    vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon;
+#endif
+    vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon;
+#endif
+    vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon;
+#endif
+    vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon;
+#endif
+    vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon;
+#endif
+    vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon;
+#endif
+    vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon;
+#endif
+    vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon;
+#endif
+    vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon;
+#endif
+    vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon;
+#endif
+    vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon;
+#endif
+    vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon;
+#endif
+    vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon;
+#endif
+    vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon;
+#endif
+    vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
+#if HAVE_NEON
+    if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon;
+#endif
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 355 - 0
thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h

@@ -0,0 +1,355 @@
+#ifndef VPX_DSP_RTCD_H_
+#define VPX_DSP_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * DSP
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8 vpx_convolve8_c
+
+void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8_avg vpx_convolve8_avg_c
+
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c
+
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c
+
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8_horiz vpx_convolve8_horiz_c
+
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve8_vert vpx_convolve8_vert_c
+
+void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve_avg vpx_convolve_avg_c
+
+void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_convolve_copy vpx_convolve_copy_c
+
+void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
+
+void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
+
+void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
+
+void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
+
+void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
+
+void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
+
+void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
+
+void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
+
+void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
+
+void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
+
+void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
+
+void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
+
+void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
+
+void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
+
+void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
+
+void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
+void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
+
+void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
+
+void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c
+
+void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
+void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
+
+void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
+
+void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
+
+void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
+void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
+
+void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c
+
+void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c
+
+void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c
+
+void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c
+
+void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c
+
+void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c
+
+void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c
+
+void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c
+
+void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c
+
+void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c
+
+void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c
+
+void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c
+
+void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c
+
+void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c
+
+void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c
+
+void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c
+
+void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c
+
+void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c
+
+void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
+
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
+void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
+
+void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c
+
+void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c
+
+void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c
+
+void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c
+
+void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c
+
+void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c
+
+void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c
+
+void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c
+
+void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c
+
+void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c
+
+void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c
+
+void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
+
+void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
+
+void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c
+
+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c
+
+void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c
+
+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c
+
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c
+
+void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c
+
+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c
+
+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c
+
+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c
+
+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c
+
+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c
+
+void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_2d vpx_scaled_2d_c
+
+void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
+
+void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
+
+void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
+
+void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_horiz vpx_scaled_horiz_c
+
+void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_vert vpx_scaled_vert_c
+
+void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c
+
+void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c
+
+void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c
+
+void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c
+
+void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c
+
+void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c
+
+void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c
+
+void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
+void vpx_dsp_rtcd(void);
+
+#ifdef RTCD_C
+static void setup_rtcd_internal(void)
+{
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 614 - 0
thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h

@@ -0,0 +1,614 @@
+#ifndef VPX_DSP_RTCD_H_
+#define VPX_DSP_RTCD_H_
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+/*
+ * DSP
+ */
+
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
+
+void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
+
+void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
+
+void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
+
+void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
+
+void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
+
+void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
+
+void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
+
+void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
+void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
+void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
+void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
+void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
+RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
+
+void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
+#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
+
+void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
+
+void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
+
+void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+
+void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
+
+void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
+
+void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
+
+void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_horiz vpx_scaled_horiz_c
+
+void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
+#define vpx_scaled_vert vpx_scaled_vert_c
+
+void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
+void vpx_dsp_rtcd(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+static void setup_rtcd_internal(void)
+{
+    int flags = x86_simd_caps();
+
+    vpx_convolve8 = vpx_convolve8_c;
+    if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3;
+    if (flags & HAS_AVX2) vpx_convolve8 = vpx_convolve8_avx2;
+    vpx_convolve8_avg = vpx_convolve8_avg_c;
+    if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3;
+    vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
+    if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3;
+    vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
+    if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3;
+    vpx_convolve8_horiz = vpx_convolve8_horiz_c;
+    if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3;
+    if (flags & HAS_AVX2) vpx_convolve8_horiz = vpx_convolve8_horiz_avx2;
+    vpx_convolve8_vert = vpx_convolve8_vert_c;
+    if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2;
+    if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3;
+    if (flags & HAS_AVX2) vpx_convolve8_vert = vpx_convolve8_vert_avx2;
+    vpx_convolve_avg = vpx_convolve_avg_c;
+    if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2;
+    vpx_convolve_copy = vpx_convolve_copy_c;
+    if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2;
+    vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c;
+    if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3;
+    vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3;
+    vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c;
+    if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3;
+    vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c;
+    if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3;
+    vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c;
+    if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3;
+    vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3;
+    vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2;
+    vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c;
+    if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3;
+    vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
+    if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3;
+    vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3;
+    vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2;
+    vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2;
+    vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c;
+    if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3;
+    vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c;
+    if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3;
+    vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c;
+    if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
+    vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
+    if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
+    vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2;
+    vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2;
+    vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2;
+    vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2;
+    vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2;
+    vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2;
+    vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2;
+    vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2;
+    vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2;
+    vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2;
+    vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2;
+    vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2;
+    vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2;
+    vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2;
+    vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2;
+    vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
+    vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2;
+    vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2;
+    vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2;
+    vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2;
+    vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
+    if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2;
+    vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
+    if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
+    vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
+    if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
+    vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
+    if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
+    vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
+    if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2;
+    vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
+    if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
+    vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
+    if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
+    vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
+    if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2;
+    vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
+    if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2;
+    vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
+    if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
+    vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
+    if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2;
+    vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
+    if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2;
+    vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c;
+    if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2;
+    vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2;
+    vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2;
+    vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2;
+    vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2;
+    vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2;
+    if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_avx2;
+    vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
+    if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2;
+    if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_avx2;
+    vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2;
+    vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2;
+    vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2;
+    vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2;
+    vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2;
+    vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
+    if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2;
+    vpx_scaled_2d = vpx_scaled_2d_c;
+    if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3;
+    vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2;
+    vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2;
+    vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2;
+    vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2;
+    vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
+    if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2;
+    vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
+    if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2;
+    vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
+    if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2;
+    vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
+    if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2;
+}
+#endif
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif

+ 18 - 0
thirdparty/libvpx/third_party/x86inc/LICENSE

@@ -0,0 +1,18 @@
+Copyright (C) 2005-2012 x264 project
+
+Authors: Loren Merritt <[email protected]>
+         Anton Mitrofanov <[email protected]>
+         Jason Garrett-Glaser <[email protected]>
+         Henrik Gramner <[email protected]>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ 20 - 0
thirdparty/libvpx/third_party/x86inc/README.libvpx

@@ -0,0 +1,20 @@
+URL: https://git.videolan.org/git/x264.git
+Version: d23d18655249944c1ca894b451e2c82c7a584c62
+License: ISC
+License File: LICENSE
+
+Description:
+x264/libav's framework for x86 assembly. Contains a variety of macros and
+defines that help automatically allow assembly to work cross-platform.
+
+Local Modifications:
+Get configuration from vpx_config.asm.
+Prefix functions with vpx by default.
+Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
+  exist in libvpx.
+Expand PIC default to macho64 and respect CONFIG_PIC from libvpx
+Set 'private_extern' visibility for macho targets.
+Copy PIC 'GLOBAL' macros from x86_abi_support.asm
+Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
+Use .text with no alignment for aout
+Only use 'hidden' visibility with Chromium

+ 1649 - 0
thirdparty/libvpx/third_party/x86inc/x86inc.asm

@@ -0,0 +1,1649 @@
+;*****************************************************************************
+;* x86inc.asm: x264asm abstraction layer
+;*****************************************************************************
+;* Copyright (C) 2005-2016 x264 project
+;*
+;* Authors: Loren Merritt <[email protected]>
+;*          Anton Mitrofanov <[email protected]>
+;*          Fiona Glaser <[email protected]>
+;*          Henrik Gramner <[email protected]>
+;*
+;* Permission to use, copy, modify, and/or distribute this software for any
+;* purpose with or without fee is hereby granted, provided that the above
+;* copyright notice and this permission notice appear in all copies.
+;*
+;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+;*****************************************************************************
+
+; This is a header file for the x264ASM assembly language, which uses
+; NASM/YASM syntax combined with a large number of macros to provide easy
+; abstraction between different calling conventions (x86_32, win64, linux64).
+; It also has various other useful features to simplify writing the kind of
+; DSP functions that are most often used in x264.
+
+; Unlike the rest of x264, this file is available under an ISC license, as it
+; has significant usefulness outside of x264 and we want it to be available
+; to the largest audience possible.  Of course, if you modify it for your own
+; purposes to add a new feature, we strongly encourage contributing a patch
+; as this feature might be useful for others as well.  Send patches or ideas
+; to [email protected] .
+
+%include "vpx_config.asm"
+
+%ifndef private_prefix
+    %define private_prefix vpx
+%endif
+
+%ifndef public_prefix
+    %define public_prefix private_prefix
+%endif
+
+%ifndef STACK_ALIGNMENT
+    %if ARCH_X86_64
+        %define STACK_ALIGNMENT 16
+    %else
+        %define STACK_ALIGNMENT 4
+    %endif
+%endif
+
+%define WIN64  0
+%define UNIX64 0
+%if ARCH_X86_64
+    %ifidn __OUTPUT_FORMAT__,win32
+        %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,win64
+        %define WIN64  1
+    %elifidn __OUTPUT_FORMAT__,x64
+        %define WIN64  1
+    %else
+        %define UNIX64 1
+    %endif
+%endif
+
+%define FORMAT_ELF 0
+%ifidn __OUTPUT_FORMAT__,elf
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf32
+    %define FORMAT_ELF 1
+%elifidn __OUTPUT_FORMAT__,elf64
+    %define FORMAT_ELF 1
+%endif
+
+%define FORMAT_MACHO 0
+%ifidn __OUTPUT_FORMAT__,macho32
+     %define FORMAT_MACHO 1
+%elifidn __OUTPUT_FORMAT__,macho64
+     %define FORMAT_MACHO 1
+%endif
+
+; Set PREFIX for libvpx builds.
+%if FORMAT_ELF
+    %undef PREFIX
+%elif WIN64
+    %undef PREFIX
+%else
+    %define PREFIX
+%endif
+
+%ifdef PREFIX
+    %define mangle(x) _ %+ x
+%else
+    %define mangle(x) x
+%endif
+
+; In some instances macho32 tables get misaligned when using .rodata.
+; When looking at the disassembly it appears that the offset is either
+; correct or consistently off by 90. Placing them in the .text section
+; works around the issue. It appears to be specific to the way libvpx
+; handles the tables.
+%macro SECTION_RODATA 0-1 16
+    %ifidn __OUTPUT_FORMAT__,macho32
+        SECTION .text align=%1
+        fakegot:
+    %elifidn __OUTPUT_FORMAT__,aout
+        SECTION .text
+    %else
+        SECTION .rodata align=%1
+    %endif
+%endmacro
+
+; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
+; from original code is added in for 64bit.
+%ifidn __OUTPUT_FORMAT__,elf32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,macho32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,win32
+%define ABI_IS_32BIT 1
+%elifidn __OUTPUT_FORMAT__,aout
+%define ABI_IS_32BIT 1
+%else
+%define ABI_IS_32BIT 0
+%endif
+
+%if ABI_IS_32BIT
+    %if CONFIG_PIC=1
+        %ifidn __OUTPUT_FORMAT__,elf32
+            %define GET_GOT_DEFINED 1
+            %define WRT_PLT wrt ..plt
+            %macro GET_GOT 1
+                extern _GLOBAL_OFFSET_TABLE_
+                push %1
+                call %%get_got
+                %%sub_offset:
+                jmp %%exitGG
+                %%get_got:
+                mov %1, [esp]
+                add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
+                ret
+                %%exitGG:
+                %undef GLOBAL
+                %define GLOBAL(x) x + %1 wrt ..gotoff
+                %undef RESTORE_GOT
+                %define RESTORE_GOT pop %1
+            %endmacro
+        %elifidn __OUTPUT_FORMAT__,macho32
+            %define GET_GOT_DEFINED 1
+            %macro GET_GOT 1
+                push %1
+                call %%get_got
+                %%get_got:
+                pop  %1
+                %undef GLOBAL
+                %define GLOBAL(x) x + %1 - %%get_got
+                %undef RESTORE_GOT
+                %define RESTORE_GOT pop %1
+            %endmacro
+        %else
+            %define GET_GOT_DEFINED 0
+        %endif
+    %endif
+
+    %if ARCH_X86_64 == 0
+        %undef PIC
+    %endif
+
+%else
+    %macro GET_GOT 1
+    %endmacro
+    %define GLOBAL(x) rel x
+    %define WRT_PLT wrt ..plt
+
+    %if WIN64
+        %define PIC
+    %elifidn __OUTPUT_FORMAT__,macho64
+        %define PIC
+    %elif CONFIG_PIC
+        %define PIC
+    %endif
+%endif
+
+%ifnmacro GET_GOT
+    %macro GET_GOT 1
+    %endmacro
+    %define GLOBAL(x) x
+%endif
+%ifndef RESTORE_GOT
+    %define RESTORE_GOT
+%endif
+%ifndef WRT_PLT
+    %define WRT_PLT
+%endif
+
+%ifdef PIC
+    default rel
+%endif
+
+%ifndef GET_GOT_DEFINED
+    %define GET_GOT_DEFINED 0
+%endif
+; Done with PIC macros
+
+%ifdef __NASM_VER__
+    %use smartalign
+%endif
+
+; Macros to eliminate most code duplication between x86_32 and x86_64:
+; Currently this works only for leaf functions which load all their arguments
+; into registers at the start, and make no other use of the stack. Luckily that
+; covers most of x264's asm.
+
+; PROLOGUE:
+; %1 = number of arguments. loads them from stack if needed.
+; %2 = number of registers used. pushes callee-saved regs if needed.
+; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
+; %4 = (optional) stack size to be allocated. The stack will be aligned before
+;      allocating the specified stack size. If the required stack alignment is
+;      larger than the known stack alignment the stack will be manually aligned
+;      and an extra register will be allocated to hold the original stack
+;      pointer (to not invalidate r0m etc.). To prevent the use of an extra
+;      register as stack pointer, request a negative stack size.
+; %4+/%5+ = list of names to define to registers
+; PROLOGUE can also be invoked by adding the same options to cglobal
+
+; e.g.
+; cglobal foo, 2,3,7,0x40, dst, src, tmp
+; declares a function (foo) that automatically loads two arguments (dst and
+; src) into registers, uses one additional register (tmp) plus 7 vector
+; registers (m0-m6) and allocates 0x40 bytes of stack space.
+
+; TODO Some functions can use some args directly from the stack. If they're the
+; last args then you can just not declare them, but if they're in the middle
+; we need more flexible macro.
+
+; RET:
+; Pops anything that was pushed by PROLOGUE, and returns.
+
+; REP_RET:
+; Use this instead of RET if it's a branch target.
+
+; registers:
+; rN and rNq are the native-size register holding function argument N
+; rNd, rNw, rNb are dword, word, and byte size
+; rNh is the high 8 bits of the word size
+; rNm is the original location of arg N (a register or on the stack), dword
+; rNmp is native size
+
+%macro DECLARE_REG 2-3
+    %define r%1q %2
+    %define r%1d %2d
+    %define r%1w %2w
+    %define r%1b %2b
+    %define r%1h %2h
+    %define %2q %2
+    %if %0 == 2
+        %define r%1m  %2d
+        %define r%1mp %2
+    %elif ARCH_X86_64 ; memory
+        %define r%1m [rstk + stack_offset + %3]
+        %define r%1mp qword r %+ %1 %+ m
+    %else
+        %define r%1m [rstk + stack_offset + %3]
+        %define r%1mp dword r %+ %1 %+ m
+    %endif
+    %define r%1  %2
+%endmacro
+
+%macro DECLARE_REG_SIZE 3
+    %define r%1q r%1
+    %define e%1q r%1
+    %define r%1d e%1
+    %define e%1d e%1
+    %define r%1w %1
+    %define e%1w %1
+    %define r%1h %3
+    %define e%1h %3
+    %define r%1b %2
+    %define e%1b %2
+    %if ARCH_X86_64 == 0
+        %define r%1 e%1
+    %endif
+%endmacro
+
+DECLARE_REG_SIZE ax, al, ah
+DECLARE_REG_SIZE bx, bl, bh
+DECLARE_REG_SIZE cx, cl, ch
+DECLARE_REG_SIZE dx, dl, dh
+DECLARE_REG_SIZE si, sil, null
+DECLARE_REG_SIZE di, dil, null
+DECLARE_REG_SIZE bp, bpl, null
+
+; t# defines for when per-arch register allocation is more complex than just function arguments
+
+%macro DECLARE_REG_TMP 1-*
+    %assign %%i 0
+    %rep %0
+        CAT_XDEFINE t, %%i, r%1
+        %assign %%i %%i+1
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro DECLARE_REG_TMP_SIZE 0-*
+    %rep %0
+        %define t%1q t%1 %+ q
+        %define t%1d t%1 %+ d
+        %define t%1w t%1 %+ w
+        %define t%1h t%1 %+ h
+        %define t%1b t%1 %+ b
+        %rotate 1
+    %endrep
+%endmacro
+
+DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
+
+%if ARCH_X86_64
+    %define gprsize 8
+%else
+    %define gprsize 4
+%endif
+
+%macro PUSH 1
+    push %1
+    %ifidn rstk, rsp
+        %assign stack_offset stack_offset+gprsize
+    %endif
+%endmacro
+
+%macro POP 1
+    pop %1
+    %ifidn rstk, rsp
+        %assign stack_offset stack_offset-gprsize
+    %endif
+%endmacro
+
+%macro PUSH_IF_USED 1-*
+    %rep %0
+        %if %1 < regs_used
+            PUSH r%1
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro POP_IF_USED 1-*
+    %rep %0
+        %if %1 < regs_used
+            pop r%1
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro LOAD_IF_USED 1-*
+    %rep %0
+        %if %1 < num_args
+            mov r%1, r %+ %1 %+ mp
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro SUB 2
+    sub %1, %2
+    %ifidn %1, rstk
+        %assign stack_offset stack_offset+(%2)
+    %endif
+%endmacro
+
+%macro ADD 2
+    add %1, %2
+    %ifidn %1, rstk
+        %assign stack_offset stack_offset-(%2)
+    %endif
+%endmacro
+
+%macro movifnidn 2
+    %ifnidn %1, %2
+        mov %1, %2
+    %endif
+%endmacro
+
+%macro movsxdifnidn 2
+    %ifnidn %1, %2
+        movsxd %1, %2
+    %endif
+%endmacro
+
+%macro ASSERT 1
+    %if (%1) == 0
+        %error assertion ``%1'' failed
+    %endif
+%endmacro
+
+%macro DEFINE_ARGS 0-*
+    %ifdef n_arg_names
+        %assign %%i 0
+        %rep n_arg_names
+            CAT_UNDEF arg_name %+ %%i, q
+            CAT_UNDEF arg_name %+ %%i, d
+            CAT_UNDEF arg_name %+ %%i, w
+            CAT_UNDEF arg_name %+ %%i, h
+            CAT_UNDEF arg_name %+ %%i, b
+            CAT_UNDEF arg_name %+ %%i, m
+            CAT_UNDEF arg_name %+ %%i, mp
+            CAT_UNDEF arg_name, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+
+    %xdefine %%stack_offset stack_offset
+    %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine
+    %assign %%i 0
+    %rep %0
+        %xdefine %1q r %+ %%i %+ q
+        %xdefine %1d r %+ %%i %+ d
+        %xdefine %1w r %+ %%i %+ w
+        %xdefine %1h r %+ %%i %+ h
+        %xdefine %1b r %+ %%i %+ b
+        %xdefine %1m r %+ %%i %+ m
+        %xdefine %1mp r %+ %%i %+ mp
+        CAT_XDEFINE arg_name, %%i, %1
+        %assign %%i %%i+1
+        %rotate 1
+    %endrep
+    %xdefine stack_offset %%stack_offset
+    %assign n_arg_names %0
+%endmacro
+
+%define required_stack_alignment ((mmsize + 15) & ~15)
+
+%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
+    %ifnum %1
+        %if %1 != 0
+            %assign %%pad 0
+            %assign stack_size %1
+            %if stack_size < 0
+                %assign stack_size -stack_size
+            %endif
+            %if WIN64
+                %assign %%pad %%pad + 32 ; shadow space
+                %if mmsize != 8
+                    %assign xmm_regs_used %2
+                    %if xmm_regs_used > 8
+                        %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers
+                    %endif
+                %endif
+            %endif
+            %if required_stack_alignment <= STACK_ALIGNMENT
+                ; maintain the current stack alignment
+                %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
+                SUB rsp, stack_size_padded
+            %else
+                %assign %%reg_num (regs_used - 1)
+                %xdefine rstk r %+ %%reg_num
+                ; align stack, and save original stack location directly above
+                ; it, i.e. in [rsp+stack_size_padded], so we can restore the
+                ; stack in a single instruction (i.e. mov rsp, rstk or mov
+                ; rsp, [rsp+stack_size_padded])
+                %if %1 < 0 ; need to store rsp on stack
+                    %xdefine rstkm [rsp + stack_size + %%pad]
+                    %assign %%pad %%pad + gprsize
+                %else ; can keep rsp in rstk during whole function
+                    %xdefine rstkm rstk
+                %endif
+                %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1))
+                mov rstk, rsp
+                and rsp, ~(required_stack_alignment-1)
+                sub rsp, stack_size_padded
+                movifnidn rstkm, rstk
+            %endif
+            WIN64_PUSH_XMM
+        %endif
+    %endif
+%endmacro
+
+%macro SETUP_STACK_POINTER 1
+    %ifnum %1
+        %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT
+            %if %1 > 0
+                %assign regs_used (regs_used + 1)
+            %endif
+            %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+                ; Ensure that we don't clobber any registers containing arguments
+                %assign regs_used 5 + UNIX64 * 3
+            %endif
+        %endif
+    %endif
+%endmacro
+
+%macro DEFINE_ARGS_INTERNAL 3+
+    %ifnum %2
+        DEFINE_ARGS %3
+    %elif %1 == 4
+        DEFINE_ARGS %2
+    %elif %1 > 4
+        DEFINE_ARGS %2, %3
+    %endif
+%endmacro
+
+%if WIN64 ; Windows x64 ;=================================================
+
+DECLARE_REG 0,  rcx
+DECLARE_REG 1,  rdx
+DECLARE_REG 2,  R8
+DECLARE_REG 3,  R9
+DECLARE_REG 4,  R10, 40
+DECLARE_REG 5,  R11, 48
+DECLARE_REG 6,  rax, 56
+DECLARE_REG 7,  rdi, 64
+DECLARE_REG 8,  rsi, 72
+DECLARE_REG 9,  rbx, 80
+DECLARE_REG 10, rbp, 88
+DECLARE_REG 11, R12, 96
+DECLARE_REG 12, R13, 104
+DECLARE_REG 13, R14, 112
+DECLARE_REG 14, R15, 120
+
+%macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    ASSERT regs_used >= num_args
+    SETUP_STACK_POINTER %4
+    ASSERT regs_used <= 15
+    PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
+    ALLOC_STACK %4, %3
+    %if mmsize != 8 && stack_size == 0
+        WIN64_SPILL_XMM %3
+    %endif
+    LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
+    DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%macro WIN64_PUSH_XMM 0
+    ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated.
+    %if xmm_regs_used > 6
+        movaps [rstk + stack_offset +  8], xmm6
+    %endif
+    %if xmm_regs_used > 7
+        movaps [rstk + stack_offset + 24], xmm7
+    %endif
+    %if xmm_regs_used > 8
+        %assign %%i 8
+        %rep xmm_regs_used-8
+            movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+%endmacro
+
+%macro WIN64_SPILL_XMM 1
+    %assign xmm_regs_used %1
+    ASSERT xmm_regs_used <= 16
+    %if xmm_regs_used > 8
+        ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack.
+        %assign %%pad (xmm_regs_used-8)*16 + 32
+        %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1))
+        SUB rsp, stack_size_padded
+    %endif
+    WIN64_PUSH_XMM
+%endmacro
+
+%macro WIN64_RESTORE_XMM_INTERNAL 1
+    %assign %%pad_size 0
+    %if xmm_regs_used > 8
+        %assign %%i xmm_regs_used
+        %rep xmm_regs_used-8
+            %assign %%i %%i-1
+            movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32]
+        %endrep
+    %endif
+    %if stack_size_padded > 0
+        %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT
+            mov rsp, rstkm
+        %else
+            add %1, stack_size_padded
+            %assign %%pad_size stack_size_padded
+        %endif
+    %endif
+    %if xmm_regs_used > 7
+        movaps xmm7, [%1 + stack_offset - %%pad_size + 24]
+    %endif
+    %if xmm_regs_used > 6
+        movaps xmm6, [%1 + stack_offset - %%pad_size +  8]
+    %endif
+%endmacro
+
+%macro WIN64_RESTORE_XMM 1
+    WIN64_RESTORE_XMM_INTERNAL %1
+    %assign stack_offset (stack_offset-stack_size_padded)
+    %assign xmm_regs_used 0
+%endmacro
+
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0
+
+%macro RET 0
+    WIN64_RESTORE_XMM_INTERNAL rsp
+    POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
+    %if mmsize == 32
+        vzeroupper
+    %endif
+    AUTO_REP_RET
+%endmacro
+
+%elif ARCH_X86_64 ; *nix x64 ;=============================================
+
+DECLARE_REG 0,  rdi
+DECLARE_REG 1,  rsi
+DECLARE_REG 2,  rdx
+DECLARE_REG 3,  rcx
+DECLARE_REG 4,  R8
+DECLARE_REG 5,  R9
+DECLARE_REG 6,  rax, 8
+DECLARE_REG 7,  R10, 16
+DECLARE_REG 8,  R11, 24
+DECLARE_REG 9,  rbx, 32
+DECLARE_REG 10, rbp, 40
+DECLARE_REG 11, R12, 48
+DECLARE_REG 12, R13, 56
+DECLARE_REG 13, R14, 64
+DECLARE_REG 14, R15, 72
+
+%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    ASSERT regs_used >= num_args
+    SETUP_STACK_POINTER %4
+    ASSERT regs_used <= 15
+    PUSH_IF_USED 9, 10, 11, 12, 13, 14
+    ALLOC_STACK %4
+    LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
+    DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0
+
+%macro RET 0
+    %if stack_size_padded > 0
+        %if required_stack_alignment > STACK_ALIGNMENT
+            mov rsp, rstkm
+        %else
+            add rsp, stack_size_padded
+        %endif
+    %endif
+    POP_IF_USED 14, 13, 12, 11, 10, 9
+    %if mmsize == 32
+        vzeroupper
+    %endif
+    AUTO_REP_RET
+%endmacro
+
+%else ; X86_32 ;==============================================================
+
+DECLARE_REG 0, eax, 4
+DECLARE_REG 1, ecx, 8
+DECLARE_REG 2, edx, 12
+DECLARE_REG 3, ebx, 16
+DECLARE_REG 4, esi, 20
+DECLARE_REG 5, edi, 24
+DECLARE_REG 6, ebp, 28
+%define rsp esp
+
+%macro DECLARE_ARG 1-*
+    %rep %0
+        %define r%1m [rstk + stack_offset + 4*%1 + 4]
+        %define r%1mp dword r%1m
+        %rotate 1
+    %endrep
+%endmacro
+
+DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
+
+%macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names...
+    %assign num_args %1
+    %assign regs_used %2
+    ASSERT regs_used >= num_args
+    %if num_args > 7
+        %assign num_args 7
+    %endif
+    %if regs_used > 7
+        %assign regs_used 7
+    %endif
+    SETUP_STACK_POINTER %4
+    ASSERT regs_used <= 7
+    PUSH_IF_USED 3, 4, 5, 6
+    ALLOC_STACK %4
+    LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
+    DEFINE_ARGS_INTERNAL %0, %4, %5
+%endmacro
+
+%define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0
+
+%macro RET 0
+    %if stack_size_padded > 0
+        %if required_stack_alignment > STACK_ALIGNMENT
+            mov rsp, rstkm
+        %else
+            add rsp, stack_size_padded
+        %endif
+    %endif
+    POP_IF_USED 6, 5, 4, 3
+    %if mmsize == 32
+        vzeroupper
+    %endif
+    AUTO_REP_RET
+%endmacro
+
+%endif ;======================================================================
+
+%if WIN64 == 0
+    %macro WIN64_SPILL_XMM 1
+    %endmacro
+    %macro WIN64_RESTORE_XMM 1
+    %endmacro
+    %macro WIN64_PUSH_XMM 0
+    %endmacro
+%endif
+
+; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
+; a branch or a branch target. So switch to a 2-byte form of ret in that case.
+; We can automatically detect "follows a branch", but not a branch target.
+; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.)
+%macro REP_RET 0
+    %if has_epilogue
+        RET
+    %else
+        rep ret
+    %endif
+    annotate_function_size
+%endmacro
+
+%define last_branch_adr $$
+%macro AUTO_REP_RET 0
+    %if notcpuflag(ssse3)
+        times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr.
+    %endif
+    ret
+    annotate_function_size
+%endmacro
+
+%macro BRANCH_INSTR 0-*
+    %rep %0
+        %macro %1 1-2 %1
+            %2 %1
+            %if notcpuflag(ssse3)
+                %%branch_instr equ $
+                %xdefine last_branch_adr %%branch_instr
+            %endif
+        %endmacro
+        %rotate 1
+    %endrep
+%endmacro
+
+BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp
+
+%macro TAIL_CALL 2 ; callee, is_nonadjacent
+    %if has_epilogue
+        call %1
+        RET
+    %elif %2
+        jmp %1
+    %endif
+    annotate_function_size
+%endmacro
+
+;=============================================================================
+; arch-independent part
+;=============================================================================
+
+%assign function_align 16
+
+; Begin a function.
+; Applies any symbol mangling needed for C linkage, and sets up a define such that
+; subsequent uses of the function name automatically refer to the mangled version.
+; Appends cpuflags to the function name if cpuflags has been specified.
+; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX
+; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2).
+%macro cglobal 1-2+ "" ; name, [PROLOGUE args]
+    cglobal_internal 1, %1 %+ SUFFIX, %2
+%endmacro
+%macro cvisible 1-2+ "" ; name, [PROLOGUE args]
+    cglobal_internal 0, %1 %+ SUFFIX, %2
+%endmacro
+%macro cglobal_internal 2-3+
+    annotate_function_size
+    %if %1
+        %xdefine %%FUNCTION_PREFIX private_prefix
+        ; libvpx explicitly sets visibility in shared object builds. Avoid
+        ; setting visibility to hidden as it may break builds that split
+        ; sources on e.g., directory boundaries.
+        %ifdef CHROMIUM
+            %xdefine %%VISIBILITY hidden
+        %else
+            %xdefine %%VISIBILITY
+        %endif
+    %else
+        %xdefine %%FUNCTION_PREFIX public_prefix
+        %xdefine %%VISIBILITY
+    %endif
+    %ifndef cglobaled_%2
+        %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2)
+        %xdefine %2.skip_prologue %2 %+ .skip_prologue
+        CAT_XDEFINE cglobaled_, %2, 1
+    %endif
+    %xdefine current_function %2
+    %xdefine current_function_section __SECT__
+    %if FORMAT_ELF
+        global %2:function %%VISIBILITY
+    %elif FORMAT_MACHO
+        %ifdef __NASM_VER__
+            global %2
+        %else
+            global %2:private_extern
+        %endif
+    %else
+        global %2
+    %endif
+    align function_align
+    %2:
+    RESET_MM_PERMUTATION        ; needed for x86-64, also makes disassembly somewhat nicer
+    %xdefine rstk rsp           ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required
+    %assign stack_offset 0      ; stack pointer offset relative to the return address
+    %assign stack_size 0        ; amount of stack space that can be freely used inside a function
+    %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding
+    %assign xmm_regs_used 0     ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64
+    %ifnidn %3, ""
+        PROLOGUE %3
+    %endif
+%endmacro
+
+%macro cextern 1
+    %xdefine %1 mangle(private_prefix %+ _ %+ %1)
+    CAT_XDEFINE cglobaled_, %1, 1
+    extern %1
+%endmacro
+
+; like cextern, but without the prefix
+%macro cextern_naked 1
+    %ifdef PREFIX
+        %xdefine %1 mangle(%1)
+    %endif
+    CAT_XDEFINE cglobaled_, %1, 1
+    extern %1
+%endmacro
+
+%macro const 1-2+
+    %xdefine %1 mangle(private_prefix %+ _ %+ %1)
+    %if FORMAT_ELF
+        global %1:data hidden
+    %else
+        global %1
+    %endif
+    %1: %2
+%endmacro
+
+; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default.
+%if FORMAT_ELF
+    [SECTION .note.GNU-stack noalloc noexec nowrite progbits]
+%endif
+
+; Tell debuggers how large the function was.
+; This may be invoked multiple times per function; we rely on later instances overriding earlier ones.
+; This is invoked by RET and similar macros, and also cglobal does it for the previous function,
+; but if the last function in a source file doesn't use any of the standard macros for its epilogue,
+; then its size might be unspecified.
+%macro annotate_function_size 0
+    %ifdef __YASM_VER__
+        %ifdef current_function
+            %if FORMAT_ELF
+                current_function_section
+                %%ecf equ $
+                size current_function %%ecf - current_function
+                __SECT__
+            %endif
+        %endif
+    %endif
+%endmacro
+
+; cpuflags
+
+%assign cpuflags_mmx      (1<<0)
+%assign cpuflags_mmx2     (1<<1) | cpuflags_mmx
+%assign cpuflags_3dnow    (1<<2) | cpuflags_mmx
+%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow
+%assign cpuflags_sse      (1<<4) | cpuflags_mmx2
+%assign cpuflags_sse2     (1<<5) | cpuflags_sse
+%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2
+%assign cpuflags_sse3     (1<<7) | cpuflags_sse2
+%assign cpuflags_ssse3    (1<<8) | cpuflags_sse3
+%assign cpuflags_sse4     (1<<9) | cpuflags_ssse3
+%assign cpuflags_sse42    (1<<10)| cpuflags_sse4
+%assign cpuflags_avx      (1<<11)| cpuflags_sse42
+%assign cpuflags_xop      (1<<12)| cpuflags_avx
+%assign cpuflags_fma4     (1<<13)| cpuflags_avx
+%assign cpuflags_fma3     (1<<14)| cpuflags_avx
+%assign cpuflags_avx2     (1<<15)| cpuflags_fma3
+
+%assign cpuflags_cache32  (1<<16)
+%assign cpuflags_cache64  (1<<17)
+%assign cpuflags_slowctz  (1<<18)
+%assign cpuflags_lzcnt    (1<<19)
+%assign cpuflags_aligned  (1<<20) ; not a cpu feature, but a function variant
+%assign cpuflags_atom     (1<<21)
+%assign cpuflags_bmi1     (1<<22)|cpuflags_lzcnt
+%assign cpuflags_bmi2     (1<<23)|cpuflags_bmi1
+
+; Returns a boolean value expressing whether or not the specified cpuflag is enabled.
+%define    cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1)
+%define notcpuflag(x) (cpuflag(x) ^ 1)
+
+; Takes an arbitrary number of cpuflags from the above list.
+; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
+; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
+%macro INIT_CPUFLAGS 0-*
+    %xdefine SUFFIX
+    %undef cpuname
+    %assign cpuflags 0
+
+    %if %0 >= 1
+        %rep %0
+            %ifdef cpuname
+                %xdefine cpuname cpuname %+ _%1
+            %else
+                %xdefine cpuname %1
+            %endif
+            %assign cpuflags cpuflags | cpuflags_%1
+            %rotate 1
+        %endrep
+        %xdefine SUFFIX _ %+ cpuname
+
+        %if cpuflag(avx)
+            %assign avx_enabled 1
+        %endif
+        %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2))
+            %define mova movaps
+            %define movu movups
+            %define movnta movntps
+        %endif
+        %if cpuflag(aligned)
+            %define movu mova
+        %elif cpuflag(sse3) && notcpuflag(ssse3)
+            %define movu lddqu
+        %endif
+    %endif
+
+    %if ARCH_X86_64 || cpuflag(sse2)
+        %ifdef __NASM_VER__
+            ALIGNMODE k8
+        %else
+            CPU amdnop
+        %endif
+    %else
+        %ifdef __NASM_VER__
+            ALIGNMODE nop
+        %else
+            CPU basicnop
+        %endif
+    %endif
+%endmacro
+
+; Merge mmx and sse*
+; m# is a simd register of the currently selected size
+; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m#
+; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m#
+; (All 3 remain in sync through SWAP.)
+
+%macro CAT_XDEFINE 3
+    %xdefine %1%2 %3
+%endmacro
+
+%macro CAT_UNDEF 2
+    %undef %1%2
+%endmacro
+
+%macro INIT_MMX 0-1+
+    %assign avx_enabled 0
+    %define RESET_MM_PERMUTATION INIT_MMX %1
+    %define mmsize 8
+    %define num_mmregs 8
+    %define mova movq
+    %define movu movq
+    %define movh movd
+    %define movnta movntq
+    %assign %%i 0
+    %rep 8
+        CAT_XDEFINE m, %%i, mm %+ %%i
+        CAT_XDEFINE nnmm, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    %rep 8
+        CAT_UNDEF m, %%i
+        CAT_UNDEF nnmm, %%i
+        %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+%macro INIT_XMM 0-1+
+    %assign avx_enabled 0
+    %define RESET_MM_PERMUTATION INIT_XMM %1
+    %define mmsize 16
+    %define num_mmregs 8
+    %if ARCH_X86_64
+        %define num_mmregs 16
+    %endif
+    %define mova movdqa
+    %define movu movdqu
+    %define movh movq
+    %define movnta movntdq
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE m, %%i, xmm %+ %%i
+        CAT_XDEFINE nnxmm, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+%macro INIT_YMM 0-1+
+    %assign avx_enabled 1
+    %define RESET_MM_PERMUTATION INIT_YMM %1
+    %define mmsize 32
+    %define num_mmregs 8
+    %if ARCH_X86_64
+        %define num_mmregs 16
+    %endif
+    %define mova movdqa
+    %define movu movdqu
+    %undef movh
+    %define movnta movntdq
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE m, %%i, ymm %+ %%i
+        CAT_XDEFINE nnymm, %%i, %%i
+        %assign %%i %%i+1
+    %endrep
+    INIT_CPUFLAGS %1
+%endmacro
+
+INIT_XMM
+
+%macro DECLARE_MMCAST 1
+    %define  mmmm%1   mm%1
+    %define  mmxmm%1  mm%1
+    %define  mmymm%1  mm%1
+    %define xmmmm%1   mm%1
+    %define xmmxmm%1 xmm%1
+    %define xmmymm%1 xmm%1
+    %define ymmmm%1   mm%1
+    %define ymmxmm%1 xmm%1
+    %define ymmymm%1 ymm%1
+    %define xm%1 xmm %+ m%1
+    %define ym%1 ymm %+ m%1
+%endmacro
+
+%assign i 0
+%rep 16
+    DECLARE_MMCAST i
+    %assign i i+1
+%endrep
+
+; I often want to use macros that permute their arguments. e.g. there's no
+; efficient way to implement butterfly or transpose or dct without swapping some
+; arguments.
+;
+; I would like to not have to manually keep track of the permutations:
+; If I insert a permutation in the middle of a function, it should automatically
+; change everything that follows. For more complex macros I may also have multiple
+; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
+;
+; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
+; permutes its arguments. It's equivalent to exchanging the contents of the
+; registers, except that this way you exchange the register names instead, so it
+; doesn't cost any cycles.
+
+%macro PERMUTE 2-* ; takes a list of pairs to swap
+    %rep %0/2
+        %xdefine %%tmp%2 m%2
+        %rotate 2
+    %endrep
+    %rep %0/2
+        %xdefine m%1 %%tmp%2
+        CAT_XDEFINE nn, m%1, %1
+        %rotate 2
+    %endrep
+%endmacro
+
+%macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs)
+    %ifnum %1 ; SWAP 0, 1, ...
+        SWAP_INTERNAL_NUM %1, %2
+    %else ; SWAP m0, m1, ...
+        SWAP_INTERNAL_NAME %1, %2
+    %endif
+%endmacro
+
+%macro SWAP_INTERNAL_NUM 2-*
+    %rep %0-1
+        %xdefine %%tmp m%1
+        %xdefine m%1 m%2
+        %xdefine m%2 %%tmp
+        CAT_XDEFINE nn, m%1, %1
+        CAT_XDEFINE nn, m%2, %2
+        %rotate 1
+    %endrep
+%endmacro
+
+%macro SWAP_INTERNAL_NAME 2-*
+    %xdefine %%args nn %+ %1
+    %rep %0-1
+        %xdefine %%args %%args, nn %+ %2
+        %rotate 1
+    %endrep
+    SWAP_INTERNAL_NUM %%args
+%endmacro
+
+; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later
+; calls to that function will automatically load the permutation, so values can
+; be returned in mmregs.
+%macro SAVE_MM_PERMUTATION 0-1
+    %if %0
+        %xdefine %%f %1_m
+    %else
+        %xdefine %%f current_function %+ _m
+    %endif
+    %assign %%i 0
+    %rep num_mmregs
+        CAT_XDEFINE %%f, %%i, m %+ %%i
+        %assign %%i %%i+1
+    %endrep
+%endmacro
+
+%macro LOAD_MM_PERMUTATION 1 ; name to load from
+    %ifdef %1_m0
+        %assign %%i 0
+        %rep num_mmregs
+            CAT_XDEFINE m, %%i, %1_m %+ %%i
+            CAT_XDEFINE nn, m %+ %%i, %%i
+            %assign %%i %%i+1
+        %endrep
+    %endif
+%endmacro
+
+; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't
+%macro call 1
+    call_internal %1 %+ SUFFIX, %1
+%endmacro
+%macro call_internal 2
+    %xdefine %%i %2
+    %ifndef cglobaled_%2
+        %ifdef cglobaled_%1
+            %xdefine %%i %1
+        %endif
+    %endif
+    call %%i
+    LOAD_MM_PERMUTATION %%i
+%endmacro
+
+; Substitutions that reduce instruction size but are functionally equivalent
+%macro add 2
+    %ifnum %2
+        %if %2==128
+            sub %1, -128
+        %else
+            add %1, %2
+        %endif
+    %else
+        add %1, %2
+    %endif
+%endmacro
+
+%macro sub 2
+    %ifnum %2
+        %if %2==128
+            add %1, -128
+        %else
+            sub %1, %2
+        %endif
+    %else
+        sub %1, %2
+    %endif
+%endmacro
+
+;=============================================================================
+; AVX abstraction layer
+;=============================================================================
+
+%assign i 0
+%rep 16
+    %if i < 8
+        CAT_XDEFINE sizeofmm, i, 8
+    %endif
+    CAT_XDEFINE sizeofxmm, i, 16
+    CAT_XDEFINE sizeofymm, i, 32
+    %assign i i+1
+%endrep
+%undef i
+
+%macro CHECK_AVX_INSTR_EMU 3-*
+    %xdefine %%opcode %1
+    %xdefine %%dst %2
+    %rep %0-2
+        %ifidn %%dst, %3
+            %error non-avx emulation of ``%%opcode'' is not supported
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
+;%1 == instruction
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+;%6+: operands
+%macro RUN_AVX_INSTR 6-9+
+    %ifnum sizeof%7
+        %assign __sizeofreg sizeof%7
+    %elifnum sizeof%6
+        %assign __sizeofreg sizeof%6
+    %else
+        %assign __sizeofreg mmsize
+    %endif
+    %assign __emulate_avx 0
+    %if avx_enabled && __sizeofreg >= 16
+        %xdefine __instr v%1
+    %else
+        %xdefine __instr %1
+        %if %0 >= 8+%4
+            %assign __emulate_avx 1
+        %endif
+    %endif
+    %ifnidn %2, fnord
+        %ifdef cpuname
+            %if notcpuflag(%2)
+                %error use of ``%1'' %2 instruction in cpuname function: current_function
+            %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8
+                %error use of ``%1'' sse2 instruction in cpuname function: current_function
+            %endif
+        %endif
+    %endif
+
+    %if __emulate_avx
+        %xdefine __src1 %7
+        %xdefine __src2 %8
+        %ifnidn %6, %7
+            %if %0 >= 9
+                CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9
+            %else
+                CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8
+            %endif
+            %if %5 && %4 == 0
+                %ifnid %8
+                    ; 3-operand AVX instructions with a memory arg can only have it in src2,
+                    ; whereas SSE emulation prefers to have it in src1 (i.e. the mov).
+                    ; So, if the instruction is commutative with a memory arg, swap them.
+                    %xdefine __src1 %8
+                    %xdefine __src2 %7
+                %endif
+            %endif
+            %if __sizeofreg == 8
+                MOVQ %6, __src1
+            %elif %3
+                MOVAPS %6, __src1
+            %else
+                MOVDQA %6, __src1
+            %endif
+        %endif
+        %if %0 >= 9
+            %1 %6, __src2, %9
+        %else
+            %1 %6, __src2
+        %endif
+    %elif %0 >= 9
+        __instr %6, %7, %8, %9
+    %elif %0 == 8
+        __instr %6, %7, %8
+    %elif %0 == 7
+        __instr %6, %7
+    %else
+        __instr %6
+    %endif
+%endmacro
+
+;%1 == instruction
+;%2 == minimal instruction set
+;%3 == 1 if float, 0 if int
+;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise
+;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not
+%macro AVX_INSTR 1-5 fnord, 0, 1, 0
+    %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5
+        %ifidn %2, fnord
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1
+        %elifidn %3, fnord
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2
+        %elifidn %4, fnord
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3
+        %elifidn %5, fnord
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4
+        %else
+            RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5
+        %endif
+    %endmacro
+%endmacro
+
+; Instructions with both VEX and non-VEX encodings
+; Non-destructive instructions are written without parameters
+AVX_INSTR addpd, sse2, 1, 0, 1
+AVX_INSTR addps, sse, 1, 0, 1
+AVX_INSTR addsd, sse2, 1, 0, 1
+AVX_INSTR addss, sse, 1, 0, 1
+AVX_INSTR addsubpd, sse3, 1, 0, 0
+AVX_INSTR addsubps, sse3, 1, 0, 0
+AVX_INSTR aesdec, fnord, 0, 0, 0
+AVX_INSTR aesdeclast, fnord, 0, 0, 0
+AVX_INSTR aesenc, fnord, 0, 0, 0
+AVX_INSTR aesenclast, fnord, 0, 0, 0
+AVX_INSTR aesimc
+AVX_INSTR aeskeygenassist
+AVX_INSTR andnpd, sse2, 1, 0, 0
+AVX_INSTR andnps, sse, 1, 0, 0
+AVX_INSTR andpd, sse2, 1, 0, 1
+AVX_INSTR andps, sse, 1, 0, 1
+AVX_INSTR blendpd, sse4, 1, 0, 0
+AVX_INSTR blendps, sse4, 1, 0, 0
+AVX_INSTR blendvpd, sse4, 1, 0, 0
+AVX_INSTR blendvps, sse4, 1, 0, 0
+AVX_INSTR cmppd, sse2, 1, 1, 0
+AVX_INSTR cmpps, sse, 1, 1, 0
+AVX_INSTR cmpsd, sse2, 1, 1, 0
+AVX_INSTR cmpss, sse, 1, 1, 0
+AVX_INSTR comisd, sse2
+AVX_INSTR comiss, sse
+AVX_INSTR cvtdq2pd, sse2
+AVX_INSTR cvtdq2ps, sse2
+AVX_INSTR cvtpd2dq, sse2
+AVX_INSTR cvtpd2ps, sse2
+AVX_INSTR cvtps2dq, sse2
+AVX_INSTR cvtps2pd, sse2
+AVX_INSTR cvtsd2si, sse2
+AVX_INSTR cvtsd2ss, sse2
+AVX_INSTR cvtsi2sd, sse2
+AVX_INSTR cvtsi2ss, sse
+AVX_INSTR cvtss2sd, sse2
+AVX_INSTR cvtss2si, sse
+AVX_INSTR cvttpd2dq, sse2
+AVX_INSTR cvttps2dq, sse2
+AVX_INSTR cvttsd2si, sse2
+AVX_INSTR cvttss2si, sse
+AVX_INSTR divpd, sse2, 1, 0, 0
+AVX_INSTR divps, sse, 1, 0, 0
+AVX_INSTR divsd, sse2, 1, 0, 0
+AVX_INSTR divss, sse, 1, 0, 0
+AVX_INSTR dppd, sse4, 1, 1, 0
+AVX_INSTR dpps, sse4, 1, 1, 0
+AVX_INSTR extractps, sse4
+AVX_INSTR haddpd, sse3, 1, 0, 0
+AVX_INSTR haddps, sse3, 1, 0, 0
+AVX_INSTR hsubpd, sse3, 1, 0, 0
+AVX_INSTR hsubps, sse3, 1, 0, 0
+AVX_INSTR insertps, sse4, 1, 1, 0
+AVX_INSTR lddqu, sse3
+AVX_INSTR ldmxcsr, sse
+AVX_INSTR maskmovdqu, sse2
+AVX_INSTR maxpd, sse2, 1, 0, 1
+AVX_INSTR maxps, sse, 1, 0, 1
+AVX_INSTR maxsd, sse2, 1, 0, 1
+AVX_INSTR maxss, sse, 1, 0, 1
+AVX_INSTR minpd, sse2, 1, 0, 1
+AVX_INSTR minps, sse, 1, 0, 1
+AVX_INSTR minsd, sse2, 1, 0, 1
+AVX_INSTR minss, sse, 1, 0, 1
+AVX_INSTR movapd, sse2
+AVX_INSTR movaps, sse
+AVX_INSTR movd, mmx
+AVX_INSTR movddup, sse3
+AVX_INSTR movdqa, sse2
+AVX_INSTR movdqu, sse2
+AVX_INSTR movhlps, sse, 1, 0, 0
+AVX_INSTR movhpd, sse2, 1, 0, 0
+AVX_INSTR movhps, sse, 1, 0, 0
+AVX_INSTR movlhps, sse, 1, 0, 0
+AVX_INSTR movlpd, sse2, 1, 0, 0
+AVX_INSTR movlps, sse, 1, 0, 0
+AVX_INSTR movmskpd, sse2
+AVX_INSTR movmskps, sse
+AVX_INSTR movntdq, sse2
+AVX_INSTR movntdqa, sse4
+AVX_INSTR movntpd, sse2
+AVX_INSTR movntps, sse
+AVX_INSTR movq, mmx
+AVX_INSTR movsd, sse2, 1, 0, 0
+AVX_INSTR movshdup, sse3
+AVX_INSTR movsldup, sse3
+AVX_INSTR movss, sse, 1, 0, 0
+AVX_INSTR movupd, sse2
+AVX_INSTR movups, sse
+AVX_INSTR mpsadbw, sse4
+AVX_INSTR mulpd, sse2, 1, 0, 1
+AVX_INSTR mulps, sse, 1, 0, 1
+AVX_INSTR mulsd, sse2, 1, 0, 1
+AVX_INSTR mulss, sse, 1, 0, 1
+AVX_INSTR orpd, sse2, 1, 0, 1
+AVX_INSTR orps, sse, 1, 0, 1
+AVX_INSTR pabsb, ssse3
+AVX_INSTR pabsd, ssse3
+AVX_INSTR pabsw, ssse3
+AVX_INSTR packsswb, mmx, 0, 0, 0
+AVX_INSTR packssdw, mmx, 0, 0, 0
+AVX_INSTR packuswb, mmx, 0, 0, 0
+AVX_INSTR packusdw, sse4, 0, 0, 0
+AVX_INSTR paddb, mmx, 0, 0, 1
+AVX_INSTR paddw, mmx, 0, 0, 1
+AVX_INSTR paddd, mmx, 0, 0, 1
+AVX_INSTR paddq, sse2, 0, 0, 1
+AVX_INSTR paddsb, mmx, 0, 0, 1
+AVX_INSTR paddsw, mmx, 0, 0, 1
+AVX_INSTR paddusb, mmx, 0, 0, 1
+AVX_INSTR paddusw, mmx, 0, 0, 1
+AVX_INSTR palignr, ssse3
+AVX_INSTR pand, mmx, 0, 0, 1
+AVX_INSTR pandn, mmx, 0, 0, 0
+AVX_INSTR pavgb, mmx2, 0, 0, 1
+AVX_INSTR pavgw, mmx2, 0, 0, 1
+AVX_INSTR pblendvb, sse4, 0, 0, 0
+AVX_INSTR pblendw, sse4
+AVX_INSTR pclmulqdq
+AVX_INSTR pcmpestri, sse42
+AVX_INSTR pcmpestrm, sse42
+AVX_INSTR pcmpistri, sse42
+AVX_INSTR pcmpistrm, sse42
+AVX_INSTR pcmpeqb, mmx, 0, 0, 1
+AVX_INSTR pcmpeqw, mmx, 0, 0, 1
+AVX_INSTR pcmpeqd, mmx, 0, 0, 1
+AVX_INSTR pcmpeqq, sse4, 0, 0, 1
+AVX_INSTR pcmpgtb, mmx, 0, 0, 0
+AVX_INSTR pcmpgtw, mmx, 0, 0, 0
+AVX_INSTR pcmpgtd, mmx, 0, 0, 0
+AVX_INSTR pcmpgtq, sse42, 0, 0, 0
+AVX_INSTR pextrb, sse4
+AVX_INSTR pextrd, sse4
+AVX_INSTR pextrq, sse4
+AVX_INSTR pextrw, mmx2
+AVX_INSTR phaddw, ssse3, 0, 0, 0
+AVX_INSTR phaddd, ssse3, 0, 0, 0
+AVX_INSTR phaddsw, ssse3, 0, 0, 0
+AVX_INSTR phminposuw, sse4
+AVX_INSTR phsubw, ssse3, 0, 0, 0
+AVX_INSTR phsubd, ssse3, 0, 0, 0
+AVX_INSTR phsubsw, ssse3, 0, 0, 0
+AVX_INSTR pinsrb, sse4
+AVX_INSTR pinsrd, sse4
+AVX_INSTR pinsrq, sse4
+AVX_INSTR pinsrw, mmx2
+AVX_INSTR pmaddwd, mmx, 0, 0, 1
+AVX_INSTR pmaddubsw, ssse3, 0, 0, 0
+AVX_INSTR pmaxsb, sse4, 0, 0, 1
+AVX_INSTR pmaxsw, mmx2, 0, 0, 1
+AVX_INSTR pmaxsd, sse4, 0, 0, 1
+AVX_INSTR pmaxub, mmx2, 0, 0, 1
+AVX_INSTR pmaxuw, sse4, 0, 0, 1
+AVX_INSTR pmaxud, sse4, 0, 0, 1
+AVX_INSTR pminsb, sse4, 0, 0, 1
+AVX_INSTR pminsw, mmx2, 0, 0, 1
+AVX_INSTR pminsd, sse4, 0, 0, 1
+AVX_INSTR pminub, mmx2, 0, 0, 1
+AVX_INSTR pminuw, sse4, 0, 0, 1
+AVX_INSTR pminud, sse4, 0, 0, 1
+AVX_INSTR pmovmskb, mmx2
+AVX_INSTR pmovsxbw, sse4
+AVX_INSTR pmovsxbd, sse4
+AVX_INSTR pmovsxbq, sse4
+AVX_INSTR pmovsxwd, sse4
+AVX_INSTR pmovsxwq, sse4
+AVX_INSTR pmovsxdq, sse4
+AVX_INSTR pmovzxbw, sse4
+AVX_INSTR pmovzxbd, sse4
+AVX_INSTR pmovzxbq, sse4
+AVX_INSTR pmovzxwd, sse4
+AVX_INSTR pmovzxwq, sse4
+AVX_INSTR pmovzxdq, sse4
+AVX_INSTR pmuldq, sse4, 0, 0, 1
+AVX_INSTR pmulhrsw, ssse3, 0, 0, 1
+AVX_INSTR pmulhuw, mmx2, 0, 0, 1
+AVX_INSTR pmulhw, mmx, 0, 0, 1
+AVX_INSTR pmullw, mmx, 0, 0, 1
+AVX_INSTR pmulld, sse4, 0, 0, 1
+AVX_INSTR pmuludq, sse2, 0, 0, 1
+AVX_INSTR por, mmx, 0, 0, 1
+AVX_INSTR psadbw, mmx2, 0, 0, 1
+AVX_INSTR pshufb, ssse3, 0, 0, 0
+AVX_INSTR pshufd, sse2
+AVX_INSTR pshufhw, sse2
+AVX_INSTR pshuflw, sse2
+AVX_INSTR psignb, ssse3, 0, 0, 0
+AVX_INSTR psignw, ssse3, 0, 0, 0
+AVX_INSTR psignd, ssse3, 0, 0, 0
+AVX_INSTR psllw, mmx, 0, 0, 0
+AVX_INSTR pslld, mmx, 0, 0, 0
+AVX_INSTR psllq, mmx, 0, 0, 0
+AVX_INSTR pslldq, sse2, 0, 0, 0
+AVX_INSTR psraw, mmx, 0, 0, 0
+AVX_INSTR psrad, mmx, 0, 0, 0
+AVX_INSTR psrlw, mmx, 0, 0, 0
+AVX_INSTR psrld, mmx, 0, 0, 0
+AVX_INSTR psrlq, mmx, 0, 0, 0
+AVX_INSTR psrldq, sse2, 0, 0, 0
+AVX_INSTR psubb, mmx, 0, 0, 0
+AVX_INSTR psubw, mmx, 0, 0, 0
+AVX_INSTR psubd, mmx, 0, 0, 0
+AVX_INSTR psubq, sse2, 0, 0, 0
+AVX_INSTR psubsb, mmx, 0, 0, 0
+AVX_INSTR psubsw, mmx, 0, 0, 0
+AVX_INSTR psubusb, mmx, 0, 0, 0
+AVX_INSTR psubusw, mmx, 0, 0, 0
+AVX_INSTR ptest, sse4
+AVX_INSTR punpckhbw, mmx, 0, 0, 0
+AVX_INSTR punpckhwd, mmx, 0, 0, 0
+AVX_INSTR punpckhdq, mmx, 0, 0, 0
+AVX_INSTR punpckhqdq, sse2, 0, 0, 0
+AVX_INSTR punpcklbw, mmx, 0, 0, 0
+AVX_INSTR punpcklwd, mmx, 0, 0, 0
+AVX_INSTR punpckldq, mmx, 0, 0, 0
+AVX_INSTR punpcklqdq, sse2, 0, 0, 0
+AVX_INSTR pxor, mmx, 0, 0, 1
+AVX_INSTR rcpps, sse, 1, 0, 0
+AVX_INSTR rcpss, sse, 1, 0, 0
+AVX_INSTR roundpd, sse4
+AVX_INSTR roundps, sse4
+AVX_INSTR roundsd, sse4
+AVX_INSTR roundss, sse4
+AVX_INSTR rsqrtps, sse, 1, 0, 0
+AVX_INSTR rsqrtss, sse, 1, 0, 0
+AVX_INSTR shufpd, sse2, 1, 1, 0
+AVX_INSTR shufps, sse, 1, 1, 0
+AVX_INSTR sqrtpd, sse2, 1, 0, 0
+AVX_INSTR sqrtps, sse, 1, 0, 0
+AVX_INSTR sqrtsd, sse2, 1, 0, 0
+AVX_INSTR sqrtss, sse, 1, 0, 0
+AVX_INSTR stmxcsr, sse
+AVX_INSTR subpd, sse2, 1, 0, 0
+AVX_INSTR subps, sse, 1, 0, 0
+AVX_INSTR subsd, sse2, 1, 0, 0
+AVX_INSTR subss, sse, 1, 0, 0
+AVX_INSTR ucomisd, sse2
+AVX_INSTR ucomiss, sse
+AVX_INSTR unpckhpd, sse2, 1, 0, 0
+AVX_INSTR unpckhps, sse, 1, 0, 0
+AVX_INSTR unpcklpd, sse2, 1, 0, 0
+AVX_INSTR unpcklps, sse, 1, 0, 0
+AVX_INSTR xorpd, sse2, 1, 0, 1
+AVX_INSTR xorps, sse, 1, 0, 1
+
+; 3DNow instructions, for sharing code between AVX, SSE and 3DN
+AVX_INSTR pfadd, 3dnow, 1, 0, 1
+AVX_INSTR pfsub, 3dnow, 1, 0, 0
+AVX_INSTR pfmul, 3dnow, 1, 0, 1
+
+; base-4 constants for shuffles
+%assign i 0
+%rep 256
+    %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3)
+    %if j < 10
+        CAT_XDEFINE q000, j, i
+    %elif j < 100
+        CAT_XDEFINE q00, j, i
+    %elif j < 1000
+        CAT_XDEFINE q0, j, i
+    %else
+        CAT_XDEFINE q, j, i
+    %endif
+    %assign i i+1
+%endrep
+%undef i
+%undef j
+
+%macro FMA_INSTR 3
+    %macro %1 4-7 %1, %2, %3
+        %if cpuflag(xop)
+            v%5 %1, %2, %3, %4
+        %elifnidn %1, %4
+            %6 %1, %2, %3
+            %7 %1, %4
+        %else
+            %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported
+        %endif
+    %endmacro
+%endmacro
+
+FMA_INSTR  pmacsww,  pmullw, paddw
+FMA_INSTR  pmacsdd,  pmulld, paddd ; sse4 emulation
+FMA_INSTR pmacsdql,  pmuldq, paddq ; sse4 emulation
+FMA_INSTR pmadcswd, pmaddwd, paddd
+
+; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax.
+; FMA3 is only possible if dst is the same as one of the src registers.
+; Either src2 or src3 can be a memory operand.
+%macro FMA4_INSTR 2-*
+    %push fma4_instr
+    %xdefine %$prefix %1
+    %rep %0 - 1
+        %macro %$prefix%2 4-6 %$prefix, %2
+            %if notcpuflag(fma3) && notcpuflag(fma4)
+                %error use of ``%5%6'' fma instruction in cpuname function: current_function
+            %elif cpuflag(fma4)
+                v%5%6 %1, %2, %3, %4
+            %elifidn %1, %2
+                ; If %3 or %4 is a memory operand it needs to be encoded as the last operand.
+                %ifid %3
+                    v%{5}213%6 %2, %3, %4
+                %else
+                    v%{5}132%6 %2, %4, %3
+                %endif
+            %elifidn %1, %3
+                v%{5}213%6 %3, %2, %4
+            %elifidn %1, %4
+                v%{5}231%6 %4, %2, %3
+            %else
+                %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported
+            %endif
+        %endmacro
+        %rotate 1
+    %endrep
+    %pop
+%endmacro
+
+FMA4_INSTR fmadd,    pd, ps, sd, ss
+FMA4_INSTR fmaddsub, pd, ps
+FMA4_INSTR fmsub,    pd, ps, sd, ss
+FMA4_INSTR fmsubadd, pd, ps
+FMA4_INSTR fnmadd,   pd, ps, sd, ss
+FMA4_INSTR fnmsub,   pd, ps, sd, ss
+
+; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
+%ifdef __YASM_VER__
+    %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+        %macro vpbroadcastq 2
+            %if sizeof%1 == 16
+                movddup %1, %2
+            %else
+                vbroadcastsd %1, %2
+            %endif
+        %endmacro
+    %endif
+%endif

+ 190 - 0
thirdparty/libvpx/vp8/common/alloccommon.c

@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "alloccommon.h"
+#include "blockd.h"
+#include "vpx_mem/vpx_mem.h"
+#include "onyxc_int.h"
+#include "findnearmv.h"
+#include "entropymode.h"
+#include "systemdependent.h"
+
+void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
+{
+    int i;
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+        vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
+
+    vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
+#if CONFIG_POSTPROC
+    vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
+    if (oci->post_proc_buffer_int_used)
+        vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
+
+    vpx_free(oci->pp_limits_buffer);
+    oci->pp_limits_buffer = NULL;
+#endif
+
+    vpx_free(oci->above_context);
+    vpx_free(oci->mip);
+#if CONFIG_ERROR_CONCEALMENT
+    vpx_free(oci->prev_mip);
+    oci->prev_mip = NULL;
+#endif
+
+    oci->above_context = NULL;
+    oci->mip = NULL;
+}
+
+int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
+{
+    int i;
+
+    vp8_de_alloc_frame_buffers(oci);
+
+    /* our internal buffers are always multiples of 16 */
+    if ((width & 0xf) != 0)
+        width += 16 - (width & 0xf);
+
+    if ((height & 0xf) != 0)
+        height += 16 - (height & 0xf);
+
+
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+    {
+        oci->fb_idx_ref_cnt[i] = 0;
+        oci->yv12_fb[i].flags = 0;
+        if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
+            goto allocation_fail;
+    }
+
+    oci->new_fb_idx = 0;
+    oci->lst_fb_idx = 1;
+    oci->gld_fb_idx = 2;
+    oci->alt_fb_idx = 3;
+
+    oci->fb_idx_ref_cnt[0] = 1;
+    oci->fb_idx_ref_cnt[1] = 1;
+    oci->fb_idx_ref_cnt[2] = 1;
+    oci->fb_idx_ref_cnt[3] = 1;
+
+    if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
+        goto allocation_fail;
+
+    oci->mb_rows = height >> 4;
+    oci->mb_cols = width >> 4;
+    oci->MBs = oci->mb_rows * oci->mb_cols;
+    oci->mode_info_stride = oci->mb_cols + 1;
+    oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
+
+    if (!oci->mip)
+        goto allocation_fail;
+
+    oci->mi = oci->mip + oci->mode_info_stride + 1;
+
+    /* Allocation of previous mode info will be done in vp8_decode_frame()
+     * as it is a decoder only data */
+
+    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
+
+    if (!oci->above_context)
+        goto allocation_fail;
+
+#if CONFIG_POSTPROC
+    if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
+        goto allocation_fail;
+
+    oci->post_proc_buffer_int_used = 0;
+    memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
+    memset(oci->post_proc_buffer.buffer_alloc, 128,
+           oci->post_proc_buffer.frame_size);
+
+    /* Allocate buffer to store post-processing filter coefficients.
+     *
+     * Note: Round up mb_cols to support SIMD reads
+     */
+    oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
+    if (!oci->pp_limits_buffer)
+        goto allocation_fail;
+#endif
+
+    return 0;
+
+allocation_fail:
+    vp8_de_alloc_frame_buffers(oci);
+    return 1;
+}
+
+void vp8_setup_version(VP8_COMMON *cm)
+{
+    switch (cm->version)
+    {
+    case 0:
+        cm->no_lpf = 0;
+        cm->filter_type = NORMAL_LOOPFILTER;
+        cm->use_bilinear_mc_filter = 0;
+        cm->full_pixel = 0;
+        break;
+    case 1:
+        cm->no_lpf = 0;
+        cm->filter_type = SIMPLE_LOOPFILTER;
+        cm->use_bilinear_mc_filter = 1;
+        cm->full_pixel = 0;
+        break;
+    case 2:
+        cm->no_lpf = 1;
+        cm->filter_type = NORMAL_LOOPFILTER;
+        cm->use_bilinear_mc_filter = 1;
+        cm->full_pixel = 0;
+        break;
+    case 3:
+        cm->no_lpf = 1;
+        cm->filter_type = SIMPLE_LOOPFILTER;
+        cm->use_bilinear_mc_filter = 1;
+        cm->full_pixel = 1;
+        break;
+    default:
+        /*4,5,6,7 are reserved for future use*/
+        cm->no_lpf = 0;
+        cm->filter_type = NORMAL_LOOPFILTER;
+        cm->use_bilinear_mc_filter = 0;
+        cm->full_pixel = 0;
+        break;
+    }
+}
+void vp8_create_common(VP8_COMMON *oci)
+{
+    vp8_machine_specific_config(oci);
+
+    vp8_init_mbmode_probs(oci);
+    vp8_default_bmode_probs(oci->fc.bmode_prob);
+
+    oci->mb_no_coeff_skip = 1;
+    oci->no_lpf = 0;
+    oci->filter_type = NORMAL_LOOPFILTER;
+    oci->use_bilinear_mc_filter = 0;
+    oci->full_pixel = 0;
+    oci->multi_token_partition = ONE_PARTITION;
+    oci->clamp_type = RECON_CLAMP_REQUIRED;
+
+    /* Initialize reference frame sign bias structure to defaults */
+    memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
+
+    /* Default disable buffer to buffer copying */
+    oci->copy_buffer_to_gf = 0;
+    oci->copy_buffer_to_arf = 0;
+}
+
+void vp8_remove_common(VP8_COMMON *oci)
+{
+    vp8_de_alloc_frame_buffers(oci);
+}

+ 31 - 0
thirdparty/libvpx/vp8/common/alloccommon.h

@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ALLOCCOMMON_H_
+#define VP8_COMMON_ALLOCCOMMON_H_
+
+#include "onyxc_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_create_common(VP8_COMMON *oci);
+void vp8_remove_common(VP8_COMMON *oci);
+void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
+int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
+void vp8_setup_version(VP8_COMMON *oci);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ALLOCCOMMON_H_

+ 181 - 0
thirdparty/libvpx/vp8/common/arm/loopfilter_arm.c

@@ -0,0 +1,181 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vp8/common/loopfilter.h"
+#include "vp8/common/onyxc_int.h"
+
+#define prototype_loopfilter(sym) \
+    void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
+             const unsigned char *limit, const unsigned char *thresh, int count)
+
+#if HAVE_MEDIA
+extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
+extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
+extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
+extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
+#endif
+
+#if HAVE_NEON
+typedef void loopfilter_y_neon(unsigned char *src, int pitch,
+        unsigned char blimit, unsigned char limit, unsigned char thresh);
+typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
+        unsigned char blimit, unsigned char limit, unsigned char thresh,
+        unsigned char *v);
+
+extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
+extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
+extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
+extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
+
+extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
+extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
+extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
+extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
+#endif
+
+#if HAVE_MEDIA
+/* ARMV6/MEDIA loopfilter functions*/
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                               int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
+                               const unsigned char *blimit)
+{
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
+    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
+}
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
+                               const unsigned char *blimit)
+{
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
+    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
+}
+#endif
+
+#if HAVE_NEON
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    unsigned char mblim = *lfi->mblim;
+    unsigned char lim = *lfi->lim;
+    unsigned char hev_thr = *lfi->hev_thr;
+    vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
+
+    if (u_ptr)
+        vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
+}
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                              int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    unsigned char mblim = *lfi->mblim;
+    unsigned char lim = *lfi->lim;
+    unsigned char hev_thr = *lfi->hev_thr;
+
+    vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
+
+    if (u_ptr)
+        vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
+}
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                             int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    unsigned char blim = *lfi->blim;
+    unsigned char lim = *lfi->lim;
+    unsigned char hev_thr = *lfi->hev_thr;
+
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
+    vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
+
+    if (u_ptr)
+        vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
+}
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
+                             int y_stride, int uv_stride, loop_filter_info *lfi)
+{
+    unsigned char blim = *lfi->blim;
+    unsigned char lim = *lfi->lim;
+    unsigned char hev_thr = *lfi->hev_thr;
+
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
+    vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
+
+    if (u_ptr)
+        vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
+}
+#endif

+ 591 - 0
thirdparty/libvpx/vp8/common/arm/neon/bilinearpredict_neon.c

@@ -0,0 +1,591 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const uint8_t bifilter4_coeff[8][2] = {
+    {128,   0},
+    {112,  16},
+    { 96,  32},
+    { 80,  48},
+    { 64,  64},
+    { 48,  80},
+    { 32,  96},
+    { 16, 112}
+};
+
+void vp8_bilinear_predict8x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
+    uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16;
+    uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+    if (xoffset == 0) {  // skip_1stpass_filter
+        d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d26u8 = vld1_u8(src_ptr);
+    } else {
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q5u8 = vld1q_u8(src_ptr);
+
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+        d22u8 = vqrshrn_n_u16(q6u16, 7);
+        d23u8 = vqrshrn_n_u16(q7u16, 7);
+        d24u8 = vqrshrn_n_u16(q8u16, 7);
+        d25u8 = vqrshrn_n_u16(q9u16, 7);
+        d26u8 = vqrshrn_n_u16(q10u16, 7);
+    }
+
+    // secondpass_filter
+    if (yoffset == 0) {  // skip_2ndpass_filter
+        vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d25u8);
+    } else {
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q1u16 = vmull_u8(d22u8, d0u8);
+        q2u16 = vmull_u8(d23u8, d0u8);
+        q3u16 = vmull_u8(d24u8, d0u8);
+        q4u16 = vmull_u8(d25u8, d0u8);
+
+        q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+        q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+        q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+        q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+
+        vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d5u8);
+    }
+    return;
+}
+
+void vp8_bilinear_predict8x8_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
+    uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
+    uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
+
+    if (xoffset == 0) {  // skip_1stpass_filter
+        d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
+        d30u8 = vld1_u8(src_ptr);
+    } else {
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+
+        d22u8 = vqrshrn_n_u16(q6u16, 7);
+        d23u8 = vqrshrn_n_u16(q7u16, 7);
+        d24u8 = vqrshrn_n_u16(q8u16, 7);
+        d25u8 = vqrshrn_n_u16(q9u16, 7);
+
+        // first_pass filtering on the rest 5-line data
+        q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+        q5u8 = vld1q_u8(src_ptr);
+
+        q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+
+        d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
+        d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
+        d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+
+        q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
+        q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
+
+        d26u8 = vqrshrn_n_u16(q6u16, 7);
+        d27u8 = vqrshrn_n_u16(q7u16, 7);
+        d28u8 = vqrshrn_n_u16(q8u16, 7);
+        d29u8 = vqrshrn_n_u16(q9u16, 7);
+        d30u8 = vqrshrn_n_u16(q10u16, 7);
+    }
+
+    // secondpass_filter
+    if (yoffset == 0) {  // skip_2ndpass_filter
+        vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d29u8);
+    } else {
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q1u16 = vmull_u8(d22u8, d0u8);
+        q2u16 = vmull_u8(d23u8, d0u8);
+        q3u16 = vmull_u8(d24u8, d0u8);
+        q4u16 = vmull_u8(d25u8, d0u8);
+        q5u16 = vmull_u8(d26u8, d0u8);
+        q6u16 = vmull_u8(d27u8, d0u8);
+        q7u16 = vmull_u8(d28u8, d0u8);
+        q8u16 = vmull_u8(d29u8, d0u8);
+
+        q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
+        q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
+        q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
+        q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
+        q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
+        q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
+        q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
+        q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+        d6u8 = vqrshrn_n_u16(q5u16, 7);
+        d7u8 = vqrshrn_n_u16(q6u16, 7);
+        d8u8 = vqrshrn_n_u16(q7u16, 7);
+        d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+        vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
+        vst1_u8((uint8_t *)dst_ptr, d9u8);
+    }
+    return;
+}
+
+void vp8_bilinear_predict16x16_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    int i;
+    unsigned char tmp[272];
+    unsigned char *tmpp;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
+    uint8x8_t d19u8, d20u8, d21u8;
+    uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
+    uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
+    uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
+    uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
+
+    if (xoffset == 0) {  // secondpass_bfilter16x16_only
+        d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+        q11u8 = vld1q_u8(src_ptr);
+        src_ptr += src_pixels_per_line;
+        for (i = 4; i > 0; i--) {
+            q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+            q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
+
+            q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+            q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+            q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+            q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+            q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+            q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+            q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+            q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+            q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+            q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+            q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+            q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+            q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+            q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+            q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+            q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+            d2u8 = vqrshrn_n_u16(q1u16, 7);
+            d3u8 = vqrshrn_n_u16(q2u16, 7);
+            d4u8 = vqrshrn_n_u16(q3u16, 7);
+            d5u8 = vqrshrn_n_u16(q4u16, 7);
+            d6u8 = vqrshrn_n_u16(q5u16, 7);
+            d7u8 = vqrshrn_n_u16(q6u16, 7);
+            d8u8 = vqrshrn_n_u16(q7u16, 7);
+            d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+            q1u8 = vcombine_u8(d2u8, d3u8);
+            q2u8 = vcombine_u8(d4u8, d5u8);
+            q3u8 = vcombine_u8(d6u8, d7u8);
+            q4u8 = vcombine_u8(d8u8, d9u8);
+
+            q11u8 = q15u8;
+
+            vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    if (yoffset == 0) {  // firstpass_bfilter16x16_only
+        d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+        d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+        for (i = 4; i > 0 ; i--) {
+            d2u8 = vld1_u8(src_ptr);
+            d3u8 = vld1_u8(src_ptr + 8);
+            d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d5u8 = vld1_u8(src_ptr);
+            d6u8 = vld1_u8(src_ptr + 8);
+            d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d8u8 = vld1_u8(src_ptr);
+            d9u8 = vld1_u8(src_ptr + 8);
+            d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+            d11u8 = vld1_u8(src_ptr);
+            d12u8 = vld1_u8(src_ptr + 8);
+            d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+            q7u16  = vmull_u8(d2u8, d0u8);
+            q8u16  = vmull_u8(d3u8, d0u8);
+            q9u16  = vmull_u8(d5u8, d0u8);
+            q10u16 = vmull_u8(d6u8, d0u8);
+            q11u16 = vmull_u8(d8u8, d0u8);
+            q12u16 = vmull_u8(d9u8, d0u8);
+            q13u16 = vmull_u8(d11u8, d0u8);
+            q14u16 = vmull_u8(d12u8, d0u8);
+
+            d2u8  = vext_u8(d2u8, d3u8, 1);
+            d5u8  = vext_u8(d5u8, d6u8, 1);
+            d8u8  = vext_u8(d8u8, d9u8, 1);
+            d11u8 = vext_u8(d11u8, d12u8, 1);
+
+            q7u16  = vmlal_u8(q7u16, d2u8, d1u8);
+            q9u16  = vmlal_u8(q9u16, d5u8, d1u8);
+            q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+            q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+            d3u8  = vext_u8(d3u8, d4u8, 1);
+            d6u8  = vext_u8(d6u8, d7u8, 1);
+            d9u8  = vext_u8(d9u8, d10u8, 1);
+            d12u8 = vext_u8(d12u8, d13u8, 1);
+
+            q8u16  = vmlal_u8(q8u16,  d3u8, d1u8);
+            q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+            q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+            q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+            d14u8 = vqrshrn_n_u16(q7u16, 7);
+            d15u8 = vqrshrn_n_u16(q8u16, 7);
+            d16u8 = vqrshrn_n_u16(q9u16, 7);
+            d17u8 = vqrshrn_n_u16(q10u16, 7);
+            d18u8 = vqrshrn_n_u16(q11u16, 7);
+            d19u8 = vqrshrn_n_u16(q12u16, 7);
+            d20u8 = vqrshrn_n_u16(q13u16, 7);
+            d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+            q7u8 = vcombine_u8(d14u8, d15u8);
+            q8u8 = vcombine_u8(d16u8, d17u8);
+            q9u8 = vcombine_u8(d18u8, d19u8);
+            q10u8 =vcombine_u8(d20u8, d21u8);
+
+            vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
+            vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
+    d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
+
+    d2u8 = vld1_u8(src_ptr);
+    d3u8 = vld1_u8(src_ptr + 8);
+    d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d5u8 = vld1_u8(src_ptr);
+    d6u8 = vld1_u8(src_ptr + 8);
+    d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d8u8 = vld1_u8(src_ptr);
+    d9u8 = vld1_u8(src_ptr + 8);
+    d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+    d11u8 = vld1_u8(src_ptr);
+    d12u8 = vld1_u8(src_ptr + 8);
+    d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+    // First Pass: output_height lines x output_width columns (17x16)
+    tmpp = tmp;
+    for (i = 3; i > 0; i--) {
+        q7u16  = vmull_u8(d2u8, d0u8);
+        q8u16  = vmull_u8(d3u8, d0u8);
+        q9u16  = vmull_u8(d5u8, d0u8);
+        q10u16 = vmull_u8(d6u8, d0u8);
+        q11u16 = vmull_u8(d8u8, d0u8);
+        q12u16 = vmull_u8(d9u8, d0u8);
+        q13u16 = vmull_u8(d11u8, d0u8);
+        q14u16 = vmull_u8(d12u8, d0u8);
+
+        d2u8  = vext_u8(d2u8, d3u8, 1);
+        d5u8  = vext_u8(d5u8, d6u8, 1);
+        d8u8  = vext_u8(d8u8, d9u8, 1);
+        d11u8 = vext_u8(d11u8, d12u8, 1);
+
+        q7u16  = vmlal_u8(q7u16, d2u8, d1u8);
+        q9u16  = vmlal_u8(q9u16, d5u8, d1u8);
+        q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
+        q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
+
+        d3u8  = vext_u8(d3u8, d4u8, 1);
+        d6u8  = vext_u8(d6u8, d7u8, 1);
+        d9u8  = vext_u8(d9u8, d10u8, 1);
+        d12u8 = vext_u8(d12u8, d13u8, 1);
+
+        q8u16  = vmlal_u8(q8u16,  d3u8, d1u8);
+        q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
+        q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
+        q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
+
+        d14u8 = vqrshrn_n_u16(q7u16, 7);
+        d15u8 = vqrshrn_n_u16(q8u16, 7);
+        d16u8 = vqrshrn_n_u16(q9u16, 7);
+        d17u8 = vqrshrn_n_u16(q10u16, 7);
+        d18u8 = vqrshrn_n_u16(q11u16, 7);
+        d19u8 = vqrshrn_n_u16(q12u16, 7);
+        d20u8 = vqrshrn_n_u16(q13u16, 7);
+        d21u8 = vqrshrn_n_u16(q14u16, 7);
+
+        d2u8 = vld1_u8(src_ptr);
+        d3u8 = vld1_u8(src_ptr + 8);
+        d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d5u8 = vld1_u8(src_ptr);
+        d6u8 = vld1_u8(src_ptr + 8);
+        d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d8u8 = vld1_u8(src_ptr);
+        d9u8 = vld1_u8(src_ptr + 8);
+        d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+        d11u8 = vld1_u8(src_ptr);
+        d12u8 = vld1_u8(src_ptr + 8);
+        d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+        q7u8 = vcombine_u8(d14u8, d15u8);
+        q8u8 = vcombine_u8(d16u8, d17u8);
+        q9u8 = vcombine_u8(d18u8, d19u8);
+        q10u8 = vcombine_u8(d20u8, d21u8);
+
+        vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
+        vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
+    }
+
+    // First-pass filtering for rest 5 lines
+    d14u8 = vld1_u8(src_ptr);
+    d15u8 = vld1_u8(src_ptr + 8);
+    d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
+
+    q9u16  = vmull_u8(d2u8, d0u8);
+    q10u16 = vmull_u8(d3u8, d0u8);
+    q11u16 = vmull_u8(d5u8, d0u8);
+    q12u16 = vmull_u8(d6u8, d0u8);
+    q13u16 = vmull_u8(d8u8, d0u8);
+    q14u16 = vmull_u8(d9u8, d0u8);
+
+    d2u8  = vext_u8(d2u8, d3u8, 1);
+    d5u8  = vext_u8(d5u8, d6u8, 1);
+    d8u8  = vext_u8(d8u8, d9u8, 1);
+
+    q9u16  = vmlal_u8(q9u16, d2u8, d1u8);
+    q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
+    q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
+
+    d3u8  = vext_u8(d3u8, d4u8, 1);
+    d6u8  = vext_u8(d6u8, d7u8, 1);
+    d9u8  = vext_u8(d9u8, d10u8, 1);
+
+    q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
+    q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
+    q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
+
+    q1u16 = vmull_u8(d11u8, d0u8);
+    q2u16 = vmull_u8(d12u8, d0u8);
+    q3u16 = vmull_u8(d14u8, d0u8);
+    q4u16 = vmull_u8(d15u8, d0u8);
+
+    d11u8 = vext_u8(d11u8, d12u8, 1);
+    d14u8 = vext_u8(d14u8, d15u8, 1);
+
+    q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
+    q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
+
+    d12u8 = vext_u8(d12u8, d13u8, 1);
+    d15u8 = vext_u8(d15u8, d16u8, 1);
+
+    q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
+    q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
+
+    d10u8 = vqrshrn_n_u16(q9u16, 7);
+    d11u8 = vqrshrn_n_u16(q10u16, 7);
+    d12u8 = vqrshrn_n_u16(q11u16, 7);
+    d13u8 = vqrshrn_n_u16(q12u16, 7);
+    d14u8 = vqrshrn_n_u16(q13u16, 7);
+    d15u8 = vqrshrn_n_u16(q14u16, 7);
+    d16u8 = vqrshrn_n_u16(q1u16, 7);
+    d17u8 = vqrshrn_n_u16(q2u16, 7);
+    d18u8 = vqrshrn_n_u16(q3u16, 7);
+    d19u8 = vqrshrn_n_u16(q4u16, 7);
+
+    q5u8 = vcombine_u8(d10u8, d11u8);
+    q6u8 = vcombine_u8(d12u8, d13u8);
+    q7u8 = vcombine_u8(d14u8, d15u8);
+    q8u8 = vcombine_u8(d16u8, d17u8);
+    q9u8 = vcombine_u8(d18u8, d19u8);
+
+    vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
+    vst1q_u8((uint8_t *)tmpp, q9u8);
+
+    // secondpass_filter
+    d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
+    d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
+
+    tmpp = tmp;
+    q11u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    for (i = 4; i > 0; i--) {
+        q12u8 = vld1q_u8(tmpp); tmpp += 16;
+        q13u8 = vld1q_u8(tmpp); tmpp += 16;
+        q14u8 = vld1q_u8(tmpp); tmpp += 16;
+        q15u8 = vld1q_u8(tmpp); tmpp += 16;
+
+        q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
+        q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
+        q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
+        q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
+        q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
+        q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
+        q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
+        q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
+
+        q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
+        q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
+        q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
+        q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
+        q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
+        q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
+        q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
+        q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
+
+        d2u8 = vqrshrn_n_u16(q1u16, 7);
+        d3u8 = vqrshrn_n_u16(q2u16, 7);
+        d4u8 = vqrshrn_n_u16(q3u16, 7);
+        d5u8 = vqrshrn_n_u16(q4u16, 7);
+        d6u8 = vqrshrn_n_u16(q5u16, 7);
+        d7u8 = vqrshrn_n_u16(q6u16, 7);
+        d8u8 = vqrshrn_n_u16(q7u16, 7);
+        d9u8 = vqrshrn_n_u16(q8u16, 7);
+
+        q1u8 = vcombine_u8(d2u8, d3u8);
+        q2u8 = vcombine_u8(d4u8, d5u8);
+        q3u8 = vcombine_u8(d6u8, d7u8);
+        q4u8 = vcombine_u8(d8u8, d9u8);
+
+        q11u8 = q15u8;
+
+        vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
+        vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
+    }
+    return;
+}

+ 59 - 0
thirdparty/libvpx/vp8/common/arm/neon/copymem_neon.c

@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_copy_mem8x4_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    uint8x8_t vtmp;
+    int r;
+
+    for (r = 0; r < 4; r++) {
+        vtmp = vld1_u8(src);
+        vst1_u8(dst, vtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+void vp8_copy_mem8x8_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    uint8x8_t vtmp;
+    int r;
+
+    for (r = 0; r < 8; r++) {
+        vtmp = vld1_u8(src);
+        vst1_u8(dst, vtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+void vp8_copy_mem16x16_neon(
+        unsigned char *src,
+        int src_stride,
+        unsigned char *dst,
+        int dst_stride) {
+    int r;
+    uint8x16_t qtmp;
+
+    for (r = 0; r < 16; r++) {
+        qtmp = vld1q_u8(src);
+        vst1q_u8(dst, qtmp);
+        src += src_stride;
+        dst += dst_stride;
+    }
+}

+ 42 - 0
thirdparty/libvpx/vp8/common/arm/neon/dc_only_idct_add_neon.c

@@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_dc_only_idct_add_neon(
+        int16_t input_dc,
+        unsigned char *pred_ptr,
+        int pred_stride,
+        unsigned char *dst_ptr,
+        int dst_stride) {
+    int i;
+    uint16_t a1 = ((input_dc + 4) >> 3);
+    uint32x2_t d2u32 = vdup_n_u32(0);
+    uint8x8_t d2u8;
+    uint16x8_t q1u16;
+    uint16x8_t qAdd;
+
+    qAdd = vdupq_n_u16(a1);
+
+    for (i = 0; i < 2; i++) {
+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
+        pred_ptr += pred_stride;
+        d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
+        pred_ptr += pred_stride;
+
+        q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
+        dst_ptr += dst_stride;
+    }
+}

+ 142 - 0
thirdparty/libvpx/vp8/common/arm/neon/dequant_idct_neon.c

@@ -0,0 +1,142 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 35468;
+
+void vp8_dequant_idct_add_neon(
+        int16_t *input,
+        int16_t *dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0;
+    int32x2_t d14, d15;
+    int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+    int16x8_t q1, q2, q3, q4, q5, q6;
+    int16x8_t qEmpty = vdupq_n_s16(0);
+    int32x2x2_t d2tmp0, d2tmp1;
+    int16x4x2_t d2tmp2, d2tmp3;
+
+    d14 = d15 = vdup_n_s32(0);
+
+    // load input
+    q3 = vld1q_s16(input);
+    vst1q_s16(input, qEmpty);
+    input += 8;
+    q4 = vld1q_s16(input);
+    vst1q_s16(input, qEmpty);
+
+    // load dq
+    q5 = vld1q_s16(dq);
+    dq += 8;
+    q6 = vld1q_s16(dq);
+
+    // load src from dst
+    dst0 = dst;
+    d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
+    dst0 += stride;
+    d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
+    dst0 += stride;
+    d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
+    dst0 += stride;
+    d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
+
+    q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
+                                         vreinterpretq_u16_s16(q5)));
+    q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
+                                         vreinterpretq_u16_s16(q6)));
+
+    d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
+    d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
+
+    q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
+
+    q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+    q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+    q3 = vshrq_n_s16(q3, 1);
+    q4 = vshrq_n_s16(q4, 1);
+
+    q3 = vqaddq_s16(q3, q2);
+    q4 = vqaddq_s16(q4, q2);
+
+    d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+    d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+                      vreinterpret_s16_s32(d2tmp1.val[0]));
+    d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+                      vreinterpret_s16_s32(d2tmp1.val[1]));
+
+    // loop 2
+    q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
+
+    q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
+    q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
+    d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
+
+    q3 = vshrq_n_s16(q3, 1);
+    q4 = vshrq_n_s16(q4, 1);
+
+    q3 = vqaddq_s16(q3, q2);
+    q4 = vqaddq_s16(q4, q2);
+
+    d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
+    d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2 = vrshr_n_s16(d2, 3);
+    d3 = vrshr_n_s16(d3, 3);
+    d4 = vrshr_n_s16(d4, 3);
+    d5 = vrshr_n_s16(d5, 3);
+
+    d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
+                      vreinterpret_s16_s32(d2tmp1.val[0]));
+    d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
+                      vreinterpret_s16_s32(d2tmp1.val[1]));
+
+    q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
+    q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
+
+    q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
+                                        vreinterpret_u8_s32(d14)));
+    q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
+                                        vreinterpret_u8_s32(d15)));
+
+    d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
+    d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
+
+    dst0 = dst;
+    vst1_lane_s32((int32_t *)dst0, d14, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d14, 1);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d15, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst0, d15, 1);
+    return;
+}

+ 25 - 0
thirdparty/libvpx/vp8/common/arm/neon/dequantizeb_neon.c

@@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "vp8/common/blockd.h"
+
+void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
+    int16x8x2_t qQ, qDQC, qDQ;
+
+    qQ   = vld2q_s16(d->qcoeff);
+    qDQC = vld2q_s16(DQC);
+
+    qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
+    qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
+
+    vst2q_s16(d->dqcoeff, qDQ);
+}

+ 96 - 0
thirdparty/libvpx/vp8/common/arm/neon/idct_blk_neon.c

@@ -0,0 +1,96 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+
+/* place these declarations here because we don't want to maintain them
+ * outside of this scope
+ */
+void idct_dequant_full_2x_neon(short *q, short *dq,
+                               unsigned char *dst, int stride);
+void idct_dequant_0_2x_neon(short *q, short dq,
+                            unsigned char *dst, int stride);
+
+
+void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
+                                       unsigned char *dst,
+                                       int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (((short *)(eobs))[0])
+        {
+            if (((short *)eobs)[0] & 0xfefe)
+                idct_dequant_full_2x_neon (q, dq, dst, stride);
+            else
+                idct_dequant_0_2x_neon (q, dq[0], dst, stride);
+        }
+
+        if (((short *)(eobs))[1])
+        {
+            if (((short *)eobs)[1] & 0xfefe)
+                idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
+            else
+                idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
+        }
+        q    += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
+                                        unsigned char *dstu,
+                                        unsigned char *dstv,
+                                        int stride, char *eobs)
+{
+    if (((short *)(eobs))[0])
+    {
+        if (((short *)eobs)[0] & 0xfefe)
+            idct_dequant_full_2x_neon (q, dq, dstu, stride);
+        else
+            idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
+    }
+
+    q    += 32;
+    dstu += 4*stride;
+
+    if (((short *)(eobs))[1])
+    {
+        if (((short *)eobs)[1] & 0xfefe)
+            idct_dequant_full_2x_neon (q, dq, dstu, stride);
+        else
+            idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
+    }
+
+    q += 32;
+
+    if (((short *)(eobs))[2])
+    {
+        if (((short *)eobs)[2] & 0xfefe)
+            idct_dequant_full_2x_neon (q, dq, dstv, stride);
+        else
+            idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
+    }
+
+    q    += 32;
+    dstv += 4*stride;
+
+    if (((short *)(eobs))[3])
+    {
+        if (((short *)eobs)[3] & 0xfefe)
+            idct_dequant_full_2x_neon (q, dq, dstv, stride);
+        else
+            idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
+    }
+}

+ 63 - 0
thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.c

@@ -0,0 +1,63 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void idct_dequant_0_2x_neon(
+        int16_t *q,
+        int16_t dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0;
+    int i, a0, a1;
+    int16x8x2_t q2Add;
+    int32x2_t d2s32 = vdup_n_s32(0),
+              d4s32 = vdup_n_s32(0);
+    uint8x8_t d2u8, d4u8;
+    uint16x8_t q1u16, q2u16;
+
+    a0 = ((q[0] * dq) + 4) >> 3;
+    a1 = ((q[16] * dq) + 4) >> 3;
+    q[0] = q[16] = 0;
+    q2Add.val[0] = vdupq_n_s16((int16_t)a0);
+    q2Add.val[1] = vdupq_n_s16((int16_t)a1);
+
+    for (i = 0; i < 2; i++, dst += 4) {
+        dst0 = dst;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
+
+        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d2s32));
+        q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
+                         vreinterpret_u8_s32(d4s32));
+
+        d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+        d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
+
+        d2s32 = vreinterpret_s32_u8(d2u8);
+        d4s32 = vreinterpret_s32_u8(d4u8);
+
+        dst0 = dst;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d2s32, 1);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 0);
+        dst0 += stride;
+        vst1_lane_s32((int32_t *)dst0, d4s32, 1);
+    }
+    return;
+}

+ 185 - 0
thirdparty/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.c

@@ -0,0 +1,185 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 17734;
+// because the lowest bit in 0x8a8c is 0, we can pre-shift this
+
+void idct_dequant_full_2x_neon(
+        int16_t *q,
+        int16_t *dq,
+        unsigned char *dst,
+        int stride) {
+    unsigned char *dst0, *dst1;
+    int32x2_t d28, d29, d30, d31;
+    int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
+    int16x8_t qEmpty = vdupq_n_s16(0);
+    int32x4x2_t q2tmp0, q2tmp1;
+    int16x8x2_t q2tmp2, q2tmp3;
+    int16x4_t dLow0, dLow1, dHigh0, dHigh1;
+
+    d28 = d29 = d30 = d31 = vdup_n_s32(0);
+
+    // load dq
+    q0 = vld1q_s16(dq);
+    dq += 8;
+    q1 = vld1q_s16(dq);
+
+    // load q
+    q2 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q3 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q4 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+    q += 8;
+    q5 = vld1q_s16(q);
+    vst1q_s16(q, qEmpty);
+
+    // load src from dst
+    dst0 = dst;
+    dst1 = dst + 4;
+    d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
+    dst0 += stride;
+    d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
+    dst1 += stride;
+    d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
+    dst0 += stride;
+    d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
+    dst1 += stride;
+
+    d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
+    dst0 += stride;
+    d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
+    dst1 += stride;
+    d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
+    d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
+
+    q2 = vmulq_s16(q2, q0);
+    q3 = vmulq_s16(q3, q1);
+    q4 = vmulq_s16(q4, q0);
+    q5 = vmulq_s16(q5, q1);
+
+    // vswp
+    dLow0 = vget_low_s16(q2);
+    dHigh0 = vget_high_s16(q2);
+    dLow1 = vget_low_s16(q4);
+    dHigh1 = vget_high_s16(q4);
+    q2 = vcombine_s16(dLow0, dLow1);
+    q4 = vcombine_s16(dHigh0, dHigh1);
+
+    dLow0 = vget_low_s16(q3);
+    dHigh0 = vget_high_s16(q3);
+    dLow1 = vget_low_s16(q5);
+    dHigh1 = vget_high_s16(q5);
+    q3 = vcombine_s16(dLow0, dLow1);
+    q5 = vcombine_s16(dHigh0, dHigh1);
+
+    q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
+    q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
+    q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
+    q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
+
+    q10 = vqaddq_s16(q2, q3);
+    q11 = vqsubq_s16(q2, q3);
+
+    q8 = vshrq_n_s16(q8, 1);
+    q9 = vshrq_n_s16(q9, 1);
+
+    q4 = vqaddq_s16(q4, q8);
+    q5 = vqaddq_s16(q5, q9);
+
+    q2 = vqsubq_s16(q6, q5);
+    q3 = vqaddq_s16(q7, q4);
+
+    q4 = vqaddq_s16(q10, q3);
+    q5 = vqaddq_s16(q11, q2);
+    q6 = vqsubq_s16(q11, q2);
+    q7 = vqsubq_s16(q10, q3);
+
+    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+                       vreinterpretq_s16_s32(q2tmp1.val[0]));
+    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+                       vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+    // loop 2
+    q8  = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
+    q9  = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
+    q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
+    q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
+
+    q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+    q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
+
+    q10 = vshrq_n_s16(q10, 1);
+    q11 = vshrq_n_s16(q11, 1);
+
+    q10 = vqaddq_s16(q2tmp2.val[1], q10);
+    q11 = vqaddq_s16(q2tmp3.val[1], q11);
+
+    q8 = vqsubq_s16(q8, q11);
+    q9 = vqaddq_s16(q9, q10);
+
+    q4 = vqaddq_s16(q2, q9);
+    q5 = vqaddq_s16(q3, q8);
+    q6 = vqsubq_s16(q3, q8);
+    q7 = vqsubq_s16(q2, q9);
+
+    q4 = vrshrq_n_s16(q4, 3);
+    q5 = vrshrq_n_s16(q5, 3);
+    q6 = vrshrq_n_s16(q6, 3);
+    q7 = vrshrq_n_s16(q7, 3);
+
+    q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
+    q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
+    q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
+                       vreinterpretq_s16_s32(q2tmp1.val[0]));
+    q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
+                       vreinterpretq_s16_s32(q2tmp1.val[1]));
+
+    q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
+                                          vreinterpret_u8_s32(d28)));
+    q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
+                                          vreinterpret_u8_s32(d29)));
+    q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
+                                          vreinterpret_u8_s32(d30)));
+    q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
+                                          vreinterpret_u8_s32(d31)));
+
+    d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
+    d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
+    d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
+    d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
+
+    dst0 = dst;
+    dst1 = dst + 4;
+    vst1_lane_s32((int32_t *)dst0, d28, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d28, 1);
+    dst1 += stride;
+    vst1_lane_s32((int32_t *)dst0, d29, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d29, 1);
+    dst1 += stride;
+
+    vst1_lane_s32((int32_t *)dst0, d30, 0);
+    dst0 += stride;
+    vst1_lane_s32((int32_t *)dst1, d30, 1);
+    dst1 += stride;
+    vst1_lane_s32((int32_t *)dst0, d31, 0);
+    vst1_lane_s32((int32_t *)dst1, d31, 1);
+    return;
+}

+ 102 - 0
thirdparty/libvpx/vp8/common/arm/neon/iwalsh_neon.c

@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+void vp8_short_inv_walsh4x4_neon(
+        int16_t *input,
+        int16_t *mb_dqcoeff) {
+    int16x8_t q0s16, q1s16, q2s16, q3s16;
+    int16x4_t d4s16, d5s16, d6s16, d7s16;
+    int16x4x2_t v2tmp0, v2tmp1;
+    int32x2x2_t v2tmp2, v2tmp3;
+    int16x8_t qAdd3;
+
+    q0s16 = vld1q_s16(input);
+    q1s16 = vld1q_s16(input + 8);
+
+    // 1st for loop
+    d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+    d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
+    d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
+
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_low_s16(q1s16)));
+    v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
+                      vreinterpret_s32_s16(vget_high_s16(q1s16)));
+    v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
+                      vreinterpret_s16_s32(v2tmp3.val[0]));
+    v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
+                      vreinterpret_s16_s32(v2tmp3.val[1]));
+
+    // 2nd for loop
+    d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
+    d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
+    q2s16 = vcombine_s16(d4s16, d5s16);
+    q3s16 = vcombine_s16(d6s16, d7s16);
+
+    qAdd3 = vdupq_n_s16(3);
+
+    q0s16 = vaddq_s16(q2s16, q3s16);
+    q1s16 = vsubq_s16(q2s16, q3s16);
+
+    q0s16 = vaddq_s16(q0s16, qAdd3);
+    q1s16 = vaddq_s16(q1s16, qAdd3);
+
+    q0s16 = vshrq_n_s16(q0s16, 3);
+    q1s16 = vshrq_n_s16(q1s16, 3);
+
+    // store
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  0);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  1);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  2);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
+    mb_dqcoeff += 16;
+
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16),  3);
+    mb_dqcoeff += 16;
+    vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
+    mb_dqcoeff += 16;
+    return;
+}

+ 111 - 0
thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c

@@ -0,0 +1,111 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
+        unsigned char *s,
+        int p,
+        const unsigned char *blimit) {
+    uint8_t *sp;
+    uint8x16_t qblimit, q0u8;
+    uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
+    int16x8_t q2s16, q3s16, q13s16;
+    int8x8_t d8s8, d9s8;
+    int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
+
+    qblimit = vdupq_n_u8(*blimit);
+
+    sp = s - (p << 1);
+    q5u8 = vld1q_u8(sp);
+    sp += p;
+    q6u8 = vld1q_u8(sp);
+    sp += p;
+    q7u8 = vld1q_u8(sp);
+    sp += p;
+    q8u8 = vld1q_u8(sp);
+
+    q15u8 = vabdq_u8(q6u8, q7u8);
+    q14u8 = vabdq_u8(q5u8, q8u8);
+
+    q15u8 = vqaddq_u8(q15u8, q15u8);
+    q14u8 = vshrq_n_u8(q14u8, 1);
+    q0u8 = vdupq_n_u8(0x80);
+    q13s16 = vdupq_n_s16(3);
+    q15u8 = vqaddq_u8(q15u8, q14u8);
+
+    q5u8 = veorq_u8(q5u8, q0u8);
+    q6u8 = veorq_u8(q6u8, q0u8);
+    q7u8 = veorq_u8(q7u8, q0u8);
+    q8u8 = veorq_u8(q8u8, q0u8);
+
+    q15u8 = vcgeq_u8(qblimit, q15u8);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
+                     vget_low_s8(vreinterpretq_s8_u8(q6u8)));
+    q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
+                     vget_high_s8(vreinterpretq_s8_u8(q6u8)));
+
+    q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
+                     vreinterpretq_s8_u8(q8u8));
+
+    q2s16 = vmulq_s16(q2s16, q13s16);
+    q3s16 = vmulq_s16(q3s16, q13s16);
+
+    q10u8 = vdupq_n_u8(3);
+    q9u8 = vdupq_n_u8(4);
+
+    q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
+    q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
+
+    d8s8 = vqmovn_s16(q2s16);
+    d9s8 = vqmovn_s16(q3s16);
+    q4s8 = vcombine_s8(d8s8, d9s8);
+
+    q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
+
+    q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
+    q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q3s8 = vshrq_n_s8(q3s8, 3);
+
+    q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
+    q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
+
+    q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+    q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+
+    vst1q_u8(s, q7u8);
+    s -= p;
+    vst1q_u8(s, q6u8);
+    return;
+}
+
+void vp8_loop_filter_bhs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += y_stride * 4;
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}
+
+void vp8_loop_filter_mbhs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}

+ 283 - 0
thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c

@@ -0,0 +1,283 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
+
+#ifdef VPX_INCOMPATIBLE_GCC
+static INLINE void write_2x4(unsigned char *dst, int pitch,
+                             const uint8x8x2_t result) {
+    /*
+     * uint8x8x2_t result
+    00 01 02 03 | 04 05 06 07
+    10 11 12 13 | 14 15 16 17
+    ---
+    * after vtrn_u8
+    00 10 02 12 | 04 14 06 16
+    01 11 03 13 | 05 15 07 17
+    */
+    const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
+                                       result.val[1]);
+    const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
+    const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
+    vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
+    dst += pitch;
+    vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
+}
+
+static INLINE void write_2x8(unsigned char *dst, int pitch,
+                             const uint8x8x2_t result,
+                             const uint8x8x2_t result2) {
+  write_2x4(dst, pitch, result);
+  dst += pitch * 8;
+  write_2x4(dst, pitch, result2);
+}
+#else
+static INLINE void write_2x8(unsigned char *dst, int pitch,
+                             const uint8x8x2_t result,
+                             const uint8x8x2_t result2) {
+  vst2_lane_u8(dst, result, 0);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 1);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 2);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 3);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 4);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 5);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 6);
+  dst += pitch;
+  vst2_lane_u8(dst, result, 7);
+  dst += pitch;
+
+  vst2_lane_u8(dst, result2, 0);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 1);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 2);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 3);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 4);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 5);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 6);
+  dst += pitch;
+  vst2_lane_u8(dst, result2, 7);
+}
+#endif  // VPX_INCOMPATIBLE_GCC
+
+
+#ifdef VPX_INCOMPATIBLE_GCC
+static INLINE
+uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
+    uint8x8x4_t x;
+    const uint8x8_t a = vld1_u8(src);
+    const uint8x8_t b = vld1_u8(src + pitch * 1);
+    const uint8x8_t c = vld1_u8(src + pitch * 2);
+    const uint8x8_t d = vld1_u8(src + pitch * 3);
+    const uint8x8_t e = vld1_u8(src + pitch * 4);
+    const uint8x8_t f = vld1_u8(src + pitch * 5);
+    const uint8x8_t g = vld1_u8(src + pitch * 6);
+    const uint8x8_t h = vld1_u8(src + pitch * 7);
+    const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
+                                          vreinterpret_u32_u8(e));
+    const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
+                                          vreinterpret_u32_u8(f));
+    const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
+                                          vreinterpret_u32_u8(g));
+    const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
+                                          vreinterpret_u32_u8(h));
+    const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
+                                          vreinterpret_u16_u32(r26_u32.val[0]));
+    const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
+                                          vreinterpret_u16_u32(r37_u32.val[0]));
+    const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
+                                       vreinterpret_u8_u16(r13_u16.val[0]));
+    const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
+                                       vreinterpret_u8_u16(r13_u16.val[1]));
+    /*
+     * after vtrn_u32
+    00 01 02 03 | 40 41 42 43
+    10 11 12 13 | 50 51 52 53
+    20 21 22 23 | 60 61 62 63
+    30 31 32 33 | 70 71 72 73
+    ---
+    * after vtrn_u16
+    00 01 20 21 | 40 41 60 61
+    02 03 22 23 | 42 43 62 63
+    10 11 30 31 | 50 51 70 71
+    12 13 32 33 | 52 52 72 73
+
+    00 01 20 21 | 40 41 60 61
+    10 11 30 31 | 50 51 70 71
+    02 03 22 23 | 42 43 62 63
+    12 13 32 33 | 52 52 72 73
+    ---
+    * after vtrn_u8
+    00 10 20 30 | 40 50 60 70
+    01 11 21 31 | 41 51 61 71
+    02 12 22 32 | 42 52 62 72
+    03 13 23 33 | 43 53 63 73
+    */
+    x.val[0] = r01_u8.val[0];
+    x.val[1] = r01_u8.val[1];
+    x.val[2] = r23_u8.val[0];
+    x.val[3] = r23_u8.val[1];
+
+    return x;
+}
+#else
+static INLINE
+uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
+    uint8x8x4_t x;
+    x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
+    x = vld4_lane_u8(src, x, 0);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 1);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 2);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 3);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 4);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 5);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 6);
+    src += pitch;
+    x = vld4_lane_u8(src, x, 7);
+    return x;
+}
+#endif  // VPX_INCOMPATIBLE_GCC
+
+static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
+        unsigned char *s,
+        int p,
+        const unsigned char *blimit) {
+    unsigned char *src1;
+    uint8x16_t qblimit, q0u8;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
+    int16x8_t q2s16, q13s16, q11s16;
+    int8x8_t d28s8, d29s8;
+    int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
+    uint8x8x4_t d0u8x4;  // d6, d7, d8, d9
+    uint8x8x4_t d1u8x4;  // d10, d11, d12, d13
+    uint8x8x2_t d2u8x2;  // d12, d13
+    uint8x8x2_t d3u8x2;  // d14, d15
+
+    qblimit = vdupq_n_u8(*blimit);
+
+    src1 = s - 2;
+    d0u8x4 = read_4x8(src1, p);
+    src1 += p * 8;
+    d1u8x4 = read_4x8(src1, p);
+
+    q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]);  // d6 d10
+    q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]);  // d8 d12
+    q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]);  // d7 d11
+    q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]);  // d9 d13
+
+    q15u8 = vabdq_u8(q5u8, q4u8);
+    q14u8 = vabdq_u8(q3u8, q6u8);
+
+    q15u8 = vqaddq_u8(q15u8, q15u8);
+    q14u8 = vshrq_n_u8(q14u8, 1);
+    q0u8 = vdupq_n_u8(0x80);
+    q11s16 = vdupq_n_s16(3);
+    q15u8 = vqaddq_u8(q15u8, q14u8);
+
+    q3u8 = veorq_u8(q3u8, q0u8);
+    q4u8 = veorq_u8(q4u8, q0u8);
+    q5u8 = veorq_u8(q5u8, q0u8);
+    q6u8 = veorq_u8(q6u8, q0u8);
+
+    q15u8 = vcgeq_u8(qblimit, q15u8);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
+                     vget_low_s8(vreinterpretq_s8_u8(q5u8)));
+    q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
+                      vget_high_s8(vreinterpretq_s8_u8(q5u8)));
+
+    q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
+                      vreinterpretq_s8_u8(q6u8));
+
+    q2s16 = vmulq_s16(q2s16, q11s16);
+    q13s16 = vmulq_s16(q13s16, q11s16);
+
+    q11u8 = vdupq_n_u8(3);
+    q12u8 = vdupq_n_u8(4);
+
+    q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
+    q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
+
+    d28s8 = vqmovn_s16(q2s16);
+    d29s8 = vqmovn_s16(q13s16);
+    q14s8 = vcombine_s8(d28s8, d29s8);
+
+    q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
+
+    q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
+    q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q14s8 = vshrq_n_s8(q3s8, 3);
+
+    q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
+    q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
+
+    q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+    q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+
+    d2u8x2.val[0] = vget_low_u8(q6u8);   // d12
+    d2u8x2.val[1] = vget_low_u8(q7u8);   // d14
+    d3u8x2.val[0] = vget_high_u8(q6u8);  // d13
+    d3u8x2.val[1] = vget_high_u8(q7u8);  // d15
+
+    src1 = s - 1;
+    write_2x8(src1, p, d2u8x2, d3u8x2);
+}
+
+void vp8_loop_filter_bvs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    y_ptr += 4;
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += 4;
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
+    y_ptr += 4;
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}
+
+void vp8_loop_filter_mbvs_neon(
+        unsigned char *y_ptr,
+        int y_stride,
+        const unsigned char *blimit) {
+    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
+    return;
+}

+ 625 - 0
thirdparty/libvpx/vp8/common/arm/neon/mbloopfilter_neon.c

@@ -0,0 +1,625 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+
+static INLINE void vp8_mbloop_filter_neon(
+        uint8x16_t qblimit,  // mblimit
+        uint8x16_t qlimit,   // limit
+        uint8x16_t qthresh,  // thresh
+        uint8x16_t q3,       // p2
+        uint8x16_t q4,       // p2
+        uint8x16_t q5,       // p1
+        uint8x16_t q6,       // p0
+        uint8x16_t q7,       // q0
+        uint8x16_t q8,       // q1
+        uint8x16_t q9,       // q2
+        uint8x16_t q10,      // q3
+        uint8x16_t *q4r,     // p1
+        uint8x16_t *q5r,     // p1
+        uint8x16_t *q6r,     // p0
+        uint8x16_t *q7r,     // q0
+        uint8x16_t *q8r,     // q1
+        uint8x16_t *q9r) {   // q1
+    uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+    int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
+    int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
+    uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
+    int8x16_t q0s8, q12s8, q14s8, q15s8;
+    int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
+
+    q11u8 = vabdq_u8(q3, q4);
+    q12u8 = vabdq_u8(q4, q5);
+    q13u8 = vabdq_u8(q5, q6);
+    q14u8 = vabdq_u8(q8, q7);
+    q1u8  = vabdq_u8(q9, q8);
+    q0u8  = vabdq_u8(q10, q9);
+
+    q11u8 = vmaxq_u8(q11u8, q12u8);
+    q12u8 = vmaxq_u8(q13u8, q14u8);
+    q1u8  = vmaxq_u8(q1u8, q0u8);
+    q15u8 = vmaxq_u8(q11u8, q12u8);
+
+    q12u8 = vabdq_u8(q6, q7);
+
+    // vp8_hevmask
+    q13u8 = vcgtq_u8(q13u8, qthresh);
+    q14u8 = vcgtq_u8(q14u8, qthresh);
+    q15u8 = vmaxq_u8(q15u8, q1u8);
+
+    q15u8 = vcgeq_u8(qlimit, q15u8);
+
+    q1u8 = vabdq_u8(q5, q8);
+    q12u8 = vqaddq_u8(q12u8, q12u8);
+
+    // vp8_filter() function
+    // convert to signed
+    q0u8 = vdupq_n_u8(0x80);
+    q9 = veorq_u8(q9, q0u8);
+    q8 = veorq_u8(q8, q0u8);
+    q7 = veorq_u8(q7, q0u8);
+    q6 = veorq_u8(q6, q0u8);
+    q5 = veorq_u8(q5, q0u8);
+    q4 = veorq_u8(q4, q0u8);
+
+    q1u8 = vshrq_n_u8(q1u8, 1);
+    q12u8 = vqaddq_u8(q12u8, q1u8);
+
+    q14u8 = vorrq_u8(q13u8, q14u8);
+    q12u8 = vcgeq_u8(qblimit, q12u8);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
+                     vget_low_s8(vreinterpretq_s8_u8(q6)));
+    q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
+                      vget_high_s8(vreinterpretq_s8_u8(q6)));
+
+    q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
+                     vreinterpretq_s8_u8(q8));
+
+    q11s16 = vdupq_n_s16(3);
+    q2s16  = vmulq_s16(q2s16, q11s16);
+    q13s16 = vmulq_s16(q13s16, q11s16);
+
+    q15u8 = vandq_u8(q15u8, q12u8);
+
+    q2s16  = vaddw_s8(q2s16, vget_low_s8(q1s8));
+    q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
+
+    q12u8 = vdupq_n_u8(3);
+    q11u8 = vdupq_n_u8(4);
+    // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    d2 = vqmovn_s16(q2s16);
+    d3 = vqmovn_s16(q13s16);
+    q1s8 = vcombine_s8(d2, d3);
+    q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
+    q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+    q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
+    q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q13s8 = vshrq_n_s8(q13s8, 3);
+
+    q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
+    q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
+
+    q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+    q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
+    d5 = vdup_n_s8(9);
+    d4 = vdup_n_s8(18);
+
+    q0s16  = vmlal_s8(vreinterpretq_s16_u16(q0u16),  vget_low_s8(q1s8),  d5);
+    q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
+    d5 = vdup_n_s8(27);
+    q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8),  d4);
+    q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
+    q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8),  d5);
+    q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
+
+    d0  = vqshrn_n_s16(q0s16 , 7);
+    d1  = vqshrn_n_s16(q11s16, 7);
+    d24 = vqshrn_n_s16(q12s16, 7);
+    d25 = vqshrn_n_s16(q13s16, 7);
+    d28 = vqshrn_n_s16(q14s16, 7);
+    d29 = vqshrn_n_s16(q15s16, 7);
+
+    q0s8  = vcombine_s8(d0, d1);
+    q12s8 = vcombine_s8(d24, d25);
+    q14s8 = vcombine_s8(d28, d29);
+
+    q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
+    q0s8  = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
+    q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
+    q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
+    q15s8 = vqsubq_s8((q7s8), q14s8);
+    q14s8 = vqaddq_s8((q6s8), q14s8);
+
+    q1u8 = vdupq_n_u8(0x80);
+    *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
+    *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
+    *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
+    *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
+    *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
+    *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
+    return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    src -= (pitch << 2);
+
+    q3 = vld1q_u8(src);
+    src += pitch;
+    q4 = vld1q_u8(src);
+    src += pitch;
+    q5 = vld1q_u8(src);
+    src += pitch;
+    q6 = vld1q_u8(src);
+    src += pitch;
+    q7 = vld1q_u8(src);
+    src += pitch;
+    q8 = vld1q_u8(src);
+    src += pitch;
+    q9 = vld1q_u8(src);
+    src += pitch;
+    q10 = vld1q_u8(src);
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    src -= (pitch * 6);
+    vst1q_u8(src, q4);
+    src += pitch;
+    vst1q_u8(src, q5);
+    src += pitch;
+    vst1q_u8(src, q6);
+    src += pitch;
+    vst1q_u8(src, q7);
+    src += pitch;
+    vst1q_u8(src, q8);
+    src += pitch;
+    vst1q_u8(src, q9);
+    return;
+}
+
+void vp8_mbloop_filter_horizontal_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    u -= (pitch << 2);
+    v -= (pitch << 2);
+
+    d6 = vld1_u8(u);
+    u += pitch;
+    d7 = vld1_u8(v);
+    v += pitch;
+    d8 = vld1_u8(u);
+    u += pitch;
+    d9 = vld1_u8(v);
+    v += pitch;
+    d10 = vld1_u8(u);
+    u += pitch;
+    d11 = vld1_u8(v);
+    v += pitch;
+    d12 = vld1_u8(u);
+    u += pitch;
+    d13 = vld1_u8(v);
+    v += pitch;
+    d14 = vld1_u8(u);
+    u += pitch;
+    d15 = vld1_u8(v);
+    v += pitch;
+    d16 = vld1_u8(u);
+    u += pitch;
+    d17 = vld1_u8(v);
+    v += pitch;
+    d18 = vld1_u8(u);
+    u += pitch;
+    d19 = vld1_u8(v);
+    v += pitch;
+    d20 = vld1_u8(u);
+    d21 = vld1_u8(v);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    u -= (pitch * 6);
+    v -= (pitch * 6);
+    vst1_u8(u, vget_low_u8(q4));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q4));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q5));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q5));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q6));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q6));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q7));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q7));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q8));
+    u += pitch;
+    vst1_u8(v, vget_high_u8(q8));
+    v += pitch;
+    vst1_u8(u, vget_low_u8(q9));
+    vst1_u8(v, vget_high_u8(q9));
+    return;
+}
+
+void vp8_mbloop_filter_vertical_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    unsigned char *s1, *s2;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    s1 = src - 4;
+    s2 = s1 + 8 * pitch;
+    d6  = vld1_u8(s1);
+    s1 += pitch;
+    d7  = vld1_u8(s2);
+    s2 += pitch;
+    d8  = vld1_u8(s1);
+    s1 += pitch;
+    d9  = vld1_u8(s2);
+    s2 += pitch;
+    d10 = vld1_u8(s1);
+    s1 += pitch;
+    d11 = vld1_u8(s2);
+    s2 += pitch;
+    d12 = vld1_u8(s1);
+    s1 += pitch;
+    d13 = vld1_u8(s2);
+    s2 += pitch;
+    d14 = vld1_u8(s1);
+    s1 += pitch;
+    d15 = vld1_u8(s2);
+    s2 += pitch;
+    d16 = vld1_u8(s1);
+    s1 += pitch;
+    d17 = vld1_u8(s2);
+    s2 += pitch;
+    d18 = vld1_u8(s1);
+    s1 += pitch;
+    d19 = vld1_u8(s2);
+    s2 += pitch;
+    d20 = vld1_u8(s1);
+    d21 = vld1_u8(s2);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    s1 -= 7 * pitch;
+    s2 -= 7 * pitch;
+
+    vst1_u8(s1, vget_low_u8(q3));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q3));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q4));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q4));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q5));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q5));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q6));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q6));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q7));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q7));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q8));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q8));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q9));
+    s1 += pitch;
+    vst1_u8(s2, vget_high_u8(q9));
+    s2 += pitch;
+    vst1_u8(s1, vget_low_u8(q10));
+    vst1_u8(s2, vget_high_u8(q10));
+    return;
+}
+
+void vp8_mbloop_filter_vertical_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    unsigned char *us, *ud;
+    unsigned char *vs, *vd;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    us = u - 4;
+    vs = v - 4;
+    d6 = vld1_u8(us);
+    us += pitch;
+    d7 = vld1_u8(vs);
+    vs += pitch;
+    d8 = vld1_u8(us);
+    us += pitch;
+    d9 = vld1_u8(vs);
+    vs += pitch;
+    d10 = vld1_u8(us);
+    us += pitch;
+    d11 = vld1_u8(vs);
+    vs += pitch;
+    d12 = vld1_u8(us);
+    us += pitch;
+    d13 = vld1_u8(vs);
+    vs += pitch;
+    d14 = vld1_u8(us);
+    us += pitch;
+    d15 = vld1_u8(vs);
+    vs += pitch;
+    d16 = vld1_u8(us);
+    us += pitch;
+    d17 = vld1_u8(vs);
+    vs += pitch;
+    d18 = vld1_u8(us);
+    us += pitch;
+    d19 = vld1_u8(vs);
+    vs += pitch;
+    d20 = vld1_u8(us);
+    d21 = vld1_u8(vs);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q4, &q5, &q6, &q7, &q8, &q9);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    ud = u - 4;
+    vst1_u8(ud, vget_low_u8(q3));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q4));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q5));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q6));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q7));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q8));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q9));
+    ud += pitch;
+    vst1_u8(ud, vget_low_u8(q10));
+
+    vd = v - 4;
+    vst1_u8(vd, vget_high_u8(q3));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q4));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q5));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q6));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q7));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q8));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q9));
+    vd += pitch;
+    vst1_u8(vd, vget_high_u8(q10));
+    return;
+}

+ 123 - 0
thirdparty/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c

@@ -0,0 +1,123 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+static const int16_t cospi8sqrt2minus1 = 20091;
+static const int16_t sinpi8sqrt2       = 35468;
+
+void vp8_short_idct4x4llm_neon(
+        int16_t *input,
+        unsigned char *pred_ptr,
+        int pred_stride,
+        unsigned char *dst_ptr,
+        int dst_stride) {
+    int i;
+    uint32x2_t d6u32 = vdup_n_u32(0);
+    uint8x8_t d1u8;
+    int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
+    uint16x8_t q1u16;
+    int16x8_t q1s16, q2s16, q3s16, q4s16;
+    int32x2x2_t v2tmp0, v2tmp1;
+    int16x4x2_t v2tmp2, v2tmp3;
+
+    d2 = vld1_s16(input);
+    d3 = vld1_s16(input + 4);
+    d4 = vld1_s16(input + 8);
+    d5 = vld1_s16(input + 12);
+
+    // 1st for loop
+    q1s16 = vcombine_s16(d2, d4);  // Swap d3 d4 here
+    q2s16 = vcombine_s16(d3, d5);
+
+    q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+    q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // a1
+    d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // b1
+
+    q3s16 = vshrq_n_s16(q3s16, 1);
+    q4s16 = vshrq_n_s16(q4s16, 1);
+
+    q3s16 = vqaddq_s16(q3s16, q2s16);
+    q4s16 = vqaddq_s16(q4s16, q2s16);
+
+    d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16));  // c1
+    d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16));  // d1
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+                      vreinterpret_s16_s32(v2tmp1.val[0]));
+    v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+                      vreinterpret_s16_s32(v2tmp1.val[1]));
+
+    // 2nd for loop
+    q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
+    q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
+
+    q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
+    q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
+
+    d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // a1
+    d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16));  // b1
+
+    q3s16 = vshrq_n_s16(q3s16, 1);
+    q4s16 = vshrq_n_s16(q4s16, 1);
+
+    q3s16 = vqaddq_s16(q3s16, q2s16);
+    q4s16 = vqaddq_s16(q4s16, q2s16);
+
+    d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16));  // c1
+    d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16));  // d1
+
+    d2 = vqadd_s16(d12, d11);
+    d3 = vqadd_s16(d13, d10);
+    d4 = vqsub_s16(d13, d10);
+    d5 = vqsub_s16(d12, d11);
+
+    d2 = vrshr_n_s16(d2, 3);
+    d3 = vrshr_n_s16(d3, 3);
+    d4 = vrshr_n_s16(d4, 3);
+    d5 = vrshr_n_s16(d5, 3);
+
+    v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
+    v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
+    v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
+                      vreinterpret_s16_s32(v2tmp1.val[0]));
+    v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
+                      vreinterpret_s16_s32(v2tmp1.val[1]));
+
+    q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
+    q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
+
+    // dc_only_idct_add
+    for (i = 0; i < 2; i++, q1s16 = q2s16) {
+        d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
+        pred_ptr += pred_stride;
+        d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
+        pred_ptr += pred_stride;
+
+        q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
+                         vreinterpret_u8_u32(d6u32));
+        d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
+
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
+        dst_ptr += dst_stride;
+    }
+    return;
+}

+ 1377 - 0
thirdparty/libvpx/vp8/common/arm/neon/sixtappredict_neon.c

@@ -0,0 +1,1377 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "vpx_ports/mem.h"
+
+static const int8_t vp8_sub_pel_filters[8][8] = {
+    {0,  0,  128,   0,   0, 0, 0, 0},  /* note that 1/8 pel positionyys are */
+    {0, -6,  123,  12,  -1, 0, 0, 0},  /*    just as per alpha -0.5 bicubic */
+    {2, -11, 108,  36,  -8, 1, 0, 0},  /* New 1/4 pel 6 tap filter */
+    {0, -9,   93,  50,  -6, 0, 0, 0},
+    {3, -16,  77,  77, -16, 3, 0, 0},  /* New 1/2 pel 6 tap filter */
+    {0, -6,   50,  93,  -9, 0, 0, 0},
+    {1, -8,   36, 108, -11, 2, 0, 0},  /* New 1/4 pel 6 tap filter */
+    {0, -1,   12, 123,  -6, 0, 0, 0},
+};
+
+void vp8_sixtap_predict8x4_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8;
+    uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+    uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+    int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8;
+
+    if (xoffset == 0) {  // secondpass_filter8x4_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src = src_ptr - src_pixels_per_line * 2;
+        d22u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d23u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d24u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d25u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d26u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d27u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d28u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d29u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d30u8 = vld1_u8(src);
+
+        q3u16 = vmull_u8(d22u8, d0u8);
+        q4u16 = vmull_u8(d23u8, d0u8);
+        q5u16 = vmull_u8(d24u8, d0u8);
+        q6u16 = vmull_u8(d25u8, d0u8);
+
+        q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+        q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+        q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+        q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+        q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+        q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+        q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+        q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+        q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+        q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+        q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+        q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+        q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+        q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+        q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+        q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+        q7u16 = vmull_u8(d25u8, d3u8);
+        q8u16 = vmull_u8(d26u8, d3u8);
+        q9u16 = vmull_u8(d27u8, d3u8);
+        q10u16 = vmull_u8(d28u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d6u8 = vqrshrun_n_s16(q7s16, 7);
+        d7u8 = vqrshrun_n_s16(q8s16, 7);
+        d8u8 = vqrshrun_n_s16(q9s16, 7);
+        d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+        vst1_u8(dst_ptr, d6u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d7u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d8u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d9u8);
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0)  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+    else
+        src = src_ptr - 2 - (src_pixels_per_line * 2);
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+
+    q7u16  = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q8u16  = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q9u16  = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+    q7u16  = vmlsl_u8(q7u16, d28u8, d1u8);
+    q8u16  = vmlsl_u8(q8u16, d29u8, d1u8);
+    q9u16  = vmlsl_u8(q9u16, d30u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+    q7u16  = vmlsl_u8(q7u16, d28u8, d4u8);
+    q8u16  = vmlsl_u8(q8u16, d29u8, d4u8);
+    q9u16  = vmlsl_u8(q9u16, d30u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+    q7u16  = vmlal_u8(q7u16, d28u8, d2u8);
+    q8u16  = vmlal_u8(q8u16, d29u8, d2u8);
+    q9u16  = vmlal_u8(q9u16, d30u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+    q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+    q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+    q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+    d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+    q3u16 = vmull_u8(d28u8, d3u8);
+    q4u16 = vmull_u8(d29u8, d3u8);
+    q5u16 = vmull_u8(d30u8, d3u8);
+    q6u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+
+    q7s16 = vqaddq_s16(q7s16, q3s16);
+    q8s16 = vqaddq_s16(q8s16, q4s16);
+    q9s16 = vqaddq_s16(q9s16, q5s16);
+    q10s16 = vqaddq_s16(q10s16, q6s16);
+
+    d22u8 = vqrshrun_n_s16(q7s16, 7);
+    d23u8 = vqrshrun_n_s16(q8s16, 7);
+    d24u8 = vqrshrun_n_s16(q9s16, 7);
+    d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+    if (yoffset == 0) {  // firstpass_filter8x4_only
+        vst1_u8(dst_ptr, d22u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d23u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d24u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d25u8);
+        return;
+    }
+
+    // First Pass on rest 5-line data
+    src += src_pixels_per_line;
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q7u8 = vld1q_u8(src);
+
+    q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+    q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+    q8u16  = vmlsl_u8(q8u16, d27u8, d1u8);
+    q9u16  = vmlsl_u8(q9u16, d28u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+    q8u16  = vmlsl_u8(q8u16, d27u8, d4u8);
+    q9u16  = vmlsl_u8(q9u16, d28u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+    q8u16  = vmlal_u8(q8u16, d27u8, d2u8);
+    q9u16  = vmlal_u8(q9u16, d28u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+    q8u16  = vmlal_u8(q8u16, d27u8, d5u8);
+    q9u16  = vmlal_u8(q9u16, d28u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+    q3u16 = vmull_u8(d27u8, d3u8);
+    q4u16 = vmull_u8(d28u8, d3u8);
+    q5u16 = vmull_u8(d29u8, d3u8);
+    q6u16 = vmull_u8(d30u8, d3u8);
+    q7u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q11s16 = vreinterpretq_s16_u16(q11u16);
+    q12s16 = vreinterpretq_s16_u16(q12u16);
+
+    q8s16 = vqaddq_s16(q8s16, q3s16);
+    q9s16 = vqaddq_s16(q9s16, q4s16);
+    q10s16 = vqaddq_s16(q10s16, q5s16);
+    q11s16 = vqaddq_s16(q11s16, q6s16);
+    q12s16 = vqaddq_s16(q12s16, q7s16);
+
+    d26u8 = vqrshrun_n_s16(q8s16, 7);
+    d27u8 = vqrshrun_n_s16(q9s16, 7);
+    d28u8 = vqrshrun_n_s16(q10s16, 7);
+    d29u8 = vqrshrun_n_s16(q11s16, 7);
+    d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+    // Second pass: 8x4
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    q3u16 = vmull_u8(d22u8, d0u8);
+    q4u16 = vmull_u8(d23u8, d0u8);
+    q5u16 = vmull_u8(d24u8, d0u8);
+    q6u16 = vmull_u8(d25u8, d0u8);
+
+    q3u16 = vmlsl_u8(q3u16, d23u8, d1u8);
+    q4u16 = vmlsl_u8(q4u16, d24u8, d1u8);
+    q5u16 = vmlsl_u8(q5u16, d25u8, d1u8);
+    q6u16 = vmlsl_u8(q6u16, d26u8, d1u8);
+
+    q3u16 = vmlsl_u8(q3u16, d26u8, d4u8);
+    q4u16 = vmlsl_u8(q4u16, d27u8, d4u8);
+    q5u16 = vmlsl_u8(q5u16, d28u8, d4u8);
+    q6u16 = vmlsl_u8(q6u16, d29u8, d4u8);
+
+    q3u16 = vmlal_u8(q3u16, d24u8, d2u8);
+    q4u16 = vmlal_u8(q4u16, d25u8, d2u8);
+    q5u16 = vmlal_u8(q5u16, d26u8, d2u8);
+    q6u16 = vmlal_u8(q6u16, d27u8, d2u8);
+
+    q3u16 = vmlal_u8(q3u16, d27u8, d5u8);
+    q4u16 = vmlal_u8(q4u16, d28u8, d5u8);
+    q5u16 = vmlal_u8(q5u16, d29u8, d5u8);
+    q6u16 = vmlal_u8(q6u16, d30u8, d5u8);
+
+    q7u16 = vmull_u8(d25u8, d3u8);
+    q8u16 = vmull_u8(d26u8, d3u8);
+    q9u16 = vmull_u8(d27u8, d3u8);
+    q10u16 = vmull_u8(d28u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+
+    q7s16 = vqaddq_s16(q7s16, q3s16);
+    q8s16 = vqaddq_s16(q8s16, q4s16);
+    q9s16 = vqaddq_s16(q9s16, q5s16);
+    q10s16 = vqaddq_s16(q10s16, q6s16);
+
+    d6u8 = vqrshrun_n_s16(q7s16, 7);
+    d7u8 = vqrshrun_n_s16(q8s16, 7);
+    d8u8 = vqrshrun_n_s16(q9s16, 7);
+    d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+    vst1_u8(dst_ptr, d6u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d7u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d8u8);
+    dst_ptr += dst_pitch;
+    vst1_u8(dst_ptr, d9u8);
+    return;
+}
+
+void vp8_sixtap_predict8x8_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src, *tmpp;
+    unsigned char tmp[64];
+    int i;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8;
+    uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
+    uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
+    int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
+    uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8;
+
+    if (xoffset == 0) {  // secondpass_filter8x8_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src = src_ptr - src_pixels_per_line * 2;
+        d18u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d19u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d20u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d21u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d22u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d23u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d24u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d25u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d26u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d27u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d28u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d29u8 = vld1_u8(src);
+        src += src_pixels_per_line;
+        d30u8 = vld1_u8(src);
+
+        for (i = 2; i > 0; i--) {
+            q3u16 = vmull_u8(d18u8, d0u8);
+            q4u16 = vmull_u8(d19u8, d0u8);
+            q5u16 = vmull_u8(d20u8, d0u8);
+            q6u16 = vmull_u8(d21u8, d0u8);
+
+            q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+            q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+            q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+            q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+            q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+            q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+            q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+            q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+            q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+            q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+            q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+            q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+            q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+            q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+            q7u16 = vmull_u8(d21u8, d3u8);
+            q8u16 = vmull_u8(d22u8, d3u8);
+            q9u16 = vmull_u8(d23u8, d3u8);
+            q10u16 = vmull_u8(d24u8, d3u8);
+
+            q3s16 = vreinterpretq_s16_u16(q3u16);
+            q4s16 = vreinterpretq_s16_u16(q4u16);
+            q5s16 = vreinterpretq_s16_u16(q5u16);
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+
+            q7s16 = vqaddq_s16(q7s16, q3s16);
+            q8s16 = vqaddq_s16(q8s16, q4s16);
+            q9s16 = vqaddq_s16(q9s16, q5s16);
+            q10s16 = vqaddq_s16(q10s16, q6s16);
+
+            d6u8 = vqrshrun_n_s16(q7s16, 7);
+            d7u8 = vqrshrun_n_s16(q8s16, 7);
+            d8u8 = vqrshrun_n_s16(q9s16, 7);
+            d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+            d18u8 = d22u8;
+            d19u8 = d23u8;
+            d20u8 = d24u8;
+            d21u8 = d25u8;
+            d22u8 = d26u8;
+            d23u8 = d27u8;
+            d24u8 = d28u8;
+            d25u8 = d29u8;
+            d26u8 = d30u8;
+
+            vst1_u8(dst_ptr, d6u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d7u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d8u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d9u8);
+            dst_ptr += dst_pitch;
+        }
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0)  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+    else
+        src = src_ptr - 2 - (src_pixels_per_line * 2);
+
+    tmpp = tmp;
+    for (i = 2; i > 0; i--) {
+        q3u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q4u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q5u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+        q6u8 = vld1q_u8(src);
+        src += src_pixels_per_line;
+
+        __builtin_prefetch(src);
+        __builtin_prefetch(src + src_pixels_per_line);
+        __builtin_prefetch(src + src_pixels_per_line * 2);
+
+        q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+        q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+        q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+        q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+
+        q7u16 = vmlsl_u8(q7u16, d28u8, d1u8);
+        q8u16 = vmlsl_u8(q8u16, d29u8, d1u8);
+        q9u16 = vmlsl_u8(q9u16, d30u8, d1u8);
+        q10u16 = vmlsl_u8(q10u16, d31u8, d1u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+
+        q7u16 = vmlsl_u8(q7u16, d28u8, d4u8);
+        q8u16 = vmlsl_u8(q8u16, d29u8, d4u8);
+        q9u16 = vmlsl_u8(q9u16, d30u8, d4u8);
+        q10u16 = vmlsl_u8(q10u16, d31u8, d4u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+
+        q7u16 = vmlal_u8(q7u16, d28u8, d2u8);
+        q8u16 = vmlal_u8(q8u16, d29u8, d2u8);
+        q9u16 = vmlal_u8(q9u16, d30u8, d2u8);
+        q10u16 = vmlal_u8(q10u16, d31u8, d2u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+
+        q7u16 = vmlal_u8(q7u16, d28u8, d5u8);
+        q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+        q9u16 = vmlal_u8(q9u16, d30u8, d5u8);
+        q10u16 = vmlal_u8(q10u16, d31u8, d5u8);
+
+        d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+        d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+        d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+        d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+
+        q3u16 = vmull_u8(d28u8, d3u8);
+        q4u16 = vmull_u8(d29u8, d3u8);
+        q5u16 = vmull_u8(d30u8, d3u8);
+        q6u16 = vmull_u8(d31u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d22u8 = vqrshrun_n_s16(q7s16, 7);
+        d23u8 = vqrshrun_n_s16(q8s16, 7);
+        d24u8 = vqrshrun_n_s16(q9s16, 7);
+        d25u8 = vqrshrun_n_s16(q10s16, 7);
+
+        if (yoffset == 0) {  // firstpass_filter8x4_only
+            vst1_u8(dst_ptr, d22u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d23u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d24u8);
+            dst_ptr += dst_pitch;
+            vst1_u8(dst_ptr, d25u8);
+            dst_ptr += dst_pitch;
+        } else {
+            vst1_u8(tmpp, d22u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d23u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d24u8);
+            tmpp += 8;
+            vst1_u8(tmpp, d25u8);
+            tmpp += 8;
+        }
+    }
+    if (yoffset == 0)
+        return;
+
+    // First Pass on rest 5-line data
+    q3u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q4u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q5u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q6u8 = vld1q_u8(src);
+    src += src_pixels_per_line;
+    q7u8 = vld1q_u8(src);
+
+    q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
+    q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
+    q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
+    q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8);
+    q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1);
+
+    q8u16 = vmlsl_u8(q8u16, d27u8, d1u8);
+    q9u16 = vmlsl_u8(q9u16, d28u8, d1u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d1u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4);
+
+    q8u16 = vmlsl_u8(q8u16, d27u8, d4u8);
+    q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+    q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+    q11u16 = vmlsl_u8(q11u16, d30u8, d4u8);
+    q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2);
+
+    q8u16 = vmlal_u8(q8u16, d27u8, d2u8);
+    q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d2u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5);
+
+    q8u16 = vmlal_u8(q8u16, d27u8, d5u8);
+    q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+    q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+    q11u16 = vmlal_u8(q11u16, d30u8, d5u8);
+    q12u16 = vmlal_u8(q12u16, d31u8, d5u8);
+
+    d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3);
+    d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3);
+    d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3);
+    d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3);
+    d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3);
+
+    q3u16 = vmull_u8(d27u8, d3u8);
+    q4u16 = vmull_u8(d28u8, d3u8);
+    q5u16 = vmull_u8(d29u8, d3u8);
+    q6u16 = vmull_u8(d30u8, d3u8);
+    q7u16 = vmull_u8(d31u8, d3u8);
+
+    q3s16 = vreinterpretq_s16_u16(q3u16);
+    q4s16 = vreinterpretq_s16_u16(q4u16);
+    q5s16 = vreinterpretq_s16_u16(q5u16);
+    q6s16 = vreinterpretq_s16_u16(q6u16);
+    q7s16 = vreinterpretq_s16_u16(q7u16);
+    q8s16 = vreinterpretq_s16_u16(q8u16);
+    q9s16 = vreinterpretq_s16_u16(q9u16);
+    q10s16 = vreinterpretq_s16_u16(q10u16);
+    q11s16 = vreinterpretq_s16_u16(q11u16);
+    q12s16 = vreinterpretq_s16_u16(q12u16);
+
+    q8s16 = vqaddq_s16(q8s16, q3s16);
+    q9s16 = vqaddq_s16(q9s16, q4s16);
+    q10s16 = vqaddq_s16(q10s16, q5s16);
+    q11s16 = vqaddq_s16(q11s16, q6s16);
+    q12s16 = vqaddq_s16(q12s16, q7s16);
+
+    d26u8 = vqrshrun_n_s16(q8s16, 7);
+    d27u8 = vqrshrun_n_s16(q9s16, 7);
+    d28u8 = vqrshrun_n_s16(q10s16, 7);
+    d29u8 = vqrshrun_n_s16(q11s16, 7);
+    d30u8 = vqrshrun_n_s16(q12s16, 7);
+
+    // Second pass: 8x8
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    tmpp = tmp;
+    q9u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q10u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q11u8 = vld1q_u8(tmpp);
+    tmpp += 16;
+    q12u8 = vld1q_u8(tmpp);
+
+    d18u8 = vget_low_u8(q9u8);
+    d19u8 = vget_high_u8(q9u8);
+    d20u8 = vget_low_u8(q10u8);
+    d21u8 = vget_high_u8(q10u8);
+    d22u8 = vget_low_u8(q11u8);
+    d23u8 = vget_high_u8(q11u8);
+    d24u8 = vget_low_u8(q12u8);
+    d25u8 = vget_high_u8(q12u8);
+
+    for (i = 2; i > 0; i--) {
+        q3u16 = vmull_u8(d18u8, d0u8);
+        q4u16 = vmull_u8(d19u8, d0u8);
+        q5u16 = vmull_u8(d20u8, d0u8);
+        q6u16 = vmull_u8(d21u8, d0u8);
+
+        q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+        q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+        q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+        q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+        q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+        q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+        q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+        q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+        q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+        q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+        q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+        q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+        q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+        q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+        q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+        q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+        q7u16 = vmull_u8(d21u8, d3u8);
+        q8u16 = vmull_u8(d22u8, d3u8);
+        q9u16 = vmull_u8(d23u8, d3u8);
+        q10u16 = vmull_u8(d24u8, d3u8);
+
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+
+        q7s16 = vqaddq_s16(q7s16, q3s16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q9s16 = vqaddq_s16(q9s16, q5s16);
+        q10s16 = vqaddq_s16(q10s16, q6s16);
+
+        d6u8 = vqrshrun_n_s16(q7s16, 7);
+        d7u8 = vqrshrun_n_s16(q8s16, 7);
+        d8u8 = vqrshrun_n_s16(q9s16, 7);
+        d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+        d18u8 = d22u8;
+        d19u8 = d23u8;
+        d20u8 = d24u8;
+        d21u8 = d25u8;
+        d22u8 = d26u8;
+        d23u8 = d27u8;
+        d24u8 = d28u8;
+        d25u8 = d29u8;
+        d26u8 = d30u8;
+
+        vst1_u8(dst_ptr, d6u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d7u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d8u8);
+        dst_ptr += dst_pitch;
+        vst1_u8(dst_ptr, d9u8);
+        dst_ptr += dst_pitch;
+    }
+    return;
+}
+
+void vp8_sixtap_predict16x16_neon(
+        unsigned char *src_ptr,
+        int src_pixels_per_line,
+        int xoffset,
+        int yoffset,
+        unsigned char *dst_ptr,
+        int dst_pitch) {
+    unsigned char *src, *src_tmp, *dst, *tmpp;
+    unsigned char tmp[336];
+    int i, j;
+    uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
+    uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8;
+    uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8;
+    uint8x8_t d28u8, d29u8, d30u8, d31u8;
+    int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
+    uint8x16_t q3u8, q4u8;
+    uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16;
+    uint16x8_t q11u16, q12u16, q13u16, q15u16;
+    int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16;
+    int16x8_t q11s16, q12s16, q13s16, q15s16;
+
+    if (xoffset == 0) {  // secondpass_filter8x8_only
+        // load second_pass filter
+        dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+        d0s8 = vdup_lane_s8(dtmps8, 0);
+        d1s8 = vdup_lane_s8(dtmps8, 1);
+        d2s8 = vdup_lane_s8(dtmps8, 2);
+        d3s8 = vdup_lane_s8(dtmps8, 3);
+        d4s8 = vdup_lane_s8(dtmps8, 4);
+        d5s8 = vdup_lane_s8(dtmps8, 5);
+        d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+        d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+        d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+        d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+        d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+        d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+        // load src data
+        src_tmp = src_ptr - src_pixels_per_line * 2;
+        for (i = 0; i < 2; i++) {
+            src = src_tmp + i * 8;
+            dst = dst_ptr + i * 8;
+            d18u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d19u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d20u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d21u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            d22u8 = vld1_u8(src);
+            src += src_pixels_per_line;
+            for (j = 0; j < 4; j++) {
+                d23u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d24u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d25u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+                d26u8 = vld1_u8(src);
+                src += src_pixels_per_line;
+
+                q3u16 = vmull_u8(d18u8, d0u8);
+                q4u16 = vmull_u8(d19u8, d0u8);
+                q5u16 = vmull_u8(d20u8, d0u8);
+                q6u16 = vmull_u8(d21u8, d0u8);
+
+                q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+                q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+                q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+                q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+                q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+                q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+                q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+                q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+                q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+                q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+                q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+                q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+                q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+                q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+                q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+                q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+                q7u16 = vmull_u8(d21u8, d3u8);
+                q8u16 = vmull_u8(d22u8, d3u8);
+                q9u16 = vmull_u8(d23u8, d3u8);
+                q10u16 = vmull_u8(d24u8, d3u8);
+
+                q3s16 = vreinterpretq_s16_u16(q3u16);
+                q4s16 = vreinterpretq_s16_u16(q4u16);
+                q5s16 = vreinterpretq_s16_u16(q5u16);
+                q6s16 = vreinterpretq_s16_u16(q6u16);
+                q7s16 = vreinterpretq_s16_u16(q7u16);
+                q8s16 = vreinterpretq_s16_u16(q8u16);
+                q9s16 = vreinterpretq_s16_u16(q9u16);
+                q10s16 = vreinterpretq_s16_u16(q10u16);
+
+                q7s16 = vqaddq_s16(q7s16, q3s16);
+                q8s16 = vqaddq_s16(q8s16, q4s16);
+                q9s16 = vqaddq_s16(q9s16, q5s16);
+                q10s16 = vqaddq_s16(q10s16, q6s16);
+
+                d6u8 = vqrshrun_n_s16(q7s16, 7);
+                d7u8 = vqrshrun_n_s16(q8s16, 7);
+                d8u8 = vqrshrun_n_s16(q9s16, 7);
+                d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+                d18u8 = d22u8;
+                d19u8 = d23u8;
+                d20u8 = d24u8;
+                d21u8 = d25u8;
+                d22u8 = d26u8;
+
+                vst1_u8(dst, d6u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d7u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d8u8);
+                dst += dst_pitch;
+                vst1_u8(dst, d9u8);
+                dst += dst_pitch;
+            }
+        }
+        return;
+    }
+
+    // load first_pass filter
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    // First pass: output_height lines x output_width columns (9x4)
+    if (yoffset == 0) {  // firstpass_filter4x4_only
+        src = src_ptr - 2;
+        dst = dst_ptr;
+        for (i = 0; i < 8; i++) {
+            d6u8 = vld1_u8(src);
+            d7u8 = vld1_u8(src + 8);
+            d8u8 = vld1_u8(src + 16);
+            src += src_pixels_per_line;
+            d9u8 = vld1_u8(src);
+            d10u8 = vld1_u8(src + 8);
+            d11u8 = vld1_u8(src + 16);
+            src += src_pixels_per_line;
+
+            __builtin_prefetch(src);
+            __builtin_prefetch(src + src_pixels_per_line);
+
+            q6u16 = vmull_u8(d6u8, d0u8);
+            q7u16 = vmull_u8(d7u8, d0u8);
+            q8u16 = vmull_u8(d9u8, d0u8);
+            q9u16 = vmull_u8(d10u8, d0u8);
+
+            d20u8 = vext_u8(d6u8, d7u8, 1);
+            d21u8 = vext_u8(d9u8, d10u8, 1);
+            d22u8 = vext_u8(d7u8, d8u8, 1);
+            d23u8 = vext_u8(d10u8, d11u8, 1);
+            d24u8 = vext_u8(d6u8, d7u8, 4);
+            d25u8 = vext_u8(d9u8, d10u8, 4);
+            d26u8 = vext_u8(d7u8, d8u8, 4);
+            d27u8 = vext_u8(d10u8, d11u8, 4);
+            d28u8 = vext_u8(d6u8, d7u8, 5);
+            d29u8 = vext_u8(d9u8, d10u8, 5);
+
+            q6u16 = vmlsl_u8(q6u16, d20u8, d1u8);
+            q8u16 = vmlsl_u8(q8u16, d21u8, d1u8);
+            q7u16 = vmlsl_u8(q7u16, d22u8, d1u8);
+            q9u16 = vmlsl_u8(q9u16, d23u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d24u8, d4u8);
+            q8u16 = vmlsl_u8(q8u16, d25u8, d4u8);
+            q7u16 = vmlsl_u8(q7u16, d26u8, d4u8);
+            q9u16 = vmlsl_u8(q9u16, d27u8, d4u8);
+            q6u16 = vmlal_u8(q6u16, d28u8, d5u8);
+            q8u16 = vmlal_u8(q8u16, d29u8, d5u8);
+
+            d20u8 = vext_u8(d7u8, d8u8, 5);
+            d21u8 = vext_u8(d10u8, d11u8, 5);
+            d22u8 = vext_u8(d6u8, d7u8, 2);
+            d23u8 = vext_u8(d9u8, d10u8, 2);
+            d24u8 = vext_u8(d7u8, d8u8, 2);
+            d25u8 = vext_u8(d10u8, d11u8, 2);
+            d26u8 = vext_u8(d6u8, d7u8, 3);
+            d27u8 = vext_u8(d9u8, d10u8, 3);
+            d28u8 = vext_u8(d7u8, d8u8, 3);
+            d29u8 = vext_u8(d10u8, d11u8, 3);
+
+            q7u16 = vmlal_u8(q7u16, d20u8, d5u8);
+            q9u16 = vmlal_u8(q9u16, d21u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d22u8, d2u8);
+            q8u16 = vmlal_u8(q8u16, d23u8, d2u8);
+            q7u16 = vmlal_u8(q7u16, d24u8, d2u8);
+            q9u16 = vmlal_u8(q9u16, d25u8, d2u8);
+
+            q10u16 = vmull_u8(d26u8, d3u8);
+            q11u16 = vmull_u8(d27u8, d3u8);
+            q12u16 = vmull_u8(d28u8, d3u8);
+            q15u16 = vmull_u8(d29u8, d3u8);
+
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+            q11s16 = vreinterpretq_s16_u16(q11u16);
+            q12s16 = vreinterpretq_s16_u16(q12u16);
+            q15s16 = vreinterpretq_s16_u16(q15u16);
+
+            q6s16 = vqaddq_s16(q6s16, q10s16);
+            q8s16 = vqaddq_s16(q8s16, q11s16);
+            q7s16 = vqaddq_s16(q7s16, q12s16);
+            q9s16 = vqaddq_s16(q9s16, q15s16);
+
+            d6u8 = vqrshrun_n_s16(q6s16, 7);
+            d7u8 = vqrshrun_n_s16(q7s16, 7);
+            d8u8 = vqrshrun_n_s16(q8s16, 7);
+            d9u8 = vqrshrun_n_s16(q9s16, 7);
+
+            q3u8 = vcombine_u8(d6u8, d7u8);
+            q4u8 = vcombine_u8(d8u8, d9u8);
+            vst1q_u8(dst, q3u8);
+            dst += dst_pitch;
+            vst1q_u8(dst, q4u8);
+            dst += dst_pitch;
+        }
+        return;
+    }
+
+    src = src_ptr - 2 - src_pixels_per_line * 2;
+    tmpp = tmp;
+    for (i = 0; i < 7; i++) {
+        d6u8 = vld1_u8(src);
+        d7u8 = vld1_u8(src + 8);
+        d8u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+        d9u8 = vld1_u8(src);
+        d10u8 = vld1_u8(src + 8);
+        d11u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+        d12u8 = vld1_u8(src);
+        d13u8 = vld1_u8(src + 8);
+        d14u8 = vld1_u8(src + 16);
+        src += src_pixels_per_line;
+
+        __builtin_prefetch(src);
+        __builtin_prefetch(src + src_pixels_per_line);
+        __builtin_prefetch(src + src_pixels_per_line * 2);
+
+        q8u16 = vmull_u8(d6u8, d0u8);
+        q9u16 = vmull_u8(d7u8, d0u8);
+        q10u16 = vmull_u8(d9u8, d0u8);
+        q11u16 = vmull_u8(d10u8, d0u8);
+        q12u16 = vmull_u8(d12u8, d0u8);
+        q13u16 = vmull_u8(d13u8, d0u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 1);
+        d29u8 = vext_u8(d9u8, d10u8, 1);
+        d30u8 = vext_u8(d12u8, d13u8, 1);
+        q8u16 = vmlsl_u8(q8u16, d28u8, d1u8);
+        q10u16 = vmlsl_u8(q10u16, d29u8, d1u8);
+        q12u16 = vmlsl_u8(q12u16, d30u8, d1u8);
+        d28u8 = vext_u8(d7u8, d8u8, 1);
+        d29u8 = vext_u8(d10u8, d11u8, 1);
+        d30u8 = vext_u8(d13u8, d14u8, 1);
+        q9u16  = vmlsl_u8(q9u16, d28u8, d1u8);
+        q11u16 = vmlsl_u8(q11u16, d29u8, d1u8);
+        q13u16 = vmlsl_u8(q13u16, d30u8, d1u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 4);
+        d29u8 = vext_u8(d9u8, d10u8, 4);
+        d30u8 = vext_u8(d12u8, d13u8, 4);
+        q8u16 = vmlsl_u8(q8u16, d28u8, d4u8);
+        q10u16 = vmlsl_u8(q10u16, d29u8, d4u8);
+        q12u16 = vmlsl_u8(q12u16, d30u8, d4u8);
+        d28u8 = vext_u8(d7u8, d8u8, 4);
+        d29u8 = vext_u8(d10u8, d11u8, 4);
+        d30u8 = vext_u8(d13u8, d14u8, 4);
+        q9u16 = vmlsl_u8(q9u16, d28u8, d4u8);
+        q11u16 = vmlsl_u8(q11u16, d29u8, d4u8);
+        q13u16 = vmlsl_u8(q13u16, d30u8, d4u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 5);
+        d29u8 = vext_u8(d9u8, d10u8, 5);
+        d30u8 = vext_u8(d12u8, d13u8, 5);
+        q8u16 = vmlal_u8(q8u16, d28u8, d5u8);
+        q10u16 = vmlal_u8(q10u16, d29u8, d5u8);
+        q12u16 = vmlal_u8(q12u16, d30u8, d5u8);
+        d28u8 = vext_u8(d7u8, d8u8, 5);
+        d29u8 = vext_u8(d10u8, d11u8, 5);
+        d30u8 = vext_u8(d13u8, d14u8, 5);
+        q9u16 = vmlal_u8(q9u16, d28u8, d5u8);
+        q11u16 = vmlal_u8(q11u16, d29u8, d5u8);
+        q13u16 = vmlal_u8(q13u16, d30u8, d5u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 2);
+        d29u8 = vext_u8(d9u8, d10u8, 2);
+        d30u8 = vext_u8(d12u8, d13u8, 2);
+        q8u16 = vmlal_u8(q8u16, d28u8, d2u8);
+        q10u16 = vmlal_u8(q10u16, d29u8, d2u8);
+        q12u16 = vmlal_u8(q12u16, d30u8, d2u8);
+        d28u8 = vext_u8(d7u8, d8u8, 2);
+        d29u8 = vext_u8(d10u8, d11u8, 2);
+        d30u8 = vext_u8(d13u8, d14u8, 2);
+        q9u16 = vmlal_u8(q9u16, d28u8, d2u8);
+        q11u16 = vmlal_u8(q11u16, d29u8, d2u8);
+        q13u16 = vmlal_u8(q13u16, d30u8, d2u8);
+
+        d28u8 = vext_u8(d6u8, d7u8, 3);
+        d29u8 = vext_u8(d9u8, d10u8, 3);
+        d30u8 = vext_u8(d12u8, d13u8, 3);
+        d15u8 = vext_u8(d7u8, d8u8, 3);
+        d31u8 = vext_u8(d10u8, d11u8, 3);
+        d6u8  = vext_u8(d13u8, d14u8, 3);
+        q4u16 = vmull_u8(d28u8, d3u8);
+        q5u16 = vmull_u8(d29u8, d3u8);
+        q6u16 = vmull_u8(d30u8, d3u8);
+        q4s16 = vreinterpretq_s16_u16(q4u16);
+        q5s16 = vreinterpretq_s16_u16(q5u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q8s16 = vreinterpretq_s16_u16(q8u16);
+        q10s16 = vreinterpretq_s16_u16(q10u16);
+        q12s16 = vreinterpretq_s16_u16(q12u16);
+        q8s16 = vqaddq_s16(q8s16, q4s16);
+        q10s16 = vqaddq_s16(q10s16, q5s16);
+        q12s16 = vqaddq_s16(q12s16, q6s16);
+
+        q6u16 = vmull_u8(d15u8, d3u8);
+        q7u16 = vmull_u8(d31u8, d3u8);
+        q3u16 = vmull_u8(d6u8, d3u8);
+        q3s16 = vreinterpretq_s16_u16(q3u16);
+        q6s16 = vreinterpretq_s16_u16(q6u16);
+        q7s16 = vreinterpretq_s16_u16(q7u16);
+        q9s16 = vreinterpretq_s16_u16(q9u16);
+        q11s16 = vreinterpretq_s16_u16(q11u16);
+        q13s16 = vreinterpretq_s16_u16(q13u16);
+        q9s16 = vqaddq_s16(q9s16, q6s16);
+        q11s16 = vqaddq_s16(q11s16, q7s16);
+        q13s16 = vqaddq_s16(q13s16, q3s16);
+
+        d6u8 = vqrshrun_n_s16(q8s16, 7);
+        d7u8 = vqrshrun_n_s16(q9s16, 7);
+        d8u8 = vqrshrun_n_s16(q10s16, 7);
+        d9u8 = vqrshrun_n_s16(q11s16, 7);
+        d10u8 = vqrshrun_n_s16(q12s16, 7);
+        d11u8 = vqrshrun_n_s16(q13s16, 7);
+
+        vst1_u8(tmpp, d6u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d7u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d8u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d9u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d10u8);
+        tmpp += 8;
+        vst1_u8(tmpp, d11u8);
+        tmpp += 8;
+    }
+
+    // Second pass: 16x16
+    dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
+    d0s8 = vdup_lane_s8(dtmps8, 0);
+    d1s8 = vdup_lane_s8(dtmps8, 1);
+    d2s8 = vdup_lane_s8(dtmps8, 2);
+    d3s8 = vdup_lane_s8(dtmps8, 3);
+    d4s8 = vdup_lane_s8(dtmps8, 4);
+    d5s8 = vdup_lane_s8(dtmps8, 5);
+    d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
+    d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
+    d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
+    d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
+    d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
+    d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
+
+    for (i = 0; i < 2; i++) {
+        dst = dst_ptr + 8 * i;
+        tmpp = tmp + 8 * i;
+        d18u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d19u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d20u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d21u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        d22u8 = vld1_u8(tmpp);
+        tmpp += 16;
+        for (j = 0; j < 4; j++) {
+            d23u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d24u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d25u8 = vld1_u8(tmpp);
+            tmpp += 16;
+            d26u8 = vld1_u8(tmpp);
+            tmpp += 16;
+
+            q3u16 = vmull_u8(d18u8, d0u8);
+            q4u16 = vmull_u8(d19u8, d0u8);
+            q5u16 = vmull_u8(d20u8, d0u8);
+            q6u16 = vmull_u8(d21u8, d0u8);
+
+            q3u16 = vmlsl_u8(q3u16, d19u8, d1u8);
+            q4u16 = vmlsl_u8(q4u16, d20u8, d1u8);
+            q5u16 = vmlsl_u8(q5u16, d21u8, d1u8);
+            q6u16 = vmlsl_u8(q6u16, d22u8, d1u8);
+
+            q3u16 = vmlsl_u8(q3u16, d22u8, d4u8);
+            q4u16 = vmlsl_u8(q4u16, d23u8, d4u8);
+            q5u16 = vmlsl_u8(q5u16, d24u8, d4u8);
+            q6u16 = vmlsl_u8(q6u16, d25u8, d4u8);
+
+            q3u16 = vmlal_u8(q3u16, d20u8, d2u8);
+            q4u16 = vmlal_u8(q4u16, d21u8, d2u8);
+            q5u16 = vmlal_u8(q5u16, d22u8, d2u8);
+            q6u16 = vmlal_u8(q6u16, d23u8, d2u8);
+
+            q3u16 = vmlal_u8(q3u16, d23u8, d5u8);
+            q4u16 = vmlal_u8(q4u16, d24u8, d5u8);
+            q5u16 = vmlal_u8(q5u16, d25u8, d5u8);
+            q6u16 = vmlal_u8(q6u16, d26u8, d5u8);
+
+            q7u16 = vmull_u8(d21u8, d3u8);
+            q8u16 = vmull_u8(d22u8, d3u8);
+            q9u16 = vmull_u8(d23u8, d3u8);
+            q10u16 = vmull_u8(d24u8, d3u8);
+
+            q3s16 = vreinterpretq_s16_u16(q3u16);
+            q4s16 = vreinterpretq_s16_u16(q4u16);
+            q5s16 = vreinterpretq_s16_u16(q5u16);
+            q6s16 = vreinterpretq_s16_u16(q6u16);
+            q7s16 = vreinterpretq_s16_u16(q7u16);
+            q8s16 = vreinterpretq_s16_u16(q8u16);
+            q9s16 = vreinterpretq_s16_u16(q9u16);
+            q10s16 = vreinterpretq_s16_u16(q10u16);
+
+            q7s16 = vqaddq_s16(q7s16, q3s16);
+            q8s16 = vqaddq_s16(q8s16, q4s16);
+            q9s16 = vqaddq_s16(q9s16, q5s16);
+            q10s16 = vqaddq_s16(q10s16, q6s16);
+
+            d6u8 = vqrshrun_n_s16(q7s16, 7);
+            d7u8 = vqrshrun_n_s16(q8s16, 7);
+            d8u8 = vqrshrun_n_s16(q9s16, 7);
+            d9u8 = vqrshrun_n_s16(q10s16, 7);
+
+            d18u8 = d22u8;
+            d19u8 = d23u8;
+            d20u8 = d24u8;
+            d21u8 = d25u8;
+            d22u8 = d26u8;
+
+            vst1_u8(dst, d6u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d7u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d8u8);
+            dst += dst_pitch;
+            vst1_u8(dst, d9u8);
+            dst += dst_pitch;
+        }
+    }
+    return;
+}

+ 550 - 0
thirdparty/libvpx/vp8/common/arm/neon/vp8_loopfilter_neon.c

@@ -0,0 +1,550 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
+
+static INLINE void vp8_loop_filter_neon(
+        uint8x16_t qblimit,  // flimit
+        uint8x16_t qlimit,   // limit
+        uint8x16_t qthresh,  // thresh
+        uint8x16_t q3,       // p3
+        uint8x16_t q4,       // p2
+        uint8x16_t q5,       // p1
+        uint8x16_t q6,       // p0
+        uint8x16_t q7,       // q0
+        uint8x16_t q8,       // q1
+        uint8x16_t q9,       // q2
+        uint8x16_t q10,      // q3
+        uint8x16_t *q5r,     // p1
+        uint8x16_t *q6r,     // p0
+        uint8x16_t *q7r,     // q0
+        uint8x16_t *q8r) {   // q1
+    uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
+    int16x8_t q2s16, q11s16;
+    uint16x8_t q4u16;
+    int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
+    int8x8_t d2s8, d3s8;
+
+    q11u8 = vabdq_u8(q3, q4);
+    q12u8 = vabdq_u8(q4, q5);
+    q13u8 = vabdq_u8(q5, q6);
+    q14u8 = vabdq_u8(q8, q7);
+    q3    = vabdq_u8(q9, q8);
+    q4    = vabdq_u8(q10, q9);
+
+    q11u8 = vmaxq_u8(q11u8, q12u8);
+    q12u8 = vmaxq_u8(q13u8, q14u8);
+    q3    = vmaxq_u8(q3, q4);
+    q15u8 = vmaxq_u8(q11u8, q12u8);
+
+    q9 = vabdq_u8(q6, q7);
+
+    // vp8_hevmask
+    q13u8 = vcgtq_u8(q13u8, qthresh);
+    q14u8 = vcgtq_u8(q14u8, qthresh);
+    q15u8 = vmaxq_u8(q15u8, q3);
+
+    q2u8 = vabdq_u8(q5, q8);
+    q9 = vqaddq_u8(q9, q9);
+
+    q15u8 = vcgeq_u8(qlimit, q15u8);
+
+    // vp8_filter() function
+    // convert to signed
+    q10 = vdupq_n_u8(0x80);
+    q8 = veorq_u8(q8, q10);
+    q7 = veorq_u8(q7, q10);
+    q6 = veorq_u8(q6, q10);
+    q5 = veorq_u8(q5, q10);
+
+    q2u8 = vshrq_n_u8(q2u8, 1);
+    q9 = vqaddq_u8(q9, q2u8);
+
+    q10 = vdupq_n_u8(3);
+
+    q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
+                     vget_low_s8(vreinterpretq_s8_u8(q6)));
+    q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
+                      vget_high_s8(vreinterpretq_s8_u8(q6)));
+
+    q9 = vcgeq_u8(qblimit, q9);
+
+    q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
+                    vreinterpretq_s8_u8(q8));
+
+    q14u8 = vorrq_u8(q13u8, q14u8);
+
+    q4u16 = vmovl_u8(vget_low_u8(q10));
+    q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
+    q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
+
+    q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
+    q15u8 = vandq_u8(q15u8, q9);
+
+    q1s8 = vreinterpretq_s8_u8(q1u8);
+    q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
+    q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
+
+    q9 = vdupq_n_u8(4);
+    // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    d2s8 = vqmovn_s16(q2s16);
+    d3s8 = vqmovn_s16(q11s16);
+    q1s8 = vcombine_s8(d2s8, d3s8);
+    q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
+    q1s8 = vreinterpretq_s8_u8(q1u8);
+
+    q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
+    q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
+    q2s8 = vshrq_n_s8(q2s8, 3);
+    q1s8 = vshrq_n_s8(q1s8, 3);
+
+    q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
+    q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
+
+    q1s8 = vrshrq_n_s8(q1s8, 1);
+    q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
+
+    q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
+    q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
+
+    q0u8 = vdupq_n_u8(0x80);
+    *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
+    *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
+    *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
+    *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
+    return;
+}
+
+void vp8_loop_filter_horizontal_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit  = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+    src -= (pitch << 2);
+
+    q3 = vld1q_u8(src);
+    src += pitch;
+    q4 = vld1q_u8(src);
+    src += pitch;
+    q5 = vld1q_u8(src);
+    src += pitch;
+    q6 = vld1q_u8(src);
+    src += pitch;
+    q7 = vld1q_u8(src);
+    src += pitch;
+    q8 = vld1q_u8(src);
+    src += pitch;
+    q9 = vld1q_u8(src);
+    src += pitch;
+    q10 = vld1q_u8(src);
+
+    vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q5, &q6, &q7, &q8);
+
+    src -= (pitch * 5);
+    vst1q_u8(src, q5);
+    src += pitch;
+    vst1q_u8(src, q6);
+    src += pitch;
+    vst1q_u8(src, q7);
+    src += pitch;
+    vst1q_u8(src, q8);
+    return;
+}
+
+void vp8_loop_filter_horizontal_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit  = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    u -= (pitch << 2);
+    v -= (pitch << 2);
+
+    d6  = vld1_u8(u);
+    u += pitch;
+    d7  = vld1_u8(v);
+    v += pitch;
+    d8  = vld1_u8(u);
+    u += pitch;
+    d9  = vld1_u8(v);
+    v += pitch;
+    d10 = vld1_u8(u);
+    u += pitch;
+    d11 = vld1_u8(v);
+    v += pitch;
+    d12 = vld1_u8(u);
+    u += pitch;
+    d13 = vld1_u8(v);
+    v += pitch;
+    d14 = vld1_u8(u);
+    u += pitch;
+    d15 = vld1_u8(v);
+    v += pitch;
+    d16 = vld1_u8(u);
+    u += pitch;
+    d17 = vld1_u8(v);
+    v += pitch;
+    d18 = vld1_u8(u);
+    u += pitch;
+    d19 = vld1_u8(v);
+    v += pitch;
+    d20 = vld1_u8(u);
+    d21 = vld1_u8(v);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q5, &q6, &q7, &q8);
+
+    u -= (pitch * 5);
+    vst1_u8(u, vget_low_u8(q5));
+    u += pitch;
+    vst1_u8(u, vget_low_u8(q6));
+    u += pitch;
+    vst1_u8(u, vget_low_u8(q7));
+    u += pitch;
+    vst1_u8(u, vget_low_u8(q8));
+
+    v -= (pitch * 5);
+    vst1_u8(v, vget_high_u8(q5));
+    v += pitch;
+    vst1_u8(v, vget_high_u8(q6));
+    v += pitch;
+    vst1_u8(v, vget_high_u8(q7));
+    v += pitch;
+    vst1_u8(v, vget_high_u8(q8));
+    return;
+}
+
+static INLINE void write_4x8(unsigned char *dst, int pitch,
+                             const uint8x8x4_t result) {
+#ifdef VPX_INCOMPATIBLE_GCC
+    /*
+     * uint8x8x4_t result
+    00 01 02 03 | 04 05 06 07
+    10 11 12 13 | 14 15 16 17
+    20 21 22 23 | 24 25 26 27
+    30 31 32 33 | 34 35 36 37
+    ---
+    * after vtrn_u16
+    00 01 20 21 | 04 05 24 25
+    02 03 22 23 | 06 07 26 27
+    10 11 30 31 | 14 15 34 35
+    12 13 32 33 | 16 17 36 37
+    ---
+    * after vtrn_u8
+    00 10 20 30 | 04 14 24 34
+    01 11 21 31 | 05 15 25 35
+    02 12 22 32 | 06 16 26 36
+    03 13 23 33 | 07 17 27 37
+    */
+    const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
+                                          vreinterpret_u16_u8(result.val[2]));
+    const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
+                                          vreinterpret_u16_u8(result.val[3]));
+    const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
+                                       vreinterpret_u8_u16(r13_u16.val[0]));
+    const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
+                                       vreinterpret_u8_u16(r13_u16.val[1]));
+    const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
+    const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
+    const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
+    const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
+    vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
+    dst += pitch;
+    vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
+#else
+    vst4_lane_u8(dst, result, 0);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 1);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 2);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 3);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 4);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 5);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 6);
+    dst += pitch;
+    vst4_lane_u8(dst, result, 7);
+#endif  // VPX_INCOMPATIBLE_GCC
+}
+
+void vp8_loop_filter_vertical_edge_y_neon(
+        unsigned char *src,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh) {
+    unsigned char *s, *d;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+    uint8x8x4_t q4ResultH, q4ResultL;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit  = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    s = src - 4;
+    d6  = vld1_u8(s);
+    s += pitch;
+    d8  = vld1_u8(s);
+    s += pitch;
+    d10 = vld1_u8(s);
+    s += pitch;
+    d12 = vld1_u8(s);
+    s += pitch;
+    d14 = vld1_u8(s);
+    s += pitch;
+    d16 = vld1_u8(s);
+    s += pitch;
+    d18 = vld1_u8(s);
+    s += pitch;
+    d20 = vld1_u8(s);
+    s += pitch;
+    d7  = vld1_u8(s);
+    s += pitch;
+    d9  = vld1_u8(s);
+    s += pitch;
+    d11 = vld1_u8(s);
+    s += pitch;
+    d13 = vld1_u8(s);
+    s += pitch;
+    d15 = vld1_u8(s);
+    s += pitch;
+    d17 = vld1_u8(s);
+    s += pitch;
+    d19 = vld1_u8(s);
+    s += pitch;
+    d21 = vld1_u8(s);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q5, &q6, &q7, &q8);
+
+    q4ResultL.val[0] = vget_low_u8(q5);   // d10
+    q4ResultL.val[1] = vget_low_u8(q6);   // d12
+    q4ResultL.val[2] = vget_low_u8(q7);   // d14
+    q4ResultL.val[3] = vget_low_u8(q8);   // d16
+    q4ResultH.val[0] = vget_high_u8(q5);  // d11
+    q4ResultH.val[1] = vget_high_u8(q6);  // d13
+    q4ResultH.val[2] = vget_high_u8(q7);  // d15
+    q4ResultH.val[3] = vget_high_u8(q8);  // d17
+
+    d = src - 2;
+    write_4x8(d, pitch, q4ResultL);
+    d += pitch * 8;
+    write_4x8(d, pitch, q4ResultH);
+}
+
+void vp8_loop_filter_vertical_edge_uv_neon(
+        unsigned char *u,
+        int pitch,
+        unsigned char blimit,
+        unsigned char limit,
+        unsigned char thresh,
+        unsigned char *v) {
+    unsigned char *us, *ud;
+    unsigned char *vs, *vd;
+    uint8x16_t qblimit, qlimit, qthresh, q3, q4;
+    uint8x16_t q5, q6, q7, q8, q9, q10;
+    uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
+    uint8x8_t d15, d16, d17, d18, d19, d20, d21;
+    uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
+    uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
+    uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
+    uint8x8x4_t q4ResultH, q4ResultL;
+
+    qblimit = vdupq_n_u8(blimit);
+    qlimit  = vdupq_n_u8(limit);
+    qthresh = vdupq_n_u8(thresh);
+
+    us = u - 4;
+    d6 = vld1_u8(us);
+    us += pitch;
+    d8 = vld1_u8(us);
+    us += pitch;
+    d10 = vld1_u8(us);
+    us += pitch;
+    d12 = vld1_u8(us);
+    us += pitch;
+    d14 = vld1_u8(us);
+    us += pitch;
+    d16 = vld1_u8(us);
+    us += pitch;
+    d18 = vld1_u8(us);
+    us += pitch;
+    d20 = vld1_u8(us);
+
+    vs = v - 4;
+    d7 = vld1_u8(vs);
+    vs += pitch;
+    d9 = vld1_u8(vs);
+    vs += pitch;
+    d11 = vld1_u8(vs);
+    vs += pitch;
+    d13 = vld1_u8(vs);
+    vs += pitch;
+    d15 = vld1_u8(vs);
+    vs += pitch;
+    d17 = vld1_u8(vs);
+    vs += pitch;
+    d19 = vld1_u8(vs);
+    vs += pitch;
+    d21 = vld1_u8(vs);
+
+    q3 = vcombine_u8(d6, d7);
+    q4 = vcombine_u8(d8, d9);
+    q5 = vcombine_u8(d10, d11);
+    q6 = vcombine_u8(d12, d13);
+    q7 = vcombine_u8(d14, d15);
+    q8 = vcombine_u8(d16, d17);
+    q9 = vcombine_u8(d18, d19);
+    q10 = vcombine_u8(d20, d21);
+
+    q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
+    q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
+    q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
+    q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
+
+    q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
+                       vreinterpretq_u16_u32(q2tmp2.val[0]));
+    q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
+                       vreinterpretq_u16_u32(q2tmp3.val[0]));
+    q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
+                       vreinterpretq_u16_u32(q2tmp2.val[1]));
+    q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
+                       vreinterpretq_u16_u32(q2tmp3.val[1]));
+
+    q2tmp8  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
+                       vreinterpretq_u8_u16(q2tmp5.val[0]));
+    q2tmp9  = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
+                       vreinterpretq_u8_u16(q2tmp5.val[1]));
+    q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
+                       vreinterpretq_u8_u16(q2tmp7.val[0]));
+    q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
+                       vreinterpretq_u8_u16(q2tmp7.val[1]));
+
+    q3 = q2tmp8.val[0];
+    q4 = q2tmp8.val[1];
+    q5 = q2tmp9.val[0];
+    q6 = q2tmp9.val[1];
+    q7 = q2tmp10.val[0];
+    q8 = q2tmp10.val[1];
+    q9 = q2tmp11.val[0];
+    q10 = q2tmp11.val[1];
+
+    vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
+                         q5, q6, q7, q8, q9, q10,
+                         &q5, &q6, &q7, &q8);
+
+    q4ResultL.val[0] = vget_low_u8(q5);   // d10
+    q4ResultL.val[1] = vget_low_u8(q6);   // d12
+    q4ResultL.val[2] = vget_low_u8(q7);   // d14
+    q4ResultL.val[3] = vget_low_u8(q8);   // d16
+    ud = u - 2;
+    write_4x8(ud, pitch, q4ResultL);
+
+    q4ResultH.val[0] = vget_high_u8(q5);  // d11
+    q4ResultH.val[1] = vget_high_u8(q6);  // d13
+    q4ResultH.val[2] = vget_high_u8(q7);  // d15
+    q4ResultH.val[3] = vget_high_u8(q8);  // d17
+    vd = v - 2;
+    write_4x8(vd, pitch, q4ResultH);
+}

+ 22 - 0
thirdparty/libvpx/vp8/common/blockd.c

@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "blockd.h"
+#include "vpx_mem/vpx_mem.h"
+
+const unsigned char vp8_block2left[25] =
+{
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+const unsigned char vp8_block2above[25] =
+{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};

+ 312 - 0
thirdparty/libvpx/vp8/common/blockd.h

@@ -0,0 +1,312 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_BLOCKD_H_
+#define VP8_COMMON_BLOCKD_H_
+
+void vpx_log(const char *format, ...);
+
+#include "vpx_config.h"
+#include "vpx_scale/yv12config.h"
+#include "mv.h"
+#include "treecoder.h"
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*#define DCPRED 1*/
+#define DCPREDSIMTHRESH 0
+#define DCPREDCNTTHRESH 3
+
+#define MB_FEATURE_TREE_PROBS   3
+#define MAX_MB_SEGMENTS         4
+
+#define MAX_REF_LF_DELTAS       4
+#define MAX_MODE_LF_DELTAS      4
+
+/* Segment Feature Masks */
+#define SEGMENT_DELTADATA   0
+#define SEGMENT_ABSDATA     1
+
+typedef struct
+{
+    int r, c;
+} POS;
+
+#define PLANE_TYPE_Y_NO_DC    0
+#define PLANE_TYPE_Y2         1
+#define PLANE_TYPE_UV         2
+#define PLANE_TYPE_Y_WITH_DC  3
+
+
+typedef char ENTROPY_CONTEXT;
+typedef struct
+{
+    ENTROPY_CONTEXT y1[4];
+    ENTROPY_CONTEXT u[2];
+    ENTROPY_CONTEXT v[2];
+    ENTROPY_CONTEXT y2;
+} ENTROPY_CONTEXT_PLANES;
+
+extern const unsigned char vp8_block2left[25];
+extern const unsigned char vp8_block2above[25];
+
+#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
+    Dest = (A)+(B);
+
+
+typedef enum
+{
+    KEY_FRAME = 0,
+    INTER_FRAME = 1
+} FRAME_TYPE;
+
+typedef enum
+{
+    DC_PRED,            /* average of above and left pixels */
+    V_PRED,             /* vertical prediction */
+    H_PRED,             /* horizontal prediction */
+    TM_PRED,            /* Truemotion prediction */
+    B_PRED,             /* block based prediction, each block has its own prediction mode */
+
+    NEARESTMV,
+    NEARMV,
+    ZEROMV,
+    NEWMV,
+    SPLITMV,
+
+    MB_MODE_COUNT
+} MB_PREDICTION_MODE;
+
+/* Macroblock level features */
+typedef enum
+{
+    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
+    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
+    MB_LVL_MAX = 2                  /* Number of MB level features supported */
+
+} MB_LVL_FEATURES;
+
+/* Segment Feature Masks */
+#define SEGMENT_ALTQ    0x01
+#define SEGMENT_ALT_LF  0x02
+
+#define VP8_YMODES  (B_PRED + 1)
+#define VP8_UV_MODES (TM_PRED + 1)
+
+#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
+
+typedef enum
+{
+    B_DC_PRED,          /* average of above and left pixels */
+    B_TM_PRED,
+
+    B_VE_PRED,           /* vertical prediction */
+    B_HE_PRED,           /* horizontal prediction */
+
+    B_LD_PRED,
+    B_RD_PRED,
+
+    B_VR_PRED,
+    B_VL_PRED,
+    B_HD_PRED,
+    B_HU_PRED,
+
+    LEFT4X4,
+    ABOVE4X4,
+    ZERO4X4,
+    NEW4X4,
+
+    B_MODE_COUNT
+} B_PREDICTION_MODE;
+
+#define VP8_BINTRAMODES (B_HU_PRED + 1)  /* 10 */
+#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
+
+/* For keyframes, intra block modes are predicted by the (already decoded)
+   modes for the Y blocks to the left and above us; for interframes, there
+   is a single probability table. */
+
+union b_mode_info
+{
+    B_PREDICTION_MODE as_mode;
+    int_mv mv;
+};
+
+typedef enum
+{
+    INTRA_FRAME = 0,
+    LAST_FRAME = 1,
+    GOLDEN_FRAME = 2,
+    ALTREF_FRAME = 3,
+    MAX_REF_FRAMES = 4
+} MV_REFERENCE_FRAME;
+
+typedef struct
+{
+    uint8_t mode, uv_mode;
+    uint8_t ref_frame;
+    uint8_t is_4x4;
+    int_mv mv;
+
+    uint8_t partitioning;
+    uint8_t mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
+    uint8_t need_to_clamp_mvs;
+    uint8_t segment_id;                  /* Which set of segmentation parameters should be used for this MB */
+} MB_MODE_INFO;
+
+typedef struct modeinfo
+{
+    MB_MODE_INFO mbmi;
+    union b_mode_info bmi[16];
+} MODE_INFO;
+
+#if CONFIG_MULTI_RES_ENCODING
+/* The mb-level information needed to be stored for higher-resolution encoder */
+typedef struct
+{
+    MB_PREDICTION_MODE mode;
+    MV_REFERENCE_FRAME ref_frame;
+    int_mv mv;
+    int dissim;    /* dissimilarity level of the macroblock */
+} LOWER_RES_MB_INFO;
+
+/* The frame-level information needed to be stored for higher-resolution
+ *  encoder */
+typedef struct
+{
+    FRAME_TYPE frame_type;
+    int is_frame_dropped;
+    // The frame rate for the lowest resolution.
+    double low_res_framerate;
+    /* The frame number of each reference frames */
+    unsigned int low_res_ref_frames[MAX_REF_FRAMES];
+    // The video frame counter value for the key frame, for lowest resolution.
+    unsigned int key_frame_counter_value;
+    LOWER_RES_MB_INFO *mb_info;
+} LOWER_RES_FRAME_INFO;
+#endif
+
+typedef struct blockd
+{
+    short *qcoeff;
+    short *dqcoeff;
+    unsigned char  *predictor;
+    short *dequant;
+
+    int offset;
+    char *eob;
+
+    union b_mode_info bmi;
+} BLOCKD;
+
+typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+
+typedef struct macroblockd
+{
+    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
+    DECLARE_ALIGNED(16, short, qcoeff[400]);
+    DECLARE_ALIGNED(16, short, dqcoeff[400]);
+    DECLARE_ALIGNED(16, char,  eobs[25]);
+
+    DECLARE_ALIGNED(16, short,  dequant_y1[16]);
+    DECLARE_ALIGNED(16, short,  dequant_y1_dc[16]);
+    DECLARE_ALIGNED(16, short,  dequant_y2[16]);
+    DECLARE_ALIGNED(16, short,  dequant_uv[16]);
+
+    /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
+    BLOCKD block[25];
+    int fullpixel_mask;
+
+    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
+    YV12_BUFFER_CONFIG dst;
+
+    MODE_INFO *mode_info_context;
+    int mode_info_stride;
+
+    FRAME_TYPE frame_type;
+
+    int up_available;
+    int left_available;
+
+    unsigned char *recon_above[3];
+    unsigned char *recon_left[3];
+    int recon_left_stride[2];
+
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;
+    ENTROPY_CONTEXT_PLANES *left_context;
+
+    /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
+    unsigned char segmentation_enabled;
+
+    /* 0 (do not update) 1 (update) the macroblock segmentation map. */
+    unsigned char update_mb_segmentation_map;
+
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
+    unsigned char update_mb_segmentation_data;
+
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
+    unsigned char mb_segement_abs_delta;
+
+    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */
+
+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */
+
+    /* mode_based Loop filter adjustment */
+    unsigned char mode_ref_lf_delta_enabled;
+    unsigned char mode_ref_lf_delta_update;
+
+    /* Delta values have the range +/- MAX_LOOP_FILTER */
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
+    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+
+    /* Distance of MB away from frame edges */
+    int mb_to_left_edge;
+    int mb_to_right_edge;
+    int mb_to_top_edge;
+    int mb_to_bottom_edge;
+
+
+
+    vp8_subpix_fn_t  subpixel_predict;
+    vp8_subpix_fn_t  subpixel_predict8x4;
+    vp8_subpix_fn_t  subpixel_predict8x8;
+    vp8_subpix_fn_t  subpixel_predict16x16;
+
+    void *current_bc;
+
+    int corrupted;
+
+#if ARCH_X86 || ARCH_X86_64
+    /* This is an intermediate buffer currently used in sub-pixel motion search
+     * to keep a copy of the reference area. This buffer can be used for other
+     * purpose.
+     */
+    DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
+#endif
+} MACROBLOCKD;
+
+
+extern void vp8_build_block_doffsets(MACROBLOCKD *x);
+extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_BLOCKD_H_

+ 197 - 0
thirdparty/libvpx/vp8/common/coefupdateprobs.h

@@ -0,0 +1,197 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
+#define VP8_COMMON_COEFUPDATEPROBS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Update probabilities for the nodes in the token entropy tree.
+   Generated file included by entropy.c */
+
+const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
+{
+    {
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
+            {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+    },
+    {
+        {
+            {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
+            {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
+        },
+        {
+            {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+    },
+    {
+        {
+            {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
+            {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
+        },
+        {
+            {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+    },
+    {
+        {
+            {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
+            {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+        {
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+            {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
+        },
+    },
+};
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_COEFUPDATEPROBS_H_

+ 48 - 0
thirdparty/libvpx/vp8/common/common.h

@@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_COMMON_H_
+#define VP8_COMMON_COMMON_H_
+
+#include <assert.h>
+
+/* Interface header for common constant data structures and lookup tables */
+
+#include "vpx_mem/vpx_mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Only need this for fixed-size arrays, for structs just assign. */
+
+#define vp8_copy( Dest, Src) { \
+        assert( sizeof( Dest) == sizeof( Src)); \
+        memcpy( Dest, Src, sizeof( Src)); \
+    }
+
+/* Use this for variably-sized arrays. */
+
+#define vp8_copy_array( Dest, Src, N) { \
+        assert( sizeof( *Dest) == sizeof( *Src)); \
+        memcpy( Dest, Src, N * sizeof( *Src)); \
+    }
+
+#define vp8_zero( Dest)  memset( &Dest, 0, sizeof( Dest));
+
+#define vp8_zero_array( Dest, N)  memset( Dest, 0, N * sizeof( *Dest));
+
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_COMMON_H_

+ 32 - 0
thirdparty/libvpx/vp8/common/copy_c.c

@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+
+#include "./vp8_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+/* Copy 2 macroblocks to a buffer */
+void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
+                    unsigned char *dst_ptr, int dst_stride,
+                    int height)
+{
+    int r;
+
+    for (r = 0; r < height; r++)
+    {
+        memcpy(dst_ptr, src_ptr, 32);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+
+    }
+}

+ 155 - 0
thirdparty/libvpx/vp8/common/debugmodes.c

@@ -0,0 +1,155 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdio.h>
+#include "blockd.h"
+
+
+void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
+{
+
+    int mb_row;
+    int mb_col;
+    int mb_index = 0;
+    FILE *mvs = fopen("mvs.stt", "a");
+
+    /* print out the macroblock Y modes */
+    mb_index = 0;
+    fprintf(mvs, "Mb Modes for Frame %d\n", frame);
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+
+            fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
+
+            mb_index++;
+        }
+
+        fprintf(mvs, "\n");
+        mb_index++;
+    }
+
+    fprintf(mvs, "\n");
+
+    mb_index = 0;
+    fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+
+            fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
+
+            mb_index++;
+        }
+
+        fprintf(mvs, "\n");
+        mb_index++;
+    }
+
+    fprintf(mvs, "\n");
+
+    /* print out the macroblock UV modes */
+    mb_index = 0;
+    fprintf(mvs, "UV Modes for Frame %d\n", frame);
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+
+            fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
+
+            mb_index++;
+        }
+
+        mb_index++;
+        fprintf(mvs, "\n");
+    }
+
+    fprintf(mvs, "\n");
+
+    /* print out the block modes */
+    fprintf(mvs, "Mbs for Frame %d\n", frame);
+    {
+        int b_row;
+
+        for (b_row = 0; b_row < 4 * rows; b_row++)
+        {
+            int b_col;
+            int bindex;
+
+            for (b_col = 0; b_col < 4 * cols; b_col++)
+            {
+                mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
+                bindex = (b_row & 3) * 4 + (b_col & 3);
+
+                if (mi[mb_index].mbmi.mode == B_PRED)
+                    fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
+                else
+                    fprintf(mvs, "xx ");
+
+            }
+
+            fprintf(mvs, "\n");
+        }
+    }
+    fprintf(mvs, "\n");
+
+    /* print out the macroblock mvs */
+    mb_index = 0;
+    fprintf(mvs, "MVs for Frame %d\n", frame);
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+            fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
+
+            mb_index++;
+        }
+
+        mb_index++;
+        fprintf(mvs, "\n");
+    }
+
+    fprintf(mvs, "\n");
+
+
+    /* print out the block modes */
+    fprintf(mvs, "MVs for Frame %d\n", frame);
+    {
+        int b_row;
+
+        for (b_row = 0; b_row < 4 * rows; b_row++)
+        {
+            int b_col;
+            int bindex;
+
+            for (b_col = 0; b_col < 4 * cols; b_col++)
+            {
+                mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
+                bindex = (b_row & 3) * 4 + (b_col & 3);
+                fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
+
+            }
+
+            fprintf(mvs, "\n");
+        }
+    }
+    fprintf(mvs, "\n");
+
+
+    fclose(mvs);
+}

+ 200 - 0
thirdparty/libvpx/vp8/common/default_coef_probs.h

@@ -0,0 +1,200 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+*/
+
+#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
+#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*Generated file, included by entropy.c*/
+
+
+static const vp8_prob default_coef_probs [BLOCK_TYPES]
+                                         [COEF_BANDS]
+                                         [PREV_COEF_CONTEXTS]
+                                         [ENTROPY_NODES] =
+{
+    { /* Block Type ( 0 ) */
+        { /* Coeff Band ( 0 )*/
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+            { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+            { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+            { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+            {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+            { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+            {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+            { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+            {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+            { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+            { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+            { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+            {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 1 ) */
+        { /* Coeff Band ( 0 )*/
+            { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+            { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+            {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+            { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+            {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+            {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+            {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+            { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+            {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+            {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+            {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+            { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+            {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+            { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+            {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 2 ) */
+        { /* Coeff Band ( 0 )*/
+            { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+            { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+            {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+            { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+            { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+            { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+            {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+            { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+            {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    },
+    { /* Block Type ( 3 ) */
+        { /* Coeff Band ( 0 )*/
+            { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+            { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+            {  61,  46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+        },
+        { /* Coeff Band ( 1 )*/
+            {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+            { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+            {  39,  77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 2 )*/
+            {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+            { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+            {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 3 )*/
+            {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+            { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+            {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+        },
+        { /* Coeff Band ( 4 )*/
+            {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+            { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+            {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 5 )*/
+            {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+            { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+            {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 6 )*/
+            {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+            { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+            {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+        },
+        { /* Coeff Band ( 7 )*/
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+        }
+    }
+};
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_DEFAULT_COEF_PROBS_H_

+ 43 - 0
thirdparty/libvpx/vp8/common/dequantize.c

@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
+{
+    int i;
+    short *DQ  = d->dqcoeff;
+    short *Q   = d->qcoeff;
+
+    for (i = 0; i < 16; i++)
+    {
+        DQ[i] = Q[i] * DQC[i];
+    }
+}
+
+void vp8_dequant_idct_add_c(short *input, short *dq,
+                            unsigned char *dest, int stride)
+{
+    int i;
+
+    for (i = 0; i < 16; i++)
+    {
+        input[i] = dq[i] * input[i];
+    }
+
+    vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
+
+    memset(input, 0, 32);
+
+}

+ 188 - 0
thirdparty/libvpx/vp8/common/entropy.c

@@ -0,0 +1,188 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "entropy.h"
+#include "blockd.h"
+#include "onyxc_int.h"
+#include "vpx_mem/vpx_mem.h"
+
+#include "coefupdateprobs.h"
+
+DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
+{
+    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
+{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
+
+DECLARE_ALIGNED(16, const unsigned char,
+                vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
+{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
+
+DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
+{
+    0,  1,  4,  8,
+    5,  2,  3,  6,
+    9, 12, 13, 10,
+    7, 11, 14, 15,
+};
+
+DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
+{
+    1,  2,  6,  7,
+    3,  5,  8, 13,
+    4,  9, 12, 14,
+   10, 11, 15, 16
+};
+
+/* vp8_default_zig_zag_mask generated with:
+
+    void vp8_init_scan_order_mask()
+    {
+        int i;
+
+        for (i = 0; i < 16; i++)
+        {
+            vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
+        }
+
+    }
+*/
+DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
+{
+     1,    2,    32,     64,
+     4,   16,   128,   4096,
+     8,  256,  2048,   8192,
+   512, 1024, 16384, -32768
+};
+
+const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
+
+/* Array indices are identical to previously-existing CONTEXT_NODE indices */
+
+const vp8_tree_index vp8_coef_tree[ 22] =     /* corresponding _CONTEXT_NODEs */
+{
+    -DCT_EOB_TOKEN, 2,                             /* 0 = EOB */
+    -ZERO_TOKEN, 4,                               /* 1 = ZERO */
+    -ONE_TOKEN, 6,                               /* 2 = ONE */
+    8, 12,                                      /* 3 = LOW_VAL */
+    -TWO_TOKEN, 10,                            /* 4 = TWO */
+    -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
+    14, 16,                                    /* 6 = HIGH_LOW */
+    -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
+    18, 20,                                   /* 8 = CAT_THREEFOUR */
+    -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,  /* 9 = CAT_THREE */
+    -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6   /* 10 = CAT_FIVE */
+};
+
+/* vp8_coef_encodings generated with:
+    vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
+*/
+vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
+{
+    {2, 2},
+    {6, 3},
+    {28, 5},
+    {58, 6},
+    {59, 6},
+    {60, 6},
+    {61, 6},
+    {124, 7},
+    {125, 7},
+    {126, 7},
+    {127, 7},
+    {0, 1}
+};
+
+/* Trees for extra bits.  Probabilities are constant and
+   do not depend on previously encoded bits */
+
+static const vp8_prob Pcat1[] = { 159};
+static const vp8_prob Pcat2[] = { 165, 145};
+static const vp8_prob Pcat3[] = { 173, 148, 140};
+static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
+static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
+static const vp8_prob Pcat6[] =
+{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
+
+
+/* tree index tables generated with:
+
+    void init_bit_tree(vp8_tree_index *p, int n)
+    {
+        int i = 0;
+
+        while (++i < n)
+        {
+            p[0] = p[1] = i << 1;
+            p += 2;
+        }
+
+        p[0] = p[1] = 0;
+    }
+
+    void init_bit_trees()
+    {
+        init_bit_tree(cat1, 1);
+        init_bit_tree(cat2, 2);
+        init_bit_tree(cat3, 3);
+        init_bit_tree(cat4, 4);
+        init_bit_tree(cat5, 5);
+        init_bit_tree(cat6, 11);
+    }
+*/
+
+static const vp8_tree_index cat1[2] = { 0, 0 };
+static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
+static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
+static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
+static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
+static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
+                                        14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
+
+const vp8_extra_bit_struct vp8_extra_bits[12] =
+{
+    { 0, 0, 0, 0},
+    { 0, 0, 0, 1},
+    { 0, 0, 0, 2},
+    { 0, 0, 0, 3},
+    { 0, 0, 0, 4},
+    { cat1, Pcat1, 1, 5},
+    { cat2, Pcat2, 2, 7},
+    { cat3, Pcat3, 3, 11},
+    { cat4, Pcat4, 4, 19},
+    { cat5, Pcat5, 5, 35},
+    { cat6, Pcat6, 11, 67},
+    { 0, 0, 0, 0}
+};
+
+#include "default_coef_probs.h"
+
+void vp8_default_coef_probs(VP8_COMMON *pc)
+{
+    memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
+}
+

+ 109 - 0
thirdparty/libvpx/vp8/common/entropy.h

@@ -0,0 +1,109 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ENTROPY_H_
+#define VP8_COMMON_ENTROPY_H_
+
+#include "treecoder.h"
+#include "blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Coefficient token alphabet */
+
+#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
+#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
+#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
+#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
+#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3       7       /* 11-18     Extra Bits 3+1 */
+#define DCT_VAL_CATEGORY4       8       /* 19-34     Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY5       9       /* 35-66     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6       10      /* 67+       Extra Bits 11+1 */
+#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
+
+#define MAX_ENTROPY_TOKENS 12
+#define ENTROPY_NODES 11
+
+extern const vp8_tree_index vp8_coef_tree[];
+
+extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
+
+typedef struct
+{
+    vp8_tree_p tree;
+    const vp8_prob *prob;
+    int Len;
+    int base_val;
+} vp8_extra_bit_struct;
+
+extern const vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
+
+#define PROB_UPDATE_BASELINE_COST   7
+
+#define MAX_PROB                255
+#define DCT_MAX_VALUE           2048
+
+
+/* Coefficients are predicted via a 3-dimensional probability table. */
+
+/* Outside dimension.  0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
+
+#define BLOCK_TYPES 4
+
+/* Middle dimension is a coarsening of the coefficient's
+   position within the 4x4 DCT. */
+
+#define COEF_BANDS 8
+extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
+
+/* Inside dimension is 3-valued measure of nearby complexity, that is,
+   the extent to which nearby coefficients are nonzero.  For the first
+   coefficient (DC, unless block type is 0), we look at the (already encoded)
+   blocks above and to the left of the current block.  The context index is
+   then the number (0,1,or 2) of these blocks having nonzero coefficients.
+   After decoding a coefficient, the measure is roughly the size of the
+   most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
+   Note that the intuitive meaning of this measure changes as coefficients
+   are decoded, e.g., prior to the first token, a zero means that my neighbors
+   are empty while, after the first token, because of the use of end-of-block,
+   a zero means we just decoded a zero and hence guarantees that a non-zero
+   coefficient will appear later in this block.  However, this shift
+   in meaning is perfectly OK because our context depends also on the
+   coefficient band (and since zigzag positions 0, 1, and 2 are in
+   distinct bands). */
+
+/*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
+#   define PREV_COEF_CONTEXTS       3
+
+extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
+
+extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+
+
+struct VP8Common;
+void vp8_default_coef_probs(struct VP8Common *);
+
+extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
+extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
+extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
+extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
+
+void vp8_coef_tree_initialize(void);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ENTROPY_H_

+ 171 - 0
thirdparty/libvpx/vp8/common/entropymode.c

@@ -0,0 +1,171 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#define USE_PREBUILT_TABLES
+
+#include "entropymode.h"
+#include "entropy.h"
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp8_entropymodedata.h"
+
+int vp8_mv_cont(const int_mv *l, const int_mv *a)
+{
+    int lez = (l->as_int == 0);
+    int aez = (a->as_int == 0);
+    int lea = (l->as_int == a->as_int);
+
+    if (lea && lez)
+        return SUBMVREF_LEFT_ABOVE_ZED;
+
+    if (lea)
+        return SUBMVREF_LEFT_ABOVE_SAME;
+
+    if (aez)
+        return SUBMVREF_ABOVE_ZED;
+
+    if (lez)
+        return SUBMVREF_LEFT_ZED;
+
+    return SUBMVREF_NORMAL;
+}
+
+static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
+
+const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
+{
+    { 147, 136, 18 },
+    { 106, 145, 1  },
+    { 179, 121, 1  },
+    { 223, 1  , 34 },
+    { 208, 1  , 1  }
+};
+
+
+
+const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
+{
+    {
+        0,  0,  0,  0,
+        0,  0,  0,  0,
+        1,  1,  1,  1,
+        1,  1,  1,  1,
+    },
+    {
+        0,  0,  1,  1,
+        0,  0,  1,  1,
+        0,  0,  1,  1,
+        0,  0,  1,  1,
+    },
+    {
+        0,  0,  1,  1,
+        0,  0,  1,  1,
+        2,  2,  3,  3,
+        2,  2,  3,  3,
+    },
+    {
+        0,  1,  2,  3,
+        4,  5,  6,  7,
+        8,  9,  10, 11,
+        12, 13, 14, 15,
+    }
+};
+
+const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
+
+const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
+
+
+/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
+
+const vp8_tree_index vp8_bmode_tree[18] =     /* INTRAMODECONTEXTNODE value */
+{
+    -B_DC_PRED, 2,                             /* 0 = DC_NODE */
+    -B_TM_PRED, 4,                            /* 1 = TM_NODE */
+    -B_VE_PRED, 6,                           /* 2 = VE_NODE */
+    8, 12,                                  /* 3 = COM_NODE */
+    -B_HE_PRED, 10,                        /* 4 = HE_NODE */
+    -B_RD_PRED, -B_VR_PRED,               /* 5 = RD_NODE */
+    -B_LD_PRED, 14,                        /* 6 = LD_NODE */
+    -B_VL_PRED, 16,                      /* 7 = VL_NODE */
+    -B_HD_PRED, -B_HU_PRED             /* 8 = HD_NODE */
+};
+
+/* Again, these trees use the same probability indices as their
+   explicitly-programmed predecessors. */
+
+const vp8_tree_index vp8_ymode_tree[8] =
+{
+    -DC_PRED, 2,
+    4, 6,
+    -V_PRED, -H_PRED,
+    -TM_PRED, -B_PRED
+};
+
+const vp8_tree_index vp8_kf_ymode_tree[8] =
+{
+    -B_PRED, 2,
+    4, 6,
+    -DC_PRED, -V_PRED,
+    -H_PRED, -TM_PRED
+};
+
+const vp8_tree_index vp8_uv_mode_tree[6] =
+{
+    -DC_PRED, 2,
+    -V_PRED, 4,
+    -H_PRED, -TM_PRED
+};
+
+const vp8_tree_index vp8_mbsplit_tree[6] =
+{
+    -3, 2,
+    -2, 4,
+    -0, -1
+};
+
+const vp8_tree_index vp8_mv_ref_tree[8] =
+{
+    -ZEROMV, 2,
+    -NEARESTMV, 4,
+    -NEARMV, 6,
+    -NEWMV, -SPLITMV
+};
+
+const vp8_tree_index vp8_sub_mv_ref_tree[6] =
+{
+    -LEFT4X4, 2,
+    -ABOVE4X4, 4,
+    -ZERO4X4, -NEW4X4
+};
+
+const vp8_tree_index vp8_small_mvtree [14] =
+{
+    2, 8,
+    4, 6,
+    -0, -1,
+    -2, -3,
+    10, 12,
+    -4, -5,
+    -6, -7
+};
+
+void vp8_init_mbmode_probs(VP8_COMMON *x)
+{
+    memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
+    memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
+    memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
+}
+
+void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
+{
+    memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
+}
+

+ 88 - 0
thirdparty/libvpx/vp8/common/entropymode.h

@@ -0,0 +1,88 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ENTROPYMODE_H_
+#define VP8_COMMON_ENTROPYMODE_H_
+
+#include "onyxc_int.h"
+#include "treecoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum
+{
+    SUBMVREF_NORMAL,
+    SUBMVREF_LEFT_ZED,
+    SUBMVREF_ABOVE_ZED,
+    SUBMVREF_LEFT_ABOVE_SAME,
+    SUBMVREF_LEFT_ABOVE_ZED
+} sumvfref_t;
+
+typedef int vp8_mbsplit[16];
+
+#define VP8_NUMMBSPLITS 4
+
+extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
+
+extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS];    /* # of subsets */
+
+extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
+
+extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
+#define SUBMVREF_COUNT 5
+extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
+
+
+extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
+
+
+extern const vp8_tree_index vp8_bmode_tree[];
+
+extern const vp8_tree_index  vp8_ymode_tree[];
+extern const vp8_tree_index  vp8_kf_ymode_tree[];
+extern const vp8_tree_index  vp8_uv_mode_tree[];
+
+extern const vp8_tree_index  vp8_mbsplit_tree[];
+extern const vp8_tree_index  vp8_mv_ref_tree[];
+extern const vp8_tree_index  vp8_sub_mv_ref_tree[];
+
+extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
+extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
+extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES];
+extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES];
+extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
+
+/* Inter mode values do not start at zero */
+
+extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
+extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
+
+extern const vp8_tree_index vp8_small_mvtree[];
+
+extern const struct vp8_token_struct vp8_small_mvencodings[8];
+
+/* Key frame default mode probs */
+extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
+[VP8_BINTRAMODES-1];
+extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
+extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
+
+void vp8_init_mbmode_probs(VP8_COMMON *x);
+void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
+void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ENTROPYMODE_H_

+ 49 - 0
thirdparty/libvpx/vp8/common/entropymv.c

@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "entropymv.h"
+
+const MV_CONTEXT vp8_mv_update_probs[2] =
+{
+    {{
+        237,
+        246,
+        253, 253, 254, 254, 254, 254, 254,
+        254, 254, 254, 254, 254, 250, 250, 252, 254, 254
+    }},
+    {{
+        231,
+        243,
+        245, 253, 254, 254, 254, 254, 254,
+        254, 254, 254, 254, 254, 251, 251, 254, 254, 254
+    }}
+};
+const MV_CONTEXT vp8_default_mv_context[2] =
+{
+    {{
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        225, 146, 172, 147, 214,  39, 156,          /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
+    }},
+
+
+
+    {{
+        /* same for column */
+        164,                                        /* is short */
+        128,
+        204, 170, 119, 235, 140, 230, 228,
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */
+
+    }}
+};

+ 52 - 0
thirdparty/libvpx/vp8/common/entropymv.h

@@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ENTROPYMV_H_
+#define VP8_COMMON_ENTROPYMV_H_
+
+#include "treecoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum
+{
+    mv_max  = 1023,              /* max absolute value of a MV component */
+    MVvals = (2 * mv_max) + 1,   /* # possible values "" */
+    mvfp_max  = 255,              /* max absolute value of a full pixel MV component */
+    MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
+
+    mvlong_width = 10,       /* Large MVs have 9 bit magnitudes */
+    mvnum_short = 8,         /* magnitudes 0 through 7 */
+
+    /* probability offsets for coding each MV component */
+
+    mvpis_short = 0,         /* short (<= 7) vs long (>= 8) */
+    MVPsign,                /* sign for non-zero */
+    MVPshort,               /* 8 short values = 7-position tree */
+
+    MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
+    MVPcount = MVPbits + mvlong_width    /* (with independent probabilities) */
+};
+
+typedef struct mv_context
+{
+    vp8_prob prob[MVPcount];  /* often come in row, col pairs */
+} MV_CONTEXT;
+
+extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ENTROPYMV_H_

+ 188 - 0
thirdparty/libvpx/vp8/common/extend.c

@@ -0,0 +1,188 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "extend.h"
+#include "vpx_mem/vpx_mem.h"
+
+
+static void copy_and_extend_plane
+(
+    unsigned char *s, /* source */
+    int sp,           /* source pitch */
+    unsigned char *d, /* destination */
+    int dp,           /* destination pitch */
+    int h,            /* height */
+    int w,            /* width */
+    int et,           /* extend top border */
+    int el,           /* extend left border */
+    int eb,           /* extend bottom border */
+    int er            /* extend right border */
+)
+{
+    int i;
+    unsigned char *src_ptr1, *src_ptr2;
+    unsigned char *dest_ptr1, *dest_ptr2;
+    int linesize;
+
+    /* copy the left and right most columns out */
+    src_ptr1 = s;
+    src_ptr2 = s + w - 1;
+    dest_ptr1 = d - el;
+    dest_ptr2 = d + w;
+
+    for (i = 0; i < h; i++)
+    {
+        memset(dest_ptr1, src_ptr1[0], el);
+        memcpy(dest_ptr1 + el, src_ptr1, w);
+        memset(dest_ptr2, src_ptr2[0], er);
+        src_ptr1  += sp;
+        src_ptr2  += sp;
+        dest_ptr1 += dp;
+        dest_ptr2 += dp;
+    }
+
+    /* Now copy the top and bottom lines into each line of the respective
+     * borders
+     */
+    src_ptr1 = d - el;
+    src_ptr2 = d + dp * (h - 1) - el;
+    dest_ptr1 = d + dp * (-et) - el;
+    dest_ptr2 = d + dp * (h) - el;
+    linesize = el + er + w;
+
+    for (i = 0; i < et; i++)
+    {
+        memcpy(dest_ptr1, src_ptr1, linesize);
+        dest_ptr1 += dp;
+    }
+
+    for (i = 0; i < eb; i++)
+    {
+        memcpy(dest_ptr2, src_ptr2, linesize);
+        dest_ptr2 += dp;
+    }
+}
+
+
+void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+                               YV12_BUFFER_CONFIG *dst)
+{
+    int et = dst->border;
+    int el = dst->border;
+    int eb = dst->border + dst->y_height - src->y_height;
+    int er = dst->border + dst->y_width - src->y_width;
+
+    copy_and_extend_plane(src->y_buffer, src->y_stride,
+                          dst->y_buffer, dst->y_stride,
+                          src->y_height, src->y_width,
+                          et, el, eb, er);
+
+    et = dst->border >> 1;
+    el = dst->border >> 1;
+    eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
+    er = (dst->border >> 1) + dst->uv_width - src->uv_width;
+
+    copy_and_extend_plane(src->u_buffer, src->uv_stride,
+                          dst->u_buffer, dst->uv_stride,
+                          src->uv_height, src->uv_width,
+                          et, el, eb, er);
+
+    copy_and_extend_plane(src->v_buffer, src->uv_stride,
+                          dst->v_buffer, dst->uv_stride,
+                          src->uv_height, src->uv_width,
+                          et, el, eb, er);
+}
+
+
+void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+                                         YV12_BUFFER_CONFIG *dst,
+                                         int srcy, int srcx,
+                                         int srch, int srcw)
+{
+    int et = dst->border;
+    int el = dst->border;
+    int eb = dst->border + dst->y_height - src->y_height;
+    int er = dst->border + dst->y_width - src->y_width;
+    int src_y_offset = srcy * src->y_stride + srcx;
+    int dst_y_offset = srcy * dst->y_stride + srcx;
+    int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
+    int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
+
+    /* If the side is not touching the bounder then don't extend. */
+    if (srcy)
+      et = 0;
+    if (srcx)
+      el = 0;
+    if (srcy + srch != src->y_height)
+      eb = 0;
+    if (srcx + srcw != src->y_width)
+      er = 0;
+
+    copy_and_extend_plane(src->y_buffer + src_y_offset,
+                          src->y_stride,
+                          dst->y_buffer + dst_y_offset,
+                          dst->y_stride,
+                          srch, srcw,
+                          et, el, eb, er);
+
+    et = (et + 1) >> 1;
+    el = (el + 1) >> 1;
+    eb = (eb + 1) >> 1;
+    er = (er + 1) >> 1;
+    srch = (srch + 1) >> 1;
+    srcw = (srcw + 1) >> 1;
+
+    copy_and_extend_plane(src->u_buffer + src_uv_offset,
+                          src->uv_stride,
+                          dst->u_buffer + dst_uv_offset,
+                          dst->uv_stride,
+                          srch, srcw,
+                          et, el, eb, er);
+
+    copy_and_extend_plane(src->v_buffer + src_uv_offset,
+                          src->uv_stride,
+                          dst->v_buffer + dst_uv_offset,
+                          dst->uv_stride,
+                          srch, srcw,
+                          et, el, eb, er);
+}
+
+
+/* note the extension is only for the last row, for intra prediction purpose */
+void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
+                       unsigned char *YPtr,
+                       unsigned char *UPtr,
+                       unsigned char *VPtr)
+{
+    int i;
+
+    YPtr += ybf->y_stride * 14;
+    UPtr += ybf->uv_stride * 6;
+    VPtr += ybf->uv_stride * 6;
+
+    for (i = 0; i < 4; i++)
+    {
+        YPtr[i] = YPtr[-1];
+        UPtr[i] = UPtr[-1];
+        VPtr[i] = VPtr[-1];
+    }
+
+    YPtr += ybf->y_stride;
+    UPtr += ybf->uv_stride;
+    VPtr += ybf->uv_stride;
+
+    for (i = 0; i < 4; i++)
+    {
+        YPtr[i] = YPtr[-1];
+        UPtr[i] = UPtr[-1];
+        VPtr[i] = VPtr[-1];
+    }
+}

+ 33 - 0
thirdparty/libvpx/vp8/common/extend.h

@@ -0,0 +1,33 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_EXTEND_H_
+#define VP8_COMMON_EXTEND_H_
+
+#include "vpx_scale/yv12config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
+void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
+                               YV12_BUFFER_CONFIG *dst);
+void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
+                                         YV12_BUFFER_CONFIG *dst,
+                                         int srcy, int srcx,
+                                         int srch, int srcw);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_EXTEND_H_

+ 493 - 0
thirdparty/libvpx/vp8/common/filter.c

@@ -0,0 +1,493 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "filter.h"
+#include "./vp8_rtcd.h"
+
+DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
+{
+    { 128,   0 },
+    { 112,  16 },
+    {  96,  32 },
+    {  80,  48 },
+    {  64,  64 },
+    {  48,  80 },
+    {  32,  96 },
+    {  16, 112 }
+};
+
+DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
+{
+
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
+    { 0, -6,  123,   12,  -1,  0 },
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
+    { 0, -9,   93,   50,  -6,  0 },
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
+    { 0, -6,   50,   93,  -9,  0 },
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
+    { 0, -1,   12,  123,  -6,  0 },
+};
+
+static void filter_block2d_first_pass
+(
+    unsigned char *src_ptr,
+    int *output_ptr,
+    unsigned int src_pixels_per_line,
+    unsigned int pixel_step,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short *vp8_filter
+)
+{
+    unsigned int i, j;
+    int  Temp;
+
+    for (i = 0; i < output_height; i++)
+    {
+        for (j = 0; j < output_width; j++)
+        {
+            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[0]                 * vp8_filter[2]) +
+                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
+                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
+                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
+
+            /* Normalize back to 0-255 */
+            Temp = Temp >> VP8_FILTER_SHIFT;
+
+            if (Temp < 0)
+                Temp = 0;
+            else if (Temp > 255)
+                Temp = 255;
+
+            output_ptr[j] = Temp;
+            src_ptr++;
+        }
+
+        /* Next row... */
+        src_ptr    += src_pixels_per_line - output_width;
+        output_ptr += output_width;
+    }
+}
+
+static void filter_block2d_second_pass
+(
+    int *src_ptr,
+    unsigned char *output_ptr,
+    int output_pitch,
+    unsigned int src_pixels_per_line,
+    unsigned int pixel_step,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short *vp8_filter
+)
+{
+    unsigned int i, j;
+    int  Temp;
+
+    for (i = 0; i < output_height; i++)
+    {
+        for (j = 0; j < output_width; j++)
+        {
+            /* Apply filter */
+            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
+                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
+                   ((int)src_ptr[0]                 * vp8_filter[2]) +
+                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
+                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
+                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
+
+            /* Normalize back to 0-255 */
+            Temp = Temp >> VP8_FILTER_SHIFT;
+
+            if (Temp < 0)
+                Temp = 0;
+            else if (Temp > 255)
+                Temp = 255;
+
+            output_ptr[j] = (unsigned char)Temp;
+            src_ptr++;
+        }
+
+        /* Start next row */
+        src_ptr    += src_pixels_per_line - output_width;
+        output_ptr += output_pitch;
+    }
+}
+
+
+static void filter_block2d
+(
+    unsigned char  *src_ptr,
+    unsigned char  *output_ptr,
+    unsigned int src_pixels_per_line,
+    int output_pitch,
+    const short  *HFilter,
+    const short  *VFilter
+)
+{
+    int FData[9*4]; /* Temp data buffer used in filtering */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
+}
+
+
+void vp8_sixtap_predict4x4_c
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
+}
+void vp8_sixtap_predict8x8_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    int FData[13*16];   /* Temp data buffer used in filtering */
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
+
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
+
+}
+
+void vp8_sixtap_predict8x4_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    int FData[13*16];   /* Temp data buffer used in filtering */
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
+
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
+
+}
+
+void vp8_sixtap_predict16x16_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short  *HFilter;
+    const short  *VFilter;
+    int FData[21*24];   /* Temp data buffer used in filtering */
+
+
+    HFilter = vp8_sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = vp8_sub_pel_filters[yoffset];   /* 6 tap */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
+
+    /* then filter verticaly... */
+    filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
+
+}
+
+
+/****************************************************************************
+ *
+ *  ROUTINE       : filter_block2d_bil_first_pass
+ *
+ *  INPUTS        : UINT8  *src_ptr    : Pointer to source block.
+ *                  UINT32  src_stride : Stride of source block.
+ *                  UINT32  height     : Block height.
+ *                  UINT32  width      : Block width.
+ *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
+ *
+ *  OUTPUTS       : INT32  *dst_ptr    : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
+ *                  in the horizontal direction to produce the filtered output
+ *                  block. Used to implement first-pass of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
+ *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
+ *
+ ****************************************************************************/
+static void filter_block2d_bil_first_pass
+(
+    unsigned char  *src_ptr,
+    unsigned short *dst_ptr,
+    unsigned int    src_stride,
+    unsigned int    height,
+    unsigned int    width,
+    const short    *vp8_filter
+)
+{
+    unsigned int i, j;
+
+    for (i = 0; i < height; i++)
+    {
+        for (j = 0; j < width; j++)
+        {
+            /* Apply bilinear filter */
+            dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
+                          ((int)src_ptr[1] * vp8_filter[1]) +
+                          (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
+            src_ptr++;
+        }
+
+        /* Next row... */
+        src_ptr += src_stride - width;
+        dst_ptr += width;
+    }
+}
+
+/****************************************************************************
+ *
+ *  ROUTINE       : filter_block2d_bil_second_pass
+ *
+ *  INPUTS        : INT32  *src_ptr    : Pointer to source block.
+ *                  UINT32  dst_pitch  : Destination block pitch.
+ *                  UINT32  height     : Block height.
+ *                  UINT32  width      : Block width.
+ *                  INT32  *vp8_filter : Array of 2 bi-linear filter taps.
+ *
+ *  OUTPUTS       : UINT16 *dst_ptr    : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block
+ *                  in the vertical direction to produce the filtered output
+ *                  block. Used to implement second-pass of 2-D separable filter.
+ *
+ *  SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
+ *                  Two filter taps should sum to VP8_FILTER_WEIGHT.
+ *
+ ****************************************************************************/
+static void filter_block2d_bil_second_pass
+(
+    unsigned short *src_ptr,
+    unsigned char  *dst_ptr,
+    int             dst_pitch,
+    unsigned int    height,
+    unsigned int    width,
+    const short    *vp8_filter
+)
+{
+    unsigned int  i, j;
+    int  Temp;
+
+    for (i = 0; i < height; i++)
+    {
+        for (j = 0; j < width; j++)
+        {
+            /* Apply filter */
+            Temp = ((int)src_ptr[0]     * vp8_filter[0]) +
+                   ((int)src_ptr[width] * vp8_filter[1]) +
+                   (VP8_FILTER_WEIGHT / 2);
+            dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
+            src_ptr++;
+        }
+
+        /* Next row... */
+        dst_ptr += dst_pitch;
+    }
+}
+
+
+/****************************************************************************
+ *
+ *  ROUTINE       : filter_block2d_bil
+ *
+ *  INPUTS        : UINT8  *src_ptr          : Pointer to source block.
+ *                  UINT32  src_pitch        : Stride of source block.
+ *                  UINT32  dst_pitch        : Stride of destination block.
+ *                  INT32  *HFilter          : Array of 2 horizontal filter taps.
+ *                  INT32  *VFilter          : Array of 2 vertical filter taps.
+ *                  INT32  Width             : Block width
+ *                  INT32  Height            : Block height
+ *
+ *  OUTPUTS       : UINT16 *dst_ptr       : Pointer to filtered block.
+ *
+ *  RETURNS       : void
+ *
+ *  FUNCTION      : 2-D filters an input block by applying a 2-tap
+ *                  bi-linear filter horizontally followed by a 2-tap
+ *                  bi-linear filter vertically on the result.
+ *
+ *  SPECIAL NOTES : The largest block size can be handled here is 16x16
+ *
+ ****************************************************************************/
+static void filter_block2d_bil
+(
+    unsigned char *src_ptr,
+    unsigned char *dst_ptr,
+    unsigned int   src_pitch,
+    unsigned int   dst_pitch,
+    const short   *HFilter,
+    const short   *VFilter,
+    int            Width,
+    int            Height
+)
+{
+
+    unsigned short FData[17*16];    /* Temp data buffer used in filtering */
+
+    /* First filter 1-D horizontally... */
+    filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
+
+    /* then 1-D vertically... */
+    filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
+}
+
+
+void vp8_bilinear_predict4x4_c
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+#if 0
+    {
+        int i;
+        unsigned char temp1[16];
+        unsigned char temp2[16];
+
+        bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
+        filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
+
+        for (i = 0; i < 16; i++)
+        {
+            if (temp1[i] != temp2[i])
+            {
+                bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
+                filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
+            }
+        }
+    }
+#endif
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
+
+}
+
+void vp8_bilinear_predict8x8_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
+
+}
+
+void vp8_bilinear_predict8x4_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
+
+}
+
+void vp8_bilinear_predict16x16_c
+(
+    unsigned char  *src_ptr,
+    int  src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int  dst_pitch
+)
+{
+    const short *HFilter;
+    const short *VFilter;
+
+    HFilter = vp8_bilinear_filters[xoffset];
+    VFilter = vp8_bilinear_filters[yoffset];
+
+    filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
+}

+ 32 - 0
thirdparty/libvpx/vp8/common/filter.h

@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_FILTER_H_
+#define VP8_COMMON_FILTER_H_
+
+#include "vpx_ports/mem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BLOCK_HEIGHT_WIDTH 4
+#define VP8_FILTER_WEIGHT 128
+#define VP8_FILTER_SHIFT  7
+
+extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
+extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_FILTER_H_

+ 193 - 0
thirdparty/libvpx/vp8/common/findnearmv.c

@@ -0,0 +1,193 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "findnearmv.h"
+
+const unsigned char vp8_mbsplit_offset[4][16] = {
+    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
+/* Predict motion vectors using those from already-decoded nearby blocks.
+   Note that we only consider one 4x4 subblock from each candidate 16x16
+   macroblock.   */
+void vp8_find_near_mvs
+(
+    MACROBLOCKD *xd,
+    const MODE_INFO *here,
+    int_mv *nearest,
+    int_mv *nearby,
+    int_mv *best_mv,
+    int cnt[4],
+    int refframe,
+    int *ref_frame_sign_bias
+)
+{
+    const MODE_INFO *above = here - xd->mode_info_stride;
+    const MODE_INFO *left = here - 1;
+    const MODE_INFO *aboveleft = above - 1;
+    int_mv            near_mvs[4];
+    int_mv           *mv = near_mvs;
+    int             *cntx = cnt;
+    enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
+
+    /* Zero accumulators */
+    mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
+    cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
+
+    /* Process above */
+    if (above->mbmi.ref_frame != INTRA_FRAME)
+    {
+        if (above->mbmi.mv.as_int)
+        {
+            (++mv)->as_int = above->mbmi.mv.as_int;
+            mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
+            ++cntx;
+        }
+
+        *cntx += 2;
+    }
+
+    /* Process left */
+    if (left->mbmi.ref_frame != INTRA_FRAME)
+    {
+        if (left->mbmi.mv.as_int)
+        {
+            int_mv this_mv;
+
+            this_mv.as_int = left->mbmi.mv.as_int;
+            mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+
+            if (this_mv.as_int != mv->as_int)
+            {
+                (++mv)->as_int = this_mv.as_int;
+                ++cntx;
+            }
+
+            *cntx += 2;
+        }
+        else
+            cnt[CNT_INTRA] += 2;
+    }
+
+    /* Process above left */
+    if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
+    {
+        if (aboveleft->mbmi.mv.as_int)
+        {
+            int_mv this_mv;
+
+            this_mv.as_int = aboveleft->mbmi.mv.as_int;
+            mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
+
+            if (this_mv.as_int != mv->as_int)
+            {
+                (++mv)->as_int = this_mv.as_int;
+                ++cntx;
+            }
+
+            *cntx += 1;
+        }
+        else
+            cnt[CNT_INTRA] += 1;
+    }
+
+    /* If we have three distinct MV's ... */
+    if (cnt[CNT_SPLITMV])
+    {
+        /* See if above-left MV can be merged with NEAREST */
+        if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
+            cnt[CNT_NEAREST] += 1;
+    }
+
+    cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
+                        + (left->mbmi.mode == SPLITMV)) * 2
+                       + (aboveleft->mbmi.mode == SPLITMV);
+
+    /* Swap near and nearest if necessary */
+    if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
+    {
+        int tmp;
+        tmp = cnt[CNT_NEAREST];
+        cnt[CNT_NEAREST] = cnt[CNT_NEAR];
+        cnt[CNT_NEAR] = tmp;
+        tmp = near_mvs[CNT_NEAREST].as_int;
+        near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
+        near_mvs[CNT_NEAR].as_int = tmp;
+    }
+
+    /* Use near_mvs[0] to store the "best" MV */
+    if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
+        near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
+
+    /* Set up return values */
+    best_mv->as_int = near_mvs[0].as_int;
+    nearest->as_int = near_mvs[CNT_NEAREST].as_int;
+    nearby->as_int = near_mvs[CNT_NEAR].as_int;
+}
+
+
+static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
+{
+    inv->as_mv.row = src->as_mv.row * -1;
+    inv->as_mv.col = src->as_mv.col * -1;
+    vp8_clamp_mv2(inv, xd);
+    vp8_clamp_mv2(src, xd);
+}
+
+
+int vp8_find_near_mvs_bias
+(
+    MACROBLOCKD *xd,
+    const MODE_INFO *here,
+    int_mv mode_mv_sb[2][MB_MODE_COUNT],
+    int_mv best_mv_sb[2],
+    int cnt[4],
+    int refframe,
+    int *ref_frame_sign_bias
+)
+{
+    int sign_bias = ref_frame_sign_bias[refframe];
+
+    vp8_find_near_mvs(xd,
+                      here,
+                      &mode_mv_sb[sign_bias][NEARESTMV],
+                      &mode_mv_sb[sign_bias][NEARMV],
+                      &best_mv_sb[sign_bias],
+                      cnt,
+                      refframe,
+                      ref_frame_sign_bias);
+
+    invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
+                         &mode_mv_sb[sign_bias][NEARESTMV], xd);
+    invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
+                         &mode_mv_sb[sign_bias][NEARMV], xd);
+    invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
+                         &best_mv_sb[sign_bias], xd);
+
+    return sign_bias;
+}
+
+
+vp8_prob *vp8_mv_ref_probs(
+    vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
+)
+{
+    p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
+    p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
+    p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
+    p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
+    return p;
+}
+

+ 195 - 0
thirdparty/libvpx/vp8/common/findnearmv.h

@@ -0,0 +1,195 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_FINDNEARMV_H_
+#define VP8_COMMON_FINDNEARMV_H_
+
+#include "./vpx_config.h"
+#include "mv.h"
+#include "blockd.h"
+#include "modecont.h"
+#include "treecoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
+                           int_mv *mvp, const int *ref_frame_sign_bias)
+{
+    if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
+    {
+        mvp->as_mv.row *= -1;
+        mvp->as_mv.col *= -1;
+    }
+}
+
+#define LEFT_TOP_MARGIN (16 << 3)
+#define RIGHT_BOTTOM_MARGIN (16 << 3)
+static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
+{
+    if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+        mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+    else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
+        mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
+
+    if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+        mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+    else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
+        mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+}
+
+static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
+                                int mb_to_right_edge, int mb_to_top_edge,
+                                int mb_to_bottom_edge)
+{
+    mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
+        mb_to_left_edge : mv->as_mv.col;
+    mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
+        mb_to_right_edge : mv->as_mv.col;
+    mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
+        mb_to_top_edge : mv->as_mv.row;
+    mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
+        mb_to_bottom_edge : mv->as_mv.row;
+}
+static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
+                                               int mb_to_right_edge,
+                                               int mb_to_top_edge,
+                                               int mb_to_bottom_edge)
+{
+    unsigned int need_to_clamp;
+    need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
+    need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
+    need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
+    need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
+    return need_to_clamp;
+}
+
+void vp8_find_near_mvs
+(
+    MACROBLOCKD *xd,
+    const MODE_INFO *here,
+    int_mv *nearest, int_mv *nearby, int_mv *best,
+    int near_mv_ref_cts[4],
+    int refframe,
+    int *ref_frame_sign_bias
+);
+
+
+int vp8_find_near_mvs_bias
+(
+    MACROBLOCKD *xd,
+    const MODE_INFO *here,
+    int_mv mode_mv_sb[2][MB_MODE_COUNT],
+    int_mv best_mv_sb[2],
+    int cnt[4],
+    int refframe,
+    int *ref_frame_sign_bias
+);
+
+
+vp8_prob *vp8_mv_ref_probs(
+    vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
+);
+
+extern const unsigned char vp8_mbsplit_offset[4][16];
+
+
+static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
+{
+    if (!(b & 3))
+    {
+        /* On L edge, get from MB to left of us */
+        --cur_mb;
+
+        if(cur_mb->mbmi.mode != SPLITMV)
+            return cur_mb->mbmi.mv.as_int;
+        b += 4;
+    }
+
+    return (cur_mb->bmi + b - 1)->mv.as_int;
+}
+
+static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
+                                      int mi_stride)
+{
+    if (!(b >> 2))
+    {
+        /* On top edge, get from MB above us */
+        cur_mb -= mi_stride;
+
+        if(cur_mb->mbmi.mode != SPLITMV)
+            return cur_mb->mbmi.mv.as_int;
+        b += 16;
+    }
+
+    return (cur_mb->bmi + (b - 4))->mv.as_int;
+}
+static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
+{
+    if (!(b & 3))
+    {
+        /* On L edge, get from MB to left of us */
+        --cur_mb;
+        switch (cur_mb->mbmi.mode)
+        {
+            case B_PRED:
+              return (cur_mb->bmi + b + 3)->as_mode;
+            case DC_PRED:
+                return B_DC_PRED;
+            case V_PRED:
+                return B_VE_PRED;
+            case H_PRED:
+                return B_HE_PRED;
+            case TM_PRED:
+                return B_TM_PRED;
+            default:
+                return B_DC_PRED;
+        }
+    }
+
+    return (cur_mb->bmi + b - 1)->as_mode;
+}
+
+static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
+                                                 int mi_stride)
+{
+    if (!(b >> 2))
+    {
+        /* On top edge, get from MB above us */
+        cur_mb -= mi_stride;
+
+        switch (cur_mb->mbmi.mode)
+        {
+            case B_PRED:
+              return (cur_mb->bmi + b + 12)->as_mode;
+            case DC_PRED:
+                return B_DC_PRED;
+            case V_PRED:
+                return B_VE_PRED;
+            case H_PRED:
+                return B_HE_PRED;
+            case TM_PRED:
+                return B_TM_PRED;
+            default:
+                return B_DC_PRED;
+        }
+    }
+
+    return (cur_mb->bmi + b - 4)->as_mode;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_FINDNEARMV_H_

+ 106 - 0
thirdparty/libvpx/vp8/common/generic/systemdependent.c

@@ -0,0 +1,106 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#elif ARCH_X86 || ARCH_X86_64
+#include "vpx_ports/x86.h"
+#endif
+#include "vp8/common/onyxc_int.h"
+#include "vp8/common/systemdependent.h"
+
+#if CONFIG_MULTITHREAD
+#if HAVE_UNISTD_H && !defined(__OS2__)
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
+#elif defined(__OS2__)
+#define INCL_DOS
+#define INCL_DOSSPINLOCK
+#include <os2.h>
+#endif
+#endif
+
+#if CONFIG_MULTITHREAD
+static int get_cpu_count()
+{
+    int core_count = 16;
+
+#if HAVE_UNISTD_H && !defined(__OS2__)
+#if defined(_SC_NPROCESSORS_ONLN)
+    core_count = sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined(_SC_NPROC_ONLN)
+    core_count = sysconf(_SC_NPROC_ONLN);
+#endif
+#elif defined(_WIN32)
+    {
+#if _WIN32_WINNT >= 0x0501
+        SYSTEM_INFO sysinfo;
+        GetNativeSystemInfo(&sysinfo);
+#else
+        PGNSI pGNSI;
+        SYSTEM_INFO sysinfo;
+
+        /* Call GetNativeSystemInfo if supported or
+         * GetSystemInfo otherwise. */
+
+        pGNSI = (PGNSI) GetProcAddress(
+                GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
+        if (pGNSI != NULL)
+            pGNSI(&sysinfo);
+        else
+            GetSystemInfo(&sysinfo);
+#endif
+
+        core_count = sysinfo.dwNumberOfProcessors;
+    }
+#elif defined(__OS2__)
+    {
+        ULONG proc_id;
+        ULONG status;
+
+        core_count = 0;
+        for (proc_id = 1; ; proc_id++)
+        {
+            if (DosGetProcessorStatus(proc_id, &status))
+                break;
+
+            if (status == PROC_ONLINE)
+                core_count++;
+        }
+    }
+#else
+    /* other platforms */
+#endif
+
+    return core_count > 0 ? core_count : 1;
+}
+#endif
+
+void vp8_clear_system_state_c() {};
+
+void vp8_machine_specific_config(VP8_COMMON *ctx)
+{
+#if CONFIG_MULTITHREAD
+    ctx->processor_core_count = get_cpu_count();
+#else
+    (void)ctx;
+#endif /* CONFIG_MULTITHREAD */
+
+#if ARCH_ARM
+    ctx->cpu_caps = arm_cpu_caps();
+#elif ARCH_X86 || ARCH_X86_64
+    ctx->cpu_caps = x86_simd_caps();
+#endif
+}

+ 51 - 0
thirdparty/libvpx/vp8/common/header.h

@@ -0,0 +1,51 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_HEADER_H_
+#define VP8_COMMON_HEADER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* 24 bits total */
+typedef struct
+{
+    unsigned int type: 1;
+    unsigned int version: 3;
+    unsigned int show_frame: 1;
+
+    /* Allow 2^20 bytes = 8 megabits for first partition */
+
+    unsigned int first_partition_length_in_bytes: 19;
+
+#ifdef PACKET_TESTING
+    unsigned int frame_number;
+    unsigned int update_gold: 1;
+    unsigned int uses_gold: 1;
+    unsigned int update_last: 1;
+    unsigned int uses_last: 1;
+#endif
+
+} VP8_HEADER;
+
+#ifdef PACKET_TESTING
+#define VP8_HEADER_SIZE 8
+#else
+#define VP8_HEADER_SIZE 3
+#endif
+
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_HEADER_H_

+ 90 - 0
thirdparty/libvpx/vp8/common/idct_blk.c

@@ -0,0 +1,90 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx_mem/vpx_mem.h"
+
+void vp8_dequant_idct_add_c(short *input, short *dq,
+                            unsigned char *dest, int stride);
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
+                            int pred_stride, unsigned char *dst_ptr,
+                            int dst_stride);
+
+void vp8_dequant_idct_add_y_block_c
+            (short *q, short *dq,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, dst, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
+                memset(q, 0, 2 * sizeof(q[0]));
+            }
+
+            q   += 16;
+            dst += 4;
+        }
+
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_c
+            (short *q, short *dq,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, dstu, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
+                memset(q, 0, 2 * sizeof(q[0]));
+            }
+
+            q    += 16;
+            dstu += 4;
+        }
+
+        dstu += 4*stride - 8;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, dstv, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
+                memset(q, 0, 2 * sizeof(q[0]));
+            }
+
+            q    += 16;
+            dstv += 4;
+        }
+
+        dstv += 4*stride - 8;
+    }
+}

+ 205 - 0
thirdparty/libvpx/vp8/common/idctllm.c

@@ -0,0 +1,205 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp8_rtcd.h"
+
+/****************************************************************************
+ * Notes:
+ *
+ * This implementation makes use of 16 bit fixed point verio of two multiply
+ * constants:
+ *         1.   sqrt(2) * cos (pi/8)
+ *         2.   sqrt(2) * sin (pi/8)
+ * Becuase the first constant is bigger than 1, to maintain the same 16 bit
+ * fixed point precision as the second one, we use a trick of
+ *         x * a = x + x*(a-1)
+ * so
+ *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
+ **************************************************************************/
+static const int cospi8sqrt2minus1 = 20091;
+static const int sinpi8sqrt2      = 35468;
+
+void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
+                            int pred_stride, unsigned char *dst_ptr,
+                            int dst_stride)
+{
+    int i;
+    int r, c;
+    int a1, b1, c1, d1;
+    short output[16];
+    short *ip = input;
+    short *op = output;
+    int temp1, temp2;
+    int shortpitch = 4;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[8];
+        b1 = ip[0] - ip[8];
+
+        temp1 = (ip[4] * sinpi8sqrt2) >> 16;
+        temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+        c1 = temp1 - temp2;
+
+        temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
+        temp2 = (ip[12] * sinpi8sqrt2) >> 16;
+        d1 = temp1 + temp2;
+
+        op[shortpitch*0] = a1 + d1;
+        op[shortpitch*3] = a1 - d1;
+
+        op[shortpitch*1] = b1 + c1;
+        op[shortpitch*2] = b1 - c1;
+
+        ip++;
+        op++;
+    }
+
+    ip = output;
+    op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[2];
+        b1 = ip[0] - ip[2];
+
+        temp1 = (ip[1] * sinpi8sqrt2) >> 16;
+        temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
+        c1 = temp1 - temp2;
+
+        temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
+        temp2 = (ip[3] * sinpi8sqrt2) >> 16;
+        d1 = temp1 + temp2;
+
+
+        op[0] = (a1 + d1 + 4) >> 3;
+        op[3] = (a1 - d1 + 4) >> 3;
+
+        op[1] = (b1 + c1 + 4) >> 3;
+        op[2] = (b1 - c1 + 4) >> 3;
+
+        ip += shortpitch;
+        op += shortpitch;
+    }
+
+    ip = output;
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = ip[c] + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+        ip += 4;
+        dst_ptr += dst_stride;
+        pred_ptr += pred_stride;
+    }
+}
+
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+                            int pred_stride, unsigned char *dst_ptr,
+                            int dst_stride)
+{
+    int a1 = ((input_dc + 4) >> 3);
+    int r, c;
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = a1 + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+
+        dst_ptr += dst_stride;
+        pred_ptr += pred_stride;
+    }
+
+}
+
+void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
+{
+    short output[16];
+    int i;
+    int a1, b1, c1, d1;
+    int a2, b2, c2, d2;
+    short *ip = input;
+    short *op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[12];
+        b1 = ip[4] + ip[8];
+        c1 = ip[4] - ip[8];
+        d1 = ip[0] - ip[12];
+
+        op[0] = a1 + b1;
+        op[4] = c1 + d1;
+        op[8] = a1 - b1;
+        op[12] = d1 - c1;
+        ip++;
+        op++;
+    }
+
+    ip = output;
+    op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[3];
+        b1 = ip[1] + ip[2];
+        c1 = ip[1] - ip[2];
+        d1 = ip[0] - ip[3];
+
+        a2 = a1 + b1;
+        b2 = c1 + d1;
+        c2 = a1 - b1;
+        d2 = d1 - c1;
+
+        op[0] = (a2 + 3) >> 3;
+        op[1] = (b2 + 3) >> 3;
+        op[2] = (c2 + 3) >> 3;
+        op[3] = (d2 + 3) >> 3;
+
+        ip += 4;
+        op += 4;
+    }
+
+    for(i = 0; i < 16; i++)
+    {
+        mb_dqcoeff[i * 16] = output[i];
+    }
+}
+
+void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
+{
+    int i;
+    int a1;
+
+    a1 = ((input[0] + 3) >> 3);
+    for(i = 0; i < 16; i++)
+    {
+        mb_dqcoeff[i * 16] = a1;
+    }
+}

+ 70 - 0
thirdparty/libvpx/vp8/common/invtrans.h

@@ -0,0 +1,70 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_INVTRANS_H_
+#define VP8_COMMON_INVTRANS_H_
+
+#include "./vpx_config.h"
+#include "vp8_rtcd.h"
+#include "blockd.h"
+#include "onyxc_int.h"
+
+#if CONFIG_MULTITHREAD
+#include "vpx_mem/vpx_mem.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static void eob_adjust(char *eobs, short *diff)
+{
+    /* eob adjust.... the idct can only skip if both the dc and eob are zero */
+    int js;
+    for(js = 0; js < 16; js++)
+    {
+        if((eobs[js] == 0) && (diff[0] != 0))
+            eobs[js]++;
+        diff+=16;
+    }
+}
+
+static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
+{
+    short *DQC = xd->dequant_y1;
+
+    if (xd->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        /* do 2nd order transform on the dc block */
+        if (xd->eobs[24] > 1)
+        {
+            vp8_short_inv_walsh4x4
+                (&xd->block[24].dqcoeff[0], xd->qcoeff);
+        }
+        else
+        {
+            vp8_short_inv_walsh4x4_1
+                (&xd->block[24].dqcoeff[0], xd->qcoeff);
+        }
+        eob_adjust(xd->eobs, xd->qcoeff);
+
+        DQC = xd->dequant_y1_dc;
+    }
+    vp8_dequant_idct_add_y_block
+                    (xd->qcoeff, DQC,
+                     xd->dst.y_buffer,
+                     xd->dst.y_stride, xd->eobs);
+}
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_INVTRANS_H_

+ 113 - 0
thirdparty/libvpx/vp8/common/loopfilter.h

@@ -0,0 +1,113 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_LOOPFILTER_H_
+#define VP8_COMMON_LOOPFILTER_H_
+
+#include "vpx_ports/mem.h"
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_LOOP_FILTER             63
+/* fraction of total macroblock rows to be used in fast filter level picking */
+/* has to be > 2 */
+#define PARTIAL_FRAME_FRACTION      8
+
+typedef enum
+{
+    NORMAL_LOOPFILTER = 0,
+    SIMPLE_LOOPFILTER = 1
+} LOOPFILTERTYPE;
+
+#if ARCH_ARM
+#define SIMD_WIDTH 1
+#else
+#define SIMD_WIDTH 16
+#endif
+
+/* Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
+typedef struct
+{
+    DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+    DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+    DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
+    DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
+    unsigned char lvl[4][4][4];
+    unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
+    unsigned char mode_lf_lut[10];
+} loop_filter_info_n;
+
+typedef struct loop_filter_info
+{
+    const unsigned char * mblim;
+    const unsigned char * blim;
+    const unsigned char * lim;
+    const unsigned char * hev_thr;
+} loop_filter_info;
+
+
+typedef void loop_filter_uvfunction
+(
+    unsigned char *u,   /* source pointer */
+    int p,              /* pitch */
+    const unsigned char *blimit,
+    const unsigned char *limit,
+    const unsigned char *thresh,
+    unsigned char *v
+);
+
+/* assorted loopfilter functions which get used elsewhere */
+struct VP8Common;
+struct macroblockd;
+struct modeinfo;
+
+void vp8_loop_filter_init(struct VP8Common *cm);
+
+void vp8_loop_filter_frame_init(struct VP8Common *cm,
+                                struct macroblockd *mbd,
+                                int default_filt_lvl);
+
+void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
+                           int frame_type);
+
+void vp8_loop_filter_partial_frame(struct VP8Common *cm,
+                                   struct macroblockd *mbd,
+                                   int default_filt_lvl);
+
+void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
+                                 struct macroblockd *mbd,
+                                 int default_filt_lvl);
+
+void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
+                                      int sharpness_lvl);
+
+void vp8_loop_filter_row_normal(struct VP8Common *cm,
+                                struct modeinfo *mode_info_context,
+                                int mb_row, int post_ystride, int post_uvstride,
+                                unsigned char *y_ptr, unsigned char *u_ptr,
+                                unsigned char *v_ptr);
+
+void vp8_loop_filter_row_simple(struct VP8Common *cm,
+                                struct modeinfo *mode_info_context,
+                                int mb_row, int post_ystride, int post_uvstride,
+                                unsigned char *y_ptr, unsigned char *u_ptr,
+                                unsigned char *v_ptr);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_LOOPFILTER_H_

+ 430 - 0
thirdparty/libvpx/vp8/common/loopfilter_filters.c

@@ -0,0 +1,430 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdlib.h>
+#include "loopfilter.h"
+#include "onyxc_int.h"
+
+typedef unsigned char uc;
+
+static signed char vp8_signed_char_clamp(int t)
+{
+    t = (t < -128 ? -128 : t);
+    t = (t > 127 ? 127 : t);
+    return (signed char) t;
+}
+
+
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static signed char vp8_filter_mask(uc limit, uc blimit,
+                            uc p3, uc p2, uc p1, uc p0,
+                            uc q0, uc q1, uc q2, uc q3)
+{
+    signed char mask = 0;
+    mask |= (abs(p3 - p2) > limit);
+    mask |= (abs(p2 - p1) > limit);
+    mask |= (abs(p1 - p0) > limit);
+    mask |= (abs(q1 - q0) > limit);
+    mask |= (abs(q2 - q1) > limit);
+    mask |= (abs(q3 - q2) > limit);
+    mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > blimit);
+    return mask - 1;
+}
+
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
+static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
+{
+    signed char hev = 0;
+    hev  |= (abs(p1 - p0) > thresh) * -1;
+    hev  |= (abs(q1 - q0) > thresh) * -1;
+    return hev;
+}
+
+static void vp8_filter(signed char mask, uc hev, uc *op1,
+        uc *op0, uc *oq0, uc *oq1)
+
+{
+    signed char ps0, qs0;
+    signed char ps1, qs1;
+    signed char filter_value, Filter1, Filter2;
+    signed char u;
+
+    ps1 = (signed char) * op1 ^ 0x80;
+    ps0 = (signed char) * op0 ^ 0x80;
+    qs0 = (signed char) * oq0 ^ 0x80;
+    qs1 = (signed char) * oq1 ^ 0x80;
+
+    /* add outer taps if we have high edge variance */
+    filter_value = vp8_signed_char_clamp(ps1 - qs1);
+    filter_value &= hev;
+
+    /* inner taps */
+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
+    filter_value &= mask;
+
+    /* save bottom 3 bits so that we round one side +4 and the other +3
+     * if it equals 4 we'll set to adjust by -1 to account for the fact
+     * we'd round 3 the other way
+     */
+    Filter1 = vp8_signed_char_clamp(filter_value + 4);
+    Filter2 = vp8_signed_char_clamp(filter_value + 3);
+    Filter1 >>= 3;
+    Filter2 >>= 3;
+    u = vp8_signed_char_clamp(qs0 - Filter1);
+    *oq0 = u ^ 0x80;
+    u = vp8_signed_char_clamp(ps0 + Filter2);
+    *op0 = u ^ 0x80;
+    filter_value = Filter1;
+
+    /* outer tap adjustments */
+    filter_value += 1;
+    filter_value >>= 1;
+    filter_value &= ~hev;
+
+    u = vp8_signed_char_clamp(qs1 - filter_value);
+    *oq1 = u ^ 0x80;
+    u = vp8_signed_char_clamp(ps1 + filter_value);
+    *op1 = u ^ 0x80;
+
+}
+void vp8_loop_filter_horizontal_edge_c
+(
+    unsigned char *s,
+    int p, /* pitch */
+    const unsigned char *blimit,
+    const unsigned char *limit,
+    const unsigned char *thresh,
+    int count
+)
+{
+    int  hev = 0; /* high edge variance */
+    signed char mask = 0;
+    int i = 0;
+
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
+    do
+    {
+        mask = vp8_filter_mask(limit[0], blimit[0],
+                               s[-4*p], s[-3*p], s[-2*p], s[-1*p],
+                               s[0*p], s[1*p], s[2*p], s[3*p]);
+
+        hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+
+        vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
+
+        ++s;
+    }
+    while (++i < count * 8);
+}
+
+void vp8_loop_filter_vertical_edge_c
+(
+    unsigned char *s,
+    int p,
+    const unsigned char *blimit,
+    const unsigned char *limit,
+    const unsigned char *thresh,
+    int count
+)
+{
+    int  hev = 0; /* high edge variance */
+    signed char mask = 0;
+    int i = 0;
+
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
+    do
+    {
+        mask = vp8_filter_mask(limit[0], blimit[0],
+                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
+
+        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
+
+        vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
+
+        s += p;
+    }
+    while (++i < count * 8);
+}
+
+static void vp8_mbfilter(signed char mask, uc hev,
+                           uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
+{
+    signed char s, u;
+    signed char filter_value, Filter1, Filter2;
+    signed char ps2 = (signed char) * op2 ^ 0x80;
+    signed char ps1 = (signed char) * op1 ^ 0x80;
+    signed char ps0 = (signed char) * op0 ^ 0x80;
+    signed char qs0 = (signed char) * oq0 ^ 0x80;
+    signed char qs1 = (signed char) * oq1 ^ 0x80;
+    signed char qs2 = (signed char) * oq2 ^ 0x80;
+
+    /* add outer taps if we have high edge variance */
+    filter_value = vp8_signed_char_clamp(ps1 - qs1);
+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
+    filter_value &= mask;
+
+    Filter2 = filter_value;
+    Filter2 &= hev;
+
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
+    Filter1 = vp8_signed_char_clamp(Filter2 + 4);
+    Filter2 = vp8_signed_char_clamp(Filter2 + 3);
+    Filter1 >>= 3;
+    Filter2 >>= 3;
+    qs0 = vp8_signed_char_clamp(qs0 - Filter1);
+    ps0 = vp8_signed_char_clamp(ps0 + Filter2);
+
+
+    /* only apply wider filter if not high edge variance */
+    filter_value &= ~hev;
+    Filter2 = filter_value;
+
+    /* roughly 3/7th difference across boundary */
+    u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
+    s = vp8_signed_char_clamp(qs0 - u);
+    *oq0 = s ^ 0x80;
+    s = vp8_signed_char_clamp(ps0 + u);
+    *op0 = s ^ 0x80;
+
+    /* roughly 2/7th difference across boundary */
+    u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
+    s = vp8_signed_char_clamp(qs1 - u);
+    *oq1 = s ^ 0x80;
+    s = vp8_signed_char_clamp(ps1 + u);
+    *op1 = s ^ 0x80;
+
+    /* roughly 1/7th difference across boundary */
+    u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
+    s = vp8_signed_char_clamp(qs2 - u);
+    *oq2 = s ^ 0x80;
+    s = vp8_signed_char_clamp(ps2 + u);
+    *op2 = s ^ 0x80;
+}
+
+void vp8_mbloop_filter_horizontal_edge_c
+(
+    unsigned char *s,
+    int p,
+    const unsigned char *blimit,
+    const unsigned char *limit,
+    const unsigned char *thresh,
+    int count
+)
+{
+    signed char hev = 0; /* high edge variance */
+    signed char mask = 0;
+    int i = 0;
+
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
+    do
+    {
+
+        mask = vp8_filter_mask(limit[0], blimit[0],
+                               s[-4*p], s[-3*p], s[-2*p], s[-1*p],
+                               s[0*p], s[1*p], s[2*p], s[3*p]);
+
+        hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+
+        vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
+
+        ++s;
+    }
+    while (++i < count * 8);
+
+}
+
+
+void vp8_mbloop_filter_vertical_edge_c
+(
+    unsigned char *s,
+    int p,
+    const unsigned char *blimit,
+    const unsigned char *limit,
+    const unsigned char *thresh,
+    int count
+)
+{
+    signed char hev = 0; /* high edge variance */
+    signed char mask = 0;
+    int i = 0;
+
+    do
+    {
+
+        mask = vp8_filter_mask(limit[0], blimit[0],
+                               s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
+
+        hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
+
+        vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
+
+        s += p;
+    }
+    while (++i < count * 8);
+
+}
+
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
+static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
+{
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ *  (void) limit;
+ */
+    signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= blimit) * -1;
+    return mask;
+}
+
+static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+{
+    signed char filter_value, Filter1, Filter2;
+    signed char p1 = (signed char) * op1 ^ 0x80;
+    signed char p0 = (signed char) * op0 ^ 0x80;
+    signed char q0 = (signed char) * oq0 ^ 0x80;
+    signed char q1 = (signed char) * oq1 ^ 0x80;
+    signed char u;
+
+    filter_value = vp8_signed_char_clamp(p1 - q1);
+    filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
+    filter_value &= mask;
+
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
+    Filter1 = vp8_signed_char_clamp(filter_value + 4);
+    Filter1 >>= 3;
+    u = vp8_signed_char_clamp(q0 - Filter1);
+    *oq0  = u ^ 0x80;
+
+    Filter2 = vp8_signed_char_clamp(filter_value + 3);
+    Filter2 >>= 3;
+    u = vp8_signed_char_clamp(p0 + Filter2);
+    *op0 = u ^ 0x80;
+}
+
+void vp8_loop_filter_simple_horizontal_edge_c
+(
+    unsigned char *s,
+    int p,
+    const unsigned char *blimit
+)
+{
+    signed char mask = 0;
+    int i = 0;
+
+    do
+    {
+        mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
+        vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
+        ++s;
+    }
+    while (++i < 16);
+}
+
+void vp8_loop_filter_simple_vertical_edge_c
+(
+    unsigned char *s,
+    int p,
+    const unsigned char *blimit
+)
+{
+    signed char mask = 0;
+    int i = 0;
+
+    do
+    {
+        mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
+        vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
+        s += p;
+    }
+    while (++i < 16);
+
+}
+
+/* Horizontal MB filtering */
+void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                           unsigned char *v_ptr, int y_stride, int uv_stride,
+                           loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Vertical MB Filtering */
+void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                           unsigned char *v_ptr, int y_stride, int uv_stride,
+                           loop_filter_info *lfi)
+{
+    vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+}
+
+/* Horizontal B Filtering */
+void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                          unsigned char *v_ptr, int y_stride, int uv_stride,
+                          loop_filter_info *lfi)
+{
+    vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
+                           const unsigned char *blimit)
+{
+    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
+    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
+    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
+}
+
+/* Vertical B Filtering */
+void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
+                          unsigned char *v_ptr, int y_stride, int uv_stride,
+                          loop_filter_info *lfi)
+{
+    vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+    vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
+
+    if (u_ptr)
+        vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+
+    if (v_ptr)
+        vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
+}
+
+void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
+                           const unsigned char *blimit)
+{
+    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
+    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
+    vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
+}

+ 68 - 0
thirdparty/libvpx/vp8/common/mbpitch.c

@@ -0,0 +1,68 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "blockd.h"
+
+void vp8_setup_block_dptrs(MACROBLOCKD *x)
+{
+    int r, c;
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
+        }
+    }
+
+    for (r = 0; r < 2; r++)
+    {
+        for (c = 0; c < 2; c++)
+        {
+            x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
+
+        }
+    }
+
+    for (r = 0; r < 2; r++)
+    {
+        for (c = 0; c < 2; c++)
+        {
+            x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
+
+        }
+    }
+
+    for (r = 0; r < 25; r++)
+    {
+        x->block[r].qcoeff  = x->qcoeff  + r * 16;
+        x->block[r].dqcoeff = x->dqcoeff + r * 16;
+        x->block[r].eob     = x->eobs + r;
+    }
+}
+
+void vp8_build_block_doffsets(MACROBLOCKD *x)
+{
+    int block;
+
+    for (block = 0; block < 16; block++) /* y blocks */
+    {
+        x->block[block].offset =
+            (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
+    }
+
+    for (block = 16; block < 20; block++) /* U and V blocks */
+    {
+        x->block[block+4].offset =
+        x->block[block].offset =
+            ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
+    }
+}

+ 40 - 0
thirdparty/libvpx/vp8/common/modecont.c

@@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "entropy.h"
+
+const int vp8_mode_contexts[6][4] =
+{
+    {
+        /* 0 */
+        7,     1,     1,   143,
+    },
+    {
+        /* 1 */
+        14,    18,    14,   107,
+    },
+    {
+        /* 2 */
+        135,    64,    57,    68,
+    },
+    {
+        /* 3 */
+        60,    56,   128,    65,
+    },
+    {
+        /* 4 */
+        159,   134,   128,    34,
+    },
+    {
+        /* 5 */
+        234,   188,   128,    28,
+    },
+};

+ 25 - 0
thirdparty/libvpx/vp8/common/modecont.h

@@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_MODECONT_H_
+#define VP8_COMMON_MODECONT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const int vp8_mode_contexts[6][4];
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_MODECONT_H_

+ 36 - 0
thirdparty/libvpx/vp8/common/mv.h

@@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_MV_H_
+#define VP8_COMMON_MV_H_
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct
+{
+    short row;
+    short col;
+} MV;
+
+typedef union int_mv
+{
+    uint32_t  as_int;
+    MV        as_mv;
+} int_mv;        /* facilitates faster equality tests and copies */
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_MV_H_

+ 185 - 0
thirdparty/libvpx/vp8/common/onyxc_int.h

@@ -0,0 +1,185 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ONYXC_INT_H_
+#define VP8_COMMON_ONYXC_INT_H_
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx/internal/vpx_codec_internal.h"
+#include "loopfilter.h"
+#include "entropymv.h"
+#include "entropy.h"
+#if CONFIG_POSTPROC
+#include "postproc.h"
+#endif
+
+/*#ifdef PACKET_TESTING*/
+#include "header.h"
+/*#endif*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MINQ 0
+#define MAXQ 127
+#define QINDEX_RANGE (MAXQ + 1)
+
+#define NUM_YV12_BUFFERS 4
+
+#define MAX_PARTITIONS 9
+
+typedef struct frame_contexts
+{
+    vp8_prob bmode_prob [VP8_BINTRAMODES-1];
+    vp8_prob ymode_prob [VP8_YMODES-1];   /* interframe intra mode probs */
+    vp8_prob uv_mode_prob [VP8_UV_MODES-1];
+    vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
+    vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
+    MV_CONTEXT mvc[2];
+} FRAME_CONTEXT;
+
+typedef enum
+{
+    ONE_PARTITION  = 0,
+    TWO_PARTITION  = 1,
+    FOUR_PARTITION = 2,
+    EIGHT_PARTITION = 3
+} TOKEN_PARTITION;
+
+typedef enum
+{
+    RECON_CLAMP_REQUIRED        = 0,
+    RECON_CLAMP_NOTREQUIRED     = 1
+} CLAMP_TYPE;
+
+typedef struct VP8Common
+
+{
+    struct vpx_internal_error_info  error;
+
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
+
+    int Width;
+    int Height;
+    int horiz_scale;
+    int vert_scale;
+
+    CLAMP_TYPE  clamp_type;
+
+    YV12_BUFFER_CONFIG *frame_to_show;
+
+    YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
+    int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
+    int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+
+    YV12_BUFFER_CONFIG temp_scale_frame;
+
+#if CONFIG_POSTPROC
+    YV12_BUFFER_CONFIG post_proc_buffer;
+    YV12_BUFFER_CONFIG post_proc_buffer_int;
+    int post_proc_buffer_int_used;
+    unsigned char *pp_limits_buffer;   /* post-processing filter coefficients */
+#endif
+
+    FRAME_TYPE last_frame_type;  /* Save last frame's frame type for motion search. */
+    FRAME_TYPE frame_type;
+
+    int show_frame;
+
+    int frame_flags;
+    int MBs;
+    int mb_rows;
+    int mb_cols;
+    int mode_info_stride;
+
+    /* profile settings */
+    int mb_no_coeff_skip;
+    int no_lpf;
+    int use_bilinear_mc_filter;
+    int full_pixel;
+
+    int base_qindex;
+
+    int y1dc_delta_q;
+    int y2dc_delta_q;
+    int y2ac_delta_q;
+    int uvdc_delta_q;
+    int uvac_delta_q;
+
+    /* We allocate a MODE_INFO struct for each macroblock, together with
+       an extra row on top and column on the left to simplify prediction. */
+
+    MODE_INFO *mip; /* Base of allocated array */
+    MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
+#if CONFIG_ERROR_CONCEALMENT
+    MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+    MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */
+#endif
+    MODE_INFO *show_frame_mi;  /* MODE_INFO for the last decoded frame
+                                  to show */
+    LOOPFILTERTYPE filter_type;
+
+    loop_filter_info_n lf_info;
+
+    int filter_level;
+    int last_sharpness_level;
+    int sharpness_level;
+
+    int refresh_last_frame;       /* Two state 0 = NO, 1 = YES */
+    int refresh_golden_frame;     /* Two state 0 = NO, 1 = YES */
+    int refresh_alt_ref_frame;     /* Two state 0 = NO, 1 = YES */
+
+    int copy_buffer_to_gf;         /* 0 none, 1 Last to GF, 2 ARF to GF */
+    int copy_buffer_to_arf;        /* 0 none, 1 Last to ARF, 2 GF to ARF */
+
+    int refresh_entropy_probs;    /* Two state 0 = NO, 1 = YES */
+
+    int ref_frame_sign_bias[MAX_REF_FRAMES];    /* Two state 0, 1 */
+
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
+    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */
+
+    FRAME_CONTEXT lfc; /* last frame entropy */
+    FRAME_CONTEXT fc;  /* this frame entropy */
+
+    unsigned int current_video_frame;
+
+    int version;
+
+    TOKEN_PARTITION multi_token_partition;
+
+#ifdef PACKET_TESTING
+    VP8_HEADER oh;
+#endif
+#if CONFIG_POSTPROC_VISUALIZER
+    double bitrate;
+    double framerate;
+#endif
+
+#if CONFIG_MULTITHREAD
+    int processor_core_count;
+#endif
+#if CONFIG_POSTPROC
+    struct postproc_state  postproc_state;
+#endif
+    int cpu_caps;
+} VP8_COMMON;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_ONYXC_INT_H_

+ 63 - 0
thirdparty/libvpx/vp8/common/onyxd.h

@@ -0,0 +1,63 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_ONYXD_H_
+#define VP8_COMMON_ONYXD_H_
+
+
+/* Create/destroy static data structures. */
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+#include "vpx_scale/yv12config.h"
+#include "ppflags.h"
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vp8.h"
+
+    struct VP8D_COMP;
+
+    typedef struct
+    {
+        int     Width;
+        int     Height;
+        int     Version;
+        int     postprocess;
+        int     max_threads;
+        int     error_concealment;
+    } VP8D_CONFIG;
+
+    typedef enum
+    {
+        VP8D_OK = 0
+    } VP8D_SETTING;
+
+    void vp8dx_initialize(void);
+
+    void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
+
+    int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
+
+    int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
+                                      size_t size, const uint8_t *dest,
+                                      int64_t time_stamp);
+    int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
+
+    vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+    vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  // VP8_COMMON_ONYXD_H_

+ 49 - 0
thirdparty/libvpx/vp8/common/ppflags.h

@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_PPFLAGS_H_
+#define VP8_COMMON_PPFLAGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+enum
+{
+    VP8D_NOFILTERING            = 0,
+    VP8D_DEBLOCK                = 1<<0,
+    VP8D_DEMACROBLOCK           = 1<<1,
+    VP8D_ADDNOISE               = 1<<2,
+    VP8D_DEBUG_TXT_FRAME_INFO   = 1<<3,
+    VP8D_DEBUG_TXT_MBLK_MODES   = 1<<4,
+    VP8D_DEBUG_TXT_DC_DIFF      = 1<<5,
+    VP8D_DEBUG_TXT_RATE_INFO    = 1<<6,
+    VP8D_DEBUG_DRAW_MV          = 1<<7,
+    VP8D_DEBUG_CLR_BLK_MODES    = 1<<8,
+    VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
+    VP8D_MFQE                   = 1<<10
+};
+
+typedef struct
+{
+    int post_proc_flag;
+    int deblocking_level;
+    int noise_level;
+    int display_ref_frame_flag;
+    int display_mb_modes_flag;
+    int display_b_modes_flag;
+    int display_mv_flag;
+} vp8_ppflags_t;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_PPFLAGS_H_

+ 135 - 0
thirdparty/libvpx/vp8/common/quant_common.c

@@ -0,0 +1,135 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "quant_common.h"
+
+static const int dc_qlookup[QINDEX_RANGE] =
+{
+    4,    5,    6,    7,    8,    9,   10,   10,   11,   12,   13,   14,   15,   16,   17,   17,
+    18,   19,   20,   20,   21,   21,   22,   22,   23,   23,   24,   25,   25,   26,   27,   28,
+    29,   30,   31,   32,   33,   34,   35,   36,   37,   37,   38,   39,   40,   41,   42,   43,
+    44,   45,   46,   46,   47,   48,   49,   50,   51,   52,   53,   54,   55,   56,   57,   58,
+    59,   60,   61,   62,   63,   64,   65,   66,   67,   68,   69,   70,   71,   72,   73,   74,
+    75,   76,   76,   77,   78,   79,   80,   81,   82,   83,   84,   85,   86,   87,   88,   89,
+    91,   93,   95,   96,   98,  100,  101,  102,  104,  106,  108,  110,  112,  114,  116,  118,
+    122,  124,  126,  128,  130,  132,  134,  136,  138,  140,  143,  145,  148,  151,  154,  157,
+};
+
+static const int ac_qlookup[QINDEX_RANGE] =
+{
+    4,    5,    6,    7,    8,    9,   10,   11,   12,   13,   14,   15,   16,   17,   18,   19,
+    20,   21,   22,   23,   24,   25,   26,   27,   28,   29,   30,   31,   32,   33,   34,   35,
+    36,   37,   38,   39,   40,   41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,
+    52,   53,   54,   55,   56,   57,   58,   60,   62,   64,   66,   68,   70,   72,   74,   76,
+    78,   80,   82,   84,   86,   88,   90,   92,   94,   96,   98,  100,  102,  104,  106,  108,
+    110,  112,  114,  116,  119,  122,  125,  128,  131,  134,  137,  140,  143,  146,  149,  152,
+    155,  158,  161,  164,  167,  170,  173,  177,  181,  185,  189,  193,  197,  201,  205,  209,
+    213,  217,  221,  225,  229,  234,  239,  245,  249,  254,  259,  264,  269,  274,  279,  284,
+};
+
+
+int vp8_dc_quant(int QIndex, int Delta)
+{
+    int retval;
+
+    QIndex = QIndex + Delta;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    retval = dc_qlookup[ QIndex ];
+    return retval;
+}
+
+int vp8_dc2quant(int QIndex, int Delta)
+{
+    int retval;
+
+    QIndex = QIndex + Delta;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    retval = dc_qlookup[ QIndex ] * 2;
+    return retval;
+
+}
+int vp8_dc_uv_quant(int QIndex, int Delta)
+{
+    int retval;
+
+    QIndex = QIndex + Delta;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    retval = dc_qlookup[ QIndex ];
+
+    if (retval > 132)
+        retval = 132;
+
+    return retval;
+}
+
+int vp8_ac_yquant(int QIndex)
+{
+    int retval;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    retval = ac_qlookup[ QIndex ];
+    return retval;
+}
+
+int vp8_ac2quant(int QIndex, int Delta)
+{
+    int retval;
+
+    QIndex = QIndex + Delta;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+     * The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+     * word size. */
+    retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
+
+    if (retval < 8)
+        retval = 8;
+
+    return retval;
+}
+int vp8_ac_uv_quant(int QIndex, int Delta)
+{
+    int retval;
+
+    QIndex = QIndex + Delta;
+
+    if (QIndex > 127)
+        QIndex = 127;
+    else if (QIndex < 0)
+        QIndex = 0;
+
+    retval = ac_qlookup[ QIndex ];
+    return retval;
+}

+ 34 - 0
thirdparty/libvpx/vp8/common/quant_common.h

@@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP8_COMMON_QUANT_COMMON_H_
+#define VP8_COMMON_QUANT_COMMON_H_
+
+
+#include "string.h"
+#include "blockd.h"
+#include "onyxc_int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int vp8_ac_yquant(int QIndex);
+extern int vp8_dc_quant(int QIndex, int Delta);
+extern int vp8_dc2quant(int QIndex, int Delta);
+extern int vp8_ac2quant(int QIndex, int Delta);
+extern int vp8_dc_uv_quant(int QIndex, int Delta);
+extern int vp8_ac_uv_quant(int QIndex, int Delta);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_QUANT_COMMON_H_

+ 544 - 0
thirdparty/libvpx/vp8/common/reconinter.c

@@ -0,0 +1,544 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <limits.h>
+#include <string.h>
+
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "blockd.h"
+#include "reconinter.h"
+#if CONFIG_RUNTIME_CPU_DETECT
+#include "onyxc_int.h"
+#endif
+
+void vp8_copy_mem16x16_c(
+    unsigned char *src,
+    int src_stride,
+    unsigned char *dst,
+    int dst_stride)
+{
+
+    int r;
+
+    for (r = 0; r < 16; r++)
+    {
+        memcpy(dst, src, 16);
+
+        src += src_stride;
+        dst += dst_stride;
+
+    }
+
+}
+
+void vp8_copy_mem8x8_c(
+    unsigned char *src,
+    int src_stride,
+    unsigned char *dst,
+    int dst_stride)
+{
+    int r;
+
+    for (r = 0; r < 8; r++)
+    {
+        memcpy(dst, src, 8);
+
+        src += src_stride;
+        dst += dst_stride;
+
+    }
+
+}
+
+void vp8_copy_mem8x4_c(
+    unsigned char *src,
+    int src_stride,
+    unsigned char *dst,
+    int dst_stride)
+{
+    int r;
+
+    for (r = 0; r < 4; r++)
+    {
+        memcpy(dst, src, 8);
+
+        src += src_stride;
+        dst += dst_stride;
+
+    }
+
+}
+
+
+void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
+{
+    int r;
+    unsigned char *pred_ptr = d->predictor;
+    unsigned char *ptr;
+    ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
+
+    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
+    {
+        sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
+    }
+    else
+    {
+        for (r = 0; r < 4; r++)
+        {
+            pred_ptr[0]  = ptr[0];
+            pred_ptr[1]  = ptr[1];
+            pred_ptr[2]  = ptr[2];
+            pred_ptr[3]  = ptr[3];
+            pred_ptr     += pitch;
+            ptr         += pre_stride;
+        }
+    }
+}
+
+static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
+{
+    unsigned char *ptr;
+    ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
+
+    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
+    {
+        x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
+    }
+    else
+    {
+        vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
+    }
+}
+
+static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
+{
+    unsigned char *ptr;
+    ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
+
+    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
+    {
+        x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
+    }
+    else
+    {
+        vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
+    }
+}
+
+static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
+{
+    int r;
+    unsigned char *ptr;
+    ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
+
+    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
+    {
+        sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
+    }
+    else
+    {
+        for (r = 0; r < 4; r++)
+        {
+          dst[0]  = ptr[0];
+          dst[1]  = ptr[1];
+          dst[2]  = ptr[2];
+          dst[3]  = ptr[3];
+          dst     += dst_stride;
+          ptr     += pre_stride;
+        }
+    }
+}
+
+
+/*encoder only*/
+void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
+{
+    unsigned char *uptr, *vptr;
+    unsigned char *upred_ptr = &x->predictor[256];
+    unsigned char *vpred_ptr = &x->predictor[320];
+
+    int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+    int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+    int offset;
+    int pre_stride = x->pre.uv_stride;
+
+    /* calc uv motion vectors */
+    mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
+    mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
+    mv_row /= 2;
+    mv_col /= 2;
+    mv_row &= x->fullpixel_mask;
+    mv_col &= x->fullpixel_mask;
+
+    offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
+    uptr = x->pre.u_buffer + offset;
+    vptr = x->pre.v_buffer + offset;
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
+        x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
+    }
+    else
+    {
+        vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
+        vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
+    }
+}
+
+/*encoder only*/
+void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
+{
+    int i, j;
+    int pre_stride = x->pre.uv_stride;
+    unsigned char *base_pre;
+
+    /* build uv mvs */
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            int yoffset = i * 8 + j * 2;
+            int uoffset = 16 + i * 2 + j;
+            int voffset = 20 + i * 2 + j;
+
+            int temp;
+
+            temp = x->block[yoffset  ].bmi.mv.as_mv.row
+                   + x->block[yoffset+1].bmi.mv.as_mv.row
+                   + x->block[yoffset+4].bmi.mv.as_mv.row
+                   + x->block[yoffset+5].bmi.mv.as_mv.row;
+
+            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+
+            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
+
+            temp = x->block[yoffset  ].bmi.mv.as_mv.col
+                   + x->block[yoffset+1].bmi.mv.as_mv.col
+                   + x->block[yoffset+4].bmi.mv.as_mv.col
+                   + x->block[yoffset+5].bmi.mv.as_mv.col;
+
+            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+
+            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
+
+            x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
+        }
+    }
+
+    base_pre = x->pre.u_buffer;
+    for (i = 16; i < 20; i += 2)
+    {
+        BLOCKD *d0 = &x->block[i];
+        BLOCKD *d1 = &x->block[i+1];
+
+        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+            build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
+        else
+        {
+            vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
+            vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
+        }
+    }
+
+    base_pre = x->pre.v_buffer;
+    for (i = 20; i < 24; i += 2)
+    {
+        BLOCKD *d0 = &x->block[i];
+        BLOCKD *d1 = &x->block[i+1];
+
+        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+            build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
+        else
+        {
+            vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
+            vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
+        }
+    }
+}
+
+
+/*encoder only*/
+void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
+                                         unsigned char *dst_y,
+                                         int dst_ystride)
+{
+    unsigned char *ptr_base;
+    unsigned char *ptr;
+    int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+    int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
+    int pre_stride = x->pre.y_stride;
+
+    ptr_base = x->pre.y_buffer;
+    ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
+                                 dst_y, dst_ystride);
+    }
+    else
+    {
+        vp8_copy_mem16x16(ptr, pre_stride, dst_y,
+            dst_ystride);
+    }
+}
+
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+    /* If the MV points so far into the UMV border that no visible pixels
+     * are used for reconstruction, the subpel part of the MV can be
+     * discarded and the MV limited to 16 pixels with equivalent results.
+     *
+     * This limit kicks in at 19 pixels for the top and left edges, for
+     * the 16 pixels plus 3 taps right of the central pixel when subpel
+     * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+     * left of the central pixel when filtering.
+     */
+    if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+        mv->col = xd->mb_to_left_edge - (16 << 3);
+    else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+        mv->col = xd->mb_to_right_edge + (16 << 3);
+
+    if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+        mv->row = xd->mb_to_top_edge - (16 << 3);
+    else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+        mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+    mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
+        (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+    mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
+        (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
+
+    mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
+        (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+    mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
+        (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
+}
+
+void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
+                                        unsigned char *dst_y,
+                                        unsigned char *dst_u,
+                                        unsigned char *dst_v,
+                                        int dst_ystride,
+                                        int dst_uvstride)
+{
+    int offset;
+    unsigned char *ptr;
+    unsigned char *uptr, *vptr;
+
+    int_mv _16x16mv;
+
+    unsigned char *ptr_base = x->pre.y_buffer;
+    int pre_stride = x->pre.y_stride;
+
+    _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
+
+    if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+    {
+        clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
+    }
+
+    ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
+
+    if ( _16x16mv.as_int & 0x00070007)
+    {
+        x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
+    }
+    else
+    {
+        vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
+    }
+
+    /* calc uv motion vectors */
+    _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
+    _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
+    _16x16mv.as_mv.row /= 2;
+    _16x16mv.as_mv.col /= 2;
+    _16x16mv.as_mv.row &= x->fullpixel_mask;
+    _16x16mv.as_mv.col &= x->fullpixel_mask;
+
+    pre_stride >>= 1;
+    offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
+    uptr = x->pre.u_buffer + offset;
+    vptr = x->pre.v_buffer + offset;
+
+    if ( _16x16mv.as_int & 0x00070007)
+    {
+        x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
+        x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7,  _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
+    }
+    else
+    {
+        vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
+        vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
+    }
+}
+
+static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
+{
+    int i;
+    unsigned char *base_dst = x->dst.y_buffer;
+    unsigned char *base_pre = x->pre.y_buffer;
+
+    if (x->mode_info_context->mbmi.partitioning < 3)
+    {
+        BLOCKD *b;
+        int dst_stride = x->dst.y_stride;
+
+        x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
+        x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
+        x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
+        x->block[10].bmi = x->mode_info_context->bmi[10];
+        if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+        {
+            clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
+            clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
+            clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
+            clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
+        }
+
+        b = &x->block[ 0];
+        build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
+        b = &x->block[ 2];
+        build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
+        b = &x->block[ 8];
+        build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
+        b = &x->block[10];
+        build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
+    }
+    else
+    {
+        for (i = 0; i < 16; i += 2)
+        {
+            BLOCKD *d0 = &x->block[i];
+            BLOCKD *d1 = &x->block[i+1];
+            int dst_stride = x->dst.y_stride;
+
+            x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
+            x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
+            if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+            {
+                clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
+                clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
+            }
+
+            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+                build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
+            else
+            {
+                build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+                build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+            }
+
+        }
+
+    }
+    base_dst = x->dst.u_buffer;
+    base_pre = x->pre.u_buffer;
+    for (i = 16; i < 20; i += 2)
+    {
+        BLOCKD *d0 = &x->block[i];
+        BLOCKD *d1 = &x->block[i+1];
+        int dst_stride = x->dst.uv_stride;
+
+        /* Note: uv mvs already clamped in build_4x4uvmvs() */
+
+        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+            build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
+        else
+        {
+            build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+            build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+        }
+    }
+
+    base_dst = x->dst.v_buffer;
+    base_pre = x->pre.v_buffer;
+    for (i = 20; i < 24; i += 2)
+    {
+        BLOCKD *d0 = &x->block[i];
+        BLOCKD *d1 = &x->block[i+1];
+        int dst_stride = x->dst.uv_stride;
+
+        /* Note: uv mvs already clamped in build_4x4uvmvs() */
+
+        if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
+            build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
+        else
+        {
+            build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+            build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
+        }
+    }
+}
+
+static
+void build_4x4uvmvs(MACROBLOCKD *x)
+{
+    int i, j;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            int yoffset = i * 8 + j * 2;
+            int uoffset = 16 + i * 2 + j;
+            int voffset = 20 + i * 2 + j;
+
+            int temp;
+
+            temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
+                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
+
+            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+
+            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
+
+            temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
+                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
+
+            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+
+            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
+
+            if (x->mode_info_context->mbmi.need_to_clamp_mvs)
+                clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
+
+            x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
+        }
+    }
+}
+
+void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
+{
+    if (xd->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
+                                           xd->dst.u_buffer, xd->dst.v_buffer,
+                                           xd->dst.y_stride, xd->dst.uv_stride);
+    }
+    else
+    {
+        build_4x4uvmvs(xd);
+        build_inter4x4_predictors_mb(xd);
+    }
+}

+ 43 - 0
thirdparty/libvpx/vp8/common/reconinter.h

@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_RECONINTER_H_
+#define VP8_COMMON_RECONINTER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
+extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
+                                               unsigned char *dst_y,
+                                               unsigned char *dst_u,
+                                               unsigned char *dst_v,
+                                               int dst_ystride,
+                                               int dst_uvstride);
+
+
+extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
+                                                unsigned char *dst_y,
+                                                int dst_ystride);
+extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
+                                         unsigned char *base_pre,
+                                         int pre_stride,
+                                         vp8_subpix_fn_t sppf);
+
+extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
+extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP8_COMMON_RECONINTER_H_

+ 117 - 0
thirdparty/libvpx/vp8/common/reconintra.c

@@ -0,0 +1,117 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/vpx_once.h"
+#include "blockd.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
+
+enum {
+    SIZE_16,
+    SIZE_8,
+    NUM_SIZES,
+};
+
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[4][NUM_SIZES];
+static intra_pred_fn dc_pred[2][2][NUM_SIZES];
+
+static void vp8_init_intra_predictors_internal(void)
+{
+#define INIT_SIZE(sz) \
+    pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
+    pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
+    pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
+ \
+    dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
+    dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
+    dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
+    dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
+
+    INIT_SIZE(16);
+    INIT_SIZE(8);
+    vp8_init_intra4x4_predictors_internal();
+}
+
+void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
+                                      unsigned char * yabove_row,
+                                      unsigned char * yleft,
+                                      int left_stride,
+                                      unsigned char * ypred_ptr,
+                                      int y_stride)
+{
+    MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
+    DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
+    int i;
+    intra_pred_fn fn;
+
+    for (i = 0; i < 16; i++)
+    {
+        yleft_col[i] = yleft[i* left_stride];
+    }
+
+    if (mode == DC_PRED)
+    {
+        fn = dc_pred[x->left_available][x->up_available][SIZE_16];
+    }
+    else
+    {
+        fn = pred[mode][SIZE_16];
+    }
+
+    fn(ypred_ptr, y_stride, yabove_row, yleft_col);
+}
+
+void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
+                                       unsigned char * uabove_row,
+                                       unsigned char * vabove_row,
+                                       unsigned char * uleft,
+                                       unsigned char * vleft,
+                                       int left_stride,
+                                       unsigned char * upred_ptr,
+                                       unsigned char * vpred_ptr,
+                                       int pred_stride)
+{
+    MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
+    unsigned char uleft_col[8];
+    unsigned char vleft_col[8];
+    int i;
+    intra_pred_fn fn;
+
+    for (i = 0; i < 8; i++)
+    {
+        uleft_col[i] = uleft[i * left_stride];
+        vleft_col[i] = vleft[i * left_stride];
+    }
+
+    if (uvmode == DC_PRED)
+    {
+        fn = dc_pred[x->left_available][x->up_available][SIZE_8];
+    }
+    else
+    {
+        fn = pred[uvmode][SIZE_8];
+    }
+
+    fn(upred_ptr, pred_stride, uabove_row, uleft_col);
+    fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
+}
+
+void vp8_init_intra_predictors(void)
+{
+    once(vp8_init_intra_predictors_internal);
+}

Some files were not shown because too many files changed in this diff