Browse Source

added building of the theora library

Martin Felis 10 years ago
parent
commit
8f5ee4a05e
69 changed files with 26482 additions and 2 deletions
  1. 56 0
      jni/libtheora-1.2.0alpha1/AUTHORS
  2. 26 0
      jni/libtheora-1.2.0alpha1/Android.mk
  3. 255 0
      jni/libtheora-1.2.0alpha1/CHANGES
  4. 28 0
      jni/libtheora-1.2.0alpha1/COPYING
  5. 18 0
      jni/libtheora-1.2.0alpha1/LICENSE
  6. 156 0
      jni/libtheora-1.2.0alpha1/README
  7. 11 0
      jni/libtheora-1.2.0alpha1/README.Android.md
  8. 99 0
      jni/libtheora-1.2.0alpha1/config.h
  9. 24 0
      jni/libtheora-1.2.0alpha1/configure_android.sh
  10. 599 0
      jni/libtheora-1.2.0alpha1/include/theora/Makefile
  11. 7 0
      jni/libtheora-1.2.0alpha1/include/theora/Makefile.am
  12. 599 0
      jni/libtheora-1.2.0alpha1/include/theora/Makefile.in
  13. 606 0
      jni/libtheora-1.2.0alpha1/include/theora/codec.h
  14. 786 0
      jni/libtheora-1.2.0alpha1/include/theora/theora.h
  15. 329 0
      jni/libtheora-1.2.0alpha1/include/theora/theoradec.h
  16. 548 0
      jni/libtheora-1.2.0alpha1/include/theora/theoraenc.h
  17. 2712 0
      jni/libtheora-1.2.0alpha1/lib/analyze.c
  18. 166 0
      jni/libtheora-1.2.0alpha1/lib/apiwrapper.c
  19. 54 0
      jni/libtheora-1.2.0alpha1/lib/apiwrapper.h
  20. 32 0
      jni/libtheora-1.2.0alpha1/lib/arm/armbits.h
  21. 116 0
      jni/libtheora-1.2.0alpha1/lib/arm/armcpu.c
  22. 29 0
      jni/libtheora-1.2.0alpha1/lib/arm/armcpu.h
  23. 57 0
      jni/libtheora-1.2.0alpha1/lib/arm/armenc.c
  24. 51 0
      jni/libtheora-1.2.0alpha1/lib/arm/armenc.h
  25. 126 0
      jni/libtheora-1.2.0alpha1/lib/arm/armint.h
  26. 219 0
      jni/libtheora-1.2.0alpha1/lib/arm/armstate.c
  27. 114 0
      jni/libtheora-1.2.0alpha1/lib/bitpack.c
  28. 76 0
      jni/libtheora-1.2.0alpha1/lib/bitpack.h
  29. 974 0
      jni/libtheora-1.2.0alpha1/lib/collect.c
  30. 109 0
      jni/libtheora-1.2.0alpha1/lib/collect.h
  31. 31 0
      jni/libtheora-1.2.0alpha1/lib/dct.h
  32. 193 0
      jni/libtheora-1.2.0alpha1/lib/decapiwrapper.c
  33. 250 0
      jni/libtheora-1.2.0alpha1/lib/decinfo.c
  34. 186 0
      jni/libtheora-1.2.0alpha1/lib/decint.h
  35. 2993 0
      jni/libtheora-1.2.0alpha1/lib/decode.c
  36. 182 0
      jni/libtheora-1.2.0alpha1/lib/dequant.c
  37. 27 0
      jni/libtheora-1.2.0alpha1/lib/dequant.h
  38. 168 0
      jni/libtheora-1.2.0alpha1/lib/encapiwrapper.c
  39. 379 0
      jni/libtheora-1.2.0alpha1/lib/encfrag.c
  40. 121 0
      jni/libtheora-1.2.0alpha1/lib/encinfo.c
  41. 845 0
      jni/libtheora-1.2.0alpha1/lib/encint.h
  42. 1836 0
      jni/libtheora-1.2.0alpha1/lib/encode.c
  43. 67 0
      jni/libtheora-1.2.0alpha1/lib/encoder_disabled.c
  44. 370 0
      jni/libtheora-1.2.0alpha1/lib/enquant.c
  45. 26 0
      jni/libtheora-1.2.0alpha1/lib/enquant.h
  46. 417 0
      jni/libtheora-1.2.0alpha1/lib/fdct.c
  47. 82 0
      jni/libtheora-1.2.0alpha1/lib/fragment.c
  48. 515 0
      jni/libtheora-1.2.0alpha1/lib/huffdec.c
  49. 32 0
      jni/libtheora-1.2.0alpha1/lib/huffdec.h
  50. 966 0
      jni/libtheora-1.2.0alpha1/lib/huffenc.c
  51. 22 0
      jni/libtheora-1.2.0alpha1/lib/huffenc.h
  52. 70 0
      jni/libtheora-1.2.0alpha1/lib/huffman.h
  53. 330 0
      jni/libtheora-1.2.0alpha1/lib/idct.c
  54. 131 0
      jni/libtheora-1.2.0alpha1/lib/info.c
  55. 210 0
      jni/libtheora-1.2.0alpha1/lib/internal.c
  56. 116 0
      jni/libtheora-1.2.0alpha1/lib/internal.h
  57. 314 0
      jni/libtheora-1.2.0alpha1/lib/mathops.c
  58. 143 0
      jni/libtheora-1.2.0alpha1/lib/mathops.h
  59. 792 0
      jni/libtheora-1.2.0alpha1/lib/mcenc.c
  60. 1030 0
      jni/libtheora-1.2.0alpha1/lib/modedec.h
  61. 128 0
      jni/libtheora-1.2.0alpha1/lib/ocintrin.h
  62. 127 0
      jni/libtheora-1.2.0alpha1/lib/quant.c
  63. 33 0
      jni/libtheora-1.2.0alpha1/lib/quant.h
  64. 1147 0
      jni/libtheora-1.2.0alpha1/lib/rate.c
  65. 1267 0
      jni/libtheora-1.2.0alpha1/lib/state.c
  66. 552 0
      jni/libtheora-1.2.0alpha1/lib/state.h
  67. 1368 0
      jni/libtheora-1.2.0alpha1/lib/tokenize.c
  68. 29 0
      jni/libtheora-1.2.0alpha1/ndkenv.sh
  69. 5 2
      jni/love/Android.mk

+ 56 - 0
jni/libtheora-1.2.0alpha1/AUTHORS

@@ -0,0 +1,56 @@
+Monty <[email protected]>
+	- Original VP3 port
+
+Timothy B. Terriberry
+Gregory Maxwell
+Ralph Giles
+Monty
+	- Ongoing development
+
+Dan B. Miller
+	- Pre alpha3 development
+
+Rudolf Marek
+Wim Tayman
+Dan Lenski
+Nils Pipenbrinck
+Monty
+	- MMX optimized functions
+
+David Schleef
+	- C64x port
+
+Aaron Colwell
+Thomas Vander Stichele
+Jan Gerber
+Conrad Parker
+Cristian Adam
+Sebastian Pippin
+Simon Hosie
+Brad Smith
+	- Bug fixes, enhancements, build systems.
+
+Mauricio Piacentini
+	- Original win32 projects and example ports
+	- VP3->Theora transcoder
+
+Silvia Pfeiffer
+	- Figures for the spec
+
+Michael Smith
+Andre Pang
+calc
+Chris Cheney
+Brendan Cully
+Edward Hervey
+Adam Moss
+Colin Ward
+Jeremy C. Reed
+Arc Riley
+Rodolphe Ortalo
+	- Bug fixes
+
+Robin Watts
+	- ARM code optimisations
+
+and other Xiph.org contributors

+ 26 - 0
jni/libtheora-1.2.0alpha1/Android.mk

@@ -0,0 +1,26 @@
+LOCAL_PATH:= $(call my-dir)
+
+# libtheora
+include $(CLEAR_VARS)
+
+LOCAL_MODULE    := libtheora
+LOCAL_CFLAGS    := -fexceptions -g -Dlinux -DHAVE_GCC_DESTRUCTOR=1 -DOPT_GENERIC -DREAL_IS_FLOAT
+LOCAL_CPPFLAGS  := ${LOCAL_CFLAGS}
+
+LOCAL_C_INCLUDES  :=  \
+	${LOCAL_PATH}/include \
+	${LOCAL_PATh}/lib/arm \
+	${LOCAL_PATh}/lib \
+	${LOCAL_PATH}/../libogg-1.3.2/include
+
+		
+LOCAL_SRC_FILES := \
+	$(filter-out \
+	,$(subst $(LOCAL_PATH)/,,\
+	$(wildcard ${LOCAL_PATH}/lib/*.c) \
+  ))
+
+# $(info local includes $(LOCAL_C_INCLUDES))
+
+include $(BUILD_STATIC_LIBRARY)
+

+ 255 - 0
jni/libtheora-1.2.0alpha1/CHANGES

@@ -0,0 +1,255 @@
+libteora 1.2.0alpha1 (2010 September 23)
+
+- New 'ptalarbvorm' encoder with better rate/distortion optimization
+- New th_encode_ctl option for copying configuration from an existing
+  setup header, useful for splicing streams.
+- Returns TH_DUPFRAME in more cases.
+- Add ARM optimizations
+- Add TI C64x+ DSP optimizations
+- Other performance improvements
+- Rename speedlevel 2 to 3 and provide a new speedlevel 2
+- Various minor bug fixes
+
+libtheora 1.1.2 (unreleased snapshot)
+
+ - Fix Huffman table decoding with OC_HUFF_SLUSH is set to 0
+ - Fix a frame size bug in player_example
+ - Add support for passing a buffer the size of the picture
+   region, rather than a full padded frame to th_encode_ycbcr_in()
+   as was possible with the legacy pre-1.0 API.
+ - 4:4:4 support in player_example using software yuv->rgb
+ - Better rgb->yuv conversion in png2theora
+ - Clean up warnings and local variables
+ - Build and documentation fixes
+
+libtheora 1.1.1 (2009 October 1)
+
+ - Fix problems with MSVC inline assembly
+ - Add the missing encoder_disabled.c to the distribution
+ - build updates: autogen.sh should work better after switching systems
+   and the MSVC project now defaults to the dynamic runtime library
+ - Namespace some variables to avoid conflicts on wince.
+
+libtheora 1.1.0 (2009 September 24)
+
+ - Fix various small issues with the example and telemetry code
+ - Fix handing a zero-byte packet as the first frame
+ - Documentation cleanup
+ - Two minor build fixes
+
+libtheora 1.1beta3 (2009 August 22)
+
+ - Rate control fixes to smooth quality
+ - MSVC build now exports all of the 1.0 api
+ - Assorted small bug fixes
+
+libtheora 1.1beta2 (2009 August 12)
+
+ - Fix a rate control problem with difficult input
+ - Build fixes for OpenBSD and Apple Xcode
+ - Examples now all use the 1.0 api
+ - TH_ENCCTL_SET_SPLEVEL works again
+ - Various bug fixes and source tree rearrangement
+
+libtheora 1.1beta1 (2009 August 5)
+
+ - Support for two-pass encoding
+ - Performance optimization of both encoder and decoder
+ - Encoder supports dynamic adjustment of quality and 
+   bitrate targets
+ - Encoder is generally more configurable, and all
+   rate control modes perform better
+ - Encoder now accepts 4:2:2 and 4:4:4 chroma sampling
+ - Decoder telemetry output shows quantization choice
+   and a breakdown of bitrate usage in the frame
+ - MSVC assembly optimizations up to date and functional
+
+libtheora 1.1alpha2 (2009 May 26)
+
+ - Reduce lambda for small quantizers.
+ - New encoder fDCT does better on smooth gradients
+ - Use SATD for mode decisions (1-2% bitrate reduction)
+ - Assembly rewrite for new features and general speed up
+ - Share code between the encoder and decoder for performance
+ - Fix 4:2:2 decoding and telemetry
+ - MSVC project files updated, but assembly is disabled.
+ - New configure option --disable-spec to work around toolchain
+   detection failures.
+ - Limit symbol exports on MacOS X.
+ - Port remaining unit tests from the 1.0 release.
+
+libtheora 1.1alpha1 (2009 March 27)
+
+ - Encoder rewrite with much improved vbr quality/bitrate and
+   better tracking of the target rate in cbr mode.
+ - MSVC project files do not work in this release.
+
+libtheora 1.0 (2008 November 3)
+
+ - Merge x86 assembly for forward DCT from Thusnelda branch.
+ - Update 32 bit MMX with loop filter fix.
+ - Check for an uninitialized state before dereferencing in propagating
+   decode calls.
+ - Remove all TH_DEBUG statements.
+ - Rename the bitpacker source files copied from libogg to avoid
+   confusing simple build systems using both libraries.
+ - Declare bitfield entries to be explicitly signed for Solaris cc.
+ - Set quantization parameters to default values when an empty buffer is
+   passed with TH_ENCCTL_SET_QUANT_PARAMS.
+ - Split encoder and decoder tests depending on configure settings.
+ - Return lstylex.sty to the distribution.
+ - Disable inline assembly on gcc versions prior to 3.1.
+ - Remove extern references for OC_*_QUANT_MIN.
+ - Make various data tables static const so they can be read-only.
+ - Remove ENCCTL codes from the old encoder API.
+ - Implement TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE ctl.
+ - Fix segfault when exactly one of the width or height is not a multiple
+   of 16, but the other is.
+ - Compute the correct vertical offset for chroma.
+ - cpuid assembly fix for MSVC.
+ - Add VS2008 project files.
+ - Build updates for 64-bit platforms, Mingw32, VS and XCode.
+ - Do not clobber the cropping rectangle.
+ - Declare ourselves 1.0final to pkg-config to sort after beta releases.
+ - Fix the scons build to include asm in libtheoradec/enc.
+
+libtheora 1.0beta3 (2008 April 16)
+
+ - Build new libtheoradec and libtheoraenc libraries
+   supporting the new API from theora-exp. This API should
+   not be considered stable yet.
+ - Change granule_frame() to return an index as documented.
+   This is a change of behaviour from 1.0beta1.
+ - Document that granule_time() returns the end of the
+   presentation interval.
+ - Use a custom copy of the libogg bitpacker in the decoder
+   to avoid function call overhead.
+ - MMX code improved and ported to MSVC.
+ - Fix a problem with the MMX code on SELinux.
+ - Fix a problem with decoder quantizer initialization.
+ - Fix a page queue problem with png2theora.
+ - Improved robustness.
+ - Updated VS2005 project files.
+ - Dropped build support for Microsoft VS2003.
+ - Dropped build support for the unreleased libogg2.
+ - Added the specification to the autotools build.
+ - Specification corrections.
+
+libtheora 1.0beta2 (2007 October 12)
+
+ - Fix a crash bug on char-is-unsigned architectures (PowerPC)
+ - Fix a buffer sizing issue that caused rare encoder crashes
+ - Fix a buffer alignment issue
+ - Build fixes for MingW32, MSVC
+ - Improved format documentation.
+
+libtheora 1.0beta1 (2007 September 22)
+
+ - Granulepos scheme modified to match other codecs. This bumps
+   the bitstream revision to 3.2.1. Bitstreams marked 3.2.0 are
+   handled correctly by this decoder. Older decoders will show
+   a one frame sync error in the less noticeable direction.
+
+libtheora 1.0alpha8 (2007 September 18)
+
+ - Switch to new spec compliant decoder from theora-exp branch.
+   Written by Dr. Timothy Terriberry.
+ - Add support to the encoder for using quantization settings
+   provided by the application.
+ - more assembly optimizations
+
+libtheora 1.0alpha7 (2006 June 20)
+
+ - Enable mmx assembly by default
+ - Avoid some relocations that caused problems on SELinux
+ - Other build fixes
+ - time testing mode (-f) for the dump_video example
+
+libtheora 1.0alpha6 (2006 May 30)
+
+ * Merge theora-mmx simd acceleration (x86_32 and x86_64)
+ * Major RTP payload specification update
+ * Minor format specification updates
+ * Fix some spurious calls to free() instead of _ogg_free()
+ * Fix invalid array indexing in PixelLineSearch()
+ * Improve robustness against invalid input
+ * General warning cleanup
+ * The offset_y member now means what every application thought it meant
+   (offset from the top). This will mean some old files (those with a 
+   non-centered image created with a buggy encoder) will display differently.
+
+libtheora 1.0alpha5 (2005 August 20)
+
+ * Fixed bitrate management bugs that caused popping and encode
+   errors
+ * Fixed a crash problem with the theora_state internals not
+   being intialized properly.
+ * new utility function:
+   - theora_granule_shift()
+ * dump_video example now makes YUV4MPEG files by default, so
+   the results can be fed back to encoder_example and similar
+   tools. The old behavior is restored through the '-r' switch.
+ * ./configure now prints a summary
+ * simple unit test of the comment api under 'make check'
+ * misc code cleanup, warning and leak fixes
+
+libtheora 1.0alpha4 (2004 December 15)
+
+ * first draft of the Theora I Format Specification
+ * API documentation generated from theora.h with Doxygen
+ * fix a double-update bug in the motion analysis
+ * apply the loop filter before filling motion vector border 
+   in the reference frame
+ * new utility functions:
+   - theora_packet_isheader(),
+   - theora_packet_iskeyframe()
+   - theora_granule_frame()
+ * optional support for building without floating point
+ * optional support for building without encode support 
+ * various build and packaging fixes
+ * pkg-config support
+ * SymbianOS build support
+
+libtheora 1.0alpha3 (2004 March 20)
+
+ UPDATE: on 2004 July 1 the Theora I bitstream format was frozen. Files
+ produced by the libtheora 1.0alpha3 reference encoder will always be
+ decodable by the Theora I spec.
+
+ * Bitstream info header FORMAT CHANGES:
+   - move the granulepos shift field to maintain byte alignment longer.
+   - reserve 5 additional bits for subsampling and interlace flags.
+ * Bitstream setup header FORMAT CHANGES:
+   - support for a range of interpolated quant matricies.
+   - include the in-loop block filter coeff.
+ * Bitsteam data packet FORMAT CHANGES:
+   - Reserve a bit for per-block Q index selection.
+   - Flip the coded image orientation for compatibility with VP3.
+     This allows lossless transcoding of VP3 content, but files
+     encoded with earlier theora releases would play upside down.
+ * example VP3 lossless transcoder
+ * optional support for libogg2
+ * timing improvements in the example player
+ * packaging and build system updates and fixes
+
+libtheora 1.0alpha2 (2003 June 9)
+
+ * bitstream FORMAT CHANGES:
+   - store the quant tables in a third setup header for
+     future encoder flexibility
+   - store the huffman tables in the third setup header
+   - add a field for marking the colorspace to the info header
+   - add crop parameters for non-multiple-of-16 frame sizes
+   - add a second vorbiscomment-style metadata header
+ * API changes to handle multiple headers with a single 
+   theora_decode_header() call, like libvorbis
+ * code cleanup and minor fixes
+ * new dump_video code example/utility
+ * experimental win32 code examples
+
+libtheora 1.0alpha1 (2002 September 25)
+
+ * First release of the theora reference implementation
+ * Port of the newly opened VP3 code to the Ogg container
+ * Rewrite of the code for portability and to use the libogg bitpacker
+

+ 28 - 0
jni/libtheora-1.2.0alpha1/COPYING

@@ -0,0 +1,28 @@
+Copyright (C) 2002-2009 Xiph.org Foundation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Xiph.org Foundation nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 18 - 0
jni/libtheora-1.2.0alpha1/LICENSE

@@ -0,0 +1,18 @@
+Please see the file COPYING for the copyright license for this software.
+
+In addition to and irrespective of the copyright license associated
+with this software, On2 Technologies, Inc. makes the following statement
+regarding technology used in this software:
+
+  On2 represents and warrants that it shall not assert any rights
+  relating to infringement of On2's registered patents, nor initiate
+  any litigation asserting such rights, against any person who, or
+  entity which utilizes the On2 VP3 Codec Software, including any
+  use, distribution, and sale of said Software; which make changes,
+  modifications, and improvements in said Software; and to use,
+  distribute, and sell said changes as well as applications for other
+  fields of use.
+
+This reference implementation is originally derived from the On2 VP3
+Codec Software, and the Theora video format is essentially compatible
+with the VP3 video format, consisting of a backward-compatible superset.

+ 156 - 0
jni/libtheora-1.2.0alpha1/README

@@ -0,0 +1,156 @@
+-------------------------------------------------------------------------
+             The Xiph.org Foundation's libtheora 1.2
+-------------------------------------------------------------------------
+
+*** What is Theora?
+
+Theora is Xiph.Org's first publicly released video codec, intended
+for use within the Foundation's Ogg multimedia streaming system.
+Theora is derived directly from On2's VP3 codec, adds new features
+while allow it a longer useful lifetime as an competitive codec.
+
+The 1.0 release decoder supported all the new features, but the
+encoder is nearly identical to the VP3 code.
+
+The 1.1 release featured a completely rewritten encoder, offering
+better performance and compression, and making more complete use
+of the format's feature set.
+
+The 1.2 release features significant additional improvements in
+compression and performance. Files produced by newer encoders can
+be decoded by earlier releases.
+
+*** Where is Theora?
+
+Theora's main site is www.theora.org.  Theora and related libraries
+can be gotten from www.theora.org or the main Xiph.Org site at
+www.xiph.org.  Development source is kept in an open subversion
+repository, see http://theora.org/svn/ for instructions.
+
+-------------------------------------------------------------------------
+Getting started with the code
+-------------------------------------------------------------------------
+
+*** What do I need to build the source?
+
+Requirements summary:
+
+  For libtheora:
+
+      libogg 1.1 or newer.
+
+  For example encoder:
+
+      as above,
+
+      libvorbis and libvorbisenc 1.0.1 or newer.
+      (libvorbis 1.3.1 or newer for 5.1 audio)
+
+  For creating a source distribution package:
+
+      as above,
+
+      Doxygen to build the API documentation,
+      pdflatex and fig2dev to build the format specification
+        (transfig package in Ubuntu).
+
+  For the player only:
+
+      as above,
+
+      SDL (Simple Direct media Layer) libraries and headers,
+      OSS audio driver and development headers.
+
+The provided build system is the GNU automake/autoconf system, and
+the main library, libtheora, should already build smoothly on any
+system.  Failure of libtheora to build on a GNU-enabled system is
+considered a bug; please report problems to [email protected].
+
+Windows build support is included in the win32 directory.
+
+Project files for Apple XCode are included in the macosx directory.
+
+There is also a more limited scons build.
+
+*** How do I use the sample encoder?
+
+The sample encoder takes raw video in YUV4MPEG2 format, as used by
+lavtools, mjpeg-tools and other packages. The encoder expects audio,
+if any, in a separate wave WAV file. Try 'encoder_example -h' for a
+complete list of options.
+
+An easy way to get raw video and audio files is to use MPlayer as an
+export utility.  The options " -ao pcm -vo yuv4mpeg " will export a
+wav file named audiodump.wav and a YUV video file in the correct
+format for encoder_example as stream.yuv.  Be careful when exporting
+video alone; MPlayer may drop frames to 'keep up' with the audio
+timer.  The example encoder can't properly synchronize input audio and
+video file that aren't in sync to begin with.
+
+The encoder will also take video or audio on stdin if '-' is specified
+as the input file name.
+
+There is also a 'png2theora' example which accepts a set of image
+files in that format.
+
+*** How do I use the sample player?
+
+The sample player takes an Ogg file on standard in; the file may be
+audio alone, video alone or video with audio.
+
+*** What other tools are available?
+
+The programs in the examples directory are intended as tutorial source
+for developers using the library. As such they sacrifice features and
+robustness in the interests of comprehension and should not be
+considered serious applications.
+
+If you're wanting to just use theora, consider the programs linked
+from http://www.theora.org/. There is playback support in a number
+of common free players, and plugins for major media frameworks.
+Jan Gerber's ffmpeg2theora is an excellent encoding front end.
+
+-------------------------------------------------------------------------
+Troubleshooting the build process
+-------------------------------------------------------------------------
+
+*** Compile error, such as:
+
+encoder_internal.h:664: parse error before `ogg_uint16_t'
+
+This means you have version of libogg prior to 1.1. A *complete* new Ogg
+install, libs and headers is needed.
+
+Also be sure that there aren't multiple copies of Ogg installed in
+/usr and /usr/local; an older one might be first on the search path
+for libs and headers.
+
+*** Link error, such as:
+
+undefined reference to `oggpackB_stream'
+
+See above; you need libogg 1.1 or later.
+
+*** Link error, such as:
+
+undefined reference to `vorbis_granule_time'
+
+You need libvorbis and libvorbisenc from the 1.0.1 release or later.
+
+*** Link error, such as:
+
+/usr/lib/libSDL.a(SDL_esdaudio.lo): In function `ESD_OpenAudio':
+SDL_esdaudio.lo(.text+0x25d): undefined reference to `esd_play_stream'
+
+Be sure to use an SDL that's built to work with OSS.  If you use an
+SDL that is also built with ESD and/or ALSA support, it will try to
+suck in all those extra libraries at link time too.  That will only
+work if the extra libraries are also installed.
+
+*** Link warning, such as:
+
+libtool: link: warning: library `/usr/lib/libogg.la' was moved.
+libtool: link: warning: library `/usr/lib/libogg.la' was moved.
+
+Re-run theora/autogen.sh after an Ogg or Vorbis rebuild/reinstall
+

+ 11 - 0
jni/libtheora-1.2.0alpha1/README.Android.md

@@ -0,0 +1,11 @@
+Building for Android should work using the ```config.h``` found in this
+directory. Should you need to recreate the ```config.h``` file due to an
+update of libtheora you have to do the following:
+
+1. Create the folders ```theora```, and ```theora/build```
+2. Extract the source of the theora library to ```theora/src```
+3. Copy the files ```ndkenv.sh``` and ```configure_android.sh``` to ```theora/build```
+4. Go to the ```theora/build``` directory and run ```configure_android.sh```
+
+You might have to adjust some paths and build settings in ```ndkenv.sh```
+and ```configure_android.sh```.

+ 99 - 0
jni/libtheora-1.2.0alpha1/config.h

@@ -0,0 +1,99 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* libcairo is available for visual debugging output */
+/* #undef HAVE_CAIRO */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <machine/soundcard.h> header file. */
+/* #undef HAVE_MACHINE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <soundcard.h> header file. */
+/* #undef HAVE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/soundcard.h> header file. */
+/* #undef HAVE_SYS_SOUNDCARD_H */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* make use of arm asm optimization */
+#define OC_ARM_ASM /**/
+
+/* Define if assembler supports EDSP instructions */
+#define OC_ARM_ASM_EDSP 1
+
+/* Define if assembler supports ARMv6 media instructions */
+#define OC_ARM_ASM_MEDIA 1
+
+/* Define if compiler supports NEON instructions */
+/* #undef OC_ARM_ASM_NEON */
+
+/* make use of c64x+ asm optimization */
+/* #undef OC_C64X_ASM */
+
+/* make use of x86_64 asm optimization */
+/* #undef OC_X86_64_ASM */
+
+/* make use of x86 asm optimization */
+/* #undef OC_X86_ASM */
+
+/* Name of package */
+#define PACKAGE "libtheora"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "[email protected]"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libtheora"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libtheora 1.2.0alpha1+svn"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libtheora"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.2.0alpha1+svn"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to exclude encode support from the build */
+#define THEORA_DISABLE_ENCODE /**/
+
+/* Version number of package */
+#define VERSION "1.2.0alpha1+svn"

+ 24 - 0
jni/libtheora-1.2.0alpha1/configure_android.sh

@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# This file is a modified version of the android build system of the
+# Adventure Game Studio (AGS) https://github.com/adventuregamestudio/ags
+#
+
+set -e 
+
+source ./ndkenv.sh
+
+BUILD_DIR=build
+rm -rf $BUILD_DIR
+mkdir $BUILD_DIR
+cp ../src/* -a $BUILD_DIR
+
+export CFLAGS="$NDK_CFLAGS -fsigned-char --sysroot=$NDK_PLATFORM_ROOT"
+export LDFLAGS="$NDK_LDFLAGS"
+
+pushd $BUILD_DIR
+
+# disable asflag-probe as it guess wrong arm arch
+./autogen.sh --host=$NDK_HOST_NAME --disable-doc --disable-examples --disable-asflag-probe --disable-encode --disable-shared --disable-spec --disable-oggtest --disable-vorbistest
+
+popd

+ 599 - 0
jni/libtheora-1.2.0alpha1/include/theora/Makefile

@@ -0,0 +1,599 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# include/theora/Makefile.  Generated from Makefile.in by configure.
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+
+
+
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/libtheora
+pkgincludedir = $(includedir)/libtheora
+pkglibdir = $(libdir)/libtheora
+pkglibexecdir = $(libexecdir)/libtheora
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = x86_64-unknown-linux-gnu
+host_triplet = arm-unknown-linux-androideabi
+subdir = include/theora
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(noinst_HEADERS) $(theorainclude_HEADERS)
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/as-ac-expand.m4 \
+	$(top_srcdir)/m4/as-gcc-inline-assembly.m4 \
+	$(top_srcdir)/m4/ogg.m4 $(top_srcdir)/m4/pkg.m4 \
+	$(top_srcdir)/m4/vorbis.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_$(V))
+am__v_P_ = $(am__v_P_$(AM_DEFAULT_VERBOSITY))
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_$(V))
+am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY))
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_$(V))
+am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY))
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(theoraincludedir)"
+HEADERS = $(noinst_HEADERS) $(theorainclude_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/missing aclocal-1.14
+AMTAR = $${TAR-tar}
+AM_DEFAULT_VERBOSITY = 0
+AR = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ar
+AS = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-as
+AUTOCONF = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/missing autoconf
+AUTOHEADER = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/missing autoheader
+AUTOMAKE = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/missing automake-1.14
+AWK = gawk
+BINDIR = 
+BUILDABLE_EXAMPLES =  png2theora$(EXEEXT)
+CAIRO_CFLAGS = 
+CAIRO_LIBS = 
+CC = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc
+CCAS = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-gcc
+CCASDEPMODE = depmode=gcc3
+CCASFLAGS =  -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -I/opt/mfelis/android-ndk-linux/platforms/android-9/arch-arm/usr/include/sys -fsigned-char --sysroot=/opt/mfelis/android-ndk-linux/platforms/android-9/arch-arm 
+CCDEPMODE = depmode=gcc3
+CFLAGS = -Wall -Wno-parentheses -O3 -fomit-frame-pointer -finline-functions -funroll-loops  -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -I/opt/mfelis/android-ndk-linux/platforms/android-9/arch-arm/usr/include/sys -fsigned-char --sysroot=/opt/mfelis/android-ndk-linux/platforms/android-9/arch-arm
+COMPAT_LIBS = 
+CPP = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-cpp -E --sysroot=/opt/mfelis/android-ndk-linux/platforms/android-9/arch-arm
+CPPFLAGS = 
+CYGPATH_W = echo
+DEBUG = -g -Wall -Wno-parentheses -DDEBUG -D__NO_MATH_INLINES
+DEFS = -DHAVE_CONFIG_H
+DEPDIR = .deps
+DLLTOOL = dlltool
+DOCDIR = 
+DSYMUTIL = 
+DUMPBIN = 
+ECHO_C = 
+ECHO_N = -n
+ECHO_T = 
+EGREP = /bin/grep -E
+EXEEXT = 
+FGREP = /bin/grep -F
+GETOPT_OBJS = 
+GREP = /bin/grep
+HAVE_ARM_ASM_EDSP = 1
+HAVE_ARM_ASM_MEDIA = 1
+HAVE_ARM_ASM_NEON = 0
+HAVE_BIBTEX = yes
+HAVE_DOXYGEN = true
+HAVE_PDFLATEX = yes
+HAVE_PERL = yes
+HAVE_PKG_CONFIG = yes
+HAVE_TIFF = no
+HAVE_TRANSFIG = yes
+HAVE_VALGRIND = 
+INCLUDEDIR = 
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_SCRIPT = ${INSTALL}
+INSTALL_STRIP_PROGRAM = $(install_sh) -c -s
+LD = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ld
+LDFLAGS =  -march=armv7-a -Wl,--fix-cortex-a8
+LIBDIR = 
+LIBOBJS = 
+LIBS = 
+LIBTOOL = $(SHELL) $(top_builddir)/libtool
+LIPO = 
+LN_S = ln -s
+LTLIBOBJS = 
+MAINT = 
+MAKEINFO = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/missing makeinfo
+MANIFEST_TOOL = :
+MKDIR_P = /bin/mkdir -p
+NM = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-nm
+NMEDIT = 
+OBJDUMP = objdump
+OBJEXT = o
+OGG_CFLAGS =  
+OGG_LIBS = -logg  
+OSS_LIBS = 
+OTOOL = 
+OTOOL64 = 
+PACKAGE = libtheora
+PACKAGE_BUGREPORT = [email protected]
+PACKAGE_NAME = libtheora
+PACKAGE_STRING = libtheora 1.2.0alpha1+svn
+PACKAGE_TARNAME = libtheora
+PACKAGE_URL = 
+PACKAGE_VERSION = 1.2.0alpha1+svn
+PATH_SEPARATOR = :
+PKG_CONFIG = /usr/bin/pkg-config
+PNG_CFLAGS = -I/usr/include/libpng12  
+PNG_LIBS = -lpng12  
+PROFILE = -Wall -Wno-parentheses -pg -g -O3 -fno-inline-functions -DDEBUG
+RANLIB = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-ranlib
+SDL_CFLAGS = -D_GNU_SOURCE=1 -D_REENTRANT -I/usr/include/SDL  
+SDL_LIBS = -lSDL  
+SED = /bin/sed
+SET_MAKE = 
+SHELL = /bin/bash
+STRIP = /opt/mfelis/android-ndk-linux/toolchains/arm-linux-androideabi-4.8/prebuilt/linux-x86_64/bin/arm-linux-androideabi-strip
+THDEC_LIB_AGE = 1
+THDEC_LIB_CURRENT = 2
+THDEC_LIB_REVISION = 5
+THENC_LIB_AGE = 2
+THENC_LIB_CURRENT = 3
+THENC_LIB_REVISION = 0
+THEORADEC_LDFLAGS =  -Wl,--version-script=$(srcdir)/Version_script-dec
+THEORAENC_LDFLAGS =  -Wl,--version-script=$(srcdir)/Version_script-enc
+THEORA_LDFLAGS =  -Wl,--version-script=$(srcdir)/Version_script
+TH_LIB_AGE = 4
+TH_LIB_CURRENT = 4
+TH_LIB_REVISION = 0
+TIFF_CFLAGS = 
+TIFF_LIBS = 
+VALGRIND_ENVIRONMENT = 
+VERSION = 1.2.0alpha1+svn
+VORBISENC_LIBS = -lvorbisenc
+VORBISFILE_LIBS = 
+VORBIS_CFLAGS =  
+VORBIS_LIBS = -lvorbis  
+abs_builddir = /home/mfelis/temp/theora/armeabi-v7a/src/include/theora
+abs_srcdir = /home/mfelis/temp/theora/armeabi-v7a/src/include/theora
+abs_top_builddir = /home/mfelis/temp/theora/armeabi-v7a/src
+abs_top_srcdir = /home/mfelis/temp/theora/armeabi-v7a/src
+ac_ct_AR = 
+ac_ct_CC = 
+ac_ct_DUMPBIN = 
+am__include = include
+am__leading_dot = .
+am__quote = 
+am__tar = $${TAR-tar} chof - "$$tardir"
+am__untar = $${TAR-tar} xf -
+bindir = ${exec_prefix}/bin
+build = x86_64-unknown-linux-gnu
+build_alias = 
+build_cpu = x86_64
+build_os = linux-gnu
+build_vendor = unknown
+builddir = .
+datadir = ${datarootdir}
+datarootdir = ${prefix}/share
+docdir = ${datarootdir}/doc/${PACKAGE_TARNAME}
+dvidir = ${docdir}
+exec_prefix = ${prefix}
+host = arm-unknown-linux-androideabi
+host_alias = arm-linux-androideabi
+host_cpu = arm
+host_os = linux-androideabi
+host_vendor = unknown
+htmldir = ${docdir}
+includedir = ${prefix}/include
+infodir = ${datarootdir}/info
+install_sh = ${SHELL} /home/mfelis/temp/theora/armeabi-v7a/src/install-sh
+libdir = ${exec_prefix}/lib
+libexecdir = ${exec_prefix}/libexec
+localedir = ${datarootdir}/locale
+localstatedir = ${prefix}/var
+mandir = ${datarootdir}/man
+mkdir_p = $(MKDIR_P)
+oldincludedir = /usr/include
+pdfdir = ${docdir}
+prefix = /usr/local
+program_transform_name = s,x,x,
+psdir = ${docdir}
+sbindir = ${exec_prefix}/sbin
+sharedstatedir = ${prefix}/com
+srcdir = .
+sysconfdir = ${prefix}/etc
+target_alias = 
+top_build_prefix = ../../
+top_builddir = ../..
+top_srcdir = ../..
+theoraincludedir = $(includedir)/theora
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+noinst_HEADERS = codec.h theoradec.h
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/theora/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu include/theora/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-theoraincludeHEADERS: $(theorainclude_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(theorainclude_HEADERS)'; test -n "$(theoraincludedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(theoraincludedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(theoraincludedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(theoraincludedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(theoraincludedir)" || exit $$?; \
+	done
+
+uninstall-theoraincludeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(theorainclude_HEADERS)'; test -n "$(theoraincludedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(theoraincludedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(theoraincludedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-theoraincludeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-theoraincludeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	install-theoraincludeHEADERS installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-theoraincludeHEADERS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

+ 7 - 0
jni/libtheora-1.2.0alpha1/include/theora/Makefile.am

@@ -0,0 +1,7 @@
+## Process this file with automake to produce Makefile.in
+
+theoraincludedir = $(includedir)/theora
+
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+
+noinst_HEADERS = codec.h theoradec.h

+ 599 - 0
jni/libtheora-1.2.0alpha1/include/theora/Makefile.in

@@ -0,0 +1,599 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include/theora
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(noinst_HEADERS) $(theorainclude_HEADERS)
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/as-ac-expand.m4 \
+	$(top_srcdir)/m4/as-gcc-inline-assembly.m4 \
+	$(top_srcdir)/m4/ogg.m4 $(top_srcdir)/m4/pkg.m4 \
+	$(top_srcdir)/m4/vorbis.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(theoraincludedir)"
+HEADERS = $(noinst_HEADERS) $(theorainclude_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BINDIR = @BINDIR@
+BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_LIBS = @CAIRO_LIBS@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+COMPAT_LIBS = @COMPAT_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEBUG = @DEBUG@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DOCDIR = @DOCDIR@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GETOPT_OBJS = @GETOPT_OBJS@
+GREP = @GREP@
+HAVE_ARM_ASM_EDSP = @HAVE_ARM_ASM_EDSP@
+HAVE_ARM_ASM_MEDIA = @HAVE_ARM_ASM_MEDIA@
+HAVE_ARM_ASM_NEON = @HAVE_ARM_ASM_NEON@
+HAVE_BIBTEX = @HAVE_BIBTEX@
+HAVE_DOXYGEN = @HAVE_DOXYGEN@
+HAVE_PDFLATEX = @HAVE_PDFLATEX@
+HAVE_PERL = @HAVE_PERL@
+HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
+HAVE_TIFF = @HAVE_TIFF@
+HAVE_TRANSFIG = @HAVE_TRANSFIG@
+HAVE_VALGRIND = @HAVE_VALGRIND@
+INCLUDEDIR = @INCLUDEDIR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBDIR = @LIBDIR@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OGG_CFLAGS = @OGG_CFLAGS@
+OGG_LIBS = @OGG_LIBS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PNG_CFLAGS = @PNG_CFLAGS@
+PNG_LIBS = @PNG_LIBS@
+PROFILE = @PROFILE@
+RANLIB = @RANLIB@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+THDEC_LIB_AGE = @THDEC_LIB_AGE@
+THDEC_LIB_CURRENT = @THDEC_LIB_CURRENT@
+THDEC_LIB_REVISION = @THDEC_LIB_REVISION@
+THENC_LIB_AGE = @THENC_LIB_AGE@
+THENC_LIB_CURRENT = @THENC_LIB_CURRENT@
+THENC_LIB_REVISION = @THENC_LIB_REVISION@
+THEORADEC_LDFLAGS = @THEORADEC_LDFLAGS@
+THEORAENC_LDFLAGS = @THEORAENC_LDFLAGS@
+THEORA_LDFLAGS = @THEORA_LDFLAGS@
+TH_LIB_AGE = @TH_LIB_AGE@
+TH_LIB_CURRENT = @TH_LIB_CURRENT@
+TH_LIB_REVISION = @TH_LIB_REVISION@
+TIFF_CFLAGS = @TIFF_CFLAGS@
+TIFF_LIBS = @TIFF_LIBS@
+VALGRIND_ENVIRONMENT = @VALGRIND_ENVIRONMENT@
+VERSION = @VERSION@
+VORBISENC_LIBS = @VORBISENC_LIBS@
+VORBISFILE_LIBS = @VORBISFILE_LIBS@
+VORBIS_CFLAGS = @VORBIS_CFLAGS@
+VORBIS_LIBS = @VORBIS_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+theoraincludedir = $(includedir)/theora
+theorainclude_HEADERS = theora.h theoradec.h theoraenc.h codec.h
+noinst_HEADERS = codec.h theoradec.h
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/theora/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu include/theora/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-theoraincludeHEADERS: $(theorainclude_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(theorainclude_HEADERS)'; test -n "$(theoraincludedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(theoraincludedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(theoraincludedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(theoraincludedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(theoraincludedir)" || exit $$?; \
+	done
+
+uninstall-theoraincludeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(theorainclude_HEADERS)'; test -n "$(theoraincludedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(theoraincludedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(theoraincludedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-theoraincludeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-theoraincludeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-generic distclean-libtool distclean-tags distdir dvi \
+	dvi-am html html-am info info-am install install-am \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-info install-info-am install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	install-theoraincludeHEADERS installcheck installcheck-am \
+	installdirs maintainer-clean maintainer-clean-generic \
+	mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+	ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-theoraincludeHEADERS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:

+ 606 - 0
jni/libtheora-1.2.0alpha1/include/theora/codec.h

@@ -0,0 +1,606 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\mainpage
+ *
+ * \section intro Introduction
+ *
+ * This is the documentation for the <tt>libtheora</tt> C API.
+ *
+ * The \c libtheora package is the current reference
+ * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
+ * patent-unencumbered video codec.
+ * Theora is derived from On2's VP3 codec with additional features and
+ *  integration with Ogg multimedia formats by
+ *  <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
+ * Complete documentation of the format itself is available in
+ * <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>.
+ *
+ * \section Organization
+ *
+ * The functions documented here are divided between two
+ * separate libraries:
+ * - \c libtheoraenc contains the encoder interface,
+ *   described in \ref encfuncs.
+ * - \c libtheoradec contains the decoder interface,
+ *   described in \ref decfuncs, \n
+ *   and additional \ref basefuncs.
+ *
+ * New code should link to \c libtheoradec. If using encoder
+ * features, it must also link to \c libtheoraenc.
+ *
+ * During initial development, prior to the 1.0 release,
+ * \c libtheora exported a different \ref oldfuncs which
+ * combined both encode and decode functions.
+ * In general, legacy API symbols can be indentified
+ * by their \c theora_ or \c OC_ namespace prefixes.
+ * The current API uses \c th_ or \c TH_ instead.
+ *
+ * While deprecated, \c libtheoraenc and \c libtheoradec
+ * together export the legacy api as well at the one documented above.
+ * Likewise, the legacy \c libtheora included with this package
+ * exports the new 1.x API. Older code and build scripts can therefore
+ * but updated independently to the current scheme.
+ */
+
+/**\file
+ * The shared <tt>libtheoradec</tt> and <tt>libtheoraenc</tt> C API.
+ * You don't need to include this directly.*/
+
+#if !defined(_O_THEORA_CODEC_H_)
+# define _O_THEORA_CODEC_H_ (1)
+# include <ogg/ogg.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name Return codes*/
+/*@{*/
+/**An invalid pointer was provided.*/
+#define TH_EFAULT     (-1)
+/**An invalid argument was provided.*/
+#define TH_EINVAL     (-10)
+/**The contents of the header were incomplete, invalid, or unexpected.*/
+#define TH_EBADHEADER (-20)
+/**The header does not belong to a Theora stream.*/
+#define TH_ENOTFORMAT (-21)
+/**The bitstream version is too high.*/
+#define TH_EVERSION   (-22)
+/**The specified function is not implemented.*/
+#define TH_EIMPL      (-23)
+/**There were errors in the video data packet.*/
+#define TH_EBADPACKET (-24)
+/**The decoded packet represented a dropped frame.
+   The player can continue to display the current frame, as the contents of the
+    decoded frame buffer have not changed.*/
+#define TH_DUPFRAME   (1)
+/*@}*/
+
+/**The currently defined color space tags.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Chapter 4, for exact details on the meaning
+ *  of each of these color spaces.*/
+typedef enum{
+  /**The color space was not specified at the encoder.
+      It may be conveyed by an external means.*/
+  TH_CS_UNSPECIFIED,
+  /**A color space designed for NTSC content.*/
+  TH_CS_ITU_REC_470M,
+  /**A color space designed for PAL/SECAM content.*/
+  TH_CS_ITU_REC_470BG,
+  /**The total number of currently defined color spaces.*/
+  TH_CS_NSPACES
+}th_colorspace;
+
+/**The currently defined pixel format tags.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 4.4, for details on the precise sample
+ *  locations.*/
+typedef enum{
+  /**Chroma decimation by 2 in both the X and Y directions (4:2:0).
+     The Cb and Cr chroma planes are half the width and half the
+      height of the luma plane.*/
+  TH_PF_420,
+  /**Currently reserved.*/
+  TH_PF_RSVD,
+  /**Chroma decimation by 2 in the X direction (4:2:2).
+     The Cb and Cr chroma planes are half the width of the luma plane, but full
+      height.*/
+  TH_PF_422,
+  /**No chroma decimation (4:4:4).
+     The Cb and Cr chroma planes are full width and full height.*/
+  TH_PF_444,
+  /**The total number of currently defined pixel formats.*/
+  TH_PF_NFORMATS
+}th_pixel_fmt;
+
+
+
+/**A buffer for a single color plane in an uncompressed image.
+ * This contains the image data in a left-to-right, top-down format.
+ * Each row of pixels is stored contiguously in memory, but successive
+ *  rows need not be.
+ * Use \a stride to compute the offset of the next row.
+ * The encoder accepts both positive \a stride values (top-down in memory)
+ *  and negative (bottom-up in memory).
+ * The decoder currently always generates images with positive strides.*/
+typedef struct{
+  /**The width of this plane.*/
+  int            width;
+  /**The height of this plane.*/
+  int            height;
+  /**The offset in bytes between successive rows.*/
+  int            stride;
+  /**A pointer to the beginning of the first row.*/
+  unsigned char *data;
+}th_img_plane;
+
+/**A complete image buffer for an uncompressed frame.
+ * The chroma planes may be decimated by a factor of two in either
+ *  direction, as indicated by th_info#pixel_fmt.
+ * The width and height of the Y' plane must be multiples of 16.
+ * They may need to be cropped for display, using the rectangle
+ *  specified by th_info#pic_x, th_info#pic_y, th_info#pic_width,
+ *  and th_info#pic_height.
+ * All samples are 8 bits.
+ * \note The term YUV often used to describe a colorspace is ambiguous.
+ * The exact parameters of the RGB to YUV conversion process aside, in
+ *  many contexts the U and V channels actually have opposite meanings.
+ * To avoid this confusion, we are explicit: the name of the color
+ *  channels are Y'CbCr, and they appear in that order, always.
+ * The prime symbol denotes that the Y channel is non-linear.
+ * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
+typedef th_img_plane th_ycbcr_buffer[3];
+
+/**Theora bitstream information.
+ * This contains the basic playback parameters for a stream, and corresponds to
+ *  the initial 'info' header packet.
+ * To initialize an encoder, the application fills in this structure and
+ *  passes it to th_encode_alloc().
+ * A default encoding mode is chosen based on the values of the #quality and
+ *  #target_bitrate fields.
+ * On decode, it is filled in by th_decode_headerin(), and then passed to
+ *  th_decode_alloc().
+ *
+ * Encoded Theora frames must be a multiple of 16 in size;
+ *  this is what the #frame_width and #frame_height members represent.
+ * To handle arbitrary picture sizes, a crop rectangle is specified in the
+ *  #pic_x, #pic_y, #pic_width and #pic_height members.
+ *
+ * All frame buffers contain pointers to the full, padded frame.
+ * However, the current encoder <em>will not</em> reference pixels outside of
+ *  the cropped picture region, and the application does not need to fill them
+ *  in.
+ * The decoder <em>will</em> allocate storage for a full frame, but the
+ *  application <em>should not</em> rely on the padding containing sensible
+ *  data.
+ *
+ * It is also generally recommended that the offsets and sizes should still be
+ *  multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 4.4, for more details.
+ *
+ * Frame rate, in frames per second, is stored as a rational fraction, as is
+ *  the pixel aspect ratio.
+ * Note that this refers to the aspect ratio of the individual pixels, not of
+ *  the overall frame itself.
+ * The frame aspect ratio can be computed from pixel aspect ratio using the
+ *  image dimensions.*/
+typedef struct{
+  /**\name Theora version
+   * Bitstream version information.*/
+  /*@{*/
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+  /*@}*/
+  /**The encoded frame width.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_width;
+  /**The encoded frame height.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_height;
+  /**The displayed picture width.
+   * This must be no larger than width.*/
+  ogg_uint32_t  pic_width;
+  /**The displayed picture height.
+   * This must be no larger than height.*/
+  ogg_uint32_t  pic_height;
+  /**The X offset of the displayed picture.
+   * This must be no larger than #frame_width-#pic_width or 255, whichever is
+   *  smaller.*/
+  ogg_uint32_t  pic_x;
+  /**The Y offset of the displayed picture.
+   * This must be no larger than #frame_height-#pic_height, and
+   *  #frame_height-#pic_height-#pic_y must be no larger than 255.
+   * This slightly funny restriction is due to the fact that the offset is
+   *  specified from the top of the image for consistency with the standard
+   *  graphics left-handed coordinate system used throughout this API, while
+   *  it is stored in the encoded stream as an offset from the bottom.*/
+  ogg_uint32_t  pic_y;
+  /**\name Frame rate
+   * The frame rate, as a fraction.
+   * If either is 0, the frame rate is undefined.*/
+  /*@{*/
+  ogg_uint32_t  fps_numerator;
+  ogg_uint32_t  fps_denominator;
+  /*@}*/
+  /**\name Aspect ratio
+   * The aspect ratio of the pixels.
+   * If either value is zero, the aspect ratio is undefined.
+   * If not specified by any external means, 1:1 should be assumed.
+   * The aspect ratio of the full picture can be computed as
+   * \code
+   *  aspect_numerator*pic_width/(aspect_denominator*pic_height).
+   * \endcode */
+  /*@{*/
+  ogg_uint32_t  aspect_numerator;
+  ogg_uint32_t  aspect_denominator;
+  /*@}*/
+  /**The color space.*/
+  th_colorspace colorspace;
+  /**The pixel format.*/
+  th_pixel_fmt  pixel_fmt;
+  /**The target bit-rate in bits per second.
+     If initializing an encoder with this struct, set this field to a non-zero
+      value to activate CBR encoding by default.*/
+  int           target_bitrate;
+  /**The target quality level.
+     Valid values range from 0 to 63, inclusive, with higher values giving
+      higher quality.
+     If initializing an encoder with this struct, and #target_bitrate is set
+      to zero, VBR encoding at this quality will be activated by default.*/
+  /*Currently this is set so that a qi of 0 corresponds to distortions of 24
+     times the JND, and each increase by 16 halves that value.
+    This gives us fine discrimination at low qualities, yet effective rate
+     control at high qualities.
+    The qi value 63 is special, however.
+    For this, the highest quality, we use one half of a JND for our threshold.
+    Due to the lower bounds placed on allowable quantizers in Theora, we will
+     not actually be able to achieve quality this good, but this should
+     provide as close to visually lossless quality as Theora is capable of.
+    We could lift the quantizer restrictions without breaking VP3.1
+     compatibility, but this would result in quantized coefficients that are
+     too large for the current bitstream to be able to store.
+    We'd have to redesign the token syntax to store these large coefficients,
+     which would make transcoding complex.*/
+  int           quality;
+  /**The amount to shift to extract the last keyframe number from the granule
+   *  position.
+   * This can be at most 31.
+   * th_info_init() will set this to a default value (currently <tt>6</tt>,
+   *  which is good for streaming applications), but you can set it to 0 to
+   *  make every frame a keyframe.
+   * The maximum distance between key frames is
+   *  <tt>1<<#keyframe_granule_shift</tt>.
+   * The keyframe frequency can be more finely controlled with
+   *  #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted
+   *  during encoding (for example, to force the next frame to be a keyframe),
+   *  but it cannot be set larger than the amount permitted by this field after
+   *  the headers have been output.*/
+  int           keyframe_granule_shift;
+}th_info;
+
+/**The comment information.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ *  the 'comment' header packet.
+ * The comment header is meant to be used much like someone jotting a quick
+ *  note on the label of a video.
+ * It should be a short, to the point text note that can be more than a couple
+ *  words, but not more than a short paragraph.
+ *
+ * The metadata is stored as a series of (tag, value) pairs, in
+ *  length-encoded string vectors.
+ * The first occurrence of the '=' character delimits the tag and value.
+ * A particular tag may occur more than once, and order is significant.
+ * The character set encoding for the strings is always UTF-8, but the tag
+ *  names are limited to ASCII, and treated as case-insensitive.
+ * See <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *  specification</a>, Section 6.3.3 for details.
+ *
+ * In filling in this structure, th_decode_headerin() will null-terminate
+ *  the user_comment strings for safety.
+ * However, the bitstream format itself treats them as 8-bit clean vectors,
+ *  possibly containing null characters, so the length array should be
+ *  treated as their authoritative length.
+ */
+typedef struct th_comment{
+  /**The array of comment string vectors.*/
+  char **user_comments;
+  /**An array of the corresponding length of each vector, in bytes.*/
+  int   *comment_lengths;
+  /**The total number of comment strings.*/
+  int    comments;
+  /**The null-terminated vendor string.
+     This identifies the software used to encode the stream.*/
+  char  *vendor;
+}th_comment;
+
+
+
+/**A single base matrix.*/
+typedef unsigned char th_quant_base[64];
+
+/**A set of \a qi ranges.*/
+typedef struct{
+  /**The number of ranges in the set.*/
+  int                  nranges;
+  /**The size of each of the #nranges ranges.
+     These must sum to 63.*/
+  const int           *sizes;
+  /**#nranges <tt>+1</tt> base matrices.
+     Matrices \a i and <tt>i+1</tt> form the endpoints of range \a i.*/
+  const th_quant_base *base_matrices;
+}th_quant_ranges;
+
+/**A complete set of quantization parameters.
+   The quantizer for each coefficient is calculated as:
+   \code
+    Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100),
+     1024).
+   \endcode
+
+   \a qti is the quantization type index: 0 for intra, 1 for inter.
+   <tt>ci!=0</tt> is 0 for the DC coefficient and 1 for AC coefficients.
+   \a qi is the quality index, ranging between 0 (low quality) and 63 (high
+    quality).
+   \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr.
+   \a ci is the DCT coefficient index.
+   Coefficient indices correspond to the normal 2D DCT block
+    ordering--row-major with low frequencies first--\em not zig-zag order.
+
+   Minimum quantizers are constant, and are given by:
+   \code
+   qmin[2][2]={{4,2},{8,4}}.
+   \endcode
+
+   Parameters that can be stored in the bitstream are as follows:
+    - The two scale matrices ac_scale and dc_scale.
+      \code
+      scale[2][64]={dc_scale,ac_scale}.
+      \endcode
+    - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all).
+      In order to avoid storing a full 384 base matrices, only a sparse set of
+       matrices are stored, and the rest are linearly interpolated.
+      This is done as follows.
+      For each \a qti and \a pli, a series of \a n \a qi ranges is defined.
+      The size of each \a qi range can vary arbitrarily, but they must sum to
+       63.
+      Then, <tt>n+1</tt> matrices are specified, one for each endpoint of the
+       ranges.
+      For interpolation purposes, each range's endpoints are the first \a qi
+       value it contains and one past the last \a qi value it contains.
+      Fractional values are rounded to the nearest integer, with ties rounded
+       away from zero.
+
+      Base matrices are stored by reference, so if the same matrices are used
+       multiple times, they will only appear once in the bitstream.
+      The bitstream is also capable of omitting an entire set of ranges and
+       its associated matrices if they are the same as either the previous
+       set (indexed in row-major order) or if the inter set is the same as the
+       intra set.
+
+    - Loop filter limit values.
+      The same limits are used for the loop filter in all color planes, despite
+       potentially differing levels of quantization in each.
+
+   For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater
+    than <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must
+    be no greater than <tt>base[qti][pli][qi-1][ci]</tt>.
+   These two conditions ensure that the actual quantizer for a given \a qti,
+    \a pli, and \a ci does not increase as \a qi increases.
+   This is not required by the decoder.*/
+typedef struct{
+  /**The DC scaling factors.*/
+  ogg_uint16_t    dc_scale[64];
+  /**The AC scaling factors.*/
+  ogg_uint16_t    ac_scale[64];
+  /**The loop filter limit values.*/
+  unsigned char   loop_filter_limits[64];
+  /**The \a qi ranges for each \a ci and \a pli.*/
+  th_quant_ranges qi_ranges[2][3];
+}th_quant_info;
+
+
+
+/**The number of Huffman tables used by Theora.*/
+#define TH_NHUFFMAN_TABLES (80)
+/**The number of DCT token values in each table.*/
+#define TH_NDCT_TOKENS     (32)
+
+/**A Huffman code for a Theora DCT token.
+ * Each set of Huffman codes in a given table must form a complete, prefix-free
+ *  code.
+ * There is no requirement that all the tokens in a table have a valid code,
+ *  but the current encoder is not optimized to take advantage of this.
+ * If each of the five grouops of 16 tables does not contain at least one table
+ *  with a code for every token, then the encoder may fail to encode certain
+ *  frames.
+ * The complete table in the first group of 16 does not have to be in the same
+ *  place as the complete table in the other groups, but the complete tables in
+ *  the remaining four groups must all be in the same place.*/
+typedef struct{
+  /**The bit pattern for the code, with the LSbit of the pattern aligned in
+   *   the LSbit of the word.*/
+  ogg_uint32_t pattern;
+  /**The number of bits in the code.
+   * This must be between 0 and 32, inclusive.*/
+  int          nbits;
+}th_huff_code;
+
+
+
+/**\defgroup basefuncs Functions Shared by Encode and Decode*/
+/*@{*/
+/**\name Basic shared functions
+ * These functions return information about the library itself,
+ * or provide high-level information about codec state
+ * and packet type.
+ *
+ * You must link to \c libtheoradec if you use any of the
+ * functions in this section.*/
+/*@{*/
+/**Retrieves a human-readable string to identify the library vendor and
+ *  version.
+ * \return the version string.*/
+extern const char *th_version_string(void);
+/**Retrieves the library version number.
+ * This is the highest bitstream version that the encoder library will produce,
+ *  or that the decoder library can decode.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+ * \code
+ * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR)
+ * \endcode
+ * \return the version number.*/
+extern ogg_uint32_t th_version_number(void);
+/**Converts a granule position to an absolute frame index, starting at
+ *  <tt>0</tt>.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \returns The absolute frame index corresponding to \a _granpos.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos);
+/**Converts a granule position to an absolute time in seconds.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \return The absolute time in seconds corresponding to \a _granpos.
+ *         This is the "end time" for the frame, or the latest time it should
+ *          be displayed.
+ *         It is not the presentation time.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern double th_granule_time(void *_encdec,ogg_int64_t _granpos);
+/**Determines whether a Theora packet is a header or not.
+ * This function does no verification beyond checking the packet type bit, so
+ *  it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  data packet (a delta frame with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is a video data packet.*/
+extern int th_packet_isheader(ogg_packet *_op);
+/**Determines whether a theora packet is a key frame or not.
+ * This function does no verification beyond checking the packet type and
+ *  key frame bits, so it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  delta frame (with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1  The packet contains a key frame.
+ * \retval 0  The packet contains a delta frame.
+ * \retval -1 The packet is not a video data packet.*/
+extern int th_packet_iskeyframe(ogg_packet *_op);
+/*@}*/
+
+
+/**\name Functions for manipulating header data
+ * These functions manipulate the #th_info and #th_comment structures
+ * which describe video parameters and key-value metadata, respectively.
+ *
+ * You must link to \c libtheoradec if you use any of the
+ * functions in this section.*/
+/*@{*/
+/**Initializes a th_info structure.
+ * This should be called on a freshly allocated #th_info structure before
+ *  attempting to use it.
+ * \param _info The #th_info struct to initialize.*/
+extern void th_info_init(th_info *_info);
+/**Clears a #th_info structure.
+ * This should be called on a #th_info structure after it is no longer
+ *  needed.
+ * \param _info The #th_info struct to clear.*/
+extern void th_info_clear(th_info *_info);
+
+/**Initialize a #th_comment structure.
+ * This should be called on a freshly allocated #th_comment structure
+ *  before attempting to use it.
+ * \param _tc The #th_comment struct to initialize.*/
+extern void th_comment_init(th_comment *_tc);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc      The #th_comment struct to add the comment to.
+ * \param _comment Must be a null-terminated UTF-8 string containing the
+ *                  comment in "TAG=the value" form.*/
+extern void th_comment_add(th_comment *_tc,const char *_comment);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc  The #th_comment struct to add the comment to.
+ * \param _tag A null-terminated string containing the tag associated with
+ *              the comment.
+ * \param _val The corresponding value as a null-terminated string.*/
+extern void th_comment_add_tag(th_comment *_tc,const char *_tag,
+ const char *_val);
+/**Look up a comment value by its tag.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \param _count The instance of the tag.
+ *               The same tag can appear multiple times, each with a distinct
+ *                value, so an index is required to retrieve them all.
+ *               The order in which these values appear is significant and
+ *                should be preserved.
+ *               Use th_comment_query_count() to get the legal range for
+ *                the \a _count parameter.
+ * \return A pointer to the queried tag's value.
+ *         This points directly to data in the #th_comment structure.
+ *         It should not be modified or freed by the application, and
+ *          modifications to the structure may invalidate the pointer.
+ * \retval NULL If no matching tag is found.*/
+extern char *th_comment_query(th_comment *_tc,const char *_tag,int _count);
+/**Look up the number of instances of a tag.
+ * Call this first when querying for a specific tag and then iterate over the
+ *  number of instances with separate calls to th_comment_query() to
+ *  retrieve all the values for that tag in order.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \return The number of instances of this particular tag.*/
+extern int th_comment_query_count(th_comment *_tc,const char *_tag);
+/**Clears a #th_comment structure.
+ * This should be called on a #th_comment structure after it is no longer
+ *  needed.
+ * It will free all memory used by the structure members.
+ * \param _tc The #th_comment struct to clear.*/
+extern void th_comment_clear(th_comment *_tc);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif

+ 786 - 0
jni/libtheora-1.2.0alpha1/include/theora/theora.h

@@ -0,0 +1,786 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $
+
+ ********************************************************************/
+
+#ifndef _O_THEORA_H_
+#define _O_THEORA_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+#include <stddef.h>	/* for size_t */
+
+#include <ogg/ogg.h>
+
+/** \file
+ * The libtheora pre-1.0 legacy C API.
+ *
+ * \ingroup oldfuncs
+ *
+ * \section intro Introduction
+ *
+ * This is the documentation for the libtheora legacy C API, declared in
+ * the theora.h header, which describes the old interface used before
+ * the 1.0 release. This API was widely deployed for several years and
+ * remains supported, but for new code we recommend the cleaner API
+ * declared in theoradec.h and theoraenc.h.
+ *
+ * libtheora is the reference implementation for
+ * <a href="http://www.theora.org/">Theora</a>, a free video codec.
+ * Theora is derived from On2's VP3 codec with improved integration with
+ * Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
+ *
+ * \section overview Overview
+ *
+ * This library will both decode and encode theora packets to/from raw YUV
+ * frames.  In either case, the packets will most likely either come from or
+ * need to be embedded in an Ogg stream.  Use
+ * <a href="http://xiph.org/ogg/">libogg</a> or
+ * <a href="http://www.annodex.net/software/liboggz/index.html">liboggz</a>
+ * to extract/package these packets.
+ *
+ * \section decoding Decoding Process
+ *
+ * Decoding can be separated into the following steps:
+ * -# initialise theora_info and theora_comment structures using
+ *    theora_info_init() and theora_comment_init():
+ \verbatim
+ theora_info     info;
+ theora_comment  comment;
+
+ theora_info_init(&info);
+ theora_comment_init(&comment);
+ \endverbatim
+ * -# retrieve header packets from Ogg stream (there should be 3) and decode
+ *    into theora_info and theora_comment structures using
+ *    theora_decode_header().  See \ref identification for more information on
+ *    identifying which packets are theora packets.
+ \verbatim
+ int i;
+ for (i = 0; i < 3; i++)
+ {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_header(&info, &comment, op);
+ }
+ \endverbatim
+ * -# initialise the decoder based on the information retrieved into the
+ *    theora_info struct by theora_decode_header().  You will need a
+ *    theora_state struct.
+ \verbatim
+ theora_state state;
+
+ theora_decode_init(&state, &info);
+ \endverbatim
+ * -# pass in packets and retrieve decoded frames!  See the yuv_buffer
+ *    documentation for information on how to retrieve raw YUV data.
+ \verbatim
+ yuf_buffer buffer;
+ while (last packet was not e_o_s) {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_packetin(&state, op);
+   theora_decode_YUVout(&state, &buffer);
+ }
+ \endverbatim
+ *
+ *
+ * \subsection identification Identifying Theora Packets
+ *
+ * All streams inside an Ogg file have a unique serial_no attached to the
+ * stream.  Typically, you will want to
+ *  - retrieve the serial_no for each b_o_s (beginning of stream) page
+ *    encountered within the Ogg file;
+ *  - test the first (only) packet on that page to determine if it is a theora
+ *    packet;
+ *  - once you have found a theora b_o_s page then use the retrieved serial_no
+ *    to identify future packets belonging to the same theora stream.
+ *
+ * Note that you \e cannot use theora_packet_isheader() to determine if a
+ * packet is a theora packet or not, as this function does not perform any
+ * checking beyond whether a header bit is present.  Instead, use the
+ * theora_decode_header() function and check the return value; or examine the
+ * header bytes at the beginning of the Ogg page.
+ */
+
+
+/** \defgroup oldfuncs Legacy pre-1.0 C API */
+/*  @{ */
+
+/**
+ * A YUV buffer for passing uncompressed frames to and from the codec.
+ * This holds a Y'CbCr frame in planar format. The CbCr planes can be
+ * subsampled and have their own separate dimensions and row stride
+ * offsets. Note that the strides may be negative in some
+ * configurations. For theora the width and height of the largest plane
+ * must be a multiple of 16. The actual meaningful picture size and
+ * offset are stored in the theora_info structure; frames returned by
+ * the decoder may need to be cropped for display.
+ *
+ * All samples are 8 bits. Within each plane samples are ordered by
+ * row from the top of the frame to the bottom. Within each row samples
+ * are ordered from left to right.
+ *
+ * During decode, the yuv_buffer struct is allocated by the user, but all
+ * fields (including luma and chroma pointers) are filled by the library.
+ * These pointers address library-internal memory and their contents should
+ * not be modified.
+ *
+ * Conversely, during encode the user allocates the struct and fills out all
+ * fields.  The user also manages the data addressed by the luma and chroma
+ * pointers.  See the encoder_example.c and dump_video.c example files in
+ * theora/examples/ for more information.
+ */
+typedef struct {
+    int   y_width;      /**< Width of the Y' luminance plane */
+    int   y_height;     /**< Height of the luminance plane */
+    int   y_stride;     /**< Offset in bytes between successive rows */
+
+    int   uv_width;     /**< Width of the Cb and Cr chroma planes */
+    int   uv_height;    /**< Height of the chroma planes */
+    int   uv_stride;    /**< Offset between successive chroma rows */
+    unsigned char *y;   /**< Pointer to start of luminance data */
+    unsigned char *u;   /**< Pointer to start of Cb data */
+    unsigned char *v;   /**< Pointer to start of Cr data */
+
+} yuv_buffer;
+
+/**
+ * A Colorspace.
+ */
+typedef enum {
+  OC_CS_UNSPECIFIED,    /**< The colorspace is unknown or unspecified */
+  OC_CS_ITU_REC_470M,   /**< This is the best option for 'NTSC' content */
+  OC_CS_ITU_REC_470BG,  /**< This is the best option for 'PAL' content */
+  OC_CS_NSPACES         /**< This marks the end of the defined colorspaces */
+} theora_colorspace;
+
+/**
+ * A Chroma subsampling
+ *
+ * These enumerate the available chroma subsampling options supported
+ * by the theora format. See Section 4.4 of the specification for
+ * exact definitions.
+ */
+typedef enum {
+  OC_PF_420,    /**< Chroma subsampling by 2 in each direction (4:2:0) */
+  OC_PF_RSVD,   /**< Reserved value */
+  OC_PF_422,    /**< Horizonatal chroma subsampling by 2 (4:2:2) */
+  OC_PF_444     /**< No chroma subsampling at all (4:4:4) */
+} theora_pixelformat;
+
+/**
+ * Theora bitstream info.
+ * Contains the basic playback parameters for a stream,
+ * corresponding to the initial 'info' header packet.
+ *
+ * Encoded theora frames must be a multiple of 16 in width and height.
+ * To handle other frame sizes, a crop rectangle is specified in
+ * frame_height and frame_width, offset_x and * offset_y. The offset
+ * and size should still be a multiple of 2 to avoid chroma sampling
+ * shifts. Offset values in this structure are measured from the
+ * upper left of the image.
+ *
+ * Frame rate, in frames per second, is stored as a rational
+ * fraction. Aspect ratio is also stored as a rational fraction, and
+ * refers to the aspect ratio of the frame pixels, not of the
+ * overall frame itself.
+ *
+ * See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
+ * examples/encoder_example.c</a> for usage examples of the
+ * other parameters and good default settings for the encoder parameters.
+ */
+typedef struct {
+  ogg_uint32_t  width;		/**< encoded frame width  */
+  ogg_uint32_t  height;		/**< encoded frame height */
+  ogg_uint32_t  frame_width;	/**< display frame width  */
+  ogg_uint32_t  frame_height;	/**< display frame height */
+  ogg_uint32_t  offset_x;	/**< horizontal offset of the displayed frame */
+  ogg_uint32_t  offset_y;	/**< vertical offset of the displayed frame */
+  ogg_uint32_t  fps_numerator;	    /**< frame rate numerator **/
+  ogg_uint32_t  fps_denominator;    /**< frame rate denominator **/
+  ogg_uint32_t  aspect_numerator;   /**< pixel aspect ratio numerator */
+  ogg_uint32_t  aspect_denominator; /**< pixel aspect ratio denominator */
+  theora_colorspace colorspace;	    /**< colorspace */
+  int           target_bitrate;	    /**< nominal bitrate in bits per second */
+  int           quality;  /**< Nominal quality setting, 0-63 */
+  int           quick_p;  /**< Quick encode/decode */
+
+  /* decode only */
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+
+  void *codec_setup;
+
+  /* encode only */
+  int           dropframes_p;
+  int           keyframe_auto_p;
+  ogg_uint32_t  keyframe_frequency;
+  ogg_uint32_t  keyframe_frequency_force;  /* also used for decode init to
+                                              get granpos shift correct */
+  ogg_uint32_t  keyframe_data_target_bitrate;
+  ogg_int32_t   keyframe_auto_threshold;
+  ogg_uint32_t  keyframe_mindistance;
+  ogg_int32_t   noise_sensitivity;
+  ogg_int32_t   sharpness;
+
+  theora_pixelformat pixelformat;	/**< chroma subsampling mode to expect */
+
+} theora_info;
+
+/** Codec internal state and context.
+ */
+typedef struct{
+  theora_info *i;
+  ogg_int64_t granulepos;
+
+  void *internal_encode;
+  void *internal_decode;
+
+} theora_state;
+
+/**
+ * Comment header metadata.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ * the 'comment' header packet.
+ *
+ * Meta data is stored as a series of (tag, value) pairs, in
+ * length-encoded string vectors. The first occurence of the
+ * '=' character delimits the tag and value. A particular tag
+ * may occur more than once. The character set encoding for
+ * the strings is always UTF-8, but the tag names are limited
+ * to case-insensitive ASCII. See the spec for details.
+ *
+ * In filling in this structure, theora_decode_header() will
+ * null-terminate the user_comment strings for safety. However,
+ * the bitstream format itself treats them as 8-bit clean,
+ * and so the length array should be treated as authoritative
+ * for their length.
+ */
+typedef struct theora_comment{
+  char **user_comments;         /**< An array of comment string vectors */
+  int   *comment_lengths;       /**< An array of corresponding string vector lengths in bytes */
+  int    comments;              /**< The total number of comment string vectors */
+  char  *vendor;                /**< The vendor string identifying the encoder, null terminated */
+
+} theora_comment;
+
+
+/**\name theora_control() codes */
+/* \anchor decctlcodes_old
+ * These are the available request codes for theora_control()
+ * when called with a decoder instance.
+ * By convention decoder control codes are odd, to distinguish
+ * them from \ref encctlcodes_old "encoder control codes" which
+ * are even.
+ *
+ * Note that since the 1.0 release, both the legacy and the final
+ * implementation accept all the same control codes, but only the
+ * final API declares the newer codes.
+ *
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+
+/*@{*/
+
+/**Get the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
+ */
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+
+/**Set the post-processing level.
+ * Sets the level of post-processing to use when decoding the
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ */
+#define TH_DECCTL_SET_PPLEVEL (3)
+
+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+
+/**Set the granule position.
+ * Call this after a seek, to update the internal granulepos
+ * in the decoder, to insure that subsequent frames are marked
+ * properly. If you track timestamps yourself and do not use
+ * the granule postion returned by the decoder, then you do
+ * not need to use this control.
+ */
+#define TH_DECCTL_SET_GRANPOS (5)
+
+/**\anchor encctlcodes_old */
+
+/**Sets the quantization parameters to use.
+ * The parameters are copied, not stored by reference, so they can be freed
+ *  after this call.
+ * <tt>NULL</tt> may be specified to revert to the default parameters.
+ *
+ * \param[in] buf #th_quant_info
+ * \retval OC_FAULT  \a theora_state is <tt>NULL</tt>.
+ * \retval OC_EINVAL Encoding has already begun, the quantization parameters
+ *                    are not acceptable to this version of the encoder,
+ *                    \a buf is <tt>NULL</tt> and \a buf_sz is not zero,
+ *                    or \a buf is non-<tt>NULL</tt> and \a buf_sz is
+ *                    not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+
+/**Disables any encoder features that would prevent lossless transcoding back
+ *  to VP3.
+ * This primarily means disabling block-level QI values and not using 4MV mode
+ *  when any of the luma blocks in a macro block are not coded.
+ * It also includes using the VP3 quantization tables and Huffman codes; if you
+ *  set them explicitly after calling this function, the resulting stream will
+ *  not be VP3-compatible.
+ * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
+ *  material, or when using a picture region smaller than the full frame (e.g.
+ *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
+ *  still be disabled, but the stream will still not be VP3-compatible, as VP3
+ *  was not capable of encoding such formats.
+ * If you call this after encoding has already begun, then the quantization
+ *  tables and codebooks cannot be changed, but the frame-level features will
+ *  be enabled or disabled as requested.
+ *
+ * \param[in]  buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
+ *                   or 0 to disable it (the default).
+ * \param[out] buf <tt>int</tt>: 1 if all bitstream features required for
+ *                   VP3-compatibility could be set, and 0 otherwise.
+ *                  The latter will be returned if the pixel format is not
+ *                   4:2:0, the picture region is smaller than the full frame,
+ *                   or if encoding has begun, preventing the quantization
+ *                   tables and codebooks from being set.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but in this case bitrate will also likely increase.
+ * In any case, overall rate/distortion performance will probably decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. CQI, etc.).
+ *
+ * \param[out] buf int: The maximum encoding speed level.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+
+/**Sets the speed level.
+ * By default a speed value of 1 is used.
+ *
+ * \param[in] buf int: The new encoding speed level.
+ *                      0 is slowest, larger values use less CPU.
+ * \retval OC_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval OC_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval OC_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_SPLEVEL (14)
+
+/*@}*/
+
+#define OC_FAULT       -1       /**< General failure */
+#define OC_EINVAL      -10      /**< Library encountered invalid internal data */
+#define OC_DISABLED    -11      /**< Requested action is disabled */
+#define OC_BADHEADER   -20      /**< Header packet was corrupt/invalid */
+#define OC_NOTFORMAT   -21      /**< Packet is not a theora packet */
+#define OC_VERSION     -22      /**< Bitstream version is not handled */
+#define OC_IMPL        -23      /**< Feature or action not implemented */
+#define OC_BADPACKET   -24      /**< Packet is corrupt */
+#define OC_NEWPACKET   -25      /**< Packet is an (ignorable) unhandled extension */
+#define OC_DUPFRAME    1        /**< Packet is a dropped frame */
+
+/**
+ * Retrieve a human-readable string to identify the encoder vendor and version.
+ * \returns A version string.
+ */
+extern const char *theora_version_string(void);
+
+/**
+ * Retrieve a 32-bit version number.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+<pre>
+   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
+</pre>
+* \returns The version number.
+*/
+extern ogg_uint32_t theora_version_number(void);
+
+/**
+ * Initialize the theora encoder.
+ * \param th The theora_state handle to initialize for encoding.
+ * \param ti A theora_info struct filled with the desired encoding parameters.
+ * \retval 0 Success
+ */
+extern int theora_encode_init(theora_state *th, theora_info *ti);
+
+/**
+ * Submit a YUV buffer to the theora encoder.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param yuv A buffer of YUV data to encode.  Note that both the yuv_buffer
+ *            struct and the luma/chroma buffers within should be allocated by
+ *            the user.
+ * \retval OC_EINVAL Encoder is not ready, or is finished.
+ * \retval -1 The size of the given frame differs from those previously input
+ * \retval 0 Success
+ */
+extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv);
+
+/**
+ * Request the next packet of encoded video.
+ * The encoded data is placed in a user-provided ogg_packet structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param last_p whether this is the last packet the encoder should produce.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to encoded
+ *           data. The memory for the encoded data is owned by libtheora.
+ * \retval 0 No internal storage exists OR no packet is ready
+ * \retval -1 The encoding process has completed
+ * \retval 1 Success
+ */
+extern int theora_encode_packetout( theora_state *t, int last_p,
+                                    ogg_packet *op);
+
+/**
+ * Request a packet containing the initial header.
+ * A pointer to the header data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the header
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_header(theora_state *t, ogg_packet *op);
+
+/**
+ * Request a comment header packet from provided metadata.
+ * A pointer to the comment data is placed in a user-provided ogg_packet
+ * structure.
+ * \param tc A theora_comment structure filled with the desired metadata
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the encoded
+ *           comment data. The memory for the comment data is owned by
+ *           the application, and must be freed by it using _ogg_free().
+ *           On some systems (such as Windows when using dynamic linking), this
+ *           may mean the free is executed in a different module from the
+ *           malloc, which will crash; there is no way to free this memory on
+ *           such systems.
+ * \retval 0 Success
+ */
+extern int theora_encode_comment(theora_comment *tc, ogg_packet *op);
+
+/**
+ * Request a packet containing the codebook tables for the stream.
+ * A pointer to the codebook data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the codebook
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_tables(theora_state *t, ogg_packet *op);
+
+/**
+ * Decode an Ogg packet, with the expectation that the packet contains
+ * an initial header, comment data or codebook tables.
+ *
+ * \param ci A theora_info structure to fill. This must have been previously
+ *           initialized with theora_info_init(). If \a op contains an initial
+ *           header, theora_decode_header() will fill \a ci with the
+ *           parsed header values. If \a op contains codebook tables,
+ *           theora_decode_header() will parse these and attach an internal
+ *           representation to \a ci->codec_setup.
+ * \param cc A theora_comment structure to fill. If \a op contains comment
+ *           data, theora_decode_header() will fill \a cc with the parsed
+ *           comments.
+ * \param op An ogg_packet structure which you expect contains an initial
+ *           header, comment data or codebook tables.
+ *
+ * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet
+ *                      has the signature of an initial packet, but op is
+ *                      not a b_o_s packet; OR this packet has the signature
+ *                      of an initial header packet, but an initial header
+ *                      packet has already been seen; OR this packet has the
+ *                      signature of a comment packet, but the initial header
+ *                      has not yet been seen; OR this packet has the signature
+ *                      of a comment packet, but contains invalid data; OR
+ *                      this packet has the signature of codebook tables,
+ *                      but the initial header or comments have not yet
+ *                      been seen; OR this packet has the signature of codebook
+ *                      tables, but contains invalid data;
+ *                      OR the stream being decoded has a compatible version
+ *                      but this packet does not have the signature of a
+ *                      theora initial header, comments, or codebook packet
+ * \retval OC_VERSION   The packet data of \a op is an initial header with
+ *                      a version which is incompatible with this version of
+ *                      libtheora.
+ * \retval OC_NEWPACKET the stream being decoded has an incompatible (future)
+ *                      version and contains an unknown signature.
+ * \retval 0            Success
+ *
+ * \note The normal usage is that theora_decode_header() be called on the
+ *       first three packets of a theora logical bitstream in succession.
+ */
+extern int theora_decode_header(theora_info *ci, theora_comment *cc,
+                                ogg_packet *op);
+
+/**
+ * Initialize a theora_state handle for decoding.
+ * \param th The theora_state handle to initialize.
+ * \param c  A theora_info struct filled with the desired decoding parameters.
+ *           This is of course usually obtained from a previous call to
+ *           theora_decode_header().
+ * \retval 0 Success
+ */
+extern int theora_decode_init(theora_state *th, theora_info *c);
+
+/**
+ * Input a packet containing encoded data into the theora decoder.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 0 Success
+ * \retval OC_BADPACKET \a op does not contain encoded video data
+ */
+extern int theora_decode_packetin(theora_state *th,ogg_packet *op);
+
+/**
+ * Output the next available frame of decoded YUV data.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param yuv A yuv_buffer in which libtheora should place the decoded data.
+ *            Note that the buffer struct itself is allocated by the user, but
+ *            that the luma and chroma pointers will be filled in by the
+ *            library.  Also note that these luma and chroma regions should be
+ *            considered read-only by the user.
+ * \retval 0 Success
+ */
+extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv);
+
+/**
+ * Report whether a theora packet is a header or not
+ * This function does no verification beyond checking the header
+ * flag bit so it should not be used for bitstream identification;
+ * use theora_decode_header() for that.
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is not a header packet (and so contains frame data)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_isheader(ogg_packet *op);
+
+/**
+ * Report whether a theora packet is a keyframe or not
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet contains a keyframe image
+ * \retval 0 The packet is contains an interframe delta
+ * \retval -1 The packet is not an image data packet at all
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_iskeyframe(ogg_packet *op);
+
+/**
+ * Report the granulepos shift radix
+ *
+ * When embedded in Ogg, Theora uses a two-part granulepos,
+ * splitting the 64-bit field into two pieces. The more-significant
+ * section represents the frame count at the last keyframe,
+ * and the less-significant section represents the count of
+ * frames since the last keyframe. In this way the overall
+ * field is still non-decreasing with time, but usefully encodes
+ * a pointer to the last keyframe, which is necessary for
+ * correctly restarting decode after a seek.
+ *
+ * This function reports the number of bits used to represent
+ * the distance to the last keyframe, and thus how the granulepos
+ * field must be shifted or masked to obtain the two parts.
+ *
+ * Since libtheora returns compressed data in an ogg_packet
+ * structure, this may be generally useful even if the Theora
+ * packets are not being used in an Ogg container.
+ *
+ * \param ti A previously initialized theora_info struct
+ * \returns The bit shift dividing the two granulepos fields
+ *
+ * This function was added in the 1.0alpha5 release.
+ */
+int theora_granule_shift(theora_info *ti);
+
+/**
+ * Convert a granulepos to an absolute frame index, starting at 0.
+ * The granulepos is interpreted in the context of a given theora_state handle.
+ *
+ * Note that while the granulepos encodes the frame count (i.e. starting
+ * from 1) this call returns the frame index, starting from zero. Thus
+ * One can calculate the presentation time by multiplying the index by
+ * the rate.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The frame index corresponding to \a granulepos.
+ * \retval -1 The given granulepos is undefined (i.e. negative)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Convert a granulepos to absolute time in seconds. The granulepos is
+ * interpreted in the context of a given theora_state handle, and gives
+ * the end time of a frame's presentation as used in Ogg mux ordering.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The absolute time in seconds corresponding to \a granulepos.
+ *          This is the "end time" for the frame, or the latest time it should
+ *           be displayed.
+ *          It is not the presentation time.
+ * \retval -1. The given granulepos is undefined (i.e. negative).
+ */
+extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Initialize a theora_info structure. All values within the given theora_info
+ * structure are initialized, and space is allocated within libtheora for
+ * internal codec setup data.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_init(theora_info *c);
+
+/**
+ * Clear a theora_info structure. All values within the given theora_info
+ * structure are cleared, and associated internal codec setup data is freed.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_clear(theora_info *c);
+
+/**
+ * Free all internal data associated with a theora_state handle.
+ * \param t A theora_state handle.
+ */
+extern void theora_clear(theora_state *t);
+
+/**
+ * Initialize an allocated theora_comment structure
+ * \param tc An allocated theora_comment structure
+ **/
+extern void theora_comment_init(theora_comment *tc);
+
+/**
+ * Add a comment to an initialized theora_comment structure
+ * \param tc A previously initialized theora comment structure
+ * \param comment A null-terminated string encoding the comment in the form
+ *                "TAG=the value"
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+
+extern void theora_comment_add(theora_comment *tc, char *comment);
+
+/**
+ * Add a comment to an initialized theora_comment structure.
+ * \param tc A previously initialized theora comment structure
+ * \param tag A null-terminated string containing the tag
+ *            associated with the comment.
+ * \param value The corresponding value as a null-terminated string
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+extern void theora_comment_add_tag(theora_comment *tc,
+                                       char *tag, char *value);
+
+/**
+ * Look up a comment value by tag.
+ * \param tc Tn initialized theora_comment structure
+ * \param tag The tag to look up
+ * \param count The instance of the tag. The same tag can appear multiple
+ *              times, each with a distinct and ordered value, so an index
+ *              is required to retrieve them all.
+ * \returns A pointer to the queried tag's value
+ * \retval NULL No matching tag is found
+ *
+ * \note Use theora_comment_query_count() to get the legal range for the
+ * count parameter.
+ **/
+
+extern char *theora_comment_query(theora_comment *tc, char *tag, int count);
+
+/** Look up the number of instances of a tag.
+ *  \param tc An initialized theora_comment structure
+ *  \param tag The tag to look up
+ *  \returns The number on instances of a particular tag.
+ *
+ *  Call this first when querying for a specific tag and then interate
+ *  over the number of instances with separate calls to
+ *  theora_comment_query() to retrieve all instances in order.
+ **/
+extern int   theora_comment_query_count(theora_comment *tc, char *tag);
+
+/**
+ * Clear an allocated theora_comment struct so that it can be freed.
+ * \param tc An allocated theora_comment structure.
+ **/
+extern void  theora_comment_clear(theora_comment *tc);
+
+/**Encoder control function.
+ * This is used to provide advanced control the encoding process.
+ * \param th     A #theora_state handle.
+ * \param req    The control code to process.
+ *                See \ref encctlcodes_old "the list of available
+ *			control codes" for details.
+ * \param buf    The parameters for this control code.
+ * \param buf_sz The size of the parameter buffer.*/
+extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
+
+/* @} */ /* end oldfuncs doxygen group */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _O_THEORA_H_ */

+ 329 - 0
jni/libtheora-1.2.0alpha1/include/theora/theoradec.h

@@ -0,0 +1,329 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\file
+ * The <tt>libtheoradec</tt> C decoding API.*/
+
+#if !defined(_O_THEORA_THEORADEC_H_)
+# define _O_THEORA_THEORADEC_H_ (1)
+# include <stddef.h>
+# include <ogg/ogg.h>
+# include "codec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name th_decode_ctl() codes
+ * \anchor decctlcodes
+ * These are the available request codes for th_decode_ctl().
+ * By convention, these are odd, to distinguish them from the
+ *  \ref encctlcodes "encoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Gets the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
+ *
+ * \param[out] _buf int: The maximum post-processing level.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+/**Sets the post-processing level.
+ * By default, post-processing is disabled.
+ *
+ * Sets the level of post-processing to use when decoding the
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ *
+ * \param[in] _buf int: The new post-processing level.
+ *                      0 to disable; larger values use more CPU.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                     post-processing level is out of bounds.
+ *                    The maximum post-processing level may be
+ *                     implementation-specific, and can be obtained via
+ *                     #TH_DECCTL_GET_PPLEVEL_MAX.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_SET_PPLEVEL (3)
+/**Sets the granule position.
+ * Call this after a seek, before decoding the first frame, to ensure that the
+ *  proper granule position is returned for all subsequent frames.
+ * If you track timestamps yourself and do not use the granule position
+ *  returned by the decoder, then you need not call this function.
+ *
+ * \param[in] _buf <tt>ogg_int64_t</tt>: The granule position of the next
+ *                  frame.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(ogg_int64_t)</tt>, or the
+ *                     granule position is negative.*/
+#define TH_DECCTL_SET_GRANPOS (5)
+/**Sets the striped decode callback function.
+ * If set, this function will be called as each piece of a frame is fully
+ *  decoded in th_decode_packetin().
+ * You can pass in a #th_stripe_callback with
+ *  th_stripe_callback#stripe_decoded set to <tt>NULL</tt> to disable the
+ *  callbacks at any point.
+ * Enabling striped decode does not prevent you from calling
+ *  th_decode_ycbcr_out() after the frame is fully decoded.
+ *
+ * \param[in]  _buf #th_stripe_callback: The callback parameters.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not
+ *                     <tt>sizeof(th_stripe_callback)</tt>.*/
+#define TH_DECCTL_SET_STRIPE_CB (7)
+
+/**Enables telemetry and sets the macroblock display mode */
+#define TH_DECCTL_SET_TELEMETRY_MBMODE (9)
+/**Enables telemetry and sets the motion vector display mode */
+#define TH_DECCTL_SET_TELEMETRY_MV (11)
+/**Enables telemetry and sets the adaptive quantization display mode */
+#define TH_DECCTL_SET_TELEMETRY_QI (13)
+/**Enables telemetry and sets the bitstream breakdown visualization mode */
+#define TH_DECCTL_SET_TELEMETRY_BITS (15)
+/*@}*/
+
+
+
+/**A callback function for striped decode.
+ * This is a function pointer to an application-provided function that will be
+ *  called each time a section of the image is fully decoded in
+ *  th_decode_packetin().
+ * This allows the application to process the section immediately, while it is
+ *  still in cache.
+ * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily
+ *  decrease with each call until it reaches 0, at which point the full frame
+ *  is decoded.
+ * The number of fragment rows made available in each call depends on the pixel
+ *  format and the number of post-processing filters enabled, and may not even
+ *  be constant for the entire frame.
+ * If a non-<tt>NULL</tt> \a _granpos pointer is passed to
+ *  th_decode_packetin(), the granule position for the frame will be stored
+ *  in it before the first callback is made.
+ * If an entire frame is dropped (a 0-byte packet), then no callbacks will be
+ *  made at all for that frame.
+ * \param _ctx       An application-provided context pointer.
+ * \param _buf       The image buffer for the decoded frame.
+ * \param _yfrag0    The Y coordinate of the first row of 8x8 fragments
+ *                    decoded.
+ *                   Multiply this by 8 to obtain the pixel row number in the
+ *                    luma plane.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past
+ *                    the newly decoded section.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ *                   I.e., this section contains fragment rows
+ *                    <tt>\a _yfrag0 ...\a _yfrag_end -1</tt>.*/
+typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf,
+ int _yfrag0,int _yfrag_end);
+
+/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/
+typedef struct{
+  /**An application-provided context pointer.
+   * This will be passed back verbatim to the application.*/
+  void                   *ctx;
+  /**The callback function pointer.*/
+  th_stripe_decoded_func  stripe_decoded;
+}th_stripe_callback;
+
+
+
+/**\name Decoder state
+   The following data structures are opaque, and their contents are not
+    publicly defined by this API.
+   Referring to their internals directly is unsupported, and may break without
+    warning.*/
+/*@{*/
+/**The decoder context.*/
+typedef struct th_dec_ctx    th_dec_ctx;
+/**Setup information.
+   This contains auxiliary information (Huffman tables and quantization
+    parameters) decoded from the setup header by th_decode_headerin() to be
+    passed to th_decode_alloc().
+   It can be re-used to initialize any number of decoders, and can be freed
+    via th_setup_free() at any time.*/
+typedef struct th_setup_info th_setup_info;
+/*@}*/
+
+
+
+/**\defgroup decfuncs Functions for Decoding*/
+/*@{*/
+/**\name Functions for decoding
+ * You must link to <tt>libtheoradec</tt> if you use any of the
+ * functions in this section.
+ *
+ * The functions are listed in the order they are used in a typical decode.
+ * The basic steps are:
+ * - Parse the header packets by repeatedly calling th_decode_headerin().
+ * - Allocate a #th_dec_ctx handle with th_decode_alloc().
+ * - Call th_setup_free() to free any memory used for codec setup
+ *    information.
+ * - Perform any additional decoder configuration with th_decode_ctl().
+ * - For each video data packet:
+ *   - Submit the packet to the decoder via th_decode_packetin().
+ *   - Retrieve the uncompressed video data via th_decode_ycbcr_out().
+ * - Call th_decode_free() to release all decoder memory.*/
+/*@{*/
+/**Decodes the header packets of a Theora stream.
+ * This should be called on the initial packets of the stream, in succession,
+ *  until it returns <tt>0</tt>, indicating that all headers have been
+ *  processed, or an error is encountered.
+ * At least three header packets are required, and additional optional header
+ *  packets may follow.
+ * This can be used on the first packet of any logical stream to determine if
+ *  that stream is a Theora stream.
+ * \param _info  A #th_info structure to fill in.
+ *               This must have been previously initialized with
+ *                th_info_init().
+ *               The application may immediately begin using the contents of
+ *                this structure after the first header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _tc    A #th_comment structure to fill in.
+ *               The application may immediately begin using the contents of
+ *                this structure after the second header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _setup Returns a pointer to additional, private setup information
+ *                needed by the decoder.
+ *               The contents of this pointer must be initialized to
+ *                <tt>NULL</tt> on the first call, and the returned value must
+ *                continue to be passed in on all subsequent calls.
+ * \param _op    An <tt>ogg_packet</tt> structure which contains one of the
+ *                initial packets of an Ogg logical stream.
+ * \return A positive value indicates that a Theora header was successfully
+ *          processed.
+ * \retval 0             The first video data packet was encountered after all
+ *                        required header packets were parsed.
+ *                       The packet just passed in on this call should be saved
+ *                        and fed to th_decode_packetin() to begin decoding
+ *                        video data.
+ * \retval TH_EFAULT     One of \a _info, \a _tc, or \a _setup was
+ *                        <tt>NULL</tt>.
+ * \retval TH_EBADHEADER \a _op was <tt>NULL</tt>, the packet was not the next
+ *                        header packet in the expected sequence, or the format
+ *                        of the header data was invalid.
+ * \retval TH_EVERSION   The packet data was a Theora info header, but for a
+ *                        bitstream version not decodable with this version of
+ *                        <tt>libtheoradec</tt>.
+ * \retval TH_ENOTFORMAT The packet was not a Theora header.
+ */
+extern int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op);
+/**Allocates a decoder instance.
+ *
+ * <b>Security Warning:</b> The Theora format supports very large frame sizes,
+ *  potentially even larger than the address space of a 32-bit machine, and
+ *  creating a decoder context allocates the space for several frames of data.
+ * If the allocation fails here, your program will crash, possibly at some
+ *  future point because the OS kernel returned a valid memory range and will
+ *  only fail when it tries to map the pages in it the first time they are
+ *  used.
+ * Even if it succeeds, you may experience a denial of service if the frame
+ *  size is large enough to cause excessive paging.
+ * If you are integrating libtheora in a larger application where such things
+ *  are undesirable, it is highly recommended that you check the frame size in
+ *  \a _info before calling this function and refuse to decode streams where it
+ *  is larger than some reasonable maximum.
+ * libtheora will not check this for you, because there may be machines that
+ *  can handle such streams and applications that wish to.
+ * \param _info  A #th_info struct filled via th_decode_headerin().
+ * \param _setup A #th_setup_info handle returned via
+ *                th_decode_headerin().
+ * \return The initialized #th_dec_ctx handle.
+ * \retval NULL If the decoding parameters were invalid.*/
+extern th_dec_ctx *th_decode_alloc(const th_info *_info,
+ const th_setup_info *_setup);
+/**Releases all storage used for the decoder setup information.
+ * This should be called after you no longer want to create any decoders for
+ *  a stream whose headers you have parsed with th_decode_headerin().
+ * \param _setup The setup information to free.
+ *               This can safely be <tt>NULL</tt>.*/
+extern void th_setup_free(th_setup_info *_setup);
+/**Decoder control function.
+ * This is used to provide advanced control of the decoding process.
+ * \param _dec    A #th_dec_ctx handle.
+ * \param _req    The control code to process.
+ *                See \ref decctlcodes "the list of available control codes"
+ *                 for details.
+ * \param _buf    The parameters for this control code.
+ * \param _buf_sz The size of the parameter buffer.
+ * \return Possible return values depend on the control code used.
+ *          See \ref decctlcodes "the list of control codes" for
+ *          specific values. Generally 0 indicates success.*/
+extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz);
+/**Submits a packet containing encoded video data to the decoder.
+ * \param _dec     A #th_dec_ctx handle.
+ * \param _op      An <tt>ogg_packet</tt> containing encoded video data.
+ * \param _granpos Returns the granule position of the decoded packet.
+ *                 If non-<tt>NULL</tt>, the granule position for this specific
+ *                  packet is stored in this location.
+ *                 This is computed incrementally from previously decoded
+ *                  packets.
+ *                 After a seek, the correct granule position must be set via
+ *                  #TH_DECCTL_SET_GRANPOS for this to work properly.
+ * \retval 0             Success.
+ *                       A new decoded frame can be retrieved by calling
+ *                        th_decode_ycbcr_out().
+ * \retval TH_DUPFRAME   The packet represented a dropped frame (either a
+ *                        0-byte frame or an INTER frame with no coded blocks).
+ *                       The player can skip the call to th_decode_ycbcr_out(),
+ *                        as the contents of the decoded frame buffer have not
+ *                        changed.
+ * \retval TH_EFAULT     \a _dec or \a _op was <tt>NULL</tt>.
+ * \retval TH_EBADPACKET \a _op does not contain encoded video data.
+ * \retval TH_EIMPL      The video data uses bitstream features which this
+ *                        library does not support.*/
+extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos);
+/**Outputs the next available frame of decoded Y'CbCr data.
+ * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB,
+ *  then the application does not need to call this function.
+ * \param _dec   A #th_dec_ctx handle.
+ * \param _ycbcr A video buffer structure to fill in.
+ *               <tt>libtheoradec</tt> will fill in all the members of this
+ *                structure, including the pointers to the uncompressed video
+ *                data.
+ *               The memory for this video data is owned by
+ *                <tt>libtheoradec</tt>.
+ *               It may be freed or overwritten without notification when
+ *                subsequent frames are decoded.
+ * \retval 0 Success
+ * \retval TH_EFAULT     \a _dec or \a _ycbcr was <tt>NULL</tt>.
+ */
+extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
+ th_ycbcr_buffer _ycbcr);
+/**Frees an allocated decoder instance.
+ * \param _dec A #th_dec_ctx handle.*/
+extern void th_decode_free(th_dec_ctx *_dec);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif

+ 548 - 0
jni/libtheora-1.2.0alpha1/include/theora/theoraenc.h

@@ -0,0 +1,548 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\file
+ * The <tt>libtheoraenc</tt> C encoding API.*/
+
+#if !defined(_O_THEORA_THEORAENC_H_)
+# define _O_THEORA_THEORAENC_H_ (1)
+# include <stddef.h>
+# include <ogg/ogg.h>
+# include "codec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name th_encode_ctl() codes
+ * \anchor encctlcodes
+ * These are the available request codes for th_encode_ctl().
+ * By convention, these are even, to distinguish them from the
+ *  \ref decctlcodes "decoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Sets the Huffman tables to use.
+ * The tables are copied, not stored by reference, so they can be freed after
+ *  this call.
+ * <tt>NULL</tt> may be specified to revert to the default tables.
+ *
+ * \param[in] _buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
+ * \retval TH_EFAULT \a _enc is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun or one or more of the given
+ *                     tables is not full or prefix-free, \a _buf is
+ *                     <tt>NULL</tt> and \a _buf_sz is not zero, or \a _buf is
+ *                     non-<tt>NULL</tt> and \a _buf_sz is not
+ *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
+/**Sets the quantization parameters to use.
+ * The parameters are copied, not stored by reference, so they can be freed
+ *  after this call.
+ * <tt>NULL</tt> may be specified to revert to the default parameters.
+ *
+ * \param[in] _buf #th_quant_info
+ * \retval TH_EFAULT \a _enc is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun, \a _buf is
+ *                    <tt>NULL</tt> and \a _buf_sz is not zero,
+ *                    or \a _buf is non-<tt>NULL</tt> and
+ *                    \a _buf_sz is not <tt>sizeof(#th_quant_info)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  _buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] _buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+/**Disables any encoder features that would prevent lossless transcoding back
+ *  to VP3.
+ * This primarily means disabling block-adaptive quantization and always coding
+ *  all four luma blocks in a macro block when 4MV is used.
+ * It also includes using the VP3 quantization tables and Huffman codes; if you
+ *  set them explicitly after calling this function, the resulting stream will
+ *  not be VP3-compatible.
+ * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
+ *  material, or when using a picture region smaller than the full frame (e.g.
+ *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
+ *  still be disabled, but the stream will still not be VP3-compatible, as VP3
+ *  was not capable of encoding such formats.
+ * If you call this after encoding has already begun, then the quantization
+ *  tables and codebooks cannot be changed, but the frame-level features will
+ *  be enabled or disabled as requested.
+ *
+ * \param[in]  _buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
+ *                   or 0 to disable it (the default).
+ * \param[out] _buf <tt>int</tt>: 1 if all bitstream features required for
+ *                   VP3-compatibility could be set, and 0 otherwise.
+ *                  The latter will be returned if the pixel format is not
+ *                   4:2:0, the picture region is smaller than the full frame,
+ *                   or if encoding has begun, preventing the quantization
+ *                   tables and codebooks from being set.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but in this case bitrate will also likely increase.
+ * In any case, overall rate/distortion performance will probably decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. constant quality, etc.).
+ *
+ * \param[out] _buf <tt>int</tt>: The maximum encoding speed level.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+/**Sets the speed level.
+ * The current speed level may be retrieved using #TH_ENCCTL_GET_SPLEVEL.
+ *
+ * \param[in] _buf <tt>int</tt>: The new encoding speed level.
+ *                 0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_SPLEVEL (14)
+/**Gets the current speed level.
+ * The default speed level may vary according to encoder implementation, but if
+ *  this control code is not supported (it returns #TH_EIMPL), the default may
+ *  be assumed to be the slowest available speed (0).
+ * The maximum encoding speed level may be implementation- and encoding
+ *  mode-specific, and can be obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ *
+ * \param[out] _buf <tt>int</tt>: The current encoding speed level.
+ *                  0 is slowest, larger values use less CPU.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL (16)
+/**Sets the number of duplicates of the next frame to produce.
+ * Although libtheora can encode duplicate frames very cheaply, it costs some
+ *  amount of CPU to detect them, and a run of duplicates cannot span a
+ *  keyframe boundary.
+ * This control code tells the encoder to produce the specified number of extra
+ *  duplicates of the next frame.
+ * This allows the encoder to make smarter keyframe placement decisions and
+ *  rate control decisions, and reduces CPU usage as well, when compared to
+ *  just submitting the same frame for encoding multiple times.
+ * This setting only applies to the next frame submitted for encoding.
+ * You MUST call th_encode_packetout() repeatedly until it returns 0, or the
+ *  extra duplicate frames will be lost.
+ *
+ * \param[in] _buf <tt>int</tt>: The number of duplicates to produce.
+ *                 If this is negative or zero, no duplicates will be produced.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    number of duplicates is greater than or equal to the
+ *                    maximum keyframe interval.
+ *                   In the latter case, NO duplicate frames will be produced.
+ *                   You must ensure that the maximum keyframe interval is set
+ *                    larger than the maximum number of duplicates you will
+ *                    ever wish to insert prior to encoding.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_DUP_COUNT (18)
+/**Modifies the default bitrate management behavior.
+ * Use to allow or disallow frame dropping, and to enable or disable capping
+ *  bit reservoir overflows and underflows.
+ * See \ref encctlcodes "the list of available flags".
+ * The flags are set by default to
+ *  <tt>#TH_RATECTL_DROP_FRAMES|#TH_RATECTL_CAP_OVERFLOW</tt>.
+ *
+ * \param[in] _buf <tt>int</tt>: Any combination of
+ *                  \ref ratectlflags "the available flags":
+ *                 - #TH_RATECTL_DROP_FRAMES: Enable frame dropping.
+ *                 - #TH_RATECTL_CAP_OVERFLOW: Don't bank excess bits for later
+ *                    use.
+ *                 - #TH_RATECTL_CAP_UNDERFLOW: Don't try to make up shortfalls
+ *                    later.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt> or rate control
+ *                    is not enabled.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_FLAGS (20)
+/**Sets the size of the bitrate management bit reservoir as a function
+ *  of number of frames.
+ * The reservoir size affects how quickly bitrate management reacts to
+ *  instantaneous changes in the video complexity.
+ * Larger reservoirs react more slowly, and provide better overall quality, but
+ *  require more buffering by a client, adding more latency to live streams.
+ * By default, libtheora sets the reservoir to the maximum distance between
+ *  keyframes, subject to a minimum and maximum limit.
+ * This call may be used to increase or decrease the reservoir, increasing or
+ *  decreasing the allowed temporary variance in bitrate.
+ * An implementation may impose some limits on the size of a reservoir it can
+ *  handle, in which case the actual reservoir size may not be exactly what was
+ *  requested.
+ * The actual value set will be returned.
+ *
+ * \param[in]  _buf <tt>int</tt>: Requested size of the reservoir measured in
+ *                   frames.
+ * \param[out] _buf <tt>int</tt>: The actual size of the reservoir set.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(int)</tt>, or rate control
+ *                    is not enabled.  The buffer has an implementation
+ *                    defined minimum and maximum size and the value in _buf
+ *                    will be adjusted to match the actual value set.
+ * \retval TH_EIMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_RATE_BUFFER (22)
+/**Enable pass 1 of two-pass encoding mode and retrieve the first pass metrics.
+ * Pass 1 mode must be enabled before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * Although this does not have to be the exact rate that will be used in the
+ *  second pass, closer values may produce better results.
+ * The first call returns the size of the two-pass header data, along with some
+ *  placeholder content, and sets the encoder into pass 1 mode implicitly.
+ * This call sets the encoder to pass 1 mode implicitly.
+ * Then, a subsequent call must be made after each call to
+ *  th_encode_ycbcr_in() to retrieve the metrics for that frame.
+ * An additional, final call must be made to retrieve the summary data,
+ *  containing such information as the total number of frames, etc.
+ * This must be stored in place of the placeholder data that was returned
+ *  in the first call, before the frame metrics data.
+ * All of this data must be presented back to the encoder during pass 2 using
+ *  #TH_ENCCTL_2PASS_IN.
+ *
+ * \param[out] <tt>char *</tt>_buf: Returns a pointer to internal storage
+ *              containing the two pass metrics data.
+ *             This storage is only valid until the next call, or until the
+ *              encoder context is freed, and must be copied by the
+ *              application.
+ * \retval >=0       The number of bytes of metric data available in the
+ *                    returned buffer.
+ * \retval TH_EFAULT \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a _buf_sz is not <tt>sizeof(char *)</tt>, no target
+ *                    bitrate has been set, or the first call was made after
+ *                    the first frame was submitted for encoding.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_OUT (24)
+/**Submits two-pass encoding metric data collected the first encoding pass to
+ *  the second pass.
+ * The first call must be made before the first frame is encoded, and a target
+ *  bitrate must have already been specified to the encoder.
+ * It sets the encoder to pass 2 mode implicitly; this cannot be disabled.
+ * The encoder may require reading data from some or all of the frames in
+ *  advance, depending on, e.g., the reservoir size used in the second pass.
+ * You must call this function repeatedly before each frame to provide data
+ *  until either a) it fails to consume all of the data presented or b) all of
+ *  the pass 1 data has been consumed.
+ * In the first case, you must save the remaining data to be presented after
+ *  the next frame.
+ * You can call this function with a NULL argument to get an upper bound on
+ *  the number of bytes that will be required before the next frame.
+ *
+ * When pass 2 is first enabled, the default bit reservoir is set to the entire
+ *  file; this gives maximum flexibility but can lead to very high peak rates.
+ * You can subsequently set it to another value with #TH_ENCCTL_SET_RATE_BUFFER
+ *  (e.g., to set it to the keyframe interval for non-live streaming), however,
+ *  you may then need to provide more data before the next frame.
+ *
+ * \param[in] _buf <tt>char[]</tt>: A buffer containing the data returned by
+ *                  #TH_ENCCTL_2PASS_OUT in pass 1.
+ *                 You may pass <tt>NULL</tt> for \a _buf to return an upper
+ *                  bound on the number of additional bytes needed before the
+ *                  next frame.
+ *                 The summary data returned at the end of pass 1 must be at
+ *                  the head of the buffer on the first call with a
+ *                  non-<tt>NULL</tt> \a _buf, and the placeholder data
+ *                  returned at the start of pass 1 should be omitted.
+ *                 After each call you should advance this buffer by the number
+ *                  of bytes consumed.
+ * \retval >0            The number of bytes of metric data required/consumed.
+ * \retval 0             No more data is required before the next frame.
+ * \retval TH_EFAULT     \a _enc is <tt>NULL</tt>.
+ * \retval TH_EINVAL     No target bitrate has been set, or the first call was
+ *                        made after the first frame was submitted for
+ *                        encoding.
+ * \retval TH_ENOTFORMAT The data did not appear to be pass 1 from a compatible
+ *                        implementation of this library.
+ * \retval TH_EBADHEADER The data was invalid; this may be returned when
+ *                        attempting to read an aborted pass 1 file that still
+ *                        has the placeholder data in place of the summary
+ *                        data.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_2PASS_IN (26)
+/**Sets the current encoding quality.
+ * This is only valid so long as no bitrate has been specified, either through
+ *  the #th_info struct used to initialize the encoder or through
+ *  #TH_ENCCTL_SET_BITRATE (this restriction may be relaxed in a future
+ *  version).
+ * If it is set before the headers are emitted, the target quality encoded in
+ *  them will be updated.
+ *
+ * \param[in] _buf <tt>int</tt>: The new target quality, in the range 0...63,
+ *                  inclusive.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     A target bitrate has already been specified, or the
+ *                        quality index was not in the range 0...63.
+ * \retval TH_EIMPL       Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUALITY (28)
+/**Sets the current encoding bitrate.
+ * Once a bitrate is set, the encoder must use a rate-controlled mode for all
+ *  future frames (this restriction may be relaxed in a future version).
+ * If it is set before the headers are emitted, the target bitrate encoded in
+ *  them will be updated.
+ * Due to the buffer delay, the exact bitrate of each section of the encode is
+ *  not guaranteed.
+ * The encoder may have already used more bits than allowed for the frames it
+ *  has encoded, expecting to make them up in future frames, or it may have
+ *  used fewer, holding the excess in reserve.
+ * The exact transition between the two bitrates is not well-defined by this
+ *  API, but may be affected by flags set with #TH_ENCCTL_SET_RATE_FLAGS.
+ * After a number of frames equal to the buffer delay, one may expect further
+ *  output to average at the target bitrate.
+ *
+ * \param[in] _buf <tt>long</tt>: The new target bitrate, in bits per second.
+ * \retval 0             Success.
+ * \retval TH_EFAULT     \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     The target bitrate was not positive.
+ *                       A future version of this library may allow passing 0
+ *                        to disabled rate-controlled mode and return to a
+ *                        quality-based mode, in which case this function will
+ *                        not return an error for that value.
+ * \retval TH_EIMPL      Not supported by this implementation.*/
+#define TH_ENCCTL_SET_BITRATE (30)
+/**Sets the configuration to be compatible with that from the given setup
+ *  header.
+ * This sets the Huffman codebooks and quantization parameters to match those
+ *  found in the given setup header.
+ * This guarantees that packets encoded by this encoder will be decodable using
+ *  a decoder configured with the passed-in setup header.
+ * It does <em>not</em> guarantee that th_encode_flushheader() will produce a
+ *  bit-identical setup header, only that they will be compatible.
+ * If you need a bit-identical setup header, then use the one you passed into
+ *  this command, and not the one returned by th_encode_flushheader().
+ *
+ * This also does <em>not</em> enable or disable VP3 compatibility; that is not
+ *  signaled in the setup header (or anywhere else in the encoded stream), and
+ *  is controlled independently by the #TH_ENCCTL_SET_VP3_COMPATIBLE function.
+ * If you wish to enable VP3 compatibility mode <em>and</em> want the codebooks
+ *  and quantization parameters to match the given setup header, you should
+ *  enable VP3 compatibility before invoking this command, otherwise the
+ *  codebooks and quantization parameters will be reset to the VP3 defaults.
+ *
+ * The current encoder does not support Huffman codebooks which do not contain
+ *  codewords for all 32 tokens.
+ * Such codebooks are legal, according to the specification, but cannot be
+ *  configured with this function.
+ *
+ * \param[in] _buf <tt>unsigned char[]</tt>: The encoded setup header to copy
+ *                                            the configuration from.
+ *                                           This should be the original,
+ *                                            undecoded setup header packet,
+ *                                            and <em>not</em> a #th_setup_info
+ *                                            structure filled in by
+ *                                            th_decode_headerin().
+ * \retval TH_EFAULT     \a _enc or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL     Encoding has already begun, so the codebooks and
+ *                        quantization parameters cannot be changed, or the
+ *                        data in the setup header was not supported by this
+ *                        encoder.
+ * \retval TH_EBADHEADER \a _buf did not contain a valid setup header packet.
+ * \retval TH_ENOTFORMAT \a _buf did not contain a Theora header at all.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_COMPAT_CONFIG (32)
+
+/*@}*/
+
+
+/**\name TH_ENCCTL_SET_RATE_FLAGS flags
+ * \anchor ratectlflags
+ * These are the flags available for use with #TH_ENCCTL_SET_RATE_FLAGS.*/
+/*@{*/
+/**Drop frames to keep within bitrate buffer constraints.
+ * This can have a severe impact on quality, but is the only way to ensure that
+ *  bitrate targets are met at low rates during sudden bursts of activity.
+ * It is enabled by default.*/
+#define TH_RATECTL_DROP_FRAMES   (0x1)
+/**Ignore bitrate buffer overflows.
+ * If the encoder uses so few bits that the reservoir of available bits
+ *  overflows, ignore the excess.
+ * The encoder will not try to use these extra bits in future frames.
+ * At high rates this may cause the result to be undersized, but allows a
+ *  client to play the stream using a finite buffer; it should normally be
+ *  enabled, which is the default.*/
+#define TH_RATECTL_CAP_OVERFLOW  (0x2)
+/**Ignore bitrate buffer underflows.
+ * If the encoder uses so many bits that the reservoir of available bits
+ *  underflows, ignore the deficit.
+ * The encoder will not try to make up these extra bits in future frames.
+ * At low rates this may cause the result to be oversized; it should normally
+ *  be disabled, which is the default.*/
+#define TH_RATECTL_CAP_UNDERFLOW (0x4)
+/*@}*/
+
+
+
+/**The quantization parameters used by VP3.*/
+extern const th_quant_info TH_VP31_QUANT_INFO;
+
+/**The Huffman tables used by VP3.*/
+extern const th_huff_code
+ TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+
+
+
+/**\name Encoder state
+   The following data structure is opaque, and its contents are not publicly
+    defined by this API.
+   Referring to its internals directly is unsupported, and may break without
+    warning.*/
+/*@{*/
+/**The encoder context.*/
+typedef struct th_enc_ctx    th_enc_ctx;
+/*@}*/
+
+
+
+/**\defgroup encfuncs Functions for Encoding*/
+/*@{*/
+/**\name Functions for encoding
+ * You must link to <tt>libtheoraenc</tt> and <tt>libtheoradec</tt>
+ *  if you use any of the functions in this section.
+ *
+ * The functions are listed in the order they are used in a typical encode.
+ * The basic steps are:
+ * - Fill in a #th_info structure with details on the format of the video you
+ *    wish to encode.
+ * - Allocate a #th_enc_ctx handle with th_encode_alloc().
+ * - Perform any additional encoder configuration required with
+ *    th_encode_ctl().
+ * - Repeatedly call th_encode_flushheader() to retrieve all the header
+ *    packets.
+ * - For each uncompressed frame:
+ *   - Submit the uncompressed frame via th_encode_ycbcr_in()
+ *   - Repeatedly call th_encode_packetout() to retrieve any video
+ *      data packets that are ready.
+ * - Call th_encode_free() to release all encoder memory.*/
+/*@{*/
+/**Allocates an encoder instance.
+ * \param _info A #th_info struct filled with the desired encoding parameters.
+ * \return The initialized #th_enc_ctx handle.
+ * \retval NULL If the encoding parameters were invalid.*/
+extern th_enc_ctx *th_encode_alloc(const th_info *_info);
+/**Encoder control function.
+ * This is used to provide advanced control the encoding process.
+ * \param _enc    A #th_enc_ctx handle.
+ * \param _req    The control code to process.
+ *                See \ref encctlcodes "the list of available control codes"
+ *                 for details.
+ * \param _buf    The parameters for this control code.
+ * \param _buf_sz The size of the parameter buffer.
+ * \return Possible return values depend on the control code used.
+ *          See \ref encctlcodes "the list of control codes" for
+ *          specific values. Generally 0 indicates success.*/
+extern int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz);
+/**Outputs the next header packet.
+ * This should be called repeatedly after encoder initialization until it
+ *  returns 0 in order to get all of the header packets, in order, before
+ *  encoding actual video data.
+ * \param _enc      A #th_enc_ctx handle.
+ * \param _comments The metadata to place in the comment header, when it is
+ *                   encoded.
+ * \param _op       An <tt>ogg_packet</tt> structure to fill.
+ *                  All of the elements of this structure will be set,
+ *                   including a pointer to the header data.
+ *                  The memory for the header data is owned by
+ *                   <tt>libtheoraenc</tt>, and may be invalidated when the
+ *                   next encoder function is called.
+ * \return A positive value indicates that a header packet was successfully
+ *          produced.
+ * \retval 0         No packet was produced, and no more header packets remain.
+ * \retval TH_EFAULT \a _enc, \a _comments, or \a _op was <tt>NULL</tt>.*/
+extern int th_encode_flushheader(th_enc_ctx *_enc,
+ th_comment *_comments,ogg_packet *_op);
+/**Submits an uncompressed frame to the encoder.
+ * \param _enc   A #th_enc_ctx handle.
+ * \param _ycbcr A buffer of Y'CbCr data to encode.
+ *               If the width and height of the buffer matches the frame size
+ *                the encoder was initialized with, the encoder will only
+ *                reference the portion inside the picture region.
+ *               Any data outside this region will be ignored, and need not map
+ *                to a valid address.
+ *               Alternatively, you can pass a buffer equal to the size of the
+ *                picture region, if this is less than the full frame size.
+ *               When using subsampled chroma planes, odd picture sizes or odd
+ *                picture offsets may require an unexpected chroma plane size,
+ *                and their use is generally discouraged, as they will not be
+ *                well-supported by players and other media frameworks.
+ *               See Section 4.4 of
+ *                <a href="http://www.theora.org/doc/Theora.pdf">the Theora
+ *                specification</a> for details if you wish to use them anyway.
+ * \retval 0         Success.
+ * \retval TH_EFAULT \a _enc or \a _ycbcr is <tt>NULL</tt>.
+ * \retval TH_EINVAL The buffer size matches neither the frame size nor the
+ *                    picture size the encoder was initialized with, or
+ *                    encoding has already completed.*/
+extern int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _ycbcr);
+/**Retrieves encoded video data packets.
+ * This should be called repeatedly after each frame is submitted to flush any
+ *  encoded packets, until it returns 0.
+ * The encoder will not buffer these packets as subsequent frames are
+ *  compressed, so a failure to do so will result in lost video data.
+ * \note Currently the encoder operates in a one-frame-in, one-packet-out
+ *        manner.
+ *       However, this may be changed in the future.
+ * \param _enc  A #th_enc_ctx handle.
+ * \param _last Set this flag to a non-zero value if no more uncompressed
+ *               frames will be submitted.
+ *              This ensures that a proper EOS flag is set on the last packet.
+ * \param _op   An <tt>ogg_packet</tt> structure to fill.
+ *              All of the elements of this structure will be set, including a
+ *               pointer to the video data.
+ *              The memory for the video data is owned by
+ *               <tt>libtheoraenc</tt>, and may be invalidated when the next
+ *               encoder function is called.
+ * \return A positive value indicates that a video data packet was successfully
+ *          produced.
+ * \retval 0         No packet was produced, and no more encoded video data
+ *                    remains.
+ * \retval TH_EFAULT \a _enc or \a _op was <tt>NULL</tt>.*/
+extern int th_encode_packetout(th_enc_ctx *_enc,int _last,ogg_packet *_op);
+/**Frees an allocated encoder instance.
+ * \param _enc A #th_enc_ctx handle.*/
+extern void th_encode_free(th_enc_ctx *_enc);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif

+ 2712 - 0
jni/libtheora-1.2.0alpha1/lib/analyze.c

@@ -0,0 +1,2712 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: mode selection code
+  last mod: $Id$
+
+ ********************************************************************/
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+#include "modedec.h"
+#if defined(OC_COLLECT_METRICS)
+# include "collect.c"
+#endif
+
+
+
+typedef struct oc_rd_metric          oc_rd_metric;
+typedef struct oc_mode_choice        oc_mode_choice;
+
+
+
+/*There are 8 possible schemes used to encode macro block modes.
+  Schemes 0-6 use a maximally-skewed Huffman code to code each of the modes.
+  The same set of Huffman codes is used for each of these 7 schemes, but the
+   mode assigned to each codeword varies.
+  Scheme 0 writes a custom mapping from codeword to MB mode to the bitstream,
+   while schemes 1-6 have a fixed mapping.
+  Scheme 7 just encodes each mode directly in 3 bits.*/
+
+/*The mode orderings for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.
+  This is the inverse of the equivalent table OC_MODE_ALPHABETS in the
+   decoder.*/
+static const unsigned char OC_MODE_RANKS[7][OC_NMODES]={
+  /*Last MV dominates.*/
+  /*L P M N I G GM 4*/
+  {3,4,2,0,1,5,6,7},
+  /*L P N M I G GM 4*/
+  {2,4,3,0,1,5,6,7},
+  /*L M P N I G GM 4*/
+  {3,4,1,0,2,5,6,7},
+  /*L M N P I G GM 4*/
+  {2,4,1,0,3,5,6,7},
+  /*No MV dominates.*/
+  /*N L P M I G GM 4*/
+  {0,4,3,1,2,5,6,7},
+  /*N G L P M I GM 4*/
+  {0,5,4,2,3,1,6,7},
+  /*Default ordering.*/
+  /*N I M L P G GM 4*/
+  {0,1,2,3,4,5,6,7}
+};
+
+
+
+/*Initialize the mode scheme chooser.
+  This need only be called once per encoder.*/
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser){
+  int si;
+  _chooser->mode_ranks[0]=_chooser->scheme0_ranks;
+  for(si=1;si<8;si++)_chooser->mode_ranks[si]=OC_MODE_RANKS[si-1];
+}
+
+/*Reset the mode scheme chooser.
+  This needs to be called once for each frame, including the first.*/
+static void oc_mode_scheme_chooser_reset(oc_mode_scheme_chooser *_chooser){
+  int si;
+  memset(_chooser->mode_counts,0,OC_NMODES*sizeof(*_chooser->mode_counts));
+  /*Scheme 0 starts with 24 bits to store the mode list in.*/
+  _chooser->scheme_bits[0]=24;
+  memset(_chooser->scheme_bits+1,0,7*sizeof(*_chooser->scheme_bits));
+  for(si=0;si<8;si++){
+    /*Scheme 7 should always start first, and scheme 0 should always start
+       last.*/
+    _chooser->scheme_list[si]=7-si;
+    _chooser->scheme0_list[si]=_chooser->scheme0_ranks[si]=si;
+  }
+}
+
+/*Return the cost of coding _mb_mode in the specified scheme.*/
+static int oc_mode_scheme_chooser_scheme_mb_cost(
+ const oc_mode_scheme_chooser *_chooser,int _scheme,int _mb_mode){
+  int codebook;
+  int ri;
+  codebook=_scheme+1>>3;
+  /*For any scheme except 0, we can just use the bit cost of the mode's rank
+     in that scheme.*/
+  ri=_chooser->mode_ranks[_scheme][_mb_mode];
+  if(_scheme==0){
+    int mc;
+    /*For scheme 0, incrementing the mode count could potentially change the
+       mode's rank.
+      Find the index where the mode would be moved to in the optimal list,
+       and use its bit cost instead of the one for the mode's current
+       position in the list.*/
+    /*We don't actually reorder the list; this is for computing opportunity
+       cost, not an update.*/
+    mc=_chooser->mode_counts[_mb_mode];
+    while(ri>0&&mc>=_chooser->mode_counts[_chooser->scheme0_list[ri-1]])ri--;
+  }
+  return OC_MODE_BITS[codebook][ri];
+}
+
+/*This is the real purpose of this data structure: not actually selecting a
+   mode scheme, but estimating the cost of coding a given mode given all the
+   modes selected so far.
+  This is done via opportunity cost: the cost is defined as the number of bits
+   required to encode all the modes selected so far including the current one
+   using the best possible scheme, minus the number of bits required to encode
+   all the modes selected so far not including the current one using the best
+   possible scheme.
+  The computational expense of doing this probably makes it overkill.
+  Just be happy we take a greedy approach instead of trying to solve the
+   global mode-selection problem (which is NP-hard).
+  _mb_mode: The mode to determine the cost of.
+  Return: The number of bits required to code this mode.*/
+static int oc_mode_scheme_chooser_cost(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int scheme0;
+  int scheme1;
+  int best_bits;
+  int mode_bits;
+  int si;
+  int scheme0_bits;
+  int scheme1_bits;
+  scheme0=_chooser->scheme_list[0];
+  scheme1=_chooser->scheme_list[1];
+  scheme0_bits=_chooser->scheme_bits[scheme0];
+  scheme1_bits=_chooser->scheme_bits[scheme1];
+  mode_bits=oc_mode_scheme_chooser_scheme_mb_cost(_chooser,scheme0,_mb_mode);
+  /*Typical case: If the difference between the best scheme and the next best
+     is greater than 6 bits, then adding just one mode cannot change which
+     scheme we use.*/
+  if(scheme1_bits-scheme0_bits>6)return mode_bits;
+  /*Otherwise, check to see if adding this mode selects a different scheme as
+     the best.*/
+  si=1;
+  best_bits=scheme0_bits+mode_bits;
+  do{
+    int cur_bits;
+    cur_bits=scheme1_bits+
+     oc_mode_scheme_chooser_scheme_mb_cost(_chooser,scheme1,_mb_mode);
+    if(cur_bits<best_bits)best_bits=cur_bits;
+    if(++si>=8)break;
+    scheme1=_chooser->scheme_list[si];
+    scheme1_bits=_chooser->scheme_bits[scheme1];
+  }
+  while(scheme1_bits-scheme0_bits<=6);
+  return best_bits-scheme0_bits;
+}
+
+/*Incrementally update the mode counts and per-scheme bit counts and re-order
+   the scheme lists once a mode has been selected.
+  _mb_mode: The mode that was chosen.*/
+static void oc_mode_scheme_chooser_update(oc_mode_scheme_chooser *_chooser,
+ int _mb_mode){
+  int ri;
+  int si;
+  _chooser->mode_counts[_mb_mode]++;
+  /*Re-order the scheme0 mode list if necessary.*/
+  for(ri=_chooser->scheme0_ranks[_mb_mode];ri>0;ri--){
+    int pmode;
+    pmode=_chooser->scheme0_list[ri-1];
+    if(_chooser->mode_counts[pmode]>=_chooser->mode_counts[_mb_mode])break;
+    /*Reorder the mode ranking.*/
+    _chooser->scheme0_ranks[pmode]++;
+    _chooser->scheme0_list[ri]=pmode;
+  }
+  _chooser->scheme0_ranks[_mb_mode]=ri;
+  _chooser->scheme0_list[ri]=_mb_mode;
+  /*Now add the bit cost for the mode to each scheme.*/
+  for(si=0;si<8;si++){
+    _chooser->scheme_bits[si]+=
+     OC_MODE_BITS[si+1>>3][_chooser->mode_ranks[si][_mb_mode]];
+  }
+  /*Finally, re-order the list of schemes.*/
+  for(si=1;si<8;si++){
+    int sj;
+    int scheme0;
+    int bits0;
+    sj=si;
+    scheme0=_chooser->scheme_list[si];
+    bits0=_chooser->scheme_bits[scheme0];
+    do{
+      int scheme1;
+      scheme1=_chooser->scheme_list[sj-1];
+      if(bits0>=_chooser->scheme_bits[scheme1])break;
+      _chooser->scheme_list[sj]=scheme1;
+    }
+    while(--sj>0);
+    _chooser->scheme_list[sj]=scheme0;
+  }
+}
+
+
+
+/*The number of bits required to encode a super block run.
+  _run_count: The desired run count; must be positive and less than 4130.*/
+static int oc_sb_run_bits(int _run_count){
+  int i;
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  return OC_SB_RUN_CODE_NBITS[i];
+}
+
+/*The number of bits required to encode a block run.
+  _run_count: The desired run count; must be positive and less than 30.*/
+static int oc_block_run_bits(int _run_count){
+  return OC_BLOCK_RUN_CODE_NBITS[_run_count-1];
+}
+
+
+
+static void oc_fr_state_init(oc_fr_state *_fr){
+  _fr->bits=0;
+  _fr->sb_partial_count=0;
+  _fr->sb_full_count=0;
+  _fr->b_coded_count_prev=0;
+  _fr->b_coded_count=0;
+  _fr->b_count=0;
+  _fr->sb_prefer_partial=0;
+  _fr->sb_bits=0;
+  _fr->sb_partial=-1;
+  _fr->sb_full=-1;
+  _fr->b_coded_prev=-1;
+  _fr->b_coded=-1;
+}
+
+
+static int oc_fr_state_sb_cost(const oc_fr_state *_fr,
+ int _sb_partial,int _sb_full){
+  int bits;
+  int sb_partial_count;
+  int sb_full_count;
+  bits=0;
+  sb_partial_count=_fr->sb_partial_count;
+  /*Extend the sb_partial run, or start a new one.*/
+  if(_fr->sb_partial==_sb_partial){
+    if(sb_partial_count>=4129){
+      bits++;
+      sb_partial_count=0;
+    }
+    else bits-=oc_sb_run_bits(sb_partial_count);
+  }
+  else sb_partial_count=0;
+  bits+=oc_sb_run_bits(++sb_partial_count);
+  if(!_sb_partial){
+    /*Extend the sb_full run, or start a new one.*/
+    sb_full_count=_fr->sb_full_count;
+    if(_fr->sb_full==_sb_full){
+      if(sb_full_count>=4129){
+        bits++;
+        sb_full_count=0;
+      }
+      else bits-=oc_sb_run_bits(sb_full_count);
+    }
+    else sb_full_count=0;
+    bits+=oc_sb_run_bits(++sb_full_count);
+  }
+  return bits;
+}
+
+static void oc_fr_state_advance_sb(oc_fr_state *_fr,
+ int _sb_partial,int _sb_full){
+  int sb_partial_count;
+  int sb_full_count;
+  sb_partial_count=_fr->sb_partial_count;
+  if(_fr->sb_partial!=_sb_partial||sb_partial_count>=4129)sb_partial_count=0;
+  sb_partial_count++;
+  if(!_sb_partial){
+    sb_full_count=_fr->sb_full_count;
+    if(_fr->sb_full!=_sb_full||sb_full_count>=4129)sb_full_count=0;
+    sb_full_count++;
+    _fr->sb_full_count=sb_full_count;
+    _fr->sb_full=_sb_full;
+    /*Roll back the partial block state.*/
+    _fr->b_coded=_fr->b_coded_prev;
+    _fr->b_coded_count=_fr->b_coded_count_prev;
+  }
+  else{
+    /*Commit back the partial block state.*/
+    _fr->b_coded_prev=_fr->b_coded;
+    _fr->b_coded_count_prev=_fr->b_coded_count;
+  }
+  _fr->sb_partial_count=sb_partial_count;
+  _fr->sb_partial=_sb_partial;
+  _fr->b_count=0;
+  _fr->sb_prefer_partial=0;
+  _fr->sb_bits=0;
+}
+
+/*Commit the state of the current super block and advance to the next.*/
+static void oc_fr_state_flush_sb(oc_fr_state *_fr){
+  int sb_partial;
+  int sb_full;
+  int b_coded_count;
+  int b_count;
+  b_count=_fr->b_count;
+  b_coded_count=_fr->b_coded_count;
+  sb_full=_fr->b_coded;
+  sb_partial=b_coded_count<b_count;
+  if(!sb_partial){
+    /*If the super block is fully coded/uncoded...*/
+    if(_fr->sb_prefer_partial){
+      /*So far coding this super block as partial was cheaper anyway.*/
+      if(b_coded_count>15||_fr->b_coded_prev<0){
+        int sb_bits;
+        /*If the block run is too long, this will limit how far it can be
+           extended into the next partial super block.
+          If we need to extend it farther, we don't want to have to roll all
+           the way back here (since there could be many full SBs between now
+           and then), so we disallow this.
+          Similarly, if this is the start of a stripe, we don't know how the
+           length of the outstanding block run from the previous stripe.*/
+        sb_bits=oc_fr_state_sb_cost(_fr,sb_partial,sb_full);
+        _fr->bits+=sb_bits-_fr->sb_bits;
+        _fr->sb_bits=sb_bits;
+      }
+      else sb_partial=1;
+    }
+  }
+  oc_fr_state_advance_sb(_fr,sb_partial,sb_full);
+}
+
+static void oc_fr_state_advance_block(oc_fr_state *_fr,int _b_coded){
+  ptrdiff_t bits;
+  int       sb_bits;
+  int       b_coded_count;
+  int       b_count;
+  int       sb_prefer_partial;
+  sb_bits=_fr->sb_bits;
+  bits=_fr->bits-sb_bits;
+  b_count=_fr->b_count;
+  b_coded_count=_fr->b_coded_count;
+  sb_prefer_partial=_fr->sb_prefer_partial;
+  if(b_coded_count>=b_count){
+    int sb_partial_bits;
+    /*This super block is currently fully coded/uncoded.*/
+    if(b_count<=0){
+      /*This is the first block in this SB.*/
+      b_count=1;
+      /*Check to see whether it's cheaper to code it partially or fully.*/
+      if(_fr->b_coded==_b_coded){
+        sb_partial_bits=-oc_block_run_bits(b_coded_count);
+        sb_partial_bits+=oc_block_run_bits(++b_coded_count);
+      }
+      else{
+        b_coded_count=1;
+        sb_partial_bits=2;
+      }
+      sb_partial_bits+=oc_fr_state_sb_cost(_fr,1,_b_coded);
+      sb_bits=oc_fr_state_sb_cost(_fr,0,_b_coded);
+      sb_prefer_partial=sb_partial_bits<sb_bits;
+      sb_bits^=(sb_partial_bits^sb_bits)&-sb_prefer_partial;
+    }
+    else if(_fr->b_coded==_b_coded){
+      b_coded_count++;
+      if(++b_count<16){
+        if(sb_prefer_partial){
+          /*Check to see if it's cheaper to code it fully.*/
+          sb_partial_bits=sb_bits;
+          sb_partial_bits+=oc_block_run_bits(b_coded_count);
+          if(b_coded_count>0){
+            sb_partial_bits-=oc_block_run_bits(b_coded_count-1);
+          }
+          sb_bits=oc_fr_state_sb_cost(_fr,0,_b_coded);
+          sb_prefer_partial=sb_partial_bits<sb_bits;
+          sb_bits^=(sb_partial_bits^sb_bits)&-sb_prefer_partial;
+        }
+        /*There's no need to check the converse (whether it's cheaper to code
+           this SB partially if we were coding it fully), since the cost to
+           code a SB partially can only increase as we add more blocks, whereas
+           the cost to code it fully stays constant.*/
+      }
+      else{
+        /*If we get to the end and this SB is still full, then force it to be
+           coded full.
+          Otherwise we might not be able to extend the block run far enough
+           into the next partial SB.*/
+        if(sb_prefer_partial){
+          sb_prefer_partial=0;
+          sb_bits=oc_fr_state_sb_cost(_fr,0,_b_coded);
+        }
+      }
+    }
+    else{
+      /*This SB was full, but now must be made partial.*/
+      if(!sb_prefer_partial){
+        sb_bits=oc_block_run_bits(b_coded_count);
+        if(b_coded_count>b_count){
+          sb_bits-=oc_block_run_bits(b_coded_count-b_count);
+        }
+        sb_bits+=oc_fr_state_sb_cost(_fr,1,_b_coded);
+      }
+      b_count++;
+      b_coded_count=1;
+      sb_prefer_partial=1;
+      sb_bits+=2;
+    }
+  }
+  else{
+    b_count++;
+    if(_fr->b_coded==_b_coded)sb_bits-=oc_block_run_bits(b_coded_count);
+    else b_coded_count=0;
+    sb_bits+=oc_block_run_bits(++b_coded_count);
+  }
+  _fr->bits=bits+sb_bits;
+  _fr->b_coded_count=b_coded_count;
+  _fr->b_coded=_b_coded;
+  _fr->b_count=b_count;
+  _fr->sb_prefer_partial=sb_prefer_partial;
+  _fr->sb_bits=sb_bits;
+}
+
+static void oc_fr_skip_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,0);
+}
+
+static void oc_fr_code_block(oc_fr_state *_fr){
+  oc_fr_state_advance_block(_fr,1);
+}
+
+static int oc_fr_cost1(const oc_fr_state *_fr){
+  oc_fr_state tmp;
+  ptrdiff_t   bits;
+  *&tmp=*_fr;
+  oc_fr_skip_block(&tmp);
+  bits=tmp.bits;
+  *&tmp=*_fr;
+  oc_fr_code_block(&tmp);
+  return (int)(tmp.bits-bits);
+}
+
+static int oc_fr_cost4(const oc_fr_state *_pre,const oc_fr_state *_post){
+  oc_fr_state tmp;
+  *&tmp=*_pre;
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  oc_fr_skip_block(&tmp);
+  return (int)(_post->bits-tmp.bits);
+}
+
+
+
+static void oc_qii_state_init(oc_qii_state *_qs){
+  _qs->bits=0;
+  _qs->qi01_count=0;
+  _qs->qi01=-1;
+  _qs->qi12_count=0;
+  _qs->qi12=-1;
+}
+
+
+static void oc_qii_state_advance(oc_qii_state *_qd,
+ const oc_qii_state *_qs,int _qii){
+  ptrdiff_t bits;
+  int       qi01;
+  int       qi01_count;
+  int       qi12;
+  int       qi12_count;
+  bits=_qs->bits;
+  qi01=_qii+1>>1;
+  qi01_count=_qs->qi01_count;
+  if(qi01==_qs->qi01){
+    if(qi01_count>=4129){
+      bits++;
+      qi01_count=0;
+    }
+    else bits-=oc_sb_run_bits(qi01_count);
+  }
+  else qi01_count=0;
+  qi01_count++;
+  bits+=oc_sb_run_bits(qi01_count);
+  qi12_count=_qs->qi12_count;
+  if(_qii){
+    qi12=_qii>>1;
+    if(qi12==_qs->qi12){
+      if(qi12_count>=4129){
+        bits++;
+        qi12_count=0;
+      }
+      else bits-=oc_sb_run_bits(qi12_count);
+    }
+    else qi12_count=0;
+    qi12_count++;
+    bits+=oc_sb_run_bits(qi12_count);
+  }
+  else qi12=_qs->qi12;
+  _qd->bits=bits;
+  _qd->qi01=qi01;
+  _qd->qi01_count=qi01_count;
+  _qd->qi12=qi12;
+  _qd->qi12_count=qi12_count;
+}
+
+
+
+static void oc_enc_pipeline_init(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe){
+  ptrdiff_t *coded_fragis;
+  unsigned   mcu_nvsbs;
+  ptrdiff_t  mcu_nfrags;
+  int        flimit;
+  int        hdec;
+  int        vdec;
+  int        pli;
+  int        nqis;
+  int        qii;
+  int        qi0;
+  int        qti;
+  /*Initialize the per-plane coded block flag trackers.
+    These are used for bit-estimation purposes only; the real flag bits span
+     all three planes, so we can't compute them in parallel.*/
+  for(pli=0;pli<3;pli++)oc_fr_state_init(_pipe->fr+pli);
+  for(pli=0;pli<3;pli++)oc_qii_state_init(_pipe->qs+pli);
+  /*Set up the per-plane skip SSD storage pointers.*/
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  mcu_nfrags=mcu_nvsbs*_enc->state.fplanes[0].nhsbs*16;
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  _pipe->skip_ssd[0]=_enc->mcu_skip_ssd;
+  _pipe->skip_ssd[1]=_pipe->skip_ssd[0]+mcu_nfrags;
+  _pipe->skip_ssd[2]=_pipe->skip_ssd[1]+(mcu_nfrags>>hdec+vdec);
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.
+    Unlike the decoder, each planes' coded and uncoded fragment list is kept
+     separate during the analysis stage; we only make the coded list for all
+     three planes contiguous right before the final packet is output
+     (destroying the uncoded lists, which are no longer needed).*/
+  coded_fragis=_enc->state.coded_fragis;
+  for(pli=0;pli<3;pli++){
+    _pipe->coded_fragis[pli]=coded_fragis;
+    coded_fragis+=_enc->state.fplanes[pli].nfrags;
+    _pipe->uncoded_fragis[pli]=coded_fragis;
+  }
+  memset(_pipe->ncoded_fragis,0,sizeof(_pipe->ncoded_fragis));
+  memset(_pipe->nuncoded_fragis,0,sizeof(_pipe->nuncoded_fragis));
+  /*Set up condensed quantizer tables.*/
+  qi0=_enc->state.qis[0];
+  nqis=_enc->state.nqis;
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<nqis;qii++){
+      int qi;
+      qi=_enc->state.qis[qii];
+      for(qti=0;qti<2;qti++){
+        /*Set the DC coefficient in the dequantization table.*/
+        _enc->state.dequant_tables[qi][pli][qti][0]=
+         _enc->dequant_dc[qi0][pli][qti];
+        _enc->dequant[pli][qii][qti]=_enc->state.dequant_tables[qi][pli][qti];
+        /*Copy over the quantization table.*/
+        memcpy(_enc->enquant[pli][qii][qti],_enc->enquant_tables[qi][pli][qti],
+         _enc->opt_data.enquant_table_size);
+      }
+    }
+  }
+  /*Fix up the DC coefficients in the quantization tables.*/
+  oc_enc_enquant_table_fixup(_enc,_enc->enquant,nqis);
+  /*Initialize the tokenization state.*/
+  for(pli=0;pli<3;pli++){
+    _pipe->ndct_tokens1[pli]=0;
+    _pipe->eob_run1[pli]=0;
+  }
+  /*Initialize the bounding value array for the loop filter.*/
+  flimit=_enc->state.loop_filter_limits[_enc->state.qis[0]];
+  _pipe->loop_filter=flimit!=0;
+  if(flimit!=0)oc_loop_filter_init(&_enc->state,_pipe->bounding_values,flimit);
+  /*Clear the temporary DCT scratch space.*/
+  memset(_pipe->dct_data,0,sizeof(_pipe->dct_data));
+}
+
+/*Sets the current MCU stripe to super block row _sby.
+  Return: A non-zero value if this was the last MCU.*/
+static int oc_enc_pipeline_set_stripe(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _sby){
+  const oc_fragment_plane *fplane;
+  unsigned                 mcu_nvsbs;
+  int                      sby_end;
+  int                      notdone;
+  int                      vdec;
+  int                      pli;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  sby_end=_enc->state.fplanes[0].nvsbs;
+  notdone=_sby+mcu_nvsbs<sby_end;
+  if(notdone)sby_end=_sby+mcu_nvsbs;
+  vdec=0;
+  for(pli=0;pli<3;pli++){
+    fplane=_enc->state.fplanes+pli;
+    _pipe->sbi0[pli]=fplane->sboffset+(_sby>>vdec)*fplane->nhsbs;
+    _pipe->fragy0[pli]=_sby<<2-vdec;
+    _pipe->froffset[pli]=fplane->froffset
+     +_pipe->fragy0[pli]*(ptrdiff_t)fplane->nhfrags;
+    if(notdone){
+      _pipe->sbi_end[pli]=fplane->sboffset+(sby_end>>vdec)*fplane->nhsbs;
+      _pipe->fragy_end[pli]=sby_end<<2-vdec;
+    }
+    else{
+      _pipe->sbi_end[pli]=fplane->sboffset+fplane->nsbs;
+      _pipe->fragy_end[pli]=fplane->nvfrags;
+    }
+    vdec=!(_enc->state.info.pixel_fmt&2);
+  }
+  return notdone;
+}
+
+static void oc_enc_pipeline_finish_mcu_plane(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sdelay,int _edelay){
+  /*Copy over all the uncoded fragments from this plane and advance the uncoded
+     fragment list.*/
+  if(_pipe->nuncoded_fragis[_pli]>0){
+    _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+    oc_frag_copy_list(&_enc->state,
+     _enc->state.ref_frame_data[OC_FRAME_SELF],
+     _enc->state.ref_frame_data[OC_FRAME_PREV],
+     _enc->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
+     _pipe->nuncoded_fragis[_pli],_enc->state.frag_buf_offs);
+    _pipe->nuncoded_fragis[_pli]=0;
+  }
+  /*Perform DC prediction.*/
+  oc_enc_pred_dc_frag_rows(_enc,_pli,
+   _pipe->fragy0[_pli],_pipe->fragy_end[_pli]);
+  /*Finish DC tokenization.*/
+  oc_enc_tokenize_dc_frag_list(_enc,_pli,
+   _pipe->coded_fragis[_pli],_pipe->ncoded_fragis[_pli],
+   _pipe->ndct_tokens1[_pli],_pipe->eob_run1[_pli]);
+  _pipe->ndct_tokens1[_pli]=_enc->ndct_tokens[_pli][1];
+  _pipe->eob_run1[_pli]=_enc->eob_run[_pli][1];
+  /*And advance the coded fragment list.*/
+  _enc->state.ncoded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->coded_fragis[_pli]+=_pipe->ncoded_fragis[_pli];
+  _pipe->ncoded_fragis[_pli]=0;
+  /*Apply the loop filter if necessary.*/
+  if(_pipe->loop_filter){
+    oc_state_loop_filter_frag_rows(&_enc->state,
+     _pipe->bounding_values,OC_FRAME_SELF,_pli,
+     _pipe->fragy0[_pli]-_sdelay,_pipe->fragy_end[_pli]-_edelay);
+  }
+  else _sdelay=_edelay=0;
+  /*To fill borders, we have an additional two pixel delay, since a fragment
+     in the next row could filter its top edge, using two pixels from a
+     fragment in this row.
+    But there's no reason to delay a full fragment between the two.*/
+  oc_state_borders_fill_rows(&_enc->state,
+   _enc->state.ref_frame_idx[OC_FRAME_SELF],_pli,
+   (_pipe->fragy0[_pli]-_sdelay<<3)-(_sdelay<<1),
+   (_pipe->fragy_end[_pli]-_edelay<<3)-(_edelay<<1));
+}
+
+
+
+/*Cost information about the coded blocks in a MB.*/
+struct oc_rd_metric{
+  int uncoded_ac_ssd;
+  int coded_ac_ssd;
+  int ac_bits;
+  int dc_flag;
+};
+
+
+
+static int oc_enc_block_transform_quantize(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,ptrdiff_t _fragi,
+ unsigned _rd_scale,unsigned _rd_iscale,oc_rd_metric *_mo,
+ oc_fr_state *_fr,oc_token_checkpoint **_stack){
+  ogg_int16_t            *data;
+  ogg_int16_t            *dct;
+  ogg_int16_t            *idct;
+  oc_qii_state            qs;
+  const ogg_uint16_t     *dequant;
+  ogg_uint16_t            dequant_dc;
+  ptrdiff_t               frag_offs;
+  int                     ystride;
+  const unsigned char    *src;
+  const unsigned char    *ref;
+  unsigned char          *dst;
+  int                     nonzero;
+  unsigned                uncoded_ssd;
+  unsigned                coded_ssd;
+  oc_token_checkpoint    *checkpoint;
+  oc_fragment            *frags;
+  int                     mb_mode;
+  int                     refi;
+  int                     mv_offs[2];
+  int                     nmv_offs;
+  int                     ac_bits;
+  int                     borderi;
+  int                     nqis;
+  int                     qti;
+  int                     qii;
+  int                     dc;
+  nqis=_enc->state.nqis;
+  frags=_enc->state.frags;
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  ystride=_enc->state.ref_ystride[_pli];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO]+frag_offs;
+  borderi=frags[_fragi].borderi;
+  qii=frags[_fragi].qii;
+  data=_enc->pipe.dct_data;
+  dct=data+64;
+  idct=data+128;
+  if(qii&~3){
+#if !defined(OC_COLLECT_METRICS)
+    if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
+      /*Enable early skip detection.*/
+      frags[_fragi].coded=0;
+      frags[_fragi].refi=OC_FRAME_NONE;
+      oc_fr_skip_block(_fr);
+      return 0;
+    }
+#endif
+    /*Try and code this block anyway.*/
+    qii&=3;
+  }
+  refi=frags[_fragi].refi;
+  mb_mode=frags[_fragi].mb_mode;
+  ref=_enc->state.ref_frame_data[refi]+frag_offs;
+  dst=_enc->state.ref_frame_data[OC_FRAME_SELF]+frag_offs;
+  /*Motion compensation:*/
+  switch(mb_mode){
+    case OC_MODE_INTRA:{
+      nmv_offs=0;
+      oc_enc_frag_sub_128(_enc,data,src,ystride);
+    }break;
+    case OC_MODE_GOLDEN_NOMV:
+    case OC_MODE_INTER_NOMV:{
+      nmv_offs=1;
+      mv_offs[0]=0;
+      oc_enc_frag_sub(_enc,data,src,ref,ystride);
+    }break;
+    default:{
+      const oc_mv *frag_mvs;
+      frag_mvs=_enc->state.frag_mvs;
+      nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,
+       _pli,frag_mvs[_fragi]);
+      if(nmv_offs>1){
+        oc_enc_frag_copy2(_enc,dst,
+         ref+mv_offs[0],ref+mv_offs[1],ystride);
+        oc_enc_frag_sub(_enc,data,src,dst,ystride);
+      }
+      else oc_enc_frag_sub(_enc,data,src,ref+mv_offs[0],ystride);
+    }break;
+  }
+#if defined(OC_COLLECT_METRICS)
+  {
+    unsigned sad;
+    unsigned satd;
+    switch(nmv_offs){
+      case 0:{
+        sad=oc_enc_frag_intra_sad(_enc,src,ystride);
+        satd=oc_enc_frag_intra_satd(_enc,&dc,src,ystride);
+      }break;
+      case 1:{
+        sad=oc_enc_frag_sad_thresh(_enc,src,ref+mv_offs[0],ystride,UINT_MAX);
+        satd=oc_enc_frag_satd(_enc,&dc,src,ref+mv_offs[0],ystride);
+        satd+=abs(dc);
+      }break;
+      default:{
+        sad=oc_enc_frag_sad_thresh(_enc,src,dst,ystride,UINT_MAX);
+        satd=oc_enc_frag_satd(_enc,&dc,src,dst,ystride);
+        satd+=abs(dc);
+      }break;
+    }
+    _enc->frag_sad[_fragi]=sad;
+    _enc->frag_satd[_fragi]=satd;
+  }
+#endif
+  /*Transform:*/
+  oc_enc_fdct8x8(_enc,dct,data);
+  /*Quantize:*/
+  qti=mb_mode!=OC_MODE_INTRA;
+  dequant=_enc->dequant[_pli][qii][qti];
+  nonzero=oc_enc_quantize(_enc,data,dct,dequant,_enc->enquant[_pli][qii][qti]);
+  dc=data[0];
+  /*Tokenize.*/
+  checkpoint=*_stack;
+  if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+    ac_bits=oc_enc_tokenize_ac(_enc,_pli,_fragi,idct,data,dequant,dct,
+     nonzero+1,_stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+  }
+  else{
+    ac_bits=oc_enc_tokenize_ac_fast(_enc,_pli,_fragi,idct,data,dequant,dct,
+     nonzero+1,_stack,OC_RD_ISCALE(_enc->lambda,_rd_iscale),qti?0:3);
+  }
+  /*Reconstruct.
+    TODO: nonzero may need to be adjusted after tokenization.*/
+  dequant_dc=dequant[0];
+  if(nonzero==0){
+    ogg_int16_t p;
+    int         ci;
+    int         qi01;
+    int         qi12;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(dc*(ogg_int32_t)dequant_dc+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)data[ci]=p;
+    /*We didn't code any AC coefficients, so don't change the quantizer.*/
+    qi01=_pipe->qs[_pli].qi01;
+    qi12=_pipe->qs[_pli].qi12;
+    if(qi01>0)qii=1+qi12;
+    else if(qi01>=0)qii=0;
+  }
+  else{
+    idct[0]=dc*dequant_dc;
+    /*Note: This clears idct[] back to zero for the next block.*/
+    oc_idct8x8(&_enc->state,data,idct,nonzero+1);
+  }
+  frags[_fragi].qii=qii;
+  if(nqis>1){
+    oc_qii_state_advance(&qs,_pipe->qs+_pli,qii);
+    ac_bits+=qs.bits-_pipe->qs[_pli].bits;
+  }
+  if(!qti)oc_enc_frag_recon_intra(_enc,dst,ystride,data);
+  else{
+    oc_enc_frag_recon_inter(_enc,dst,
+     nmv_offs==1?ref+mv_offs[0]:dst,ystride,data);
+  }
+  /*If _fr is NULL, then this is an INTRA frame, and we can't skip blocks.*/
+#if !defined(OC_COLLECT_METRICS)
+  if(_fr!=NULL)
+#endif
+  {
+    /*In retrospect, should we have skipped this block?*/
+    if(borderi<0){
+      coded_ssd=oc_enc_frag_ssd(_enc,src,dst,ystride);
+    }
+    else{
+      coded_ssd=oc_enc_frag_border_ssd(_enc,src,dst,ystride,
+       _enc->state.borders[borderi].mask);
+    }
+    /*Scale to match DCT domain.*/
+    coded_ssd<<=4;
+#if defined(OC_COLLECT_METRICS)
+    _enc->frag_ssd[_fragi]=coded_ssd;
+  }
+  if(_fr!=NULL){
+#endif
+    coded_ssd=OC_RD_SCALE(coded_ssd,_rd_scale);
+    uncoded_ssd=_pipe->skip_ssd[_pli][_fragi-_pipe->froffset[_pli]];
+    if(uncoded_ssd<UINT_MAX&&
+     /*Don't allow luma blocks to be skipped in 4MV mode when VP3 compatibility
+        is enabled.*/
+     (!_enc->vp3_compatible||mb_mode!=OC_MODE_INTER_MV_FOUR||_pli)){
+      int overhead_bits;
+      overhead_bits=oc_fr_cost1(_fr);
+      /*Although the fragment coding overhead determination is accurate, it is
+         greedy, using very coarse-grained local information.
+        Allowing it to mildly discourage coding turns out to be beneficial, but
+         it's not clear that allowing it to encourage coding through negative
+         coding overhead deltas is useful.
+        For that reason, we disallow negative coding overheads.*/
+      if(overhead_bits<0)overhead_bits=0;
+      if(uncoded_ssd<=coded_ssd+(overhead_bits+ac_bits)*_enc->lambda){
+        /*Hm, not worth it; roll back.*/
+        oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
+        *_stack=checkpoint;
+        frags[_fragi].coded=0;
+        frags[_fragi].refi=OC_FRAME_NONE;
+        oc_fr_skip_block(_fr);
+        return 0;
+      }
+    }
+    else _mo->dc_flag=1;
+    _mo->uncoded_ac_ssd+=uncoded_ssd;
+    _mo->coded_ac_ssd+=coded_ssd;
+    _mo->ac_bits+=ac_bits;
+    oc_fr_code_block(_fr);
+  }
+  /*GCC 4.4.4 generates a warning here because it can't tell that
+     the init code in the nqis check above will run anytime this
+     line runs.*/
+  if(nqis>1)*(_pipe->qs+_pli)=*&qs;
+  frags[_fragi].dc=dc;
+  frags[_fragi].coded=1;
+  return 1;
+}
+
+static int oc_enc_mb_transform_quantize_inter_luma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,unsigned _mbi,int _mode_overhead,
+ const unsigned _rd_scale[4],const unsigned _rd_iscale[4]){
+  /*Worst case token stack usage for 4 fragments.*/
+  oc_token_checkpoint  stack[64*4];
+  oc_token_checkpoint *stackptr;
+  const oc_sb_map     *sb_maps;
+  signed char         *mb_modes;
+  oc_fragment         *frags;
+  ptrdiff_t           *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t           *uncoded_fragis;
+  ptrdiff_t            nuncoded_fragis;
+  oc_rd_metric         mo;
+  oc_fr_state          fr_checkpoint;
+  oc_qii_state         qs_checkpoint;
+  int                  mb_mode;
+  int                  refi;
+  int                  ncoded;
+  ptrdiff_t            fragi;
+  int                  bi;
+  *&fr_checkpoint=*(_pipe->fr+0);
+  *&qs_checkpoint=*(_pipe->qs+0);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_modes=_enc->state.mb_modes;
+  frags=_enc->state.frags;
+  coded_fragis=_pipe->coded_fragis[0];
+  ncoded_fragis=_pipe->ncoded_fragis[0];
+  uncoded_fragis=_pipe->uncoded_fragis[0];
+  nuncoded_fragis=_pipe->nuncoded_fragis[0];
+  mb_mode=mb_modes[_mbi];
+  refi=OC_FRAME_FOR_MODE(mb_mode);
+  ncoded=0;
+  stackptr=stack;
+  memset(&mo,0,sizeof(mo));
+  for(bi=0;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].refi=refi;
+    frags[fragi].mb_mode=mb_mode;
+    if(oc_enc_block_transform_quantize(_enc,_pipe,0,fragi,
+     _rd_scale[bi],_rd_iscale[bi],&mo,_pipe->fr+0,&stackptr)){
+      coded_fragis[ncoded_fragis++]=fragi;
+      ncoded++;
+    }
+    else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+  }
+  if(ncoded>0&&!mo.dc_flag){
+    int cost;
+    /*Some individual blocks were worth coding.
+      See if that's still true when accounting for mode and MV overhead.*/
+    cost=mo.coded_ac_ssd+_enc->lambda*(mo.ac_bits
+     +oc_fr_cost4(&fr_checkpoint,_pipe->fr+0)+_mode_overhead);
+    if(mo.uncoded_ac_ssd<=cost){
+      /*Taking macroblock overhead into account, it is not worth coding this
+         MB.*/
+      oc_enc_tokenlog_rollback(_enc,stack,stackptr-stack);
+      *(_pipe->fr+0)=*&fr_checkpoint;
+      *(_pipe->qs+0)=*&qs_checkpoint;
+      for(bi=0;bi<4;bi++){
+        fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+        if(frags[fragi].coded){
+          *(uncoded_fragis-++nuncoded_fragis)=fragi;
+          frags[fragi].coded=0;
+          frags[fragi].refi=OC_FRAME_NONE;
+        }
+        oc_fr_skip_block(_pipe->fr+0);
+      }
+      ncoded_fragis-=ncoded;
+      ncoded=0;
+    }
+  }
+  /*If no luma blocks coded, the mode is forced.*/
+  if(ncoded==0)mb_modes[_mbi]=OC_MODE_INTER_NOMV;
+  /*Assume that a 1MV with a single coded block is always cheaper than a 4MV
+     with a single coded block.
+    This may not be strictly true: a 4MV computes chroma MVs using (0,0) for
+     skipped blocks, while a 1MV does not.*/
+  else if(ncoded==1&&mb_mode==OC_MODE_INTER_MV_FOUR){
+    mb_modes[_mbi]=OC_MODE_INTER_MV;
+  }
+  _pipe->ncoded_fragis[0]=ncoded_fragis;
+  _pipe->nuncoded_fragis[0]=nuncoded_fragis;
+  return ncoded;
+}
+
+static void oc_enc_sb_transform_quantize_inter_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const ogg_uint16_t *mcu_rd_scale;
+  const ogg_uint16_t *mcu_rd_iscale;
+  const oc_sb_map    *sb_maps;
+  oc_sb_flags        *sb_flags;
+  oc_fr_state        *fr;
+  ptrdiff_t          *coded_fragis;
+  ptrdiff_t           ncoded_fragis;
+  ptrdiff_t          *uncoded_fragis;
+  ptrdiff_t           nuncoded_fragis;
+  ptrdiff_t           froffset;
+  int                 sbi;
+  fr=_pipe->fr+_pli;
+  mcu_rd_scale=(const ogg_uint16_t *)_enc->mcu_rd_scale;
+  mcu_rd_iscale=(const ogg_uint16_t *)_enc->mcu_rd_iscale;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  uncoded_fragis=_pipe->uncoded_fragis[_pli];
+  nuncoded_fragis=_pipe->nuncoded_fragis[_pli];
+  froffset=_pipe->froffset[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    oc_rd_metric        mo;
+    int                 quadi;
+    int                 bi;
+    memset(&mo,0,sizeof(mo));
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        unsigned             rd_scale;
+        unsigned             rd_iscale;
+        rd_scale=mcu_rd_scale[fragi-froffset];
+        rd_iscale=mcu_rd_iscale[fragi-froffset];
+        stackptr=stack;
+        if(oc_enc_block_transform_quantize(_enc,_pipe,_pli,fragi,
+         rd_scale,rd_iscale,&mo,fr,&stackptr)){
+          coded_fragis[ncoded_fragis++]=fragi;
+        }
+        else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+      }
+    }
+    oc_fr_state_flush_sb(fr);
+    sb_flags[sbi].coded_fully=fr->sb_full;
+    sb_flags[sbi].coded_partially=fr->sb_partial;
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  _pipe->nuncoded_fragis[_pli]=nuncoded_fragis;
+}
+
+/*Mode decision is done by exhaustively examining all potential choices.
+  Obviously, doing the motion compensation, fDCT, tokenization, and then
+   counting the bits each token uses is computationally expensive.
+  Theora's EOB runs can also split the cost of these tokens across multiple
+   fragments, and naturally we don't know what the optimal choice of Huffman
+   codes will be until we know all the tokens we're going to encode in all the
+   fragments.
+  So we use a simple approach to estimating the bit cost and distortion of each
+   mode based upon the SATD value of the residual before coding.
+  The mathematics behind the technique are outlined by Kim \cite{Kim03}, but
+   the process (modified somewhat from that of the paper) is very simple.
+  We build a non-linear regression of the mappings from
+   (pre-transform+quantization) SATD to (post-transform+quantization) bits and
+   SSD for each qi.
+  A separate set of mappings is kept for each quantization type and color
+   plane.
+  The mappings are constructed by partitioning the SATD values into a small
+   number of bins (currently 24) and using a linear regression in each bin
+   (as opposed to the 0th-order regression used by Kim).
+  The bit counts and SSD measurements are obtained by examining actual encoded
+   frames, with appropriate lambda values and optimal Huffman codes selected.
+  EOB bits are assigned to the fragment that started the EOB run (as opposed to
+   dividing them among all the blocks in the run; the latter approach seems
+   more theoretically correct, but Monty's testing showed a small improvement
+   with the former, though that may have been merely statistical noise).
+
+  @ARTICLE{Kim03,
+    author="Hyun Mun Kim",
+    title="Adaptive Rate Control Using Nonlinear Regression",
+    journal="IEEE Transactions on Circuits and Systems for Video Technology",
+    volume=13,
+    number=5,
+    pages="432--439",
+    month=May,
+    year=2003
+  }*/
+
+/*Computes (_ssd+_lambda*_rate)/(1<<OC_BIT_SCALE) with rounding, avoiding
+   overflow for large lambda values.*/
+#define OC_MODE_RD_COST(_ssd,_rate,_lambda) \
+ ((_ssd)>>OC_BIT_SCALE)+((_rate)>>OC_BIT_SCALE)*(_lambda) \
+ +(((_ssd)&(1<<OC_BIT_SCALE)-1)+((_rate)&(1<<OC_BIT_SCALE)-1)*(_lambda) \
+ +((1<<OC_BIT_SCALE)>>1)>>OC_BIT_SCALE)
+
+static void oc_enc_mode_rd_init(oc_enc_ctx *_enc){
+#if !defined(OC_COLLECT_METRICS)
+  const
+#endif
+  oc_mode_rd (*oc_mode_rd_table)[3][2][OC_COMP_BINS]=
+   _enc->sp_level<OC_SP_LEVEL_NOSATD?OC_MODE_RD_SATD:OC_MODE_RD_SAD;
+  int qii;
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_load(_enc);
+#endif
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    int qi;
+    int pli;
+    qi=_enc->state.qis[qii];
+    for(pli=0;pli<3;pli++){
+      int qti;
+      for(qti=0;qti<2;qti++){
+        int log_plq;
+        int modeline;
+        int bin;
+        int dx;
+        int dq;
+        log_plq=_enc->log_plq[qi][pli][qti];
+        /*Find the pair of rows in the mode table that bracket this quantizer.
+          If it falls outside the range the table covers, then we just use a
+           pair on the edge for linear extrapolation.*/
+        for(modeline=0;modeline<OC_LOGQ_BINS-1&&
+         OC_MODE_LOGQ[modeline+1][pli][qti]>log_plq;modeline++);
+        /*Interpolate a row for this quantizer.*/
+        dx=OC_MODE_LOGQ[modeline][pli][qti]-log_plq;
+        dq=OC_MODE_LOGQ[modeline][pli][qti]-OC_MODE_LOGQ[modeline+1][pli][qti];
+        if(dq==0)dq=1;
+        for(bin=0;bin<OC_COMP_BINS;bin++){
+          int y0;
+          int z0;
+          int dy;
+          int dz;
+          y0=oc_mode_rd_table[modeline][pli][qti][bin].rate;
+          z0=oc_mode_rd_table[modeline][pli][qti][bin].rmse;
+          dy=oc_mode_rd_table[modeline+1][pli][qti][bin].rate-y0;
+          dz=oc_mode_rd_table[modeline+1][pli][qti][bin].rmse-z0;
+          _enc->mode_rd[qii][pli][qti][bin].rate=
+           (ogg_int16_t)OC_CLAMPI(-32768,y0+(dy*dx+(dq>>1))/dq,32767);
+          _enc->mode_rd[qii][pli][qti][bin].rmse=
+           (ogg_int16_t)OC_CLAMPI(-32768,z0+(dz*dx+(dq>>1))/dq,32767);
+        }
+      }
+    }
+  }
+}
+
+/*Estimate the R-D cost of the DCT coefficients given the SATD of a block after
+   prediction.*/
+static unsigned oc_dct_cost2(oc_enc_ctx *_enc,unsigned *_ssd,
+ int _qii,int _pli,int _qti,int _satd){
+  unsigned rmse;
+  int      shift;
+  int      bin;
+  int      dx;
+  int      y0;
+  int      z0;
+  int      dy;
+  int      dz;
+  /*SATD metrics for chroma planes vary much less than luma, so we scale them
+     by 4 to distribute them into the mode decision bins more evenly.*/
+  _satd<<=_pli+1&2;
+  shift=_enc->sp_level<OC_SP_LEVEL_NOSATD?OC_SATD_SHIFT:OC_SAD_SHIFT;
+  bin=OC_MINI(_satd>>shift,OC_COMP_BINS-2);
+  dx=_satd-(bin<<shift);
+  y0=_enc->mode_rd[_qii][_pli][_qti][bin].rate;
+  z0=_enc->mode_rd[_qii][_pli][_qti][bin].rmse;
+  dy=_enc->mode_rd[_qii][_pli][_qti][bin+1].rate-y0;
+  dz=_enc->mode_rd[_qii][_pli][_qti][bin+1].rmse-z0;
+  rmse=OC_MAXI(z0+(dz*dx>>shift),0);
+  *_ssd=rmse*rmse>>2*OC_RMSE_SCALE-OC_BIT_SCALE;
+  return OC_MAXI(y0+(dy*dx>>shift),0);
+}
+
+/*activity_avg must be positive, or flat regions could get a zero weight, which
+   confounds analysis.
+  We set the minimum to this value so that it also avoids the need for divide
+   by zero checks in oc_mb_masking().*/
+# define OC_ACTIVITY_AVG_MIN (1<<OC_RD_SCALE_BITS)
+
+static unsigned oc_mb_activity(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _activity[4]){
+  const unsigned char *src;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *sb_map;
+  unsigned             luma;
+  int                  ystride;
+  ptrdiff_t            frag_offs;
+  ptrdiff_t            fragi;
+  int                  bi;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  luma=0;
+  for(bi=0;bi<4;bi++){
+    const unsigned char *s;
+    unsigned             x;
+    unsigned             x2;
+    unsigned             act;
+    int                  i;
+    int                  j;
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    /*TODO: This could be replaced with SATD^2, since we already have to
+       compute SATD.*/
+    x=x2=0;
+    s=src+frag_offs;
+    for(i=0;i<8;i++){
+      for(j=0;j<8;j++){
+        unsigned c;
+        c=s[j];
+        x+=c;
+        x2+=c*c;
+      }
+      s+=ystride;
+    }
+    luma+=x;
+    act=(x2<<6)-x*x;
+    if(act<8<<12){
+      /*The region is flat.*/
+      act=OC_MINI(act,5<<12);
+    }
+    else{
+      unsigned e1;
+      unsigned e2;
+      unsigned e3;
+      unsigned e4;
+      /*Test for an edge.
+        TODO: There are probably much simpler ways to do this (e.g., it could
+         probably be combined with the SATD calculation).
+        Alternatively, we could split the block around the mean and compute the
+         reduction in variance in each half.
+        For a Gaussian source the reduction should be
+         (1-2/pi) ~= 0.36338022763241865692446494650994.
+        Significantly more reduction is a good indication of a bi-level image.
+        This has the advantage of identifying, in addition to straight edges,
+         small text regions, which would otherwise be classified as "texture".*/
+      e1=e2=e3=e4=0;
+      s=src+frag_offs-1;
+      for(i=0;i<8;i++){
+        for(j=0;j<8;j++){
+          e1+=abs((s[j+2]-s[j]<<1)+(s-ystride)[j+2]-(s-ystride)[j]
+           +(s+ystride)[j+2]-(s+ystride)[j]);
+          e2+=abs(((s+ystride)[j+1]-(s-ystride)[j+1]<<1)
+           +(s+ystride)[j]-(s-ystride)[j]+(s+ystride)[j+2]-(s-ystride)[j+2]);
+          e3+=abs(((s+ystride)[j+2]-(s-ystride)[j]<<1)
+           +(s+ystride)[j+1]-s[j]+s[j+2]-(s-ystride)[j+1]);
+          e4+=abs(((s+ystride)[j]-(s-ystride)[j+2]<<1)
+           +(s+ystride)[j+1]-s[j+2]+s[j]-(s-ystride)[j+1]);
+        }
+        s+=ystride;
+      }
+      /*If the largest component of the edge energy is at least 40% of the
+         total, then classify the block as an edge block.*/
+      if(5*OC_MAXI(OC_MAXI(e1,e2),OC_MAXI(e3,e4))>2*(e1+e2+e3+e4)){
+         /*act=act_th*(act/act_th)**0.7
+              =exp(log(act_th)+0.7*(log(act)-log(act_th))).
+           Here act_th=5.0 and 0x394A=oc_blog32_q10(5<<12).*/
+         act=oc_bexp32_q10(0x394A+(7*(oc_blog32_q10(act)-0x394A+5)/10));
+      }
+    }
+    _activity[bi]=act;
+  }
+  return luma;
+}
+
+static void oc_mb_activity_fast(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _activity[4],const unsigned _intra_satd[12]){
+  int bi;
+  for(bi=0;bi<4;bi++){
+    unsigned act;
+    act=(11*_intra_satd[bi]>>8)*_intra_satd[bi];
+    if(act<8<<12){
+      /*The region is flat.*/
+      act=OC_MINI(act,5<<12);
+    }
+    _activity[bi]=act;
+  }
+}
+
+/*Compute the masking scales for the blocks in a macro block.
+  All masking is computed from the luma blocks.
+  We derive scaling factors for the chroma blocks from these, and use the same
+   ones for all chroma blocks, regardless of the subsampling.
+  It's possible for luma to be perfectly flat and yet have high chroma energy,
+   but this is unlikely in non-artificial images, and not a case that has been
+   addressed by any research to my knowledge.
+  The output of the masking process is two scale factors, which are fed into
+   the various R-D optimizations.
+  The first, rd_scale, is applied to D in the equation
+    D*rd_scale+lambda*R.
+  This is the form that must be used to properly combine scores from multiple
+   blocks, and can be interpreted as scaling distortions by their visibility.
+  The inverse, rd_iscale, is applied to lambda in the equation
+    D+rd_iscale*lambda*R.
+  This is equivalent to the first form within a single block, but much faster
+   to use when evaluating many possible distortions (e.g., during actual
+   quantization, where separate distortions are evaluated for every
+   coefficient).
+  The two macros OC_RD_SCALE(rd_scale,d) and OC_RD_ISCALE(rd_iscale,lambda) are
+   used to perform the multiplications with the proper re-scaling for the range
+   of the scaling factors.
+  Many researchers apply masking values directly to the quantizers used, and
+   not to the R-D cost.
+  Since we generally use MSE for D, rd_scale must use the square of their
+   values to generate an equivalent effect.*/
+static unsigned oc_mb_masking(unsigned _rd_scale[5],unsigned _rd_iscale[5],
+ const ogg_uint16_t _chroma_rd_scale[2],const unsigned _activity[4],
+ unsigned _activity_avg,unsigned _luma,unsigned _luma_avg){
+  unsigned activity_sum;
+  unsigned la;
+  unsigned lb;
+  unsigned d;
+  int      bi;
+  int      bi_min;
+  int      bi_min2;
+  /*The ratio lb/la is meant to approximate
+     ((((_luma-16)/219)*(255/128))**0.649**0.4**2), which is the
+     effective luminance masking from~\cite{LKW06} (including the self-masking
+     deflator).
+    The following actually turns out to be a pretty good approximation for
+     _luma>75 or so.
+    For smaller values luminance does not really follow Weber's Law anyway, and
+     this approximation gives a much less aggressive bitrate boost in this
+     region.
+    Though some researchers claim that contrast sensitivity actually decreases
+     for very low luminance values, in my experience excessive brightness on
+     LCDs or buggy color conversions (e.g., treating Y' as full-range instead
+     of the CCIR 601 range) make artifacts in such regions extremely visible.
+    We substitute _luma_avg for 128 to allow the strength of the masking to
+     vary with the actual average image luminance, within certain limits (the
+     caller has clamped _luma_avg to the range [90,160], inclusive).
+    @ARTICLE{LKW06,
+      author="Zhen Liu and Lina J. Karam and Andrew B. Watson",
+      title="{JPEG2000} Encoding With Perceptual Distortion Control",
+      journal="{IEEE} Transactions on Image Processing",
+      volume=15,
+      number=7,
+      pages="1763--1778",
+      month=Jul,
+      year=2006
+    }*/
+#if 0
+  la=_luma+4*_luma_avg;
+  lb=4*_luma+_luma_avg;
+#else
+  /*Disable luminance masking.*/
+  la=lb=1;
+#endif
+  activity_sum=0;
+  for(bi=0;bi<4;bi++){
+    unsigned a;
+    unsigned b;
+    activity_sum+=_activity[bi];
+    /*Apply activity masking.*/
+    a=_activity[bi]+4*_activity_avg;
+    b=4*_activity[bi]+_activity_avg;
+    d=OC_RD_SCALE(b,1);
+    /*And luminance masking.*/
+    d=(a+(d>>1))/d;
+    _rd_scale[bi]=(d*la+(lb>>1))/lb;
+    /*And now the inverse.*/
+    d=OC_MAXI(OC_RD_ISCALE(a,1),1);
+    d=(b+(d>>1))/d;
+    _rd_iscale[bi]=(d*lb+(la>>1))/la;
+  }
+  /*Now compute scaling factors for chroma blocks.
+    We start by finding the two smallest iscales from the luma blocks.*/
+  bi_min=_rd_iscale[1]<_rd_iscale[0];
+  bi_min2=1-bi_min;
+  for(bi=2;bi<4;bi++){
+    if(_rd_iscale[bi]<_rd_iscale[bi_min]){
+      bi_min2=bi_min;
+      bi_min=bi;
+    }
+    else if(_rd_iscale[bi]<_rd_iscale[bi_min2])bi_min2=bi;
+  }
+  /*If the minimum iscale is less than 1.0, use the second smallest instead,
+     and force the value to at least 1.0 (inflating chroma is a waste).*/
+  if(_rd_iscale[bi_min]<(1<<OC_RD_ISCALE_BITS))bi_min=bi_min2;
+  d=OC_MINI(_rd_scale[bi_min],1<<OC_RD_SCALE_BITS);
+  _rd_scale[4]=OC_RD_SCALE(d,_chroma_rd_scale[0]);
+  d=OC_MAXI(_rd_iscale[bi_min],1<<OC_RD_ISCALE_BITS);
+  _rd_iscale[4]=OC_RD_ISCALE(d,_chroma_rd_scale[1]);
+  return activity_sum;
+}
+
+static int oc_mb_intra_satd(oc_enc_ctx *_enc,unsigned _mbi,
+ unsigned _frag_satd[12]){
+  const unsigned char   *src;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    ystride;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  unsigned               luma;
+  int                    dc;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  luma=0;
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[bi]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+    luma+=dc;
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    frag_offs=frag_buf_offs[fragi];
+    _frag_satd[mapii]=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+  }
+  return luma;
+}
+
+/*Select luma block-level quantizers for a MB in an INTRA frame.*/
+static unsigned oc_analyze_intra_mb_luma(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,unsigned _mbi,const unsigned _rd_scale[4]){
+  const unsigned char *src;
+  const ptrdiff_t     *frag_buf_offs;
+  const oc_sb_map     *sb_maps;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  ptrdiff_t            fragi;
+  oc_qii_state         qs[4][3];
+  unsigned             cost[4][3];
+  unsigned             ssd[4][3];
+  unsigned             rate[4][3];
+  int                  prev[3][3];
+  unsigned             satd;
+  int                  dc;
+  unsigned             best_cost;
+  unsigned             best_ssd;
+  unsigned             best_rate;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  int                  bi;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[0];
+  fragi=sb_maps[_mbi>>2][_mbi&3][0];
+  frag_offs=frag_buf_offs[fragi];
+  if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+    satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+  }
+  else{
+    satd=oc_enc_frag_intra_sad(_enc,src+frag_offs,ystride);
+  }
+  nqis=_enc->state.nqis;
+  lambda=_enc->lambda;
+  for(qii=0;qii<nqis;qii++){
+    oc_qii_state_advance(qs[0]+qii,_qs,qii);
+    rate[0][qii]=oc_dct_cost2(_enc,ssd[0]+qii,qii,0,0,satd)
+     +(qs[0][qii].bits-_qs->bits<<OC_BIT_SCALE);
+    ssd[0][qii]=OC_RD_SCALE(ssd[0][qii],_rd_scale[0]);
+    cost[0][qii]=OC_MODE_RD_COST(ssd[0][qii],rate[0][qii],lambda);
+  }
+  for(bi=1;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frag_offs=frag_buf_offs[fragi];
+    if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+      satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+    }
+    else{
+      satd=oc_enc_frag_intra_sad(_enc,src+frag_offs,ystride);
+    }
+    for(qii=0;qii<nqis;qii++){
+      oc_qii_state qt[3];
+      unsigned     cur_ssd;
+      unsigned     cur_rate;
+      int          best_qij;
+      int          qij;
+      oc_qii_state_advance(qt+0,qs[bi-1]+0,qii);
+      cur_rate=oc_dct_cost2(_enc,&cur_ssd,qii,0,0,satd);
+      cur_ssd=OC_RD_SCALE(cur_ssd,_rd_scale[bi]);
+      best_ssd=ssd[bi-1][0]+cur_ssd;
+      best_rate=rate[bi-1][0]+cur_rate
+       +(qt[0].bits-qs[bi-1][0].bits<<OC_BIT_SCALE);
+      best_cost=OC_MODE_RD_COST(best_ssd,best_rate,lambda);
+      best_qij=0;
+      for(qij=1;qij<nqis;qij++){
+        unsigned chain_ssd;
+        unsigned chain_rate;
+        unsigned chain_cost;
+        oc_qii_state_advance(qt+qij,qs[bi-1]+qij,qii);
+        chain_ssd=ssd[bi-1][qij]+cur_ssd;
+        chain_rate=rate[bi-1][qij]+cur_rate
+         +(qt[qij].bits-qs[bi-1][qij].bits<<OC_BIT_SCALE);
+        chain_cost=OC_MODE_RD_COST(chain_ssd,chain_rate,lambda);
+        if(chain_cost<best_cost){
+          best_cost=chain_cost;
+          best_ssd=chain_ssd;
+          best_rate=chain_rate;
+          best_qij=qij;
+        }
+      }
+      *(qs[bi]+qii)=*(qt+best_qij);
+      cost[bi][qii]=best_cost;
+      ssd[bi][qii]=best_ssd;
+      rate[bi][qii]=best_rate;
+      prev[bi-1][qii]=best_qij;
+    }
+  }
+  best_qii=0;
+  best_cost=cost[3][0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[3][qii]<best_cost){
+      best_cost=cost[3][qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  for(bi=3;;){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].qii=best_qii;
+    if(bi--<=0)break;
+    best_qii=prev[bi][best_qii];
+  }
+  return best_cost;
+}
+
+/*Select a block-level quantizer for a single chroma block in an INTRA frame.*/
+static unsigned oc_analyze_intra_chroma_block(oc_enc_ctx *_enc,
+ const oc_qii_state *_qs,int _pli,ptrdiff_t _fragi,unsigned _rd_scale){
+  const unsigned char *src;
+  oc_fragment         *frags;
+  ptrdiff_t            frag_offs;
+  oc_qii_state         qt[3];
+  unsigned             cost[3];
+  unsigned             satd;
+  int                  dc;
+  unsigned             best_cost;
+  int                  best_qii;
+  int                  qii;
+  int                  lambda;
+  int                  ystride;
+  int                  nqis;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ystride=_enc->state.ref_ystride[_pli];
+  frag_offs=_enc->state.frag_buf_offs[_fragi];
+  if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+    satd=oc_enc_frag_intra_satd(_enc,&dc,src+frag_offs,ystride);
+  }
+  else{
+    satd=oc_enc_frag_intra_sad(_enc,src+frag_offs,ystride);
+  }
+  /*Most chroma blocks have no AC coefficients to speak of anyway, so it's not
+     worth spending the bits to change the AC quantizer.
+    TODO: This may be worth revisiting when we separate out DC and AC
+     predictions from SATD.*/
+#if 0
+  nqis=_enc->state.nqis;
+#else
+  nqis=1;
+#endif
+  lambda=_enc->lambda;
+  best_qii=0;
+  for(qii=0;qii<nqis;qii++){
+    unsigned cur_rate;
+    unsigned cur_ssd;
+    oc_qii_state_advance(qt+qii,_qs,qii);
+    cur_rate=oc_dct_cost2(_enc,&cur_ssd,qii,_pli,0,satd)
+     +(qt[qii].bits-_qs->bits<<OC_BIT_SCALE);
+    cur_ssd=OC_RD_SCALE(cur_ssd,_rd_scale);
+    cost[qii]=OC_MODE_RD_COST(cur_ssd,cur_rate,lambda);
+  }
+  best_cost=cost[0];
+  for(qii=1;qii<nqis;qii++){
+    if(cost[qii]<best_cost){
+      best_cost=cost[qii];
+      best_qii=qii;
+    }
+  }
+  frags=_enc->state.frags;
+  frags[_fragi].qii=best_qii;
+  return best_cost;
+}
+
+static void oc_enc_mb_transform_quantize_intra_luma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,unsigned _mbi,
+ const unsigned _rd_scale[4],const unsigned _rd_iscale[4]){
+  /*Worst case token stack usage for 4 fragments.*/
+  oc_token_checkpoint  stack[64*4];
+  oc_token_checkpoint *stackptr;
+  const oc_sb_map     *sb_maps;
+  oc_fragment         *frags;
+  ptrdiff_t           *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t            fragi;
+  int                  bi;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  frags=_enc->state.frags;
+  coded_fragis=_pipe->coded_fragis[0];
+  ncoded_fragis=_pipe->ncoded_fragis[0];
+  stackptr=stack;
+  for(bi=0;bi<4;bi++){
+    fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+    frags[fragi].refi=OC_FRAME_SELF;
+    frags[fragi].mb_mode=OC_MODE_INTRA;
+    oc_enc_block_transform_quantize(_enc,_pipe,0,fragi,
+     _rd_scale[bi],_rd_iscale[bi],NULL,NULL,&stackptr);
+    coded_fragis[ncoded_fragis++]=fragi;
+  }
+  _pipe->ncoded_fragis[0]=ncoded_fragis;
+}
+
+static void oc_enc_sb_transform_quantize_intra_chroma(oc_enc_ctx *_enc,
+ oc_enc_pipeline_state *_pipe,int _pli,int _sbi_start,int _sbi_end){
+  const ogg_uint16_t *mcu_rd_scale;
+  const ogg_uint16_t *mcu_rd_iscale;
+  const oc_sb_map    *sb_maps;
+  ptrdiff_t          *coded_fragis;
+  ptrdiff_t           ncoded_fragis;
+  ptrdiff_t           froffset;
+  int                 sbi;
+  mcu_rd_scale=(const ogg_uint16_t *)_enc->mcu_rd_scale;
+  mcu_rd_iscale=(const ogg_uint16_t *)_enc->mcu_rd_iscale;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  froffset=_pipe->froffset[_pli];
+  for(sbi=_sbi_start;sbi<_sbi_end;sbi++){
+    /*Worst case token stack usage for 1 fragment.*/
+    oc_token_checkpoint stack[64];
+    int                 quadi;
+    int                 bi;
+    for(quadi=0;quadi<4;quadi++)for(bi=0;bi<4;bi++){
+      ptrdiff_t fragi;
+      fragi=sb_maps[sbi][quadi][bi];
+      if(fragi>=0){
+        oc_token_checkpoint *stackptr;
+        unsigned             rd_scale;
+        unsigned             rd_iscale;
+        rd_scale=mcu_rd_scale[fragi-froffset];
+        rd_iscale=mcu_rd_iscale[fragi-froffset];
+        oc_analyze_intra_chroma_block(_enc,_pipe->qs+_pli,_pli,fragi,rd_scale);
+        stackptr=stack;
+        oc_enc_block_transform_quantize(_enc,_pipe,_pli,fragi,
+         rd_scale,rd_iscale,NULL,NULL,&stackptr);
+        coded_fragis[ncoded_fragis++]=fragi;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+}
+
+/*Analysis stage for an INTRA frame.*/
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode){
+  ogg_int64_t             activity_sum;
+  ogg_int64_t             luma_sum;
+  unsigned                activity_avg;
+  unsigned                luma_avg;
+  const ogg_uint16_t     *chroma_rd_scale;
+  ogg_uint16_t           *mcu_rd_scale;
+  ogg_uint16_t           *mcu_rd_iscale;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_mb_map        *mb_maps;
+  const oc_sb_map        *sb_maps;
+  oc_fragment            *frags;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  int                     refi;
+  int                     pli;
+  _enc->state.frame_type=OC_INTRA_FRAME;
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&_enc->pipe);
+  oc_enc_mode_rd_init(_enc);
+  activity_sum=luma_sum=0;
+  activity_avg=_enc->activity_avg;
+  luma_avg=OC_CLAMPI(90<<8,_enc->luma_avg,160<<8);
+  chroma_rd_scale=_enc->chroma_rd_scale[OC_INTRA_FRAME][_enc->state.qis[0]];
+  mcu_rd_scale=_enc->mcu_rd_scale;
+  mcu_rd_iscale=_enc->mcu_rd_iscale;
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  frags=_enc->state.frags;
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    ptrdiff_t cfroffset;
+    unsigned  sbi;
+    unsigned  sbi_end;
+    notdone=oc_enc_pipeline_set_stripe(_enc,&_enc->pipe,stripe_sby);
+    sbi_end=_enc->pipe.sbi_end[0];
+    cfroffset=_enc->pipe.froffset[1];
+    for(sbi=_enc->pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        unsigned  activity[4];
+        unsigned  rd_scale[5];
+        unsigned  rd_iscale[5];
+        unsigned  luma;
+        unsigned  mbi;
+        int       mapii;
+        int       mapi;
+        int       bi;
+        ptrdiff_t fragi;
+        mbi=sbi<<2|quadi;
+        /*Activity masking.*/
+        if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+          luma=oc_mb_activity(_enc,mbi,activity);
+        }
+        else{
+          unsigned intra_satd[12];
+          luma=oc_mb_intra_satd(_enc,mbi,intra_satd);
+          oc_mb_activity_fast(_enc,mbi,activity,intra_satd);
+          for(bi=0;bi<4;bi++)frags[sb_maps[mbi>>2][mbi&3][bi]].qii=0;
+        }
+        activity_sum+=oc_mb_masking(rd_scale,rd_iscale,
+         chroma_rd_scale,activity,activity_avg,luma,luma_avg);
+        luma_sum+=luma;
+        /*Motion estimation:
+          We do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not, unless we aren't using motion estimation at all.*/
+        if(!_recode&&_enc->state.curframe_num>0&&
+         _enc->sp_level<OC_SP_LEVEL_NOMC&&_enc->keyframe_frequency_force>1){
+          oc_mcenc_search(_enc,mbi);
+        }
+        if(_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+          oc_analyze_intra_mb_luma(_enc,_enc->pipe.qs+0,mbi,rd_scale);
+        }
+        mb_modes[mbi]=OC_MODE_INTRA;
+        oc_enc_mb_transform_quantize_intra_luma(_enc,&_enc->pipe,
+         mbi,rd_scale,rd_iscale);
+        /*Propagate final MB mode and MVs to the chroma blocks.*/
+        for(mapii=4;mapii<nmap_idxs;mapii++){
+          mapi=map_idxs[mapii];
+          pli=mapi>>2;
+          bi=mapi&3;
+          fragi=mb_maps[mbi][pli][bi];
+          frags[fragi].refi=OC_FRAME_SELF;
+          frags[fragi].mb_mode=OC_MODE_INTRA;
+        }
+        /*Save masking scale factors for chroma blocks.*/
+        for(mapii=4;mapii<(nmap_idxs-4>>1)+4;mapii++){
+          mapi=map_idxs[mapii];
+          bi=mapi&3;
+          fragi=mb_maps[mbi][1][bi];
+          mcu_rd_scale[fragi-cfroffset]=(ogg_uint16_t)rd_scale[4];
+          mcu_rd_iscale[fragi-cfroffset]=(ogg_uint16_t)rd_iscale[4];
+        }
+      }
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&_enc->pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_intra_chroma(_enc,&_enc->pipe,
+       pli,_enc->pipe.sbi0[pli],_enc->pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&_enc->pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Compute the average block activity and MB luma score for the frame.*/
+  _enc->activity_avg=OC_MAXI(OC_ACTIVITY_AVG_MIN,
+   (unsigned)((activity_sum+(_enc->state.fplanes[0].nfrags>>1))/
+   _enc->state.fplanes[0].nfrags));
+  _enc->luma_avg=(unsigned)((luma_sum+(_enc->state.nmbs>>1))/_enc->state.nmbs);
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  _enc->state.ntotal_coded_fragis=_enc->state.nfrags;
+}
+
+
+
+/*Cost information about a MB mode.*/
+struct oc_mode_choice{
+  unsigned      cost;
+  unsigned      ssd;
+  unsigned      rate;
+  unsigned      overhead;
+  unsigned char qii[12];
+};
+
+
+
+static void oc_mode_set_cost(oc_mode_choice *_modec,int _lambda){
+  _modec->cost=OC_MODE_RD_COST(_modec->ssd,
+   _modec->rate+_modec->overhead,_lambda);
+}
+
+/*A set of skip SSD's to use to disable early skipping.*/
+static const unsigned OC_NOSKIP[12]={
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX,
+  UINT_MAX,UINT_MAX,UINT_MAX,UINT_MAX
+};
+
+/*The estimated number of bits used by a coded chroma block to specify the AC
+   quantizer.
+  TODO: Currently this is just 0.5*log2(3) (estimating about 50% compression);
+   measurements suggest this is in the right ballpark, but it varies somewhat
+   with lambda.*/
+#define OC_CHROMA_QII_RATE ((0xCAE00D1DU>>31-OC_BIT_SCALE)+1>>1)
+
+static void oc_analyze_mb_mode_luma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],
+ const unsigned _rd_scale[4],int _qti){
+  oc_fr_state  fr;
+  oc_qii_state qs;
+  unsigned     ssd;
+  unsigned     rate;
+  unsigned     satd;
+  unsigned     best_ssd;
+  unsigned     best_rate;
+  int          best_fri;
+  int          best_qii;
+  int          lambda;
+  int          nqis;
+  int          nskipped;
+  int          bi;
+  lambda=_enc->lambda;
+  nqis=_enc->state.nqis;
+  /*We could do a trellis optimization here, but we don't make final skip
+     decisions until after transform+quantization, so the result wouldn't be
+     optimal anyway.
+    Instead we just use a greedy approach; for most SATD values, the
+     differences between the qiis are large enough to drown out the cost to
+     code the flags, anyway.*/
+  *&fr=*_fr;
+  *&qs=*_qs;
+  ssd=rate=nskipped=0;
+  for(bi=0;bi<4;bi++){
+    oc_fr_state  ft[2];
+    oc_qii_state qt[3];
+    unsigned     best_cost;
+    unsigned     cur_cost;
+    unsigned     cur_ssd;
+    unsigned     cur_rate;
+    unsigned     cur_overhead;
+    int          qii;
+    satd=_frag_satd[bi];
+    *(ft+0)=*&fr;
+    oc_fr_code_block(ft+0);
+    cur_overhead=ft[0].bits-fr.bits;
+    best_rate=oc_dct_cost2(_enc,&best_ssd,0,0,_qti,satd)
+     +(cur_overhead<<OC_BIT_SCALE);
+    if(nqis>1){
+      oc_qii_state_advance(qt+0,&qs,0);
+      best_rate+=qt[0].bits-qs.bits<<OC_BIT_SCALE;
+    }
+    best_ssd=OC_RD_SCALE(best_ssd,_rd_scale[bi]);
+    best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
+    best_fri=0;
+    best_qii=0;
+    for(qii=1;qii<nqis;qii++){
+      oc_qii_state_advance(qt+qii,&qs,qii);
+      cur_rate=oc_dct_cost2(_enc,&cur_ssd,qii,0,_qti,satd)
+       +(cur_overhead+qt[qii].bits-qs.bits<<OC_BIT_SCALE);
+      cur_ssd=OC_RD_SCALE(cur_ssd,_rd_scale[bi]);
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
+      if(cur_cost<best_cost){
+        best_cost=cur_cost;
+        best_ssd=cur_ssd;
+        best_rate=cur_rate;
+        best_qii=qii;
+      }
+    }
+    if(_skip_ssd[bi]<(UINT_MAX>>OC_BIT_SCALE+2)&&nskipped<3){
+      *(ft+1)=*&fr;
+      oc_fr_skip_block(ft+1);
+      cur_overhead=ft[1].bits-fr.bits<<OC_BIT_SCALE;
+      cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+      cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_overhead,lambda);
+      if(cur_cost<=best_cost){
+        best_ssd=cur_ssd;
+        best_rate=cur_overhead;
+        best_fri=1;
+        best_qii+=4;
+      }
+    }
+    rate+=best_rate;
+    ssd+=best_ssd;
+    *&fr=*(ft+best_fri);
+    if(best_fri==0)*&qs=*(qt+best_qii);
+    else nskipped++;
+    _modec->qii[bi]=best_qii;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+}
+
+static void oc_analyze_mb_mode_chroma(oc_enc_ctx *_enc,
+ oc_mode_choice *_modec,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],
+ unsigned _rd_scale,int _qti){
+  unsigned ssd;
+  unsigned rate;
+  unsigned satd;
+  unsigned best_ssd;
+  unsigned best_rate;
+  int      best_qii;
+  unsigned cur_cost;
+  unsigned cur_ssd;
+  unsigned cur_rate;
+  int      lambda;
+  int      nblocks;
+  int      nqis;
+  int      pli;
+  int      bi;
+  int      qii;
+  lambda=_enc->lambda;
+  /*Most chroma blocks have no AC coefficients to speak of anyway, so it's not
+     worth spending the bits to change the AC quantizer.
+    TODO: This may be worth revisiting when we separate out DC and AC
+     predictions from SATD.*/
+#if 0
+  nqis=_enc->state.nqis;
+#else
+  nqis=1;
+#endif
+  ssd=_modec->ssd;
+  rate=_modec->rate;
+  /*Because (except in 4:4:4 mode) we aren't considering chroma blocks in coded
+     order, we assume a constant overhead for coded block and qii flags.*/
+  nblocks=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  nblocks=(nblocks-4>>1)+4;
+  bi=4;
+  for(pli=1;pli<3;pli++){
+    for(;bi<nblocks;bi++){
+      unsigned best_cost;
+      satd=_frag_satd[bi];
+      best_rate=oc_dct_cost2(_enc,&best_ssd,0,pli,_qti,satd)
+       +OC_CHROMA_QII_RATE;
+      best_ssd=OC_RD_SCALE(best_ssd,_rd_scale);
+      best_cost=OC_MODE_RD_COST(ssd+best_ssd,rate+best_rate,lambda);
+      best_qii=0;
+      for(qii=1;qii<nqis;qii++){
+        cur_rate=oc_dct_cost2(_enc,&cur_ssd,qii,pli,_qti,satd)
+         +OC_CHROMA_QII_RATE;
+        cur_ssd=OC_RD_SCALE(cur_ssd,_rd_scale);
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate+cur_rate,lambda);
+        if(cur_cost<best_cost){
+          best_cost=cur_cost;
+          best_ssd=cur_ssd;
+          best_rate=cur_rate;
+          best_qii=qii;
+        }
+      }
+      if(_skip_ssd[bi]<(UINT_MAX>>OC_BIT_SCALE+2)){
+        cur_ssd=_skip_ssd[bi]<<OC_BIT_SCALE;
+        cur_cost=OC_MODE_RD_COST(ssd+cur_ssd,rate,lambda);
+        if(cur_cost<=best_cost){
+          best_ssd=cur_ssd;
+          best_rate=0;
+          best_qii+=4;
+        }
+      }
+      rate+=best_rate;
+      ssd+=best_ssd;
+      _modec->qii[bi]=best_qii;
+    }
+    nblocks=(nblocks-4<<1)+4;
+  }
+  _modec->ssd=ssd;
+  _modec->rate=rate;
+}
+
+static void oc_skip_cost(oc_enc_ctx *_enc,oc_enc_pipeline_state *_pipe,
+ unsigned _mbi,const unsigned _rd_scale[4],unsigned _ssd[12]){
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const oc_fragment     *frags;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  oc_mv                 *mvs;
+  int                    map_nidxs;
+  unsigned               uncoded_ssd;
+  int                    mapii;
+  int                    mapi;
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  int                    borderi;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
+  ystride=_enc->state.ref_ystride[0];
+  frags=_enc->state.frags;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  mvs=_enc->mb_info[_mbi].block_mv;
+  for(bi=0;bi<4;bi++){
+    fragi=sb_map[bi];
+    borderi=frags[fragi].borderi;
+    frag_offs=frag_buf_offs[fragi];
+    if(borderi<0){
+      uncoded_ssd=oc_enc_frag_ssd(_enc,src+frag_offs,ref+frag_offs,ystride);
+    }
+    else{
+      uncoded_ssd=oc_enc_frag_border_ssd(_enc,
+       src+frag_offs,ref+frag_offs,ystride,_enc->state.borders[borderi].mask);
+    }
+    /*Scale to match DCT domain and RD.*/
+    uncoded_ssd=OC_RD_SKIP_SCALE(uncoded_ssd,_rd_scale[bi]);
+    /*Motion is a special case; if there is more than a full-pixel motion
+       against the prior frame, penalize skipping.
+      TODO: The factor of two here is a kludge, but it tested out better than a
+       hard limit.*/
+    if(mvs[bi]!=0)uncoded_ssd*=2;
+    _pipe->skip_ssd[0][fragi-_pipe->froffset[0]]=_ssd[bi]=uncoded_ssd;
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=(map_nidxs-4>>1)+4;
+  mapii=4;
+  mvs=_enc->mb_info[_mbi].unref_mv;
+  for(pli=1;pli<3;pli++){
+    ystride=_enc->state.ref_ystride[pli];
+    for(;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      borderi=frags[fragi].borderi;
+      frag_offs=frag_buf_offs[fragi];
+      if(borderi<0){
+        uncoded_ssd=oc_enc_frag_ssd(_enc,src+frag_offs,ref+frag_offs,ystride);
+      }
+      else{
+        uncoded_ssd=oc_enc_frag_border_ssd(_enc,
+         src+frag_offs,ref+frag_offs,ystride,_enc->state.borders[borderi].mask);
+      }
+      /*Scale to match DCT domain and RD.*/
+      uncoded_ssd=OC_RD_SKIP_SCALE(uncoded_ssd,_rd_scale[4]);
+      /*Motion is a special case; if there is more than a full-pixel motion
+         against the prior frame, penalize skipping.
+        TODO: The factor of two here is a kludge, but it tested out better than
+         a hard limit*/
+      if(mvs[OC_FRAME_PREV]!=0)uncoded_ssd*=2;
+      _pipe->skip_ssd[pli][fragi-_pipe->froffset[pli]]=_ssd[mapii]=uncoded_ssd;
+    }
+    map_nidxs=(map_nidxs-4<<1)+4;
+  }
+}
+
+
+static void oc_cost_intra(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _frag_satd[12],const unsigned _skip_ssd[12],
+ const unsigned _rd_scale[5]){
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,_frag_satd,_skip_ssd,_rd_scale,0);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,
+   _frag_satd,_skip_ssd,_rd_scale[4],0);
+  _modec->overhead=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTRA)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,oc_mv _mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12],const unsigned _rd_scale[5]){
+  unsigned               frag_satd[12];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  const ptrdiff_t       *sb_map;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  int                    dc;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[OC_FRAME_FOR_MODE(_mb_mode)];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  sb_map=_enc->state.sb_maps[_mbi>>2][_mbi&3];
+  _modec->rate=_modec->ssd=0;
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,_mv)>1){
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+        frag_satd[bi]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+        frag_satd[bi]+=abs(dc);
+      }
+      else{
+        frag_satd[bi]=oc_enc_frag_sad2_thresh(_enc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      }
+    }
+  }
+  else{
+    for(bi=0;bi<4;bi++){
+      fragi=sb_map[bi];
+      frag_offs=frag_buf_offs[fragi];
+      if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+        frag_satd[bi]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ystride);
+        frag_satd[bi]+=abs(dc);
+      }
+      else{
+        frag_satd[bi]=oc_enc_frag_sad(_enc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ystride);
+      }
+    }
+  }
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  if(oc_state_get_mv_offsets(&_enc->state,mv_offs,1,_mv)>1){
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+        frag_satd[mapii]=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+        frag_satd[mapii]+=abs(dc);
+      }
+      else{
+        frag_satd[mapii]=oc_enc_frag_sad2_thresh(_enc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride,UINT_MAX);
+      }
+    }
+  }
+  else{
+    for(mapii=4;mapii<map_nidxs;mapii++){
+      mapi=map_idxs[mapii];
+      pli=mapi>>2;
+      bi=mapi&3;
+      fragi=mb_map[pli][bi];
+      frag_offs=frag_buf_offs[fragi];
+      if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+        frag_satd[mapii]=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ystride);
+        frag_satd[mapii]+=abs(dc);
+      }
+      else{
+        frag_satd[mapii]=oc_enc_frag_sad(_enc,src+frag_offs,
+         ref+frag_offs+mv_offs[0],ystride);
+      }
+    }
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,_skip_ssd,_rd_scale,1);
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,
+   frag_satd,_skip_ssd,_rd_scale[4],1);
+  _modec->overhead=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,_mb_mode)<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+static void oc_cost_inter_nomv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12],const unsigned _rd_scale[4]){
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,0,_fr,_qs,_skip_ssd,_rd_scale);
+}
+
+static int oc_cost_inter1mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,int _mb_mode,oc_mv _mv,
+ const oc_fr_state *_fr,const oc_qii_state *_qs,const unsigned _skip_ssd[12],
+ const unsigned _rd_scale[4]){
+  int bits0;
+  oc_cost_inter(_enc,_modec,_mbi,_mb_mode,_mv,_fr,_qs,_skip_ssd,_rd_scale);
+  bits0=OC_MV_BITS[0][OC_MV_X(_mv)+31]+OC_MV_BITS[0][OC_MV_Y(_mv)+31];
+  _modec->overhead+=OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+12)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+  return bits0;
+}
+
+/*A mapping from oc_mb_map (raster) ordering to oc_sb_map (Hilbert) ordering.*/
+static const unsigned char OC_MB_PHASE[4][4]={
+  {0,1,3,2},{0,3,1,2},{0,3,1,2},{2,3,1,0}
+};
+
+static void oc_cost_inter4mv(oc_enc_ctx *_enc,oc_mode_choice *_modec,
+ unsigned _mbi,oc_mv _mv[4],const oc_fr_state *_fr,const oc_qii_state *_qs,
+ const unsigned _skip_ssd[12],const unsigned _rd_scale[5]){
+  unsigned               frag_satd[12];
+  oc_mv                  lbmvs[4];
+  oc_mv                  cbmvs[4];
+  const unsigned char   *src;
+  const unsigned char   *ref;
+  int                    ystride;
+  const ptrdiff_t       *frag_buf_offs;
+  oc_mv                 *frag_mvs;
+  const oc_mb_map_plane *mb_map;
+  const unsigned char   *map_idxs;
+  int                    map_nidxs;
+  int                    nqis;
+  int                    mapii;
+  int                    mapi;
+  int                    mv_offs[2];
+  int                    pli;
+  int                    bi;
+  ptrdiff_t              fragi;
+  ptrdiff_t              frag_offs;
+  int                    bits0;
+  int                    bits1;
+  unsigned               satd;
+  int                    dc;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  frag_mvs=_enc->state.frag_mvs;
+  mb_map=(const oc_mb_map_plane *)_enc->state.mb_maps[_mbi];
+  _modec->rate=_modec->ssd=0;
+  for(bi=0;bi<4;bi++){
+    fragi=mb_map[0][bi];
+    /*Save the block MVs as the current ones while we're here; we'll replace
+       them if we don't ultimately choose 4MV mode.*/
+    frag_mvs[fragi]=_mv[bi];
+    frag_offs=frag_buf_offs[fragi];
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,0,_mv[bi])>1){
+      satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+    }
+    else{
+      satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
+    }
+    frag_satd[OC_MB_PHASE[_mbi&3][bi]]=satd+abs(dc);
+  }
+  oc_analyze_mb_mode_luma(_enc,_modec,_fr,_qs,frag_satd,
+   _enc->vp3_compatible?OC_NOSKIP:_skip_ssd,_rd_scale,1);
+  /*Figure out which blocks are being skipped and give them (0,0) MVs.*/
+  bits0=0;
+  bits1=0;
+  nqis=_enc->state.nqis;
+  for(bi=0;bi<4;bi++){
+    if(_modec->qii[OC_MB_PHASE[_mbi&3][bi]]>=nqis)lbmvs[bi]=0;
+    else{
+      lbmvs[bi]=_mv[bi];
+      bits0+=OC_MV_BITS[0][OC_MV_X(_mv[bi])+31]
+       +OC_MV_BITS[0][OC_MV_Y(_mv[bi])+31];
+      bits1+=12;
+    }
+  }
+  (*OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt])(cbmvs,lbmvs);
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  /*Note: This assumes ref_ystride[1]==ref_ystride[2].*/
+  ystride=_enc->state.ref_ystride[1];
+  for(mapii=4;mapii<map_nidxs;mapii++){
+    mapi=map_idxs[mapii];
+    pli=mapi>>2;
+    bi=mapi&3;
+    fragi=mb_map[pli][bi];
+    frag_offs=frag_buf_offs[fragi];
+    /*TODO: We could save half these calls by re-using the results for the Cb
+       and Cr planes; is it worth it?*/
+    if(oc_state_get_mv_offsets(&_enc->state,mv_offs,pli,cbmvs[bi])>1){
+      satd=oc_enc_frag_satd2(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ref+frag_offs+mv_offs[1],ystride);
+    }
+    else{
+      satd=oc_enc_frag_satd(_enc,&dc,src+frag_offs,
+       ref+frag_offs+mv_offs[0],ystride);
+    }
+    frag_satd[mapii]=satd+abs(dc);
+  }
+  oc_analyze_mb_mode_chroma(_enc,_modec,_fr,_qs,
+   frag_satd,_skip_ssd,_rd_scale[4],1);
+  _modec->overhead=
+   oc_mode_scheme_chooser_cost(&_enc->chooser,OC_MODE_INTER_MV_FOUR)
+   +OC_MINI(_enc->mv_bits[0]+bits0,_enc->mv_bits[1]+bits1)
+   -OC_MINI(_enc->mv_bits[0],_enc->mv_bits[1])<<OC_BIT_SCALE;
+  oc_mode_set_cost(_modec,_enc->lambda);
+}
+
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode){
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  oc_qii_state            intra_luma_qs;
+  oc_mv                   last_mv;
+  oc_mv                   prior_mv;
+  ogg_int64_t             interbits;
+  ogg_int64_t             intrabits;
+  ogg_int64_t             activity_sum;
+  ogg_int64_t             luma_sum;
+  unsigned                activity_avg;
+  unsigned                luma_avg;
+  const ogg_uint16_t     *chroma_rd_scale;
+  ogg_uint16_t           *mcu_rd_scale;
+  ogg_uint16_t           *mcu_rd_iscale;
+  const unsigned char    *map_idxs;
+  int                     nmap_idxs;
+  unsigned               *coded_mbis;
+  unsigned               *uncoded_mbis;
+  size_t                  ncoded_mbis;
+  size_t                  nuncoded_mbis;
+  oc_sb_flags            *sb_flags;
+  signed char            *mb_modes;
+  const oc_sb_map        *sb_maps;
+  const oc_mb_map        *mb_maps;
+  oc_mb_enc_info         *embs;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  unsigned                stripe_sby;
+  unsigned                mcu_nvsbs;
+  int                     notstart;
+  int                     notdone;
+  unsigned                sbi;
+  unsigned                sbi_end;
+  int                     refi;
+  int                     pli;
+  int                     sp_level;
+  sp_level=_enc->sp_level;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_enc->state.info.pixel_fmt];
+  _enc->state.frame_type=OC_INTER_FRAME;
+  oc_mode_scheme_chooser_reset(&_enc->chooser);
+  oc_enc_tokenize_start(_enc);
+  oc_enc_pipeline_init(_enc,&_enc->pipe);
+  oc_enc_mode_rd_init(_enc);
+  if(_allow_keyframe)oc_qii_state_init(&intra_luma_qs);
+  _enc->mv_bits[0]=_enc->mv_bits[1]=0;
+  interbits=intrabits=0;
+  activity_sum=luma_sum=0;
+  activity_avg=_enc->activity_avg;
+  luma_avg=OC_CLAMPI(90<<8,_enc->luma_avg,160<<8);
+  chroma_rd_scale=_enc->chroma_rd_scale[OC_INTER_FRAME][_enc->state.qis[0]];
+  mcu_rd_scale=_enc->mcu_rd_scale;
+  mcu_rd_iscale=_enc->mcu_rd_iscale;
+  last_mv=prior_mv=0;
+  /*Choose MVs and MB modes and quantize and code luma.
+    Must be done in Hilbert order.*/
+  map_idxs=OC_MB_MAP_IDXS[_enc->state.info.pixel_fmt];
+  nmap_idxs=OC_MB_MAP_NIDXS[_enc->state.info.pixel_fmt];
+  coded_mbis=_enc->coded_mbis;
+  uncoded_mbis=coded_mbis+_enc->state.nmbs;
+  ncoded_mbis=0;
+  nuncoded_mbis=0;
+  _enc->state.ncoded_fragis[0]=0;
+  _enc->state.ncoded_fragis[1]=0;
+  _enc->state.ncoded_fragis[2]=0;
+  sb_flags=_enc->state.sb_flags;
+  mb_modes=_enc->state.mb_modes;
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  embs=_enc->mb_info;
+  frags=_enc->state.frags;
+  frag_mvs=_enc->state.frag_mvs;
+  notstart=0;
+  notdone=1;
+  mcu_nvsbs=_enc->mcu_nvsbs;
+  for(stripe_sby=0;notdone;stripe_sby+=mcu_nvsbs){
+    ptrdiff_t cfroffset;
+    notdone=oc_enc_pipeline_set_stripe(_enc,&_enc->pipe,stripe_sby);
+    sbi_end=_enc->pipe.sbi_end[0];
+    cfroffset=_enc->pipe.froffset[1];
+    for(sbi=_enc->pipe.sbi0[0];sbi<sbi_end;sbi++){
+      int quadi;
+      /*Mode addressing is through Y plane, always 4 MB per SB.*/
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        oc_mode_choice modes[8];
+        unsigned       activity[4];
+        unsigned       rd_scale[5];
+        unsigned       rd_iscale[5];
+        unsigned       skip_ssd[12];
+        unsigned       intra_satd[12];
+        unsigned       luma;
+        int            mb_mv_bits_0;
+        int            mb_gmv_bits_0;
+        int            inter_mv_pref;
+        int            mb_mode;
+        int            refi;
+        int            mv;
+        unsigned       mbi;
+        int            mapii;
+        int            mapi;
+        int            bi;
+        ptrdiff_t      fragi;
+        mbi=sbi<<2|quadi;
+        luma=oc_mb_intra_satd(_enc,mbi,intra_satd);
+        /*Activity masking.*/
+        if(sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+          oc_mb_activity(_enc,mbi,activity);
+        }
+        else oc_mb_activity_fast(_enc,mbi,activity,intra_satd);
+        luma_sum+=luma;
+        activity_sum+=oc_mb_masking(rd_scale,rd_iscale,
+         chroma_rd_scale,activity,activity_avg,luma,luma_avg);
+        /*Motion estimation:
+          We always do a basic 1MV search for all macroblocks, coded or not,
+           keyframe or not.*/
+        if(!_recode&&sp_level<OC_SP_LEVEL_NOMC)oc_mcenc_search(_enc,mbi);
+        mv=0;
+        /*Find the block choice with the lowest estimated coding cost.
+          If a Cb or Cr block is coded but no Y' block from a macro block then
+           the mode MUST be OC_MODE_INTER_NOMV.
+          This is the default state to which the mode data structure is
+           initialised in encoder and decoder at the start of each frame.*/
+        /*Block coding cost is estimated from correlated SATD metrics.*/
+        /*At this point, all blocks that are in frame are still marked coded.*/
+        if(!_recode){
+          embs[mbi].unref_mv[OC_FRAME_GOLD]=
+           embs[mbi].analysis_mv[0][OC_FRAME_GOLD];
+          embs[mbi].unref_mv[OC_FRAME_PREV]=
+           embs[mbi].analysis_mv[0][OC_FRAME_PREV];
+          embs[mbi].refined=0;
+        }
+        /*Estimate the cost of coding this MB in a keyframe.*/
+        if(_allow_keyframe){
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           _enc->pipe.fr+0,&intra_luma_qs,intra_satd,OC_NOSKIP,rd_scale);
+          intrabits+=modes[OC_MODE_INTRA].rate;
+          for(bi=0;bi<4;bi++){
+            oc_qii_state_advance(&intra_luma_qs,&intra_luma_qs,
+             modes[OC_MODE_INTRA].qii[bi]);
+          }
+        }
+        /*Estimate the cost in a delta frame for various modes.*/
+        oc_skip_cost(_enc,&_enc->pipe,mbi,rd_scale,skip_ssd);
+        if(sp_level<OC_SP_LEVEL_NOMC){
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+           OC_MODE_INTER_NOMV,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           _enc->pipe.fr+0,_enc->pipe.qs+0,intra_satd,skip_ssd,rd_scale);
+          mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+           OC_MODE_INTER_MV,embs[mbi].unref_mv[OC_FRAME_PREV],
+           _enc->pipe.fr+0,_enc->pipe.qs+0,skip_ssd,rd_scale);
+          oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST,mbi,
+           OC_MODE_INTER_MV_LAST,last_mv,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          oc_cost_inter(_enc,modes+OC_MODE_INTER_MV_LAST2,mbi,
+           OC_MODE_INTER_MV_LAST2,prior_mv,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
+           OC_MODE_GOLDEN_NOMV,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+           OC_MODE_GOLDEN_MV,embs[mbi].unref_mv[OC_FRAME_GOLD],
+           _enc->pipe.fr+0,_enc->pipe.qs+0,skip_ssd,rd_scale);
+          /*The explicit MV modes (2,6,7) have not yet gone through halfpel
+             refinement.
+            We choose the explicit MV mode that's already furthest ahead on
+             R-D cost and refine only that one.
+            We have to be careful to remember which ones we've refined so that
+             we don't refine it again if we re-encode this frame.*/
+          inter_mv_pref=_enc->lambda*3;
+          if(sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+            oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+             embs[mbi].block_mv,_enc->pipe.fr+0,_enc->pipe.qs+0,
+             skip_ssd,rd_scale);
+          }
+          else{
+            modes[OC_MODE_INTER_MV_FOUR].cost=UINT_MAX;
+          }
+          if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_INTER_MV].cost&&
+           modes[OC_MODE_INTER_MV_FOUR].cost<modes[OC_MODE_GOLDEN_MV].cost){
+            if(!(embs[mbi].refined&0x80)){
+              oc_mcenc_refine4mv(_enc,mbi);
+              embs[mbi].refined|=0x80;
+            }
+            oc_cost_inter4mv(_enc,modes+OC_MODE_INTER_MV_FOUR,mbi,
+             embs[mbi].ref_mv,_enc->pipe.fr+0,_enc->pipe.qs+0,
+             skip_ssd,rd_scale);
+          }
+          else if(modes[OC_MODE_GOLDEN_MV].cost+inter_mv_pref<
+           modes[OC_MODE_INTER_MV].cost){
+            if(!(embs[mbi].refined&0x40)){
+              oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_GOLD);
+              embs[mbi].refined|=0x40;
+            }
+            mb_gmv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_GOLDEN_MV,mbi,
+             OC_MODE_GOLDEN_MV,embs[mbi].analysis_mv[0][OC_FRAME_GOLD],
+             _enc->pipe.fr+0,_enc->pipe.qs+0,skip_ssd,rd_scale);
+          }
+          if(!(embs[mbi].refined&0x04)){
+            oc_mcenc_refine1mv(_enc,mbi,OC_FRAME_PREV);
+            embs[mbi].refined|=0x04;
+          }
+          mb_mv_bits_0=oc_cost_inter1mv(_enc,modes+OC_MODE_INTER_MV,mbi,
+           OC_MODE_INTER_MV,embs[mbi].analysis_mv[0][OC_FRAME_PREV],
+           _enc->pipe.fr+0,_enc->pipe.qs+0,skip_ssd,rd_scale);
+          /*Finally, pick the mode with the cheapest estimated R-D cost.*/
+          mb_mode=OC_MODE_INTER_NOMV;
+          if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
+            mb_mode=OC_MODE_INTRA;
+          }
+          if(modes[OC_MODE_INTER_MV_LAST].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_LAST;
+          }
+          if(modes[OC_MODE_INTER_MV_LAST2].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_LAST2;
+          }
+          if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_NOMV;
+          }
+          if(modes[OC_MODE_GOLDEN_MV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_MV;
+          }
+          if(modes[OC_MODE_INTER_MV_FOUR].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_INTER_MV_FOUR;
+          }
+          /*We prefer OC_MODE_INTER_MV, but not over LAST and LAST2.*/
+          if(mb_mode==OC_MODE_INTER_MV_LAST||mb_mode==OC_MODE_INTER_MV_LAST2){
+            inter_mv_pref=0;
+          }
+          if(modes[OC_MODE_INTER_MV].cost<modes[mb_mode].cost+inter_mv_pref){
+            mb_mode=OC_MODE_INTER_MV;
+          }
+        }
+        else{
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_INTER_NOMV,mbi,
+           OC_MODE_INTER_NOMV,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          oc_cost_intra(_enc,modes+OC_MODE_INTRA,mbi,
+           _enc->pipe.fr+0,_enc->pipe.qs+0,intra_satd,skip_ssd,rd_scale);
+          oc_cost_inter_nomv(_enc,modes+OC_MODE_GOLDEN_NOMV,mbi,
+           OC_MODE_GOLDEN_NOMV,_enc->pipe.fr+0,_enc->pipe.qs+0,
+           skip_ssd,rd_scale);
+          mb_mode=OC_MODE_INTER_NOMV;
+          if(modes[OC_MODE_INTRA].cost<modes[OC_MODE_INTER_NOMV].cost){
+            mb_mode=OC_MODE_INTRA;
+          }
+          if(modes[OC_MODE_GOLDEN_NOMV].cost<modes[mb_mode].cost){
+            mb_mode=OC_MODE_GOLDEN_NOMV;
+          }
+          mb_mv_bits_0=mb_gmv_bits_0=0;
+        }
+        mb_modes[mbi]=mb_mode;
+        /*Propagate the MVs to the luma blocks.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              mv=embs[mbi].analysis_mv[0][OC_FRAME_PREV];
+            }break;
+            case OC_MODE_INTER_MV_LAST:mv=last_mv;break;
+            case OC_MODE_INTER_MV_LAST2:mv=prior_mv;break;
+            case OC_MODE_GOLDEN_MV:{
+              mv=embs[mbi].analysis_mv[0][OC_FRAME_GOLD];
+            }break;
+          }
+          for(bi=0;bi<4;bi++){
+            fragi=mb_maps[mbi][0][bi];
+            frag_mvs[fragi]=mv;
+          }
+        }
+        for(bi=0;bi<4;bi++){
+          fragi=sb_maps[mbi>>2][mbi&3][bi];
+          frags[fragi].qii=modes[mb_mode].qii[bi];
+        }
+        if(oc_enc_mb_transform_quantize_inter_luma(_enc,&_enc->pipe,mbi,
+         modes[mb_mode].overhead>>OC_BIT_SCALE,rd_scale,rd_iscale)>0){
+          int orig_mb_mode;
+          orig_mb_mode=mb_mode;
+          mb_mode=mb_modes[mbi];
+          refi=OC_FRAME_FOR_MODE(mb_mode);
+          switch(mb_mode){
+            case OC_MODE_INTER_MV:{
+              prior_mv=last_mv;
+              /*If we're backing out from 4MV, find the MV we're actually
+                 using.*/
+              if(orig_mb_mode==OC_MODE_INTER_MV_FOUR){
+                for(bi=0;;bi++){
+                  fragi=mb_maps[mbi][0][bi];
+                  if(frags[fragi].coded){
+                    mv=last_mv=frag_mvs[fragi];
+                    break;
+                  }
+                }
+                mb_mv_bits_0=OC_MV_BITS[0][OC_MV_X(mv)+31]
+                 +OC_MV_BITS[0][OC_MV_Y(mv)+31];
+              }
+              /*Otherwise we used the original analysis MV.*/
+              else last_mv=embs[mbi].analysis_mv[0][OC_FRAME_PREV];
+              _enc->mv_bits[0]+=mb_mv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              oc_mv tmp_mv;
+              tmp_mv=prior_mv;
+              prior_mv=last_mv;
+              last_mv=tmp_mv;
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              _enc->mv_bits[0]+=mb_gmv_bits_0;
+              _enc->mv_bits[1]+=12;
+            }break;
+            case OC_MODE_INTER_MV_FOUR:{
+              oc_mv lbmvs[4];
+              oc_mv cbmvs[4];
+              prior_mv=last_mv;
+              for(bi=0;bi<4;bi++){
+                fragi=mb_maps[mbi][0][bi];
+                if(frags[fragi].coded){
+                  lbmvs[bi]=last_mv=frag_mvs[fragi];
+                  _enc->mv_bits[0]+=OC_MV_BITS[0][OC_MV_X(last_mv)+31]
+                   +OC_MV_BITS[0][OC_MV_Y(last_mv)+31];
+                  _enc->mv_bits[1]+=12;
+                }
+                /*Replace the block MVs for not-coded blocks with (0,0).*/
+                else lbmvs[bi]=0;
+              }
+              (*set_chroma_mvs)(cbmvs,lbmvs);
+              for(mapii=4;mapii<nmap_idxs;mapii++){
+                mapi=map_idxs[mapii];
+                pli=mapi>>2;
+                bi=mapi&3;
+                fragi=mb_maps[mbi][pli][bi];
+                frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
+                frags[fragi].refi=refi;
+                frags[fragi].mb_mode=mb_mode;
+                frag_mvs[fragi]=cbmvs[bi];
+              }
+            }break;
+          }
+          coded_mbis[ncoded_mbis++]=mbi;
+          oc_mode_scheme_chooser_update(&_enc->chooser,mb_mode);
+          interbits+=modes[mb_mode].rate+modes[mb_mode].overhead;
+        }
+        else{
+          *(uncoded_mbis-++nuncoded_mbis)=mbi;
+          mb_mode=OC_MODE_INTER_NOMV;
+          refi=OC_FRAME_PREV;
+          mv=0;
+        }
+        /*Propagate final MB mode and MVs to the chroma blocks.
+          This has already been done for 4MV mode, since it requires individual
+           block motion vectors.*/
+        if(mb_mode!=OC_MODE_INTER_MV_FOUR){
+          for(mapii=4;mapii<nmap_idxs;mapii++){
+            mapi=map_idxs[mapii];
+            pli=mapi>>2;
+            bi=mapi&3;
+            fragi=mb_maps[mbi][pli][bi];
+            /*If we switched from 4MV mode to INTER_MV mode, then the qii
+               values won't have been chosen with the right MV, but it's
+               probaby not worth re-estimating them.*/
+            frags[fragi].qii=modes[mb_mode].qii[mapii];
+            frags[fragi].refi=refi;
+            frags[fragi].mb_mode=mb_mode;
+            frag_mvs[fragi]=mv;
+          }
+        }
+        /*Save masking scale factors for chroma blocks.*/
+        for(mapii=4;mapii<(nmap_idxs-4>>1)+4;mapii++){
+          mapi=map_idxs[mapii];
+          bi=mapi&3;
+          fragi=mb_maps[mbi][1][bi];
+          mcu_rd_scale[fragi-cfroffset]=(ogg_uint16_t)rd_scale[4];
+          mcu_rd_iscale[fragi-cfroffset]=(ogg_uint16_t)rd_iscale[4];
+        }
+      }
+      oc_fr_state_flush_sb(_enc->pipe.fr+0);
+      sb_flags[sbi].coded_fully=_enc->pipe.fr[0].sb_full;
+      sb_flags[sbi].coded_partially=_enc->pipe.fr[0].sb_partial;
+    }
+    oc_enc_pipeline_finish_mcu_plane(_enc,&_enc->pipe,0,notstart,notdone);
+    /*Code chroma planes.*/
+    for(pli=1;pli<3;pli++){
+      oc_enc_sb_transform_quantize_inter_chroma(_enc,&_enc->pipe,
+       pli,_enc->pipe.sbi0[pli],_enc->pipe.sbi_end[pli]);
+      oc_enc_pipeline_finish_mcu_plane(_enc,&_enc->pipe,pli,notstart,notdone);
+    }
+    notstart=1;
+  }
+  /*Update the average block activity and MB luma score for the frame.
+    We could use a Bessel follower here, but fast reaction is probably almost
+     always best.*/
+  _enc->activity_avg=OC_MAXI(OC_ACTIVITY_AVG_MIN,
+   (unsigned)((activity_sum+(_enc->state.fplanes[0].nfrags>>1))/
+   _enc->state.fplanes[0].nfrags));
+  _enc->luma_avg=(unsigned)((luma_sum+(_enc->state.nmbs>>1))/_enc->state.nmbs);
+  /*Finish filling in the reference frame borders.*/
+  refi=_enc->state.ref_frame_idx[OC_FRAME_SELF];
+  for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  /*Finish adding flagging overhead costs to inter bit counts to determine if
+     we should have coded a key frame instead.*/
+  if(_allow_keyframe){
+    /*Technically the chroma plane counts are over-estimations, because they
+       don't account for continuing runs from the luma planes, but the
+       inaccuracy is small.
+      We don't need to add the luma plane coding flag costs, because they are
+       already included in the MB rate estimates.*/
+    for(pli=1;pli<3;pli++)interbits+=_enc->pipe.fr[pli].bits<<OC_BIT_SCALE;
+    if(interbits>intrabits)return 1;
+  }
+  _enc->ncoded_mbis=ncoded_mbis;
+  /*Compact the coded fragment list.*/
+  {
+    ptrdiff_t ncoded_fragis;
+    ncoded_fragis=_enc->state.ncoded_fragis[0];
+    for(pli=1;pli<3;pli++){
+      memmove(_enc->state.coded_fragis+ncoded_fragis,
+       _enc->state.coded_fragis+_enc->state.fplanes[pli].froffset,
+       _enc->state.ncoded_fragis[pli]*sizeof(*_enc->state.coded_fragis));
+      ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    }
+    _enc->state.ntotal_coded_fragis=ncoded_fragis;
+  }
+  return 0;
+}

+ 166 - 0
jni/libtheora-1.2.0alpha1/lib/apiwrapper.c

@@ -0,0 +1,166 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+
+
+
+const char *theora_version_string(void){
+  return th_version_string();
+}
+
+ogg_uint32_t theora_version_number(void){
+  return th_version_number();
+}
+
+void theora_info_init(theora_info *_ci){
+  memset(_ci,0,sizeof(*_ci));
+}
+
+void theora_info_clear(theora_info *_ci){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  memset(_ci,0,sizeof(*_ci));
+  if(api!=NULL){
+    if(api->clear!=NULL)(*api->clear)(api);
+    _ogg_free(api);
+  }
+}
+
+void theora_clear(theora_state *_th){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_decode)->clear)(_th);
+  }
+  if(_th->internal_encode!=NULL){
+    (*((oc_state_dispatch_vtable *)_th->internal_encode)->clear)(_th);
+  }
+  if(_th->i!=NULL)theora_info_clear(_th->i);
+  memset(_th,0,sizeof(*_th));
+}
+
+int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else return TH_EINVAL;
+}
+
+ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_frame)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_frame)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_decode)->granule_time)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtable *)_th->internal_encode)->granule_time)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
+  _info->version_major=_ci->version_major;
+  _info->version_minor=_ci->version_minor;
+  _info->version_subminor=_ci->version_subminor;
+  _info->frame_width=_ci->width;
+  _info->frame_height=_ci->height;
+  _info->pic_width=_ci->frame_width;
+  _info->pic_height=_ci->frame_height;
+  _info->pic_x=_ci->offset_x;
+  _info->pic_y=_ci->offset_y;
+  _info->fps_numerator=_ci->fps_numerator;
+  _info->fps_denominator=_ci->fps_denominator;
+  _info->aspect_numerator=_ci->aspect_numerator;
+  _info->aspect_denominator=_ci->aspect_denominator;
+  switch(_ci->colorspace){
+    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
+    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
+    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
+  }
+  switch(_ci->pixelformat){
+    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
+    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
+    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
+    default:_info->pixel_fmt=TH_PF_RSVD;
+  }
+  _info->target_bitrate=_ci->target_bitrate;
+  _info->quality=_ci->quality;
+  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
+   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
+}
+
+int theora_packet_isheader(ogg_packet *_op){
+  return th_packet_isheader(_op);
+}
+
+int theora_packet_iskeyframe(ogg_packet *_op){
+  return th_packet_iskeyframe(_op);
+}
+
+int theora_granule_shift(theora_info *_ci){
+  /*This breaks when keyframe_frequency_force is not positive or is larger than
+     2**31 (if your int is more than 32 bits), but that's what the original
+     function does.*/
+  return oc_ilog(_ci->keyframe_frequency_force-1);
+}
+
+void theora_comment_init(theora_comment *_tc){
+  th_comment_init((th_comment *)_tc);
+}
+
+char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
+  return th_comment_query((th_comment *)_tc,_tag,_count);
+}
+
+int theora_comment_query_count(theora_comment *_tc,char *_tag){
+  return th_comment_query_count((th_comment *)_tc,_tag);
+}
+
+void theora_comment_clear(theora_comment *_tc){
+  th_comment_clear((th_comment *)_tc);
+}
+
+void theora_comment_add(theora_comment *_tc,char *_comment){
+  th_comment_add((th_comment *)_tc,_comment);
+}
+
+void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
+  th_comment_add_tag((th_comment *)_tc,_tag,_value);
+}

+ 54 - 0
jni/libtheora-1.2.0alpha1/lib/apiwrapper.h

@@ -0,0 +1,54 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_apiwrapper_H)
+# define _apiwrapper_H (1)
+# include <ogg/ogg.h>
+# include <theora/theora.h>
+# include "theora/theoradec.h"
+# include "theora/theoraenc.h"
+# include "state.h"
+
+typedef struct th_api_wrapper th_api_wrapper;
+typedef struct th_api_info    th_api_info;
+
+/*Provide an entry point for the codec setup to clear itself in case we ever
+   want to break pieces off into a common base library shared by encoder and
+   decoder.
+  In addition, this makes several other pieces of the API wrapper cleaner.*/
+typedef void (*oc_setup_clear_func)(void *_ts);
+
+/*Generally only one of these pointers will be non-NULL in any given instance.
+  Technically we do not even really need this struct, since we should be able
+   to figure out which one from "context", but doing it this way makes sure we
+   don't flub it up.*/
+struct th_api_wrapper{
+  oc_setup_clear_func  clear;
+  th_setup_info       *setup;
+  th_dec_ctx          *decode;
+  th_enc_ctx          *encode;
+};
+
+struct th_api_info{
+  th_api_wrapper api;
+  theora_info    info;
+};
+
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
+
+#endif

+ 32 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armbits.h

@@ -0,0 +1,32 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#if !defined(_arm_armbits_H)
+# define _arm_armbits_H (1)
+# include "../bitpack.h"
+# include "armcpu.h"
+
+# if defined(OC_ARM_ASM)
+#  define oc_pack_read oc_pack_read_arm
+#  define oc_pack_read1 oc_pack_read1_arm
+#  define oc_huff_token_decode oc_huff_token_decode_arm
+# endif
+
+long oc_pack_read_arm(oc_pack_buf *_b,int _bits);
+int oc_pack_read1_arm(oc_pack_buf *_b);
+int oc_huff_token_decode_arm(oc_pack_buf *_b,const ogg_int16_t *_tree);
+
+#endif

+ 116 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armcpu.c

@@ -0,0 +1,116 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+ CPU capability detection for ARM processors.
+
+ function:
+  last mod: $Id: cpu.c 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+
+#include "armcpu.h"
+
+#if !defined(OC_ARM_ASM)|| \
+ !defined(OC_ARM_ASM_EDSP)&&!defined(OC_ARM_ASM_MEDIA)&& \
+ !defined(OC_ARM_ASM_NEON)
+ogg_uint32_t oc_cpu_flags_get(void){
+  return 0;
+}
+
+#elif defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+ogg_uint32_t oc_cpu_flags_get(void){
+  ogg_uint32_t flags;
+  flags=0;
+  /*MSVC has no inline __asm support for ARM, but it does let you __emit
+     instructions via their assembled hex code.
+    All of these instructions should be essentially nops.*/
+# if defined(OC_ARM_ASM_EDSP)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OC_CPU_ARM_EDSP;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(OC_ARM_ASM_MEDIA)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OC_CPU_ARM_MEDIA;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(OC_ARM_ASM_NEON)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OC_CPU_ARM_NEON;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+
+ogg_uint32_t oc_cpu_flags_get(void){
+  ogg_uint32_t  flags;
+  FILE         *fin;
+  flags=0;
+  /*Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+     Android.
+    This also means that detection will fail in Scratchbox.*/
+  fin=fopen("/proc/cpuinfo","r");
+  if(fin!=NULL){
+    /*512 should be enough for anybody (it's even enough for all the flags that
+       x86 has accumulated... so far).*/
+    char buf[512];
+    while(fgets(buf,511,fin)!=NULL){
+      if(memcmp(buf,"Features",8)==0){
+        char *p;
+        p=strstr(buf," edsp");
+        if(p!=NULL&&(p[5]==' '||p[5]=='\n'))flags|=OC_CPU_ARM_EDSP;
+        p=strstr(buf," neon");
+        if(p!=NULL&&(p[5]==' '||p[5]=='\n'))flags|=OC_CPU_ARM_NEON;
+      }
+      if(memcmp(buf,"CPU architecture:",17)==0){
+        int version;
+        version=atoi(buf+17);
+        if(version>=6)flags|=OC_CPU_ARM_MEDIA;
+      }
+    }
+    fclose(fin);
+  }
+  return flags;
+}
+
+#else
+/*The feature registers which can tell us what the processor supports are
+   accessible in priveleged modes only, so we can't have a general user-space
+   detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+ "your platform.  Reconfigure with --disable-asm (or send patches)."
+#endif

+ 29 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armcpu.h

@@ -0,0 +1,29 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+ function:
+    last mod: $Id: cpu.h 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_arm_armcpu_H)
+# define _arm_armcpu_H (1)
+#include "../internal.h"
+
+/*"Parallel instructions" from ARM v6 and above.*/
+#define OC_CPU_ARM_MEDIA    (1<<24)
+/*Flags chosen to match arch/arm/include/asm/hwcap.h in the Linux kernel.*/
+#define OC_CPU_ARM_EDSP     (1<<7)
+#define OC_CPU_ARM_NEON     (1<<12)
+
+ogg_uint32_t oc_cpu_flags_get(void);
+
+#endif

+ 57 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armenc.c

@@ -0,0 +1,57 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#include "armenc.h"
+
+#if defined(OC_ARM_ASM)
+
+void oc_enc_accel_init_arm(oc_enc_ctx *_enc){
+  ogg_uint32_t cpu_flags;
+  cpu_flags=_enc->state.cpu_flags;
+  oc_enc_accel_init_c(_enc);
+# if defined(OC_ENC_USE_VTABLE)
+  /*TODO: Add ARMv4 functions here.*/
+# endif
+# if defined(OC_ARM_ASM_EDSP)
+  if(cpu_flags&OC_CPU_ARM_EDSP){
+#  if defined(OC_STATE_USE_VTABLE)
+    /*TODO: Add EDSP functions here.*/
+#  endif
+  }
+#  if defined(OC_ARM_ASM_MEDIA)
+  if(cpu_flags&OC_CPU_ARM_MEDIA){
+#   if defined(OC_STATE_USE_VTABLE)
+    /*TODO: Add Media functions here.*/
+#   endif
+  }
+#   if defined(OC_ARM_ASM_NEON)
+  if(cpu_flags&OC_CPU_ARM_NEON){
+#    if defined(OC_STATE_USE_VTABLE)
+    _enc->opt_vtable.frag_satd=oc_enc_frag_satd_neon;
+    _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_neon;
+    _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_neon;
+    _enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_neon;
+    _enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_neon;
+    _enc->opt_vtable.quantize=oc_enc_quantize_neon;
+#    endif
+    _enc->opt_data.enquant_table_size=128*sizeof(ogg_uint16_t);
+    _enc->opt_data.enquant_table_alignment=16;
+  }
+#   endif
+#  endif
+# endif
+}
+#endif

+ 51 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armenc.h

@@ -0,0 +1,51 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#if !defined(_arm_armenc_H)
+# define _arm_armenc_H (1)
+# include "armint.h"
+
+# if defined(OC_ARM_ASM)
+#  define oc_enc_accel_init oc_enc_accel_init_arm
+#  define OC_ENC_USE_VTABLE (1)
+# endif
+
+# include "../encint.h"
+
+# if defined(OC_ARM_ASM)
+void oc_enc_accel_init_arm(oc_enc_ctx *_enc);
+
+#  if defined(OC_ARM_ASM_EDSP)
+#   if defined(OC_ARM_ASM_MEDIA)
+#    if defined(OC_ARM_ASM_NEON)
+unsigned oc_enc_frag_satd_neon(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_neon(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_neon(int *_dc,
+ const unsigned char *_src,int _ystride);
+
+void oc_enc_enquant_table_init_neon(void *_enquant,
+ const ogg_uint16_t _dequant[64]);
+void oc_enc_enquant_table_fixup_neon(void *_enquant[3][3][2],int _nqis);
+int oc_enc_quantize_neon(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant);
+#    endif
+#   endif
+#  endif
+# endif
+
+#endif

+ 126 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armint.h

@@ -0,0 +1,126 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#if !defined(_arm_armint_H)
+# define _arm_armint_H (1)
+# include "../internal.h"
+
+# if defined(OC_ARM_ASM)
+
+#  if defined(__ARMEB__)
+#   error "Big-endian configurations are not supported by the ARM asm. " \
+ "Reconfigure with --disable-asm or undefine OC_ARM_ASM."
+#  endif
+
+#  define oc_state_accel_init oc_state_accel_init_arm
+/*This function is implemented entirely in asm, so it's helpful to pull out all
+   of the things that depend on structure offsets.
+  We reuse the function pointer with the wrong prototype, though.*/
+#  define oc_state_loop_filter_frag_rows(_state,_bv,_refi,_pli, \
+ _fragy0,_fragy_end) \
+  ((oc_loop_filter_frag_rows_arm_func) \
+   (_state)->opt_vtable.state_loop_filter_frag_rows)( \
+   (_state)->ref_frame_data[(_refi)],(_state)->ref_ystride[(_pli)], \
+   (_bv), \
+   (_state)->frags, \
+   (_state)->fplanes[(_pli)].froffset \
+   +(_fragy0)*(ptrdiff_t)(_state)->fplanes[(_pli)].nhfrags, \
+   (_state)->fplanes[(_pli)].froffset \
+   +(_fragy_end)*(ptrdiff_t)(_state)->fplanes[(_pli)].nhfrags, \
+   (_state)->fplanes[(_pli)].froffset, \
+   (_state)->fplanes[(_pli)].froffset+(_state)->fplanes[(_pli)].nfrags, \
+   (_state)->frag_buf_offs, \
+   (_state)->fplanes[(_pli)].nhfrags)
+/*For everything else the default vtable macros are fine.*/
+#  define OC_STATE_USE_VTABLE (1)
+# endif
+
+# include "../state.h"
+# include "armcpu.h"
+
+# if defined(OC_ARM_ASM)
+typedef void (*oc_loop_filter_frag_rows_arm_func)(
+ unsigned char *_ref_frame_data,int _ystride,signed char _bv[256],
+ const oc_fragment *_frags,ptrdiff_t _fragi0,ptrdiff_t _fragi0_end,
+ ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
+ const ptrdiff_t *_frag_buf_offs,int _nhfrags);
+
+void oc_state_accel_init_arm(oc_theora_state *_state);
+void oc_frag_copy_list_arm(unsigned char *_dst_frame,
+ const unsigned char *_src_frame,int _ystride,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
+void oc_frag_recon_intra_arm(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_arm(unsigned char *_dst,const unsigned char *_src,
+ int _ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_arm(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
+void oc_idct8x8_1_arm(ogg_int16_t _y[64],ogg_uint16_t _dc);
+void oc_idct8x8_arm(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
+void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_loop_filter_frag_rows_arm(unsigned char *_ref_frame_data,
+ int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
+ ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
+ const ptrdiff_t *_frag_buf_offs,int _nhfrags);
+
+#  if defined(OC_ARM_ASM_EDSP)
+void oc_frag_copy_list_edsp(unsigned char *_dst_frame,
+ const unsigned char *_src_frame,int _ystride,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
+
+#   if defined(OC_ARM_ASM_MEDIA)
+void oc_frag_recon_intra_v6(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_v6(unsigned char *_dst,const unsigned char *_src,
+ int _ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_v6(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
+void oc_idct8x8_1_v6(ogg_int16_t _y[64],ogg_uint16_t _dc);
+void oc_idct8x8_v6(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
+void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_loop_filter_init_v6(signed char *_bv,int _flimit);
+void oc_loop_filter_frag_rows_v6(unsigned char *_ref_frame_data,
+ int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
+ ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
+ const ptrdiff_t *_frag_buf_offs,int _nhfrags);
+
+#    if defined(OC_ARM_ASM_NEON)
+void oc_frag_copy_list_neon(unsigned char *_dst_frame,
+ const unsigned char *_src_frame,int _ystride,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
+void oc_frag_recon_intra_neon(unsigned char *_dst,int _ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_neon(unsigned char *_dst,const unsigned char *_src,
+ int _ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_neon(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t *_residue);
+void oc_idct8x8_1_neon(ogg_int16_t _y[64],ogg_uint16_t _dc);
+void oc_idct8x8_neon(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
+void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_loop_filter_init_neon(signed char *_bv,int _flimit);
+void oc_loop_filter_frag_rows_neon(unsigned char *_ref_frame_data,
+ int _ystride,signed char *_bv,const oc_fragment *_frags,ptrdiff_t _fragi0,
+ ptrdiff_t _fragi0_end,ptrdiff_t _fragi_top,ptrdiff_t _fragi_bot,
+ const ptrdiff_t *_frag_buf_offs,int _nhfrags);
+#    endif
+#   endif
+#  endif
+# endif
+
+#endif

+ 219 - 0
jni/libtheora-1.2.0alpha1/lib/arm/armstate.c

@@ -0,0 +1,219 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
+
+ ********************************************************************/
+#include "armint.h"
+
+#if defined(OC_ARM_ASM)
+
+# if defined(OC_ARM_ASM_NEON)
+/*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into
+   the destination.*/
+static const unsigned char OC_FZIG_ZAG_NEON[128]={
+   0, 8, 1, 2, 9,16,24,17,
+  10, 3, 4,11,18,25,32,40,
+  33,26,19,12, 5, 6,13,20,
+  27,34,41,48,56,49,42,35,
+  28,21,14, 7,15,22,29,36,
+  43,50,57,58,51,44,37,30,
+  23,31,38,45,52,59,60,53,
+  46,39,47,54,61,62,55,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+# endif
+
+void oc_state_accel_init_arm(oc_theora_state *_state){
+  oc_state_accel_init_c(_state);
+  _state->cpu_flags=oc_cpu_flags_get();
+# if defined(OC_STATE_USE_VTABLE)
+  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm;
+  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm;
+  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm;
+  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm;
+  _state->opt_vtable.idct8x8=oc_idct8x8_arm;
+  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm;
+  /*Note: We _must_ set this function pointer, because the macro in armint.h
+     calls it with different arguments, so the C version will segfault.*/
+  _state->opt_vtable.state_loop_filter_frag_rows=
+   (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm;
+# endif
+# if defined(OC_ARM_ASM_EDSP)
+  if(_state->cpu_flags&OC_CPU_ARM_EDSP){
+#  if defined(OC_STATE_USE_VTABLE)
+    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp;
+#  endif
+  }
+#  if defined(OC_ARM_ASM_MEDIA)
+  if(_state->cpu_flags&OC_CPU_ARM_MEDIA){
+#   if defined(OC_STATE_USE_VTABLE)
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6;
+    _state->opt_vtable.idct8x8=oc_idct8x8_v6;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6;
+    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6;
+    _state->opt_vtable.state_loop_filter_frag_rows=
+     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6;
+#   endif
+  }
+#   if defined(OC_ARM_ASM_NEON)
+  if(_state->cpu_flags&OC_CPU_ARM_NEON){
+#    if defined(OC_STATE_USE_VTABLE)
+    _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon;
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon;
+    _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon;
+    _state->opt_vtable.state_loop_filter_frag_rows=
+     (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon;
+    _state->opt_vtable.idct8x8=oc_idct8x8_neon;
+#    endif
+    _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON;
+  }
+#   endif
+#  endif
+# endif
+}
+
+void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            refi;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_uint16_t p;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    oc_idct8x8_1_arm(_dct_coeffs+64,p);
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  refi=_state->frags[_fragi].refi;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
+  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=_state->ref_frame_data[refi]+frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi])>1){
+      oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       _dct_coeffs+64);
+    }
+    else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
+  }
+}
+
+# if defined(OC_ARM_ASM_MEDIA)
+void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            refi;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_uint16_t p;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    oc_idct8x8_1_v6(_dct_coeffs+64,p);
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  refi=_state->frags[_fragi].refi;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
+  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=_state->ref_frame_data[refi]+frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi])>1){
+      oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       _dct_coeffs+64);
+    }
+    else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
+  }
+}
+
+# if defined(OC_ARM_ASM_NEON)
+void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            refi;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_uint16_t p;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    oc_idct8x8_1_neon(_dct_coeffs+64,p);
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  refi=_state->frags[_fragi].refi;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
+  if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=_state->ref_frame_data[refi]+frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi])>1){
+      oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
+       _dct_coeffs+64);
+    }
+    else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
+  }
+}
+#  endif
+# endif
+
+#endif

+ 114 - 0
jni/libtheora-1.2.0alpha1/lib/bitpack.c

@@ -0,0 +1,114 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id$
+
+ ********************************************************************/
+#include <string.h>
+#include <stdlib.h>
+#include "bitpack.h"
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the MSb first.*/
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes){
+  memset(_b,0,sizeof(*_b));
+  _b->ptr=_buf;
+  _b->stop=_buf+_bytes;
+}
+
+static oc_pb_window oc_pack_refill(oc_pack_buf *_b,int _bits){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  unsigned             shift;
+  stop=_b->stop;
+  ptr=_b->ptr;
+  window=_b->window;
+  available=_b->bits;
+  shift=OC_PB_WINDOW_SIZE-available;
+  while(7<shift&&ptr<stop){
+    shift-=8;
+    window|=(oc_pb_window)*ptr++<<shift;
+  }
+  _b->ptr=ptr;
+  available=OC_PB_WINDOW_SIZE-shift;
+  if(_bits>available){
+    if(ptr>=stop){
+      _b->eof=1;
+      available=OC_LOTS_OF_BITS;
+    }
+    else window|=*ptr>>(available&7);
+  }
+  _b->bits=available;
+  return window;
+}
+
+int oc_pack_look1(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1)_b->window=window=oc_pack_refill(_b,1);
+  return window>>OC_PB_WINDOW_SIZE-1;
+}
+
+void oc_pack_adv1(oc_pack_buf *_b){
+  _b->window<<=1;
+  _b->bits--;
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+long oc_pack_read_c(oc_pack_buf *_b,int _bits){
+  oc_pb_window window;
+  int          available;
+  long         result;
+  window=_b->window;
+  available=_b->bits;
+  if(_bits==0)return 0;
+  if(available<_bits){
+    window=oc_pack_refill(_b,_bits);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-_bits;
+  available-=_bits;
+  window<<=1;
+  window<<=_bits-1;
+  _b->window=window;
+  _b->bits=available;
+  return result;
+}
+
+int oc_pack_read1_c(oc_pack_buf *_b){
+  oc_pb_window window;
+  int          available;
+  int          result;
+  window=_b->window;
+  available=_b->bits;
+  if(available<1){
+    window=oc_pack_refill(_b,1);
+    available=_b->bits;
+  }
+  result=window>>OC_PB_WINDOW_SIZE-1;
+  available--;
+  window<<=1;
+  _b->window=window;
+  _b->bits=available;
+  return result;
+}
+
+long oc_pack_bytes_left(oc_pack_buf *_b){
+  if(_b->eof)return -1;
+  return _b->stop-_b->ptr+(_b->bits>>3);
+}

+ 76 - 0
jni/libtheora-1.2.0alpha1/lib/bitpack.h

@@ -0,0 +1,76 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_bitpack_H)
+# define _bitpack_H (1)
+# include <stddef.h>
+# include <limits.h>
+# include "internal.h"
+
+
+
+typedef size_t             oc_pb_window;
+typedef struct oc_pack_buf oc_pack_buf;
+
+
+
+/*Custom bitpacker implementations.*/
+# if defined(OC_ARM_ASM)
+#  include "arm/armbits.h"
+# endif
+
+# if !defined(oc_pack_read)
+#  define oc_pack_read oc_pack_read_c
+# endif
+# if !defined(oc_pack_read1)
+#  define oc_pack_read1 oc_pack_read1_c
+# endif
+# if !defined(oc_huff_token_decode)
+#  define oc_huff_token_decode oc_huff_token_decode_c
+# endif
+
+# define OC_PB_WINDOW_SIZE ((int)sizeof(oc_pb_window)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define OC_LOTS_OF_BITS (0x40000000)
+
+
+
+struct oc_pack_buf{
+  const unsigned char *stop;
+  const unsigned char *ptr;
+  oc_pb_window         window;
+  int                  bits;
+  int                  eof;
+};
+
+void oc_pack_readinit(oc_pack_buf *_b,unsigned char *_buf,long _bytes);
+int oc_pack_look1(oc_pack_buf *_b);
+void oc_pack_adv1(oc_pack_buf *_b);
+/*Here we assume 0<=_bits&&_bits<=32.*/
+long oc_pack_read_c(oc_pack_buf *_b,int _bits);
+int oc_pack_read1_c(oc_pack_buf *_b);
+/* returns -1 for read beyond EOF, or the number of whole bytes available */
+long oc_pack_bytes_left(oc_pack_buf *_b);
+
+/*These two functions are implemented locally in huffdec.c*/
+/*Read in bits without advancing the bitptr.
+  Here we assume 0<=_bits&&_bits<=32.*/
+/*static int oc_pack_look(oc_pack_buf *_b,int _bits);*/
+/*static void oc_pack_adv(oc_pack_buf *_b,int _bits);*/
+
+#endif

+ 974 - 0
jni/libtheora-1.2.0alpha1/lib/collect.c

@@ -0,0 +1,974 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2011                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: mode selection code
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
+#include <string.h>
+#include "collect.h"
+
+#if defined(OC_COLLECT_METRICS)
+
+int              OC_HAS_MODE_METRICS;
+double           OC_MODE_RD_WEIGHT_SATD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+double           OC_MODE_RD_WEIGHT_SAD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+oc_mode_metrics  OC_MODE_METRICS_SATD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+oc_mode_metrics  OC_MODE_METRICS_SAD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+const char      *OC_MODE_METRICS_FILENAME="modedec.stats";
+
+void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _s,int _q,int _r,double _d){
+  if(_metrics->w>0){
+    double ds;
+    double dq;
+    double dr;
+    double dd;
+    double ds2;
+    double dq2;
+    double s2;
+    double sq;
+    double q2;
+    double sr;
+    double qr;
+    double sd;
+    double qd;
+    double s2q;
+    double sq2;
+    double w;
+    double wa;
+    double rwa;
+    double rwa2;
+    double rwb;
+    double rwb2;
+    double rw2;
+    double rw3;
+    double rw4;
+    wa=_metrics->w;
+    ds=_s-_metrics->s/wa;
+    dq=_q-_metrics->q/wa;
+    dr=_r-_metrics->r/wa;
+    dd=_d-_metrics->d/wa;
+    ds2=ds*ds;
+    dq2=dq*dq;
+    s2=_metrics->s2;
+    sq=_metrics->sq;
+    q2=_metrics->q2;
+    sr=_metrics->sr;
+    qr=_metrics->qr;
+    sd=_metrics->sd;
+    qd=_metrics->qd;
+    s2q=_metrics->s2q;
+    sq2=_metrics->sq2;
+    w=wa+_w;
+    rwa=wa/w;
+    rwb=_w/w;
+    rwa2=rwa*rwa;
+    rwb2=rwb*rwb;
+    rw2=wa*rwb;
+    rw3=rw2*(rwa2-rwb2);
+    rw4=_w*rwa2*rwa2+wa*rwb2*rwb2;
+    _metrics->s2q2+=-2*(ds*sq2+dq*s2q)*rwb
+     +(ds2*q2+4*ds*dq*sq+dq2*s2)*rwb2+ds2*dq2*rw4;
+    _metrics->s2q+=(-2*ds*sq-dq*s2)*rwb+ds2*dq*rw3;
+    _metrics->sq2+=(-ds*q2-2*dq*sq)*rwb+ds*dq2*rw3;
+    _metrics->sqr+=(-ds*qr-dq*sr-dr*sq)*rwb+ds*dq*dr*rw3;
+    _metrics->sqd+=(-ds*qd-dq*sd-dd*sq)*rwb+ds*dq*dd*rw3;
+    _metrics->s2+=ds2*rw2;
+    _metrics->sq+=ds*dq*rw2;
+    _metrics->q2+=dq2*rw2;
+    _metrics->sr+=ds*dr*rw2;
+    _metrics->qr+=dq*dr*rw2;
+    _metrics->r2+=dr*dr*rw2;
+    _metrics->sd+=ds*dd*rw2;
+    _metrics->qd+=dq*dd*rw2;
+    _metrics->d2+=dd*dd*rw2;
+  }
+  _metrics->w+=_w;
+  _metrics->s+=_s*_w;
+  _metrics->q+=_q*_w;
+  _metrics->r+=_r*_w;
+  _metrics->d+=_d*_w;
+}
+
+void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n){
+  int i;
+  /*Find a non-empty set of metrics.*/
+  for(i=0;i<_n&&_src[i].w==0;i++);
+  if(i>=_n){
+    memset(_dst,0,sizeof(*_dst));
+    return;
+  }
+  memcpy(_dst,_src+i,sizeof(*_dst));
+  /*And iterate over the remaining non-empty sets of metrics.*/
+  for(i++;i<_n;i++)if(_src[i].w!=0){
+    double ds;
+    double dq;
+    double dr;
+    double dd;
+    double ds2;
+    double dq2;
+    double s2a;
+    double s2b;
+    double sqa;
+    double sqb;
+    double q2a;
+    double q2b;
+    double sra;
+    double srb;
+    double qra;
+    double qrb;
+    double sda;
+    double sdb;
+    double qda;
+    double qdb;
+    double s2qa;
+    double s2qb;
+    double sq2a;
+    double sq2b;
+    double w;
+    double wa;
+    double wb;
+    double rwa;
+    double rwb;
+    double rwa2;
+    double rwb2;
+    double rw2;
+    double rw3;
+    double rw4;
+    wa=_dst->w;
+    wb=_src[i].w;
+    ds=_src[i].s/wb-_dst->s/wa;
+    dq=_src[i].q/wb-_dst->q/wa;
+    dr=_src[i].r/wb-_dst->r/wa;
+    dd=_src[i].d/wb-_dst->d/wa;
+    ds2=ds*ds;
+    dq2=dq*dq;
+    s2a=_dst->s2;
+    sqa=_dst->sq;
+    q2a=_dst->q2;
+    sra=_dst->sr;
+    qra=_dst->qr;
+    sda=_dst->sd;
+    qda=_dst->qd;
+    s2qa=_dst->s2q;
+    sq2a=_dst->sq2;
+    s2b=_src[i].s2;
+    sqb=_src[i].sq;
+    q2b=_src[i].q2;
+    srb=_src[i].sr;
+    qrb=_src[i].qr;
+    sdb=_src[i].sd;
+    qdb=_src[i].qd;
+    s2qb=_src[i].s2q;
+    sq2b=_src[i].sq2;
+    w=wa+wb;
+    if(w==0)rwa=rwb=0;
+    else{
+      rwa=wa/w;
+      rwb=wb/w;
+    }
+    rwa2=rwa*rwa;
+    rwb2=rwb*rwb;
+    rw2=wa*rwb;
+    rw3=rw2*(rwa2-rwb2);
+    rw4=wb*rwa2*rwa2+wa*rwb2*rwb2;
+    /*
+    (1,1,1) ->
+     (0,0,0)#
+     (1,0,0) C(1,1)*C(1,0)*C(1,0)->  d^{1,0,0}*(rwa*B_{0,1,1}-rwb*A_{0,1,1})
+     (0,1,0) C(1,0)*C(1,1)*C(1,0)->  d^{0,1,0}*(rwa*B_{1,0,1}-rwb*A_{1,0,1})
+     (0,0,1) C(1,0)*C(1,0)*C(1,1)->  d^{0,0,1}*(rwa*B_{1,1,0}-rwb*A_{1,1,0})
+     (1,1,0)*
+     (1,0,1)*
+     (0,1,1)*
+     (1,1,1) C(1,1)*C(1,1)*C(1,1)->  d^{1,1,1}*(rwa^3*wb-rwb^3*wa)
+    (2,1) ->
+     (0,0)#
+     (1,0) C(2,1)*C(1,1)->2*d^{1,0}*(rwa*B_{1,1}-rwb*A_{1,1})
+     (0,1) C(2,0)*C(1,1)->  d^{0,1}*(rwa*B_{2,0}-rwb*A_{2,0})
+     (2,0)*
+     (1,1)*
+     (2,1) C(2,2)*C(1,1)->  d^{2,1}*(rwa^3*wb-rwb^3*wa)
+    (2,2) ->
+     (0,0)#
+     (1,0) C(2,1)*C(2,0)->2*d^{1,0}*(rwa*B_{1,2}-rwb*A_{1,2})
+     (0,1) C(2,0)*C(2,1)->2*d^{0,1}*(rwa*B_{2,1}-rwb*A_{2,1})
+     (2,0) C(2,2)*C(2,0)->  d^{2,0}*(rwa^2*B_{0,2}+rwb^2*A_{0,2})
+     (1,1) C(2,1)*C(2,1)->4*d^{1,1}*(rwa^2*B_{1,1}+rwb^2*A_{1,1})
+     (0,2) C(2,0)*C(2,2)->  d^{0,2}*(rwa^2*B_{2,0}+rwb^2*A_{2,0})
+     (1,2)*
+     (2,1)*
+     (2,2) C(2,2)*C(2,2)*d^{2,2}*(rwa^4*wb+rwb^4*wa)
+    */
+    _dst->s2q2+=_src[i].s2q2+2*(ds*(rwa*sq2b-rwb*sq2a)+dq*(rwa*s2qb-rwb*s2qa))
+     +ds2*(rwa2*q2b+rwb2*q2a)+4*ds*dq*(rwa2*sqb+rwb2*sqa)
+     +dq2*(rwa2*s2b+rwb2*s2a)+ds2*dq2*rw4;
+    _dst->s2q+=_src[i].s2q+2*ds*(rwa*sqb-rwb*sqa)
+     +dq*(rwa*s2b-rwb*s2a)+ds2*dq*rw3;
+    _dst->sq2+=_src[i].sq2+ds*(rwa*q2b-rwb*q2a)
+     +2*dq*(rwa*sqb-rwb*sqa)+ds*dq2*rw3;
+    _dst->sqr+=_src[i].sqr+ds*(rwa*qrb-rwb*qra)+dq*(rwa*srb-rwb*sra)
+     +dr*(rwa*sqb-rwb*sqa)+ds*dq*dr*rw3;
+    _dst->sqd+=_src[i].sqd+ds*(rwa*qdb-rwb*qda)+dq*(rwa*sdb-rwb*sda)
+     +dd*(rwa*sqb-rwb*sqa)+ds*dq*dd*rw3;
+    _dst->s2+=_src[i].s2+ds2*rw2;
+    _dst->sq+=_src[i].sq+ds*dq*rw2;
+    _dst->q2+=_src[i].q2+dq2*rw2;
+    _dst->sr+=_src[i].sr+ds*dr*rw2;
+    _dst->qr+=_src[i].qr+dq*dr*rw2;
+    _dst->r2+=_src[i].r2+dr*dr*rw2;
+    _dst->sd+=_src[i].sd+ds*dd*rw2;
+    _dst->qd+=_src[i].qd+dq*dd*rw2;
+    _dst->d2+=_src[i].d2+dd*dd*rw2;
+    _dst->w+=_src[i].w;
+    _dst->s+=_src[i].s;
+    _dst->q+=_src[i].q;
+    _dst->r+=_src[i].r;
+    _dst->d+=_src[i].d;
+  }
+}
+
+/*Adjust a single corner of a set of metric bins to minimize the squared
+   prediction error of R and D.
+  Each bin is assumed to cover a quad like so:
+    (s0,q0)    (s1,q0)
+       A----------B
+       |          |
+       |          |
+       |          |
+       |          |
+       C----------Z
+    (s0,q1)    (s1,q1)
+  The values A, B, and C are fixed, and Z is the free parameter.
+  Then, for example, R_i is predicted via bilinear interpolation as
+    x_i=(s_i-s0)/(s1-s0)
+    y_i=(q_i-q0)/(q1-q0)
+    dRds1_i=A+(B-A)*x_i
+    dRds2_i=C+(Z-C)*x_i
+    R_i=dRds1_i+(dRds2_i-dRds1_i)*y_i
+  To find the Z that minimizes the squared prediction error over i, this can
+   be rewritten as
+    R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i)=x_i*y_i*Z
+  Letting X={...,x_i*y_i,...}^T and
+   Y={...,R_i-(A+(B-A)*x_i+(C-A)*y_i+(A-B-C)*x_i*y_i),...}^T,
+   the optimal Z is given by Z=(X^T.Y)/(X^T.X).
+  Now, we need to compute these dot products without actually storing data for
+   each sample.
+  Starting with X^T.X, we have
+   X^T.X = sum(x_i^2*y_i^2) = sum((s_i-s0)^2*(q_i-q0)^2)/((s1-s0)^2*(q1-q0)^2).
+  Expanding the interior of the sum in a monomial basis of s_i and q_i gives
+    s0^2*q0^2  *(1)
+     -2*s0*q0^2*(s_i)
+     -2*s0^2*q0*(q_i)
+     +q0^2     *(s_i^2)
+     +4*s0*q0  *(s_i*q_i)
+     +s0^2     *(q_i^2)
+     -2*q0     *(s_i^2*q_i)
+     -2*s0     *(s_i*q_i^2)
+     +1        *(s_i^2*q_i^2).
+  However, computing things directly in this basis leads to gross numerical
+   errors, as most of the terms will have similar size and destructive
+   cancellation results.
+  A much better basis is the central (co-)moment basis:
+    {1,s_i-sbar,q_i-qbar,(s_i-sbar)^2,(s_i-sbar)*(q_i-qbar),(q_i-qbar)^2,
+     (s_i-sbar)^2*(q_i-qbar),(s_i-sbar)*(q_i-qbar)^2,(s_i-sbar)^2*(q_i-qbar)^2},
+   where sbar and qbar are the average s and q values over the bin,
+   respectively.
+  In that basis, letting ds=sbar-s0 and dq=qbar-q0, (s_i-s0)^2*(q_i-q0)^2 is
+    ds^2*dq^2*(1)
+     +dq^2   *((s_i-sbar)^2)
+     +4*ds*dq*((s_i-sbar)*(q_i-qbar))
+     +ds^2   *((q_i-qbar)^2)
+     +2*dq   *((s_i-sbar)^2*(q_i-qbar))
+     +2*ds   *((s_i-sbar)*(q_i-qbar)^2)
+     +1      *((s_i-sbar)^2*(q_i-qbar)^2).
+  With these expressions in the central (co-)moment bases, all we need to do
+   is compute sums over the (co-)moment terms, which can be done
+   incrementally (see oc_mode_metrics_add() and oc_mode_metrics_merge()),
+   with no need to store the individual samples.
+  Now, for X^T.Y, we have
+    X^T.Y = sum((R_i-A-((B-A)/(s1-s0))*(s_i-s0)-((C-A)/(q1-q0))*(q_i-q0)
+     -((A-B-C)/((s1-s0)*(q1-q0)))*(s_i-s0)*(q_i-q0))*(s_i-s0)*(q_i-q0))/
+     ((s1-s0)*(q1-q0)),
+   or, rewriting the constants to simplify notation,
+    X^T.Y = sum((C0+C1*(s_i-s0)+C2*(q_i-q0)
+     +C3*(s_i-s0)*(q_i-q0)+R_i)*(s_i-s0)*(q_i-q0))/((s1-s0)*(q1-q0)).
+  Again, converting to the central (co-)moment basis, the interior of the
+   above sum is
+    ds*dq*(rbar+C0+C1*ds+C2*dq+C3*ds*dq)  *(1)
+     +(C1*dq+C3*dq^2)                     *((s_i-sbar)^2)
+     +(rbar+C0+2*C1*ds+2*C2*dq+4*C3*ds*dq)*((s_i-sbar)*(q_i-qbar))
+     +(C2*ds+C3*ds^2)                     *((q_i-qbar)^2)
+     +dq                                  *((s_i-sbar)*(r_i-rbar))
+     +ds                                  *((q_i-qbar)*(r_i-rbar))
+     +(C1+2*C3*dq)                        *((s_i-sbar)^2*(q_i-qbar))
+     +(C2+2*C3*ds)                        *((s_i-sbar)*(q_i-qbar)^2)
+     +1                                   *((s_i-sbar)*(q_i-qbar)*(r_i-rbar))
+     +C3                                  *((s_i-sbar)^2*(q_i-qbar)^2).
+  You might think it would be easier (if perhaps slightly less robust) to
+   accumulate terms directly around s0 and q0.
+  However, we update each corner of the bins in turn, so we would have to
+   change basis to move the sums from corner to corner anyway.*/
+double oc_mode_metrics_solve(double *_r,double *_d,
+ const oc_mode_metrics *_metrics,const int *_s0,const int *_s1,
+ const int *_q0,const int *_q1,
+ const double *_ra,const double *_rb,const double *_rc,
+ const double *_da,const double *_db,const double *_dc,int _n){
+  double xx;
+  double rxy;
+  double dxy;
+  double wt;
+  int i;
+  xx=rxy=dxy=wt=0;
+  for(i=0;i<_n;i++)if(_metrics[i].w>0){
+    double s10;
+    double q10;
+    double sq10;
+    double ds;
+    double dq;
+    double ds2;
+    double dq2;
+    double r;
+    double d;
+    double s2;
+    double sq;
+    double q2;
+    double sr;
+    double qr;
+    double sd;
+    double qd;
+    double s2q;
+    double sq2;
+    double sqr;
+    double sqd;
+    double s2q2;
+    double c0;
+    double c1;
+    double c2;
+    double c3;
+    double w;
+    w=_metrics[i].w;
+    wt+=w;
+    s10=_s1[i]-_s0[i];
+    q10=_q1[i]-_q0[i];
+    sq10=s10*q10;
+    ds=_metrics[i].s/w-_s0[i];
+    dq=_metrics[i].q/w-_q0[i];
+    ds2=ds*ds;
+    dq2=dq*dq;
+    s2=_metrics[i].s2;
+    sq=_metrics[i].sq;
+    q2=_metrics[i].q2;
+    s2q=_metrics[i].s2q;
+    sq2=_metrics[i].sq2;
+    s2q2=_metrics[i].s2q2;
+    xx+=(dq2*(ds2*w+s2)+4*ds*dq*sq+ds2*q2+2*(dq*s2q+ds*sq2)+s2q2)/(sq10*sq10);
+    r=_metrics[i].r/w;
+    sr=_metrics[i].sr;
+    qr=_metrics[i].qr;
+    sqr=_metrics[i].sqr;
+    c0=-_ra[i];
+    c1=-(_rb[i]-_ra[i])/s10;
+    c2=-(_rc[i]-_ra[i])/q10;
+    c3=-(_ra[i]-_rb[i]-_rc[i])/sq10;
+    rxy+=(ds*dq*(r+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
+     +(r+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sr+ds*qr
+     +(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqr+c3*s2q2)/sq10;
+    d=_metrics[i].d/w;
+    sd=_metrics[i].sd;
+    qd=_metrics[i].qd;
+    sqd=_metrics[i].sqd;
+    c0=-_da[i];
+    c1=-(_db[i]-_da[i])/s10;
+    c2=-(_dc[i]-_da[i])/q10;
+    c3=-(_da[i]-_db[i]-_dc[i])/sq10;
+    dxy+=(ds*dq*(d+c0+c1*ds+c2*dq+c3*ds*dq)*w+(c1*dq+c3*dq2)*s2
+     +(d+c0+2*(c1*ds+(c2+2*c3*ds)*dq))*sq+(c2*ds+c3*ds2)*q2+dq*sd+ds*qd
+     +(c1+2*c3*dq)*s2q+(c2+2*c3*ds)*sq2+sqd+c3*s2q2)/sq10;
+  }
+  if(xx>1E-3){
+    *_r=rxy/xx;
+    *_d=dxy/xx;
+  }
+  else{
+    *_r=0;
+    *_d=0;
+  }
+  return wt;
+}
+
+/*Compile collected SATD/logq/rate/RMSE metrics into a form that's immediately
+   useful for mode decision.*/
+void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS],
+ int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS],
+ int _shift,double (*_weight)[3][2][OC_COMP_BINS]){
+  int niters;
+  int prevdr;
+  int prevdd;
+  int dr;
+  int dd;
+  int pli;
+  int qti;
+  int qi;
+  int si;
+  dd=dr=INT_MAX;
+  niters=0;
+  /*The encoder interpolates rate and RMSE terms bilinearly from an
+     OC_LOGQ_BINS by OC_COMP_BINS grid of sample points in _table.
+    To find the sample values at the grid points that minimize the total
+     squared prediction error actually requires solving a relatively sparse
+     linear system with a number of variables equal to the number of grid
+     points.
+    Instead of writing a general sparse linear system solver, we just use
+     Gauss-Seidel iteration, i.e., we update one grid point at time until
+     they stop changing.*/
+  do{
+    prevdr=dr;
+    prevdd=dd;
+    dd=dr=0;
+    for(pli=0;pli<3;pli++){
+      for(qti=0;qti<2;qti++){
+        for(qi=0;qi<OC_LOGQ_BINS;qi++){
+          for(si=0;si<OC_COMP_BINS;si++){
+            oc_mode_metrics m[4];
+            int             s0[4];
+            int             s1[4];
+            int             q0[4];
+            int             q1[4];
+            double          ra[4];
+            double          rb[4];
+            double          rc[4];
+            double          da[4];
+            double          db[4];
+            double          dc[4];
+            double          r;
+            double          d;
+            int             rate;
+            int             rmse;
+            int             ds;
+            int             n;
+            n=0;
+            /*Collect the statistics for the (up to) four bins grid point
+               (si,qi) touches.*/
+            if(qi>0&&si>0){
+              q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
+              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+              s0[n]=si-1<<_shift;
+              s1[n]=si<<_shift;
+              ra[n]=ldexp(_table[qi-1][pli][qti][si-1].rate,-OC_BIT_SCALE);
+              da[n]=ldexp(_table[qi-1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+              rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
+              db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+              rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
+              dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+              *(m+n++)=*(_metrics[qi-1][pli][qti]+si-1);
+            }
+            if(qi>0){
+              ds=si+1<OC_COMP_BINS?1:-1;
+              q0[n]=OC_MODE_LOGQ[qi-1][pli][qti];
+              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+              s0[n]=si+ds<<_shift;
+              s1[n]=si<<_shift;
+              ra[n]=ldexp(_table[qi-1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+              da[n]=
+               ldexp(_table[qi-1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+              rb[n]=ldexp(_table[qi-1][pli][qti][si].rate,-OC_BIT_SCALE);
+              db[n]=ldexp(_table[qi-1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+              rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+              dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+              *(m+n++)=*(_metrics[qi-1][pli][qti]+si);
+            }
+            if(qi+1<OC_LOGQ_BINS&&si>0){
+              q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
+              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+              s0[n]=si-1<<_shift;
+              s1[n]=si<<_shift;
+              ra[n]=ldexp(_table[qi+1][pli][qti][si-1].rate,-OC_BIT_SCALE);
+              da[n]=ldexp(_table[qi+1][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+              rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
+              db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+              rc[n]=ldexp(_table[qi][pli][qti][si-1].rate,-OC_BIT_SCALE);
+              dc[n]=ldexp(_table[qi][pli][qti][si-1].rmse,-OC_RMSE_SCALE);
+              *(m+n++)=*(_metrics[qi][pli][qti]+si-1);
+            }
+            if(qi+1<OC_LOGQ_BINS){
+              ds=si+1<OC_COMP_BINS?1:-1;
+              q0[n]=OC_MODE_LOGQ[qi+1][pli][qti];
+              q1[n]=OC_MODE_LOGQ[qi][pli][qti];
+              s0[n]=si+ds<<_shift;
+              s1[n]=si<<_shift;
+              ra[n]=ldexp(_table[qi+1][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+              da[n]=
+               ldexp(_table[qi+1][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+              rb[n]=ldexp(_table[qi+1][pli][qti][si].rate,-OC_BIT_SCALE);
+              db[n]=ldexp(_table[qi+1][pli][qti][si].rmse,-OC_RMSE_SCALE);
+              rc[n]=ldexp(_table[qi][pli][qti][si+ds].rate,-OC_BIT_SCALE);
+              dc[n]=ldexp(_table[qi][pli][qti][si+ds].rmse,-OC_RMSE_SCALE);
+              *(m+n++)=*(_metrics[qi][pli][qti]+si);
+            }
+            /*On the first pass, initialize with a simple weighted average of
+               the neighboring bins.*/
+            if(!OC_HAS_MODE_METRICS&&niters==0){
+              double w;
+              w=r=d=0;
+              while(n-->0){
+                w+=m[n].w;
+                r+=m[n].r;
+                d+=m[n].d;
+              }
+              r=w>1E-3?r/w:0;
+              d=w>1E-3?d/w:0;
+              _weight[qi][pli][qti][si]=w;
+            }
+            else{
+              /*Update the grid point and save the weight for later.*/
+              _weight[qi][pli][qti][si]=
+               oc_mode_metrics_solve(&r,&d,m,s0,s1,q0,q1,ra,rb,rc,da,db,dc,n);
+            }
+            rate=OC_CLAMPI(-32768,(int)(ldexp(r,OC_BIT_SCALE)+0.5),32767);
+            rmse=OC_CLAMPI(-32768,(int)(ldexp(d,OC_RMSE_SCALE)+0.5),32767);
+            dr+=abs(rate-_table[qi][pli][qti][si].rate);
+            dd+=abs(rmse-_table[qi][pli][qti][si].rmse);
+            _table[qi][pli][qti][si].rate=(ogg_int16_t)rate;
+            _table[qi][pli][qti][si].rmse=(ogg_int16_t)rmse;
+          }
+        }
+      }
+    }
+  }
+  /*After a fixed number of initial iterations, only iterate so long as the
+     total change is decreasing.
+    This ensures we don't oscillate forever, which is a danger, as all of our
+     results are rounded fairly coarsely.*/
+  while((dr>0||dd>0)&&(niters++<_niters_min||(dr<prevdr&&dd<prevdd)));
+  if(_reweight){
+    /*Now, reduce the values of the optimal solution until we get enough
+       samples in each bin to overcome the constant OC_ZWEIGHT factor.
+      This encourages sampling under-populated bins and prevents a single large
+       sample early on from discouraging coding in that bin ever again.*/
+    for(pli=0;pli<3;pli++){
+      for(qti=0;qti<2;qti++){
+        for(qi=0;qi<OC_LOGQ_BINS;qi++){
+          for(si=0;si<OC_COMP_BINS;si++){
+            double wt;
+            wt=_weight[qi][pli][qti][si];
+            wt/=OC_ZWEIGHT+wt;
+            _table[qi][pli][qti][si].rate=(ogg_int16_t)
+             (_table[qi][pli][qti][si].rate*wt+0.5);
+            _table[qi][pli][qti][si].rmse=(ogg_int16_t)
+             (_table[qi][pli][qti][si].rmse*wt+0.5);
+          }
+        }
+      }
+    }
+  }
+}
+
+/*Dump the in memory mode metrics to a file.
+  Note this data format isn't portable between different platforms.*/
+void oc_mode_metrics_dump(void){
+  FILE *fmetrics;
+  fmetrics=fopen(OC_MODE_METRICS_FILENAME,"wb");
+  if(fmetrics!=NULL){
+    (void)fwrite(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
+    (void)fwrite(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
+    (void)fwrite(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
+    fclose(fmetrics);
+  }
+}
+
+void oc_mode_metrics_print_rd(FILE *_fout,const char *_table_name,
+#if !defined(OC_COLLECT_METRICS)
+ const oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
+#else
+ oc_mode_rd (*_mode_rd_table)[3][2][OC_COMP_BINS]){
+#endif
+  int qii;
+  fprintf(_fout,
+   "# if !defined(OC_COLLECT_METRICS)\n"
+   "static const\n"
+   "# endif\n"
+   "oc_mode_rd %s[OC_LOGQ_BINS][3][2][OC_COMP_BINS]={\n",_table_name);
+  for(qii=0;qii<OC_LOGQ_BINS;qii++){
+    int pli;
+    fprintf(_fout,"  {\n");
+    for(pli=0;pli<3;pli++){
+      int qti;
+      fprintf(_fout,"    {\n");
+      for(qti=0;qti<2;qti++){
+        int bin;
+        int qi;
+        static const char *pl_names[3]={"Y'","Cb","Cr"};
+        static const char *qti_names[2]={"INTRA","INTER"};
+        qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
+        fprintf(_fout,"      /*%s  qi=%i  %s*/\n",
+         pl_names[pli],qi,qti_names[qti]);
+        fprintf(_fout,"      {\n");
+        fprintf(_fout,"        ");
+        for(bin=0;bin<OC_COMP_BINS;bin++){
+          if(bin&&!(bin&0x3))fprintf(_fout,"\n        ");
+          fprintf(_fout,"{%5i,%5i}",
+           _mode_rd_table[qii][pli][qti][bin].rate,
+           _mode_rd_table[qii][pli][qti][bin].rmse);
+          if(bin+1<OC_COMP_BINS)fprintf(_fout,",");
+        }
+        fprintf(_fout,"\n      }");
+        if(qti<1)fprintf(_fout,",");
+        fprintf(_fout,"\n");
+      }
+      fprintf(_fout,"    }");
+      if(pli<2)fprintf(_fout,",");
+      fprintf(_fout,"\n");
+    }
+    fprintf(_fout,"  }");
+    if(qii+1<OC_LOGQ_BINS)fprintf(_fout,",");
+    fprintf(_fout,"\n");
+  }
+  fprintf(_fout,
+   "};\n"
+   "\n");
+}
+
+void oc_mode_metrics_print(FILE *_fout){
+  int qii;
+  fprintf(_fout,
+   "/*File generated by libtheora with OC_COLLECT_METRICS"
+   " defined at compile time.*/\n"
+   "#if !defined(_modedec_H)\n"
+   "# define _modedec_H (1)\n"
+   "# include \"encint.h\"\n"
+   "\n"
+   "\n"
+   "\n"
+   "/*The log of the average quantizer for each of the OC_MODE_RD table rows\n"
+   "   (e.g., for the represented qi's, and each pli and qti), in Q10 format.\n"
+   "  The actual statistics used by the encoder will be interpolated from\n"
+   "   that table based on log_plq for the actual quantization matrix used.*/\n"
+   "# if !defined(OC_COLLECT_METRICS)\n"
+   "static const\n"
+   "# endif\n"
+   "ogg_int16_t OC_MODE_LOGQ[OC_LOGQ_BINS][3][2]={\n");
+  for(qii=0;qii<OC_LOGQ_BINS;qii++){
+    fprintf(_fout,"  { {0x%04X,0x%04X},{0x%04X,0x%04X},{0x%04X,0x%04X} }%s\n",
+     OC_MODE_LOGQ[qii][0][0],OC_MODE_LOGQ[qii][0][1],OC_MODE_LOGQ[qii][1][0],
+     OC_MODE_LOGQ[qii][1][1],OC_MODE_LOGQ[qii][2][0],OC_MODE_LOGQ[qii][2][1],
+     qii+1<OC_LOGQ_BINS?",":"");
+  }
+  fprintf(_fout,
+   "};\n"
+   "\n");
+  oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SATD",OC_MODE_RD_SATD);
+  oc_mode_metrics_print_rd(_fout,"OC_MODE_RD_SAD",OC_MODE_RD_SAD);
+  fprintf(_fout,
+   "#endif\n");
+}
+
+
+# if !defined(OC_COLLECT_NO_ENC_FUNCS)
+void oc_enc_mode_metrics_load(oc_enc_ctx *_enc){
+  oc_restore_fpu(&_enc->state);
+  /*Load any existing mode metrics if we haven't already.*/
+  if(!OC_HAS_MODE_METRICS){
+    FILE *fmetrics;
+    memset(OC_MODE_METRICS_SATD,0,sizeof(OC_MODE_METRICS_SATD));
+    memset(OC_MODE_METRICS_SAD,0,sizeof(OC_MODE_METRICS_SAD));
+    fmetrics=fopen(OC_MODE_METRICS_FILENAME,"rb");
+    if(fmetrics!=NULL){
+      /*Read in the binary structures as written my oc_mode_metrics_dump().
+        Note this format isn't portable between different platforms.*/
+      (void)fread(OC_MODE_LOGQ,sizeof(OC_MODE_LOGQ),1,fmetrics);
+      (void)fread(OC_MODE_METRICS_SATD,sizeof(OC_MODE_METRICS_SATD),1,fmetrics);
+      (void)fread(OC_MODE_METRICS_SAD,sizeof(OC_MODE_METRICS_SAD),1,fmetrics);
+      fclose(fmetrics);
+    }
+    else{
+      int qii;
+      int qi;
+      int pli;
+      int qti;
+      for(qii=0;qii<OC_LOGQ_BINS;qii++){
+        qi=(63*qii+(OC_LOGQ_BINS-1>>1))/(OC_LOGQ_BINS-1);
+        for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+          OC_MODE_LOGQ[qii][pli][qti]=_enc->log_plq[qi][pli][qti];
+        }
+      }
+    }
+    oc_mode_metrics_update(OC_MODE_METRICS_SATD,100,1,
+     OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
+    oc_mode_metrics_update(OC_MODE_METRICS_SAD,100,1,
+     OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
+    OC_HAS_MODE_METRICS=1;
+  }
+}
+
+/*The following token skipping code used to also be used in the decoder (and
+   even at one point other places in the encoder).
+  However, it was obsoleted by other optimizations, and is now only used here.
+  It has been moved here to avoid generating the code when it's not needed.*/
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.*/
+typedef ptrdiff_t (*oc_token_skip_func)(int _token,int _extra_bits);
+
+/*Handles the simple end of block tokens.*/
+static ptrdiff_t oc_token_skip_eob(int _token,int _extra_bits){
+  int nblocks_adjust;
+  nblocks_adjust=OC_UNIBBLE_TABLE32(0,1,2,3,7,15,0,0,_token)+1;
+  return -_extra_bits-nblocks_adjust;
+}
+
+/*The last EOB token has a special case, where an EOB run of size zero ends all
+   the remaining blocks in the frame.*/
+static ptrdiff_t oc_token_skip_eob6(int _token,int _extra_bits){
+  /*Note: We want to return -PTRDIFF_MAX, but that requires C99, which is not
+     yet available everywhere; this should be equivalent.*/
+  if(!_extra_bits)return -(~(size_t)0>>1);
+  return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static ptrdiff_t oc_token_skip_zrl(int _token,int _extra_bits){
+  return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static ptrdiff_t oc_token_skip_val(void){
+  return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static ptrdiff_t oc_token_skip_run_cat1a(int _token){
+  return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b, 1c, 2a, and 2b zero run/coefficient value combo tokens.*/
+static ptrdiff_t oc_token_skip_run(int _token,int _extra_bits){
+  int run_cati;
+  int ncoeffs_mask;
+  int ncoeffs_adjust;
+  run_cati=_token-OC_DCT_RUN_CAT1B;
+  ncoeffs_mask=OC_BYTE_TABLE32(3,7,0,1,run_cati);
+  ncoeffs_adjust=OC_BYTE_TABLE32(7,11,2,3,run_cati);
+  return (_extra_bits&ncoeffs_mask)+ncoeffs_adjust;
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+   a given token value.
+  This reduces all the conditional branches, etc., needed to parse these token
+   values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob6,
+  oc_token_skip_zrl,
+  oc_token_skip_zrl,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.
+          0 will never be returned, so that at least one coefficient in one
+           block will always be decoded for every token.*/
+static ptrdiff_t oc_dct_token_skip(int _token,int _extra_bits){
+  return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc){
+  static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+     0,16,16,16,16,16,32,32,
+    32,32,32,32,32,32,32,48,
+    48,48,48,48,48,48,48,48,
+    48,48,48,48,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64,
+    64,64,64,64,64,64,64,64
+  };
+  const oc_fragment *frags;
+  const unsigned    *frag_sad;
+  const unsigned    *frag_satd;
+  const unsigned    *frag_ssd;
+  const ptrdiff_t   *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  double             fragw;
+  int                modelines[3][3][2];
+  int                qti;
+  int                qii;
+  int                qi;
+  int                pli;
+  int                zzi;
+  int                token;
+  int                eb;
+  oc_restore_fpu(&_enc->state);
+  /*Figure out which metric bins to use for this frame's quantizers.*/
+  for(qii=0;qii<_enc->state.nqis;qii++){
+    for(pli=0;pli<3;pli++){
+      for(qti=0;qti<2;qti++){
+        int log_plq;
+        int modeline;
+        log_plq=_enc->log_plq[_enc->state.qis[qii]][pli][qti];
+        for(modeline=0;modeline<OC_LOGQ_BINS-1&&
+         OC_MODE_LOGQ[modeline+1][pli][qti]>log_plq;modeline++);
+        modelines[qii][pli][qti]=modeline;
+      }
+    }
+  }
+  qti=_enc->state.frame_type;
+  frags=_enc->state.frags;
+  frag_sad=_enc->frag_sad;
+  frag_satd=_enc->frag_satd;
+  frag_ssd=_enc->frag_ssd;
+  coded_fragis=_enc->state.coded_fragis;
+  ncoded_fragis=fragii=0;
+  /*Weight the fragments by the inverse frame size; this prevents HD content
+     from dominating the statistics.*/
+  fragw=1.0/_enc->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ti[64];
+    int       eob_token[64];
+    int       eob_run[64];
+    /*Set up token indices and eob run counts.
+      We don't bother trying to figure out the real cost of the runs that span
+       coefficients; instead we use the costs that were available when R-D
+       token optimization was done.*/
+    for(zzi=0;zzi<64;zzi++){
+      ti[zzi]=_enc->dct_token_offs[pli][zzi];
+      if(ti[zzi]>0){
+        token=_enc->dct_tokens[pli][zzi][0];
+        eb=_enc->extra_bits[pli][zzi][0];
+        eob_token[zzi]=token;
+        eob_run[zzi]=-oc_dct_token_skip(token,eb);
+      }
+      else{
+        eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        eob_run[zzi]=0;
+      }
+    }
+    /*Scan the list of coded fragments for this plane.*/
+    ncoded_fragis+=_enc->state.ncoded_fragis[pli];
+    for(;fragii<ncoded_fragis;fragii++){
+      ptrdiff_t fragi;
+      int       frag_bits;
+      int       huffi;
+      int       skip;
+      int       mb_mode;
+      unsigned  sad;
+      unsigned  satd;
+      double    sqrt_ssd;
+      int       bin;
+      int       qtj;
+      fragi=coded_fragis[fragii];
+      frag_bits=0;
+      for(zzi=0;zzi<64;){
+        if(eob_run[zzi]>0){
+          /*We've reached the end of the block.*/
+          eob_run[zzi]--;
+          break;
+        }
+        huffi=_enc->huff_idxs[qti][zzi>0][pli+1>>1]
+         +OC_ZZI_HUFF_OFFSET[zzi];
+        if(eob_token[zzi]<OC_NDCT_EOB_TOKEN_MAX){
+          /*This token caused an EOB run to be flushed.
+            Therefore it gets the bits associated with it.*/
+          frag_bits+=_enc->huff_codes[huffi][eob_token[zzi]].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[eob_token[zzi]];
+          eob_token[zzi]=OC_NDCT_EOB_TOKEN_MAX;
+        }
+        token=_enc->dct_tokens[pli][zzi][ti[zzi]];
+        eb=_enc->extra_bits[pli][zzi][ti[zzi]];
+        ti[zzi]++;
+        skip=oc_dct_token_skip(token,eb);
+        if(skip<0){
+          eob_token[zzi]=token;
+          eob_run[zzi]=-skip;
+        }
+        else{
+          /*A regular DCT value token; accumulate the bits for it.*/
+          frag_bits+=_enc->huff_codes[huffi][token].nbits
+           +OC_DCT_TOKEN_EXTRA_BITS[token];
+          zzi+=skip;
+        }
+      }
+      mb_mode=frags[fragi].mb_mode;
+      qii=frags[fragi].qii;
+      qi=_enc->state.qis[qii];
+      sad=frag_sad[fragi]<<(pli+1&2);
+      satd=frag_satd[fragi]<<(pli+1&2);
+      sqrt_ssd=sqrt(frag_ssd[fragi]);
+      qtj=mb_mode!=OC_MODE_INTRA;
+      /*Accumulate statistics.
+        The rate (frag_bits) and RMSE (sqrt(frag_ssd)) are not scaled by
+         OC_BIT_SCALE and OC_RMSE_SCALE; this lets us change the scale factor
+         yet still use old data.*/
+      bin=OC_MINI(satd>>OC_SATD_SHIFT,OC_COMP_BINS-1);
+      oc_mode_metrics_add(
+       OC_MODE_METRICS_SATD[modelines[qii][pli][qtj]][pli][qtj]+bin,
+       fragw,satd,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
+      bin=OC_MINI(sad>>OC_SAD_SHIFT,OC_COMP_BINS-1);
+      oc_mode_metrics_add(
+       OC_MODE_METRICS_SAD[modelines[qii][pli][qtj]][pli][qtj]+bin,
+       fragw,sad,_enc->log_plq[qi][pli][qtj],frag_bits,sqrt_ssd);
+    }
+  }
+  /*Update global SA(T)D/logq/rate/RMSE estimation matrix.*/
+  oc_mode_metrics_update(OC_MODE_METRICS_SATD,4,1,
+   OC_MODE_RD_SATD,OC_SATD_SHIFT,OC_MODE_RD_WEIGHT_SATD);
+  oc_mode_metrics_update(OC_MODE_METRICS_SAD,4,1,
+   OC_MODE_RD_SAD,OC_SAD_SHIFT,OC_MODE_RD_WEIGHT_SAD);
+}
+# endif
+
+#endif

+ 109 - 0
jni/libtheora-1.2.0alpha1/lib/collect.h

@@ -0,0 +1,109 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: mode selection code
+  last mod: $Id$
+
+ ********************************************************************/
+#if !defined(_collect_H)
+# define _collect_H (1)
+# include "encint.h"
+# if defined(OC_COLLECT_METRICS)
+#  include <stdio.h>
+
+
+
+typedef struct oc_mode_metrics oc_mode_metrics;
+
+
+
+/**Sets the file name to load/store mode metrics from/to.
+ * The file name string is stored by reference, and so must be valid for the
+ *  lifetime of the encoder.
+ * Mode metric collection uses global tables; do not attempt to perform
+ *  multiple collections at once.
+ * \param[in] _buf <tt>char[]</tt> The file name.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_METRICS_FILE (0x8000)
+
+
+
+/*Accumulates various weighted sums of the measurements.
+  w -> weight
+  s -> SATD
+  q -> log quantizer
+  r -> rate (in bits)
+  d -> RMSE
+  All of the single letters correspond to direct, weighted sums, e.g.,
+   w=sum(w_i), s=sum(s_i*w_i), etc.
+  The others correspond to central moments (or co-moments) of the given order,
+   e.g., sq=sum((s_i-s/w)*(q_i-q/w)*w_i).
+  Because we need some moments up to fourth order, we use central moments to
+   minimize the dynamic range and prevent rounding error from dominating the
+   calculations.*/
+struct oc_mode_metrics{
+  double w;
+  double s;
+  double q;
+  double r;
+  double d;
+  double s2;
+  double sq;
+  double q2;
+  double sr;
+  double qr;
+  double r2;
+  double sd;
+  double qd;
+  double d2;
+  double s2q;
+  double sq2;
+  double sqr;
+  double sqd;
+  double s2q2;
+};
+
+
+# define OC_ZWEIGHT   (0.25)
+
+/*TODO: It may be helpful (for block-level quantizers especially) to separate
+   out the contributions from AC and DC into separate tables.*/
+
+extern ogg_int16_t OC_MODE_LOGQ[OC_LOGQ_BINS][3][2];
+extern oc_mode_rd  OC_MODE_RD_SATD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+extern oc_mode_rd  OC_MODE_RD_SAD[OC_LOGQ_BINS][3][2][OC_COMP_BINS];
+
+extern int              OC_HAS_MODE_METRICS;
+extern oc_mode_metrics  OC_MODE_METRICS_SATD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+extern oc_mode_metrics  OC_MODE_METRICS_SAD[OC_LOGQ_BINS-1][3][2][OC_COMP_BINS];
+extern const char      *OC_MODE_METRICS_FILENAME;
+
+void oc_mode_metrics_dump();
+void oc_mode_metrics_print(FILE *_fout);
+
+void oc_mode_metrics_add(oc_mode_metrics *_metrics,
+ double _w,int _s,int _q,int _r,double _d);
+void oc_mode_metrics_merge(oc_mode_metrics *_dst,
+ const oc_mode_metrics *_src,int _n);
+double oc_mode_metrics_solve(double *_r,double *_d,
+ const oc_mode_metrics *_metrics,const int *_s0,const int *_s1,
+ const int *_q0,const int *_q1,
+ const double *_ra,const double *_rb,const double *_rc,
+ const double *_da,const double *_db,const double *_dc,int _n);
+void oc_mode_metrics_update(oc_mode_metrics (*_metrics)[3][2][OC_COMP_BINS],
+ int _niters_min,int _reweight,oc_mode_rd (*_table)[3][2][OC_COMP_BINS],
+ int shift,double (*_weight)[3][2][OC_COMP_BINS]);
+void oc_enc_mode_metrics_load(oc_enc_ctx *_enc);
+void oc_enc_mode_metrics_collect(oc_enc_ctx *_enc);
+
+# endif
+#endif

+ 31 - 0
jni/libtheora-1.2.0alpha1/lib/dct.h

@@ -0,0 +1,31 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+
+/*Definitions shared by the forward and inverse DCT transforms.*/
+#if !defined(_dct_H)
+# define _dct_H (1)
+
+/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
+#define OC_C1S7 ((ogg_int32_t)64277)
+#define OC_C2S6 ((ogg_int32_t)60547)
+#define OC_C3S5 ((ogg_int32_t)54491)
+#define OC_C4S4 ((ogg_int32_t)46341)
+#define OC_C5S3 ((ogg_int32_t)36410)
+#define OC_C6S2 ((ogg_int32_t)25080)
+#define OC_C7S1 ((ogg_int32_t)12785)
+
+#endif

+ 193 - 0
jni/libtheora-1.2.0alpha1/lib/decapiwrapper.c

@@ -0,0 +1,193 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "decint.h"
+#include "theora/theoradec.h"
+
+static void th_dec_api_clear(th_api_wrapper *_api){
+  if(_api->setup)th_setup_free(_api->setup);
+  if(_api->decode)th_decode_free(_api->decode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_decode_clear(theora_state *_td){
+  if(_td->i!=NULL)theora_info_clear(_td->i);
+  memset(_td,0,sizeof(*_td));
+}
+
+static int theora_decode_control(theora_state *_td,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_DEC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_decode_clear,
+  (oc_state_control_func)theora_decode_control,
+  (oc_state_granule_frame_func)theora_decode_granule_frame,
+  (oc_state_granule_time_func)theora_decode_granule_time,
+};
+
+static void th_info2theora_info(theora_info *_ci,const th_info *_info){
+  _ci->version_major=_info->version_major;
+  _ci->version_minor=_info->version_minor;
+  _ci->version_subminor=_info->version_subminor;
+  _ci->width=_info->frame_width;
+  _ci->height=_info->frame_height;
+  _ci->frame_width=_info->pic_width;
+  _ci->frame_height=_info->pic_height;
+  _ci->offset_x=_info->pic_x;
+  _ci->offset_y=_info->pic_y;
+  _ci->fps_numerator=_info->fps_numerator;
+  _ci->fps_denominator=_info->fps_denominator;
+  _ci->aspect_numerator=_info->aspect_numerator;
+  _ci->aspect_denominator=_info->aspect_denominator;
+  switch(_info->colorspace){
+    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
+    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
+    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
+  }
+  switch(_info->pixel_fmt){
+    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
+    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
+    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
+    default:_ci->pixelformat=OC_PF_RSVD;
+  }
+  _ci->target_bitrate=_info->target_bitrate;
+  _ci->quality=_info->quality;
+  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
+}
+
+int theora_decode_init(theora_state *_td,theora_info *_ci){
+  th_api_info    *apiinfo;
+  th_api_wrapper *api;
+  th_info         info;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+  if(apiinfo==NULL)return OC_FAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  /*Convert the info struct now instead of saving the the one we decoded with
+     theora_decode_header(), since the user might have modified values (i.e.,
+     color space, aspect ratio, etc. can be specified from a higher level).
+    The user also might be doing something "clever" with the header packets if
+     they are not using an Ogg encapsulation.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
+     of the stuff it needs.*/
+  apiinfo->api.decode=th_decode_alloc(&info,api->setup);
+  if(apiinfo->api.decode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
+  _td->internal_encode=NULL;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
+  _td->granulepos=0;
+  _td->i=&apiinfo->info;
+  _td->i->codec_setup=&apiinfo->api;
+  return 0;
+}
+
+int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
+  th_api_wrapper *api;
+  th_info         info;
+  int             ret;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate an API wrapper struct on demand, since it will not also include a
+     theora_info struct like the ones that are used in a theora_state struct.*/
+  if(api==NULL){
+    _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+    if(_ci->codec_setup==NULL)return OC_FAULT;
+    api=(th_api_wrapper *)_ci->codec_setup;
+    api->clear=(oc_setup_clear_func)th_dec_api_clear;
+  }
+  /*Convert from the theora_info struct instead of saving our own th_info
+     struct between calls.
+    The user might be doing something "clever" with the header packets if they
+     are not using an Ogg encapsulation, and we don't want to break this.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*We rely on the fact that theora_comment and th_comment structures are
+     actually identical.
+    Take care not to change this fact unless you change the code here as
+     well!*/
+  ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
+  /*We also rely on the fact that the error return code values are the same,
+    and that the implementations of these two functions return the same set of
+    them.
+   Note that theora_decode_header() really can return OC_NOTFORMAT, even
+    though it is not currently documented to do so.*/
+  if(ret<0)return ret;
+  th_info2theora_info(_ci,&info);
+  return 0;
+}
+
+int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
+  th_api_wrapper *api;
+  ogg_int64_t     gp;
+  int             ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  ret=th_decode_packetin(api->decode,_op,&gp);
+  if(ret<0)return OC_BADPACKET;
+  _td->granulepos=gp;
+  return 0;
+}
+
+int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_dec_ctx      *decode;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  decode=(th_dec_ctx *)api->decode;
+  if(!decode)return OC_FAULT;
+  ret=th_decode_ycbcr_out(decode,buf);
+  if(ret>=0){
+    _yuv->y_width=buf[0].width;
+    _yuv->y_height=buf[0].height;
+    _yuv->y_stride=buf[0].stride;
+    _yuv->uv_width=buf[1].width;
+    _yuv->uv_height=buf[1].height;
+    _yuv->uv_stride=buf[1].stride;
+    _yuv->y=buf[0].data;
+    _yuv->u=buf[1].data;
+    _yuv->v=buf[2].data;
+  }
+  return ret;
+}

+ 250 - 0
jni/libtheora-1.2.0alpha1/lib/decinfo.c

@@ -0,0 +1,250 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "decint.h"
+
+
+
+/*Unpacks a series of octets from a given byte array into the pack buffer.
+  No checking is done to ensure the buffer contains enough data.
+  _opb: The pack buffer to read the octets from.
+  _buf: The byte array to store the unpacked bytes in.
+  _len: The number of octets to unpack.*/
+static void oc_unpack_octets(oc_pack_buf *_opb,char *_buf,size_t _len){
+  while(_len-->0){
+    long val;
+    val=oc_pack_read(_opb,8);
+    *_buf++=(char)val;
+  }
+}
+
+/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
+static long oc_unpack_length(oc_pack_buf *_opb){
+  long ret[4];
+  int  i;
+  for(i=0;i<4;i++)ret[i]=oc_pack_read(_opb,8);
+  return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
+}
+
+static int oc_info_unpack(oc_pack_buf *_opb,th_info *_info){
+  long val;
+  /*Check the codec bitstream version.*/
+  val=oc_pack_read(_opb,8);
+  _info->version_major=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_minor=(unsigned char)val;
+  val=oc_pack_read(_opb,8);
+  _info->version_subminor=(unsigned char)val;
+  /*verify we can parse this bitstream version.
+     We accept earlier minors and all subminors, by spec*/
+  if(_info->version_major>TH_VERSION_MAJOR||
+   _info->version_major==TH_VERSION_MAJOR&&
+   _info->version_minor>TH_VERSION_MINOR){
+    return TH_EVERSION;
+  }
+  /*Read the encoded frame description.*/
+  val=oc_pack_read(_opb,16);
+  _info->frame_width=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,16);
+  _info->frame_height=(ogg_uint32_t)val<<4;
+  val=oc_pack_read(_opb,24);
+  _info->pic_width=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->pic_height=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_x=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->pic_y=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,32);
+  _info->fps_denominator=(ogg_uint32_t)val;
+  if(_info->frame_width==0||_info->frame_height==0||
+   _info->pic_width+_info->pic_x>_info->frame_width||
+   _info->pic_height+_info->pic_y>_info->frame_height||
+   _info->fps_numerator==0||_info->fps_denominator==0){
+    return TH_EBADHEADER;
+  }
+  /*Note: The sense of pic_y is inverted in what we pass back to the
+     application compared to how it is stored in the bitstream.
+    This is because the bitstream uses a right-handed coordinate system, while
+     applications expect a left-handed one.*/
+  _info->pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_numerator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,24);
+  _info->aspect_denominator=(ogg_uint32_t)val;
+  val=oc_pack_read(_opb,8);
+  _info->colorspace=(th_colorspace)val;
+  val=oc_pack_read(_opb,24);
+  _info->target_bitrate=(int)val;
+  val=oc_pack_read(_opb,6);
+  _info->quality=(int)val;
+  val=oc_pack_read(_opb,5);
+  _info->keyframe_granule_shift=(int)val;
+  val=oc_pack_read(_opb,2);
+  _info->pixel_fmt=(th_pixel_fmt)val;
+  if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
+  val=oc_pack_read(_opb,3);
+  if(val!=0||oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+  return 0;
+}
+
+static int oc_comment_unpack(oc_pack_buf *_opb,th_comment *_tc){
+  long len;
+  int  i;
+  /*Read the vendor string.*/
+  len=oc_unpack_length(_opb);
+  if(len<0||len>oc_pack_bytes_left(_opb))return TH_EBADHEADER;
+  _tc->vendor=_ogg_malloc((size_t)len+1);
+  if(_tc->vendor==NULL)return TH_EFAULT;
+  oc_unpack_octets(_opb,_tc->vendor,len);
+  _tc->vendor[len]='\0';
+  /*Read the user comments.*/
+  _tc->comments=(int)oc_unpack_length(_opb);
+  len=_tc->comments;
+  if(len<0||len>(LONG_MAX>>2)||len<<2>oc_pack_bytes_left(_opb)){
+    _tc->comments=0;
+    return TH_EBADHEADER;
+  }
+  _tc->comment_lengths=(int *)_ogg_malloc(
+   _tc->comments*sizeof(_tc->comment_lengths[0]));
+  _tc->user_comments=(char **)_ogg_malloc(
+   _tc->comments*sizeof(_tc->user_comments[0]));
+  if(_tc->comment_lengths==NULL||_tc->user_comments==NULL){
+    _tc->comments=0;
+    return TH_EFAULT;
+  }
+  for(i=0;i<_tc->comments;i++){
+    len=oc_unpack_length(_opb);
+    if(len<0||len>oc_pack_bytes_left(_opb)){
+      _tc->comments=i;
+      return TH_EBADHEADER;
+    }
+    _tc->comment_lengths[i]=len;
+    _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+    if(_tc->user_comments[i]==NULL){
+      _tc->comments=i;
+      return TH_EFAULT;
+    }
+    oc_unpack_octets(_opb,_tc->user_comments[i],len);
+    _tc->user_comments[i][len]='\0';
+  }
+  return oc_pack_bytes_left(_opb)<0?TH_EBADHEADER:0;
+}
+
+static int oc_setup_unpack(oc_pack_buf *_opb,th_setup_info *_setup){
+  int ret;
+  /*Read the quantizer tables.*/
+  ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
+  if(ret<0)return ret;
+  /*Read the Huffman trees.*/
+  return oc_huff_trees_unpack(_opb,_setup->huff_tables);
+}
+
+static void oc_setup_clear(th_setup_info *_setup){
+  oc_quant_params_clear(&_setup->qinfo);
+  oc_huff_trees_clear(_setup->huff_tables);
+}
+
+static int oc_dec_headerin(oc_pack_buf *_opb,th_info *_info,
+ th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
+  char buffer[6];
+  long val;
+  int  packtype;
+  int  ret;
+  val=oc_pack_read(_opb,8);
+  packtype=(int)val;
+  /*If we're at a data packet and we have received all three headers, we're
+     done.*/
+  if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
+    return 0;
+  }
+  /*Check the codec string.*/
+  oc_unpack_octets(_opb,buffer,6);
+  if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
+  switch(packtype){
+    /*Codec info header.*/
+    case 0x80:{
+      /*This should be the first packet, and we should not already be
+         initialized.*/
+      if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
+      ret=oc_info_unpack(_opb,_info);
+      if(ret<0)th_info_clear(_info);
+      else ret=3;
+    }break;
+    /*Comment header.*/
+    case 0x81:{
+      if(_tc==NULL)return TH_EFAULT;
+      /*We shoud have already decoded the info header, and should not yet have
+         decoded the comment header.*/
+      if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
+      ret=oc_comment_unpack(_opb,_tc);
+      if(ret<0)th_comment_clear(_tc);
+      else ret=2;
+    }break;
+    /*Codec setup header.*/
+    case 0x82:{
+      oc_setup_info *setup;
+      if(_tc==NULL||_setup==NULL)return TH_EFAULT;
+      /*We should have already decoded the info header and the comment header,
+         and should not yet have decoded the setup header.*/
+      if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
+        return TH_EBADHEADER;
+      }
+      setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+      if(setup==NULL)return TH_EFAULT;
+      ret=oc_setup_unpack(_opb,setup);
+      if(ret<0){
+        oc_setup_clear(setup);
+        _ogg_free(setup);
+      }
+      else{
+        *_setup=setup;
+        ret=1;
+      }
+    }break;
+    default:{
+      /*We don't know what this header is.*/
+      return TH_EBADHEADER;
+    }break;
+  }
+  return ret;
+}
+
+
+/*Decodes one header packet.
+  This should be called repeatedly with the packets at the beginning of the
+   stream until it returns 0.*/
+int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op){
+  oc_pack_buf opb;
+  if(_op==NULL)return TH_EBADHEADER;
+  if(_info==NULL)return TH_EFAULT;
+  oc_pack_readinit(&opb,_op->packet,_op->bytes);
+  return oc_dec_headerin(&opb,_info,_tc,_setup,_op);
+}
+
+void th_setup_free(th_setup_info *_setup){
+  if(_setup!=NULL){
+    oc_setup_clear(_setup);
+    _ogg_free(_setup);
+  }
+}

+ 186 - 0
jni/libtheora-1.2.0alpha1/lib/decint.h

@@ -0,0 +1,186 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <limits.h>
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "theora/theoradec.h"
+# include "state.h"
+# include "bitpack.h"
+# include "huffdec.h"
+# include "dequant.h"
+
+typedef struct th_setup_info         oc_setup_info;
+typedef struct oc_dec_opt_vtable     oc_dec_opt_vtable;
+typedef struct oc_dec_pipeline_state oc_dec_pipeline_state;
+typedef struct th_dec_ctx            oc_dec_ctx;
+
+
+
+/*Decoder-specific accelerated functions.*/
+# if defined(OC_C64X_ASM)
+#  include "c64x/c64xdec.h"
+# endif
+
+# if !defined(oc_dec_accel_init)
+#  define oc_dec_accel_init oc_dec_accel_init_c
+# endif
+# if defined(OC_DEC_USE_VTABLE)
+#  if !defined(oc_dec_dc_unpredict_mcu_plane)
+#   define oc_dec_dc_unpredict_mcu_plane(_dec,_pipe,_pli) \
+ ((*(_dec)->opt_vtable.dc_unpredict_mcu_plane)(_dec,_pipe,_pli))
+#  endif
+# else
+#  if !defined(oc_dec_dc_unpredict_mcu_plane)
+#   define oc_dec_dc_unpredict_mcu_plane oc_dec_dc_unpredict_mcu_plane_c
+#  endif
+# endif
+
+
+
+/*Constants for the packet-in state machine specific to the decoder.*/
+
+/*Next packet to read: Data packet.*/
+#define OC_PACKET_DATA (0)
+
+
+
+struct th_setup_info{
+  /*The Huffman codes.*/
+  ogg_int16_t   *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The quantization parameters.*/
+  th_quant_info  qinfo;
+};
+
+
+
+/*Decoder specific functions with accelerated variants.*/
+struct oc_dec_opt_vtable{
+  void (*dc_unpredict_mcu_plane)(oc_dec_ctx *_dec,
+   oc_dec_pipeline_state *_pipe,int _pli);
+};
+
+
+
+struct oc_dec_pipeline_state{
+  /*Decoded DCT coefficients.
+    These are placed here instead of on the stack so that they can persist
+     between blocks, which makes clearing them back to zero much faster when
+     only a few non-zero coefficients were decoded.
+    It requires at least 65 elements because the zig-zag index array uses the
+     65th element as a dumping ground for out-of-range indices to protect us
+     from buffer overflow.
+    We make it fully twice as large so that the second half can serve as the
+     reconstruction buffer, which saves passing another parameter to all the
+     acceleration functios.
+    It also solves problems with 16-byte alignment for NEON on ARM.
+    gcc (as of 4.2.1) only seems to be able to give stack variables 8-byte
+     alignment, and silently produces incorrect results if you ask for 16.
+    Finally, keeping it off the stack means there's less likely to be a data
+     hazard beween the NEON co-processor and the regular ARM core, which avoids
+     unnecessary stalls.*/
+  OC_ALIGN16(ogg_int16_t dct_coeffs[128]);
+  OC_ALIGN16(signed char bounding_values[256]);
+  ptrdiff_t           ti[3][64];
+  ptrdiff_t           ebi[3][64];
+  ptrdiff_t           eob_runs[3][64];
+  const ptrdiff_t    *coded_fragis[3];
+  const ptrdiff_t    *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  const ogg_uint16_t *dequant[3][3][2];
+  int                 fragy0[3];
+  int                 fragy_end[3];
+  int                 pred_last[3][4];
+  int                 mcu_nvfrags;
+  int                 loop_filter;
+  int                 pp_level;
+};
+
+
+struct th_dec_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state        state;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and goes to 1
+     when a frame has been processed and a data packet is ready.*/
+  int                    packet_state;
+  /*Buffer in which to assemble packets.*/
+  oc_pack_buf            opb;
+  /*Huffman decode trees.*/
+  ogg_int16_t           *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The index of the first token in each plane for each coefficient.*/
+  ptrdiff_t              ti0[3][64];
+  /*The number of outstanding EOB runs at the start of each coefficient in each
+     plane.*/
+  ptrdiff_t              eob_runs[3][64];
+  /*The DCT token lists.*/
+  unsigned char         *dct_tokens;
+  /*The extra bits associated with DCT tokens.*/
+  unsigned char         *extra_bits;
+  /*The number of dct tokens unpacked so far.*/
+  int                    dct_tokens_count;
+  /*The out-of-loop post-processing level.*/
+  int                    pp_level;
+  /*The DC scale used for out-of-loop deblocking.*/
+  int                    pp_dc_scale[64];
+  /*The sharpen modifier used for out-of-loop deringing.*/
+  int                    pp_sharp_mod[64];
+  /*The DC quantization index of each block.*/
+  unsigned char         *dc_qis;
+  /*The variance of each block.*/
+  int                   *variances;
+  /*The storage for the post-processed frame buffer.*/
+  unsigned char         *pp_frame_data;
+  /*Whether or not the post-processsed frame buffer has space for chroma.*/
+  int                    pp_frame_state;
+  /*The buffer used for the post-processed frame.
+    Note that this is _not_ guaranteed to have the same strides and offsets as
+     the reference frame buffers.*/
+  th_ycbcr_buffer        pp_frame_buf;
+  /*The striped decode callback function.*/
+  th_stripe_callback     stripe_cb;
+  oc_dec_pipeline_state  pipe;
+# if defined(OC_DEC_USE_VTABLE)
+  /*Table for decoder acceleration functions.*/
+  oc_dec_opt_vtable      opt_vtable;
+# endif
+# if defined(HAVE_CAIRO)
+  /*Output metrics for debugging.*/
+  int                    telemetry;
+  int                    telemetry_mbmode;
+  int                    telemetry_mv;
+  int                    telemetry_qi;
+  int                    telemetry_bits;
+  int                    telemetry_frame_bytes;
+  int                    telemetry_coding_bytes;
+  int                    telemetry_mode_bytes;
+  int                    telemetry_mv_bytes;
+  int                    telemetry_qi_bytes;
+  int                    telemetry_dc_bytes;
+  unsigned char         *telemetry_frame_data;
+# endif
+};
+
+/*Default pure-C implementations of decoder-specific accelerated functions.*/
+void oc_dec_accel_init_c(oc_dec_ctx *_dec);
+
+void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli);
+
+#endif

+ 2993 - 0
jni/libtheora-1.2.0alpha1/lib/decode.c

@@ -0,0 +1,2993 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "decint.h"
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+#endif
+#if defined(HAVE_CAIRO)
+# include <cairo.h>
+#endif
+
+
+/*No post-processing.*/
+#define OC_PP_LEVEL_DISABLED  (0)
+/*Keep track of DC qi for each block only.*/
+#define OC_PP_LEVEL_TRACKDCQI (1)
+/*Deblock the luma plane.*/
+#define OC_PP_LEVEL_DEBLOCKY  (2)
+/*Dering the luma plane.*/
+#define OC_PP_LEVEL_DERINGY   (3)
+/*Stronger luma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGY  (4)
+/*Deblock the chroma planes.*/
+#define OC_PP_LEVEL_DEBLOCKC  (5)
+/*Dering the chroma planes.*/
+#define OC_PP_LEVEL_DERINGC   (6)
+/*Stronger chroma plane deringing.*/
+#define OC_PP_LEVEL_SDERINGC  (7)
+/*Maximum valid post-processing level.*/
+#define OC_PP_LEVEL_MAX       (7)
+
+
+
+/*The mode alphabets for the various mode coding schemes.
+  Scheme 0 uses a custom alphabet, which is not stored in this table.*/
+static const unsigned char OC_MODE_ALPHABETS[7][OC_NMODES]={
+  /*Last MV dominates */
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV,OC_MODE_INTER_NOMV,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,
+    OC_MODE_GOLDEN_MV,OC_MODE_INTER_MV_FOUR
+  },
+  /*No MV dominates.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTER_MV_LAST,OC_MODE_INTER_MV_LAST2,
+    OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_GOLDEN_NOMV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_INTER_MV,OC_MODE_INTRA,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  },
+  /*Default ordering.*/
+  {
+    OC_MODE_INTER_NOMV,OC_MODE_INTRA,OC_MODE_INTER_MV,OC_MODE_INTER_MV_LAST,
+    OC_MODE_INTER_MV_LAST2,OC_MODE_GOLDEN_NOMV,OC_MODE_GOLDEN_MV,
+    OC_MODE_INTER_MV_FOUR
+  }
+};
+
+
+/*The original DCT tokens are extended and reordered during the construction of
+   the Huffman tables.
+  The extension means more bits can be read with fewer calls to the bitpacker
+   during the Huffman decoding process (at the cost of larger Huffman tables),
+   and fewer tokens require additional extra bits (reducing the average storage
+   per decoded token).
+  The revised ordering reveals essential information in the token value
+   itself; specifically, whether or not there are additional extra bits to read
+   and the parameter to which those extra bits are applied.
+  The token is used to fetch a code word from the OC_DCT_CODE_WORD table below.
+  The extra bits are added into code word at the bit position inferred from the
+   token value, giving the final code word from which all required parameters
+   are derived.
+  The number of EOBs and the leading zero run length can be extracted directly.
+  The coefficient magnitude is optionally negated before extraction, according
+   to a 'flip' bit.*/
+
+/*The number of additional extra bits that are decoded with each of the
+   internal DCT tokens.*/
+static const unsigned char OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[15]={
+  12,4,3,3,4,4,5,5,8,8,8,8,3,3,6
+};
+
+/*Whether or not an internal token needs any additional extra bits.*/
+#define OC_DCT_TOKEN_NEEDS_MORE(token) \
+ (token<(int)(sizeof(OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)/ \
+  sizeof(*OC_INTERNAL_DCT_TOKEN_EXTRA_BITS)))
+
+/*This token (OC_DCT_REPEAT_RUN3_TOKEN) requires more than 8 extra bits.*/
+#define OC_DCT_TOKEN_FAT_EOB (0)
+
+/*The number of EOBs to use for an end-of-frame token.
+  Note: We want to set eobs to PTRDIFF_MAX here, but that requires C99, which
+   is not yet available everywhere; this should be equivalent.*/
+#define OC_DCT_EOB_FINISH (~(size_t)0>>1)
+
+/*The location of the (6) run length bits in the code word.
+  These are placed at index 0 and given 8 bits (even though 6 would suffice)
+   because it may be faster to extract the lower byte on some platforms.*/
+#define OC_DCT_CW_RLEN_SHIFT (0)
+/*The location of the (12) EOB bits in the code word.*/
+#define OC_DCT_CW_EOB_SHIFT  (8)
+/*The location of the (1) flip bit in the code word.
+  This must be right under the magnitude bits.*/
+#define OC_DCT_CW_FLIP_BIT   (20)
+/*The location of the (11) token magnitude bits in the code word.
+  These must be last, and rely on a sign-extending right shift.*/
+#define OC_DCT_CW_MAG_SHIFT  (21)
+
+/*Pack the given fields into a code word.*/
+#define OC_DCT_CW_PACK(_eobs,_rlen,_mag,_flip) \
+ ((_eobs)<<OC_DCT_CW_EOB_SHIFT| \
+ (_rlen)<<OC_DCT_CW_RLEN_SHIFT| \
+ (_flip)<<OC_DCT_CW_FLIP_BIT| \
+ (_mag)-(_flip)<<OC_DCT_CW_MAG_SHIFT)
+
+/*A special code word value that signals the end of the frame (a long EOB run
+   of zero).*/
+#define OC_DCT_CW_FINISH (0)
+
+/*The position at which to insert the extra bits in the code word.
+  We use this formulation because Intel has no useful cmov.
+  A real architecture would probably do better with two of those.
+  This translates to 11 instructions(!), and is _still_ faster than either a
+   table lookup (just barely) or the naive double-ternary implementation (which
+   gcc translates to a jump and a cmov).
+  This assumes OC_DCT_CW_RLEN_SHIFT is zero, but could easily be reworked if
+   you want to make one of the other shifts zero.*/
+#define OC_DCT_TOKEN_EB_POS(_token) \
+ ((OC_DCT_CW_EOB_SHIFT-OC_DCT_CW_MAG_SHIFT&-((_token)<2)) \
+ +(OC_DCT_CW_MAG_SHIFT&-((_token)<12)))
+
+/*The code words for each internal token.
+  See the notes at OC_DCT_TOKEN_MAP for the reasons why things are out of
+   order.*/
+static const ogg_int32_t OC_DCT_CODE_WORD[92]={
+  /*These tokens require additional extra bits for the EOB count.*/
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+  OC_DCT_CW_FINISH,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+  OC_DCT_CW_PACK(16, 0,  0,0),
+  /*These tokens require additional extra bits for the magnitude.*/
+  /*OC_DCT_VAL_CAT5 (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 13,0),
+  OC_DCT_CW_PACK( 0, 0, 13,1),
+  /*OC_DCT_VAL_CAT6 (5 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 21,0),
+  OC_DCT_CW_PACK( 0, 0, 21,1),
+  /*OC_DCT_VAL_CAT7 (6 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 37,0),
+  OC_DCT_CW_PACK( 0, 0, 37,1),
+  /*OC_DCT_VAL_CAT8 (10 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, 69,0),
+  OC_DCT_CW_PACK( 0, 0,325,0),
+  OC_DCT_CW_PACK( 0, 0, 69,1),
+  OC_DCT_CW_PACK( 0, 0,325,1),
+  /*These tokens require additional extra bits for the run length.*/
+  /*OC_DCT_RUN_CAT1C (4 extra bits-1 already read)*/
+  OC_DCT_CW_PACK( 0,10, +1,0),
+  OC_DCT_CW_PACK( 0,10, -1,0),
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)
+    Flip is set to distinguish this from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  /*The remaining tokens require no additional extra bits.*/
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 1, 0,  0,0),
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 2, 0,  0,0),
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 3, 0,  0,0),
+  /*OC_DCT_RUN_CAT1A (1 extra bit-1 already read)x5*/
+  OC_DCT_CW_PACK( 0, 1, +1,0),
+  OC_DCT_CW_PACK( 0, 1, -1,0),
+  OC_DCT_CW_PACK( 0, 2, +1,0),
+  OC_DCT_CW_PACK( 0, 2, -1,0),
+  OC_DCT_CW_PACK( 0, 3, +1,0),
+  OC_DCT_CW_PACK( 0, 3, -1,0),
+  OC_DCT_CW_PACK( 0, 4, +1,0),
+  OC_DCT_CW_PACK( 0, 4, -1,0),
+  OC_DCT_CW_PACK( 0, 5, +1,0),
+  OC_DCT_CW_PACK( 0, 5, -1,0),
+  /*OC_DCT_RUN_CAT2A (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 1, +2,0),
+  OC_DCT_CW_PACK( 0, 1, +3,0),
+  OC_DCT_CW_PACK( 0, 1, -2,0),
+  OC_DCT_CW_PACK( 0, 1, -3,0),
+  /*OC_DCT_RUN_CAT1B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 6, +1,0),
+  OC_DCT_CW_PACK( 0, 7, +1,0),
+  OC_DCT_CW_PACK( 0, 8, +1,0),
+  OC_DCT_CW_PACK( 0, 9, +1,0),
+  OC_DCT_CW_PACK( 0, 6, -1,0),
+  OC_DCT_CW_PACK( 0, 7, -1,0),
+  OC_DCT_CW_PACK( 0, 8, -1,0),
+  OC_DCT_CW_PACK( 0, 9, -1,0),
+  /*OC_DCT_RUN_CAT2B (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 2, +2,0),
+  OC_DCT_CW_PACK( 0, 3, +2,0),
+  OC_DCT_CW_PACK( 0, 2, +3,0),
+  OC_DCT_CW_PACK( 0, 3, +3,0),
+  OC_DCT_CW_PACK( 0, 2, -2,0),
+  OC_DCT_CW_PACK( 0, 3, -2,0),
+  OC_DCT_CW_PACK( 0, 2, -3,0),
+  OC_DCT_CW_PACK( 0, 3, -3,0),
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits-3 already read)
+    Flip is set on the first one to distinguish it from OC_DCT_CW_FINISH.*/
+  OC_DCT_CW_PACK( 0, 0,  0,1),
+  OC_DCT_CW_PACK( 0, 1,  0,0),
+  OC_DCT_CW_PACK( 0, 2,  0,0),
+  OC_DCT_CW_PACK( 0, 3,  0,0),
+  OC_DCT_CW_PACK( 0, 4,  0,0),
+  OC_DCT_CW_PACK( 0, 5,  0,0),
+  OC_DCT_CW_PACK( 0, 6,  0,0),
+  OC_DCT_CW_PACK( 0, 7,  0,0),
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +1,0),
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -1,0),
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, +2,0),
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  OC_DCT_CW_PACK( 0, 0, -2,0),
+  /*OC_DCT_VAL_CAT2 (1 extra bit-1 already read)x4*/
+  OC_DCT_CW_PACK( 0, 0, +3,0),
+  OC_DCT_CW_PACK( 0, 0, -3,0),
+  OC_DCT_CW_PACK( 0, 0, +4,0),
+  OC_DCT_CW_PACK( 0, 0, -4,0),
+  OC_DCT_CW_PACK( 0, 0, +5,0),
+  OC_DCT_CW_PACK( 0, 0, -5,0),
+  OC_DCT_CW_PACK( 0, 0, +6,0),
+  OC_DCT_CW_PACK( 0, 0, -6,0),
+  /*OC_DCT_VAL_CAT3 (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +7,0),
+  OC_DCT_CW_PACK( 0, 0, +8,0),
+  OC_DCT_CW_PACK( 0, 0, -7,0),
+  OC_DCT_CW_PACK( 0, 0, -8,0),
+  /*OC_DCT_VAL_CAT4 (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 0, 0, +9,0),
+  OC_DCT_CW_PACK( 0, 0,+10,0),
+  OC_DCT_CW_PACK( 0, 0,+11,0),
+  OC_DCT_CW_PACK( 0, 0,+12,0),
+  OC_DCT_CW_PACK( 0, 0, -9,0),
+  OC_DCT_CW_PACK( 0, 0,-10,0),
+  OC_DCT_CW_PACK( 0, 0,-11,0),
+  OC_DCT_CW_PACK( 0, 0,-12,0),
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits-3 already read)*/
+  OC_DCT_CW_PACK( 8, 0,  0,0),
+  OC_DCT_CW_PACK( 9, 0,  0,0),
+  OC_DCT_CW_PACK(10, 0,  0,0),
+  OC_DCT_CW_PACK(11, 0,  0,0),
+  OC_DCT_CW_PACK(12, 0,  0,0),
+  OC_DCT_CW_PACK(13, 0,  0,0),
+  OC_DCT_CW_PACK(14, 0,  0,0),
+  OC_DCT_CW_PACK(15, 0,  0,0),
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits-2 already read)*/
+  OC_DCT_CW_PACK( 4, 0,  0,0),
+  OC_DCT_CW_PACK( 5, 0,  0,0),
+  OC_DCT_CW_PACK( 6, 0,  0,0),
+  OC_DCT_CW_PACK( 7, 0,  0,0),
+};
+
+
+
+static int oc_sb_run_unpack(oc_pack_buf *_opb){
+  /*Coding scheme:
+       Codeword            Run Length
+     0                       1
+     10x                     2-3
+     110x                    4-5
+     1110xx                  6-9
+     11110xxx                10-17
+     111110xxxx              18-33
+     111111xxxxxxxxxxxx      34-4129*/
+  static const ogg_int16_t OC_SB_RUN_TREE[22]={
+    4,
+     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
+     -(1<<8|1),-(1<<8|1),-(1<<8|1),-(1<<8|1),
+     -(3<<8|2),-(3<<8|2),-(3<<8|3),-(3<<8|3),
+     -(4<<8|4),-(4<<8|5),-(4<<8|2<<4|6-6),17,
+      2,
+       -(2<<8|2<<4|10-6),-(2<<8|2<<4|14-6),-(2<<8|4<<4|18-6),-(2<<8|12<<4|34-6)
+  };
+  int ret;
+  ret=oc_huff_token_decode(_opb,OC_SB_RUN_TREE);
+  if(ret>=0x10){
+    int offs;
+    offs=ret&0x1F;
+    ret=6+offs+(int)oc_pack_read(_opb,ret-offs>>4);
+  }
+  return ret;
+}
+
+static int oc_block_run_unpack(oc_pack_buf *_opb){
+  /*Coding scheme:
+     Codeword             Run Length
+     0x                      1-2
+     10x                     3-4
+     110x                    5-6
+     1110xx                  7-10
+     11110xx                 11-14
+     11111xxxx               15-30*/
+  static const ogg_int16_t OC_BLOCK_RUN_TREE[61]={
+    5,
+     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+     -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
+     -(2<<8|2),-(2<<8|2),-(2<<8|2),-(2<<8|2),
+     -(3<<8|3),-(3<<8|3),-(3<<8|3),-(3<<8|3),
+     -(3<<8|4),-(3<<8|4),-(3<<8|4),-(3<<8|4),
+     -(4<<8|5),-(4<<8|5),-(4<<8|6),-(4<<8|6),
+     33,       36,       39,       44,
+      1,-(1<<8|7),-(1<<8|8),
+      1,-(1<<8|9),-(1<<8|10),
+      2,-(2<<8|11),-(2<<8|12),-(2<<8|13),-(2<<8|14),
+      4,
+       -(4<<8|15),-(4<<8|16),-(4<<8|17),-(4<<8|18),
+       -(4<<8|19),-(4<<8|20),-(4<<8|21),-(4<<8|22),
+       -(4<<8|23),-(4<<8|24),-(4<<8|25),-(4<<8|26),
+       -(4<<8|27),-(4<<8|28),-(4<<8|29),-(4<<8|30)
+  };
+  return oc_huff_token_decode(_opb,OC_BLOCK_RUN_TREE);
+}
+
+
+
+void oc_dec_accel_init_c(oc_dec_ctx *_dec){
+# if defined(OC_DEC_USE_VTABLE)
+  _dec->opt_vtable.dc_unpredict_mcu_plane=
+   oc_dec_dc_unpredict_mcu_plane_c;
+# endif
+}
+
+static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
+ const th_setup_info *_setup){
+  int qti;
+  int pli;
+  int qi;
+  int ret;
+  ret=oc_state_init(&_dec->state,_info,3);
+  if(ret<0)return ret;
+  ret=oc_huff_trees_copy(_dec->huff_tables,
+   (const ogg_int16_t *const *)_setup->huff_tables);
+  if(ret<0){
+    oc_state_clear(&_dec->state);
+    return ret;
+  }
+  /*For each fragment, allocate one byte for every DCT coefficient token, plus
+     one byte for extra-bits for each token, plus one more byte for the long
+     EOB run, just in case it's the very last token and has a run length of
+     one.*/
+  _dec->dct_tokens=(unsigned char *)_ogg_malloc((64+64+1)*
+   _dec->state.nfrags*sizeof(_dec->dct_tokens[0]));
+  if(_dec->dct_tokens==NULL){
+    oc_huff_trees_clear(_dec->huff_tables);
+    oc_state_clear(&_dec->state);
+    return TH_EFAULT;
+  }
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _dec->state.dequant_tables[qi][pli][qti]=
+     _dec->state.dequant_table_data[qi][pli][qti];
+  }
+  oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
+   &_setup->qinfo);
+  for(qi=0;qi<64;qi++){
+    int qsum;
+    qsum=0;
+    for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+      qsum+=_dec->state.dequant_tables[qi][pli][qti][12]+
+       _dec->state.dequant_tables[qi][pli][qti][17]+
+       _dec->state.dequant_tables[qi][pli][qti][18]+
+       _dec->state.dequant_tables[qi][pli][qti][24]<<(pli==0);
+    }
+    _dec->pp_sharp_mod[qi]=-(qsum>>11);
+  }
+  memcpy(_dec->state.loop_filter_limits,_setup->qinfo.loop_filter_limits,
+   sizeof(_dec->state.loop_filter_limits));
+  oc_dec_accel_init(_dec);
+  _dec->pp_level=OC_PP_LEVEL_DISABLED;
+  _dec->dc_qis=NULL;
+  _dec->variances=NULL;
+  _dec->pp_frame_data=NULL;
+  _dec->stripe_cb.ctx=NULL;
+  _dec->stripe_cb.stripe_decoded=NULL;
+#if defined(HAVE_CAIRO)
+  _dec->telemetry=0;
+  _dec->telemetry_bits=0;
+  _dec->telemetry_qi=0;
+  _dec->telemetry_mbmode=0;
+  _dec->telemetry_mv=0;
+  _dec->telemetry_frame_data=NULL;
+#endif
+  return 0;
+}
+
+static void oc_dec_clear(oc_dec_ctx *_dec){
+#if defined(HAVE_CAIRO)
+  _ogg_free(_dec->telemetry_frame_data);
+#endif
+  _ogg_free(_dec->pp_frame_data);
+  _ogg_free(_dec->variances);
+  _ogg_free(_dec->dc_qis);
+  _ogg_free(_dec->dct_tokens);
+  oc_huff_trees_clear(_dec->huff_tables);
+  oc_state_clear(&_dec->state);
+}
+
+
+static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
+  long val;
+  /*Check to make sure this is a data packet.*/
+  val=oc_pack_read1(&_dec->opb);
+  if(val!=0)return TH_EBADPACKET;
+  /*Read in the frame type (I or P).*/
+  val=oc_pack_read1(&_dec->opb);
+  _dec->state.frame_type=(int)val;
+  /*Read in the qi list.*/
+  val=oc_pack_read(&_dec->opb,6);
+  _dec->state.qis[0]=(unsigned char)val;
+  val=oc_pack_read1(&_dec->opb);
+  if(!val)_dec->state.nqis=1;
+  else{
+    val=oc_pack_read(&_dec->opb,6);
+    _dec->state.qis[1]=(unsigned char)val;
+    val=oc_pack_read1(&_dec->opb);
+    if(!val)_dec->state.nqis=2;
+    else{
+      val=oc_pack_read(&_dec->opb,6);
+      _dec->state.qis[2]=(unsigned char)val;
+      _dec->state.nqis=3;
+    }
+  }
+  if(_dec->state.frame_type==OC_INTRA_FRAME){
+    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      I don't know why these remain.*/
+    /*I wanted to eliminate wasted bits, but not all config wiggle room
+       --Monty.*/
+    val=oc_pack_read(&_dec->opb,3);
+    if(val!=0)return TH_EIMPL;
+  }
+  return 0;
+}
+
+/*Mark all fragments as coded and in OC_MODE_INTRA.
+  This also builds up the coded fragment list (in coded order), and clears the
+   uncoded fragment list.
+  It does not update the coded macro block list nor the super block flags, as
+   those are not used when decoding INTRA frames.*/
+static void oc_dec_mark_all_intra(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  oc_fragment       *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                pli;
+  coded_fragis=_dec->state.coded_fragis;
+  prev_ncoded_fragis=ncoded_fragis=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  frags=_dec->state.frags;
+  sbi=nsbs=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int bi;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            frags[fragi].coded=1;
+            frags[fragi].refi=OC_FRAME_SELF;
+            frags[fragi].mb_mode=OC_MODE_INTRA;
+            coded_fragis[ncoded_fragis++]=fragi;
+          }
+        }
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+}
+
+/*Decodes the bit flags indicating whether each super block is partially coded
+   or not.
+  Return: The number of partially coded super blocks.*/
+static unsigned oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     npartial;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  sbi=npartial=0;
+  while(sbi<nsbs){
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    do{
+      sb_flags[sbi].coded_partially=flag;
+      sb_flags[sbi].coded_fully=0;
+      npartial+=flag;
+      sbi++;
+    }
+    while(--run_count>0&&sbi<nsbs);
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+  return npartial;
+}
+
+/*Decodes the bit flags for whether or not each non-partially-coded super
+   block is fully coded or not.
+  This function should only be called if there is at least one
+   non-partially-coded super block.
+  Return: The number of partially coded super blocks.*/
+static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
+  oc_sb_flags *sb_flags;
+  unsigned     nsbs;
+  unsigned     sbi;
+  unsigned     run_count;
+  long         val;
+  int          flag;
+  sb_flags=_dec->state.sb_flags;
+  nsbs=_dec->state.nsbs;
+  /*Skip partially coded super blocks.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  val=oc_pack_read1(&_dec->opb);
+  flag=(int)val;
+  do{
+    int full_run;
+    run_count=oc_sb_run_unpack(&_dec->opb);
+    full_run=run_count>=4129;
+    for(;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(run_count--<=0)break;
+      sb_flags[sbi].coded_fully=flag;
+    }
+    if(full_run&&sbi<nsbs){
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+    }
+    else flag=!flag;
+  }
+  while(sbi<nsbs);
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  signed char       *mb_modes;
+  oc_fragment       *frags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  long               val;
+  int                pli;
+  int                flag;
+  int                run_count;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t         *uncoded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          nuncoded_fragis;
+  ptrdiff_t          prev_ncoded_fragis;
+  npartial=oc_dec_partial_sb_flags_unpack(_dec);
+  if(npartial<_dec->state.nsbs)oc_dec_coded_sb_flags_unpack(_dec);
+  if(npartial>0){
+    val=oc_pack_read1(&_dec->opb);
+    flag=!(int)val;
+  }
+  else flag=0;
+  sb_maps=(const oc_sb_map *)_dec->state.sb_maps;
+  sb_flags=_dec->state.sb_flags;
+  mb_modes=_dec->state.mb_modes;
+  frags=_dec->state.frags;
+  sbi=nsbs=run_count=0;
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  prev_ncoded_fragis=ncoded_fragis=nuncoded_fragis=0;
+  for(pli=0;pli<3;pli++){
+    nsbs+=_dec->state.fplanes[pli].nsbs;
+    for(;sbi<nsbs;sbi++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++)if(sb_flags[sbi].quad_valid&1<<quadi){
+        int quad_coded;
+        int bi;
+        quad_coded=0;
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          fragi=sb_maps[sbi][quadi][bi];
+          if(fragi>=0){
+            int coded;
+            if(sb_flags[sbi].coded_fully)coded=1;
+            else if(!sb_flags[sbi].coded_partially)coded=0;
+            else{
+              if(run_count<=0){
+                run_count=oc_block_run_unpack(&_dec->opb);
+                flag=!flag;
+              }
+              run_count--;
+              coded=flag;
+            }
+            if(coded)coded_fragis[ncoded_fragis++]=fragi;
+            else *(uncoded_fragis-++nuncoded_fragis)=fragi;
+            quad_coded|=coded;
+            frags[fragi].coded=coded;
+            frags[fragi].refi=OC_FRAME_NONE;
+          }
+        }
+        /*Remember if there's a coded luma block in this macro block.*/
+        if(!pli)mb_modes[sbi<<2|quadi]=quad_coded;
+      }
+    }
+    _dec->state.ncoded_fragis[pli]=ncoded_fragis-prev_ncoded_fragis;
+    prev_ncoded_fragis=ncoded_fragis;
+  }
+  _dec->state.ntotal_coded_fragis=ncoded_fragis;
+  /*TODO: run_count should be 0 here.
+    If it's not, we should issue a warning of some kind.*/
+}
+
+
+/*Coding scheme:
+   Codeword            Mode Index
+   0                       0
+   10                      1
+   110                     2
+   1110                    3
+   11110                   4
+   111110                  5
+   1111110                 6
+   1111111                 7*/
+static const ogg_int16_t OC_VLC_MODE_TREE[26]={
+  4,
+   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
+   -(1<<8|0),-(1<<8|0),-(1<<8|0),-(1<<8|0),
+   -(2<<8|1),-(2<<8|1),-(2<<8|1),-(2<<8|1),
+   -(3<<8|2),-(3<<8|2),-(4<<8|3),17,
+    3,
+     -(1<<8|4),-(1<<8|4),-(1<<8|4),-(1<<8|4),
+     -(2<<8|5),-(2<<8|5),-(3<<8|6),-(3<<8|7)
+};
+
+static const ogg_int16_t OC_CLC_MODE_TREE[9]={
+  3,
+   -(3<<8|0),-(3<<8|1),-(3<<8|2),-(3<<8|3),
+   -(3<<8|4),-(3<<8|5),-(3<<8|6),-(3<<8|7)
+};
+
+/*Unpacks the list of macro block modes for INTER frames.*/
+static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
+  signed char         *mb_modes;
+  const unsigned char *alphabet;
+  unsigned char        scheme0_alphabet[8];
+  const ogg_int16_t   *mode_tree;
+  size_t               nmbs;
+  size_t               mbi;
+  long                 val;
+  int                  mode_scheme;
+  val=oc_pack_read(&_dec->opb,3);
+  mode_scheme=(int)val;
+  if(mode_scheme==0){
+    int mi;
+    /*Just in case, initialize the modes to something.
+      If the bitstream doesn't contain each index exactly once, it's likely
+       corrupt and the rest of the packet is garbage anyway, but this way we
+       won't crash, and we'll decode SOMETHING.*/
+    /*LOOP VECTORIZES*/
+    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
+    for(mi=0;mi<OC_NMODES;mi++){
+      val=oc_pack_read(&_dec->opb,3);
+      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
+    }
+    alphabet=scheme0_alphabet;
+  }
+  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+  mode_tree=mode_scheme==7?OC_CLC_MODE_TREE:OC_VLC_MODE_TREE;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    if(mb_modes[mbi]>0){
+      /*We have a coded luma block; decode a mode.*/
+      mb_modes[mbi]=alphabet[oc_huff_token_decode(&_dec->opb,mode_tree)];
+    }
+    /*For other valid macro blocks, INTER_NOMV is forced, but we rely on the
+       fact that OC_MODE_INTER_NOMV is already 0.*/
+  }
+}
+
+
+
+static const ogg_int16_t OC_VLC_MV_COMP_TREE[101]={
+  5,
+   -(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),-(3<<8|32+0),
+   -(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),-(3<<8|32+1),
+   -(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),-(3<<8|32-1),
+   -(4<<8|32+2),-(4<<8|32+2),-(4<<8|32-2),-(4<<8|32-2),
+   -(4<<8|32+3),-(4<<8|32+3),-(4<<8|32-3),-(4<<8|32-3),
+   33,          36,          39,          42,
+   45,          50,          55,          60,
+   65,          74,          83,          92,
+    1,-(1<<8|32+4),-(1<<8|32-4),
+    1,-(1<<8|32+5),-(1<<8|32-5),
+    1,-(1<<8|32+6),-(1<<8|32-6),
+    1,-(1<<8|32+7),-(1<<8|32-7),
+    2,-(2<<8|32+8),-(2<<8|32-8),-(2<<8|32+9),-(2<<8|32-9),
+    2,-(2<<8|32+10),-(2<<8|32-10),-(2<<8|32+11),-(2<<8|32-11),
+    2,-(2<<8|32+12),-(2<<8|32-12),-(2<<8|32+13),-(2<<8|32-13),
+    2,-(2<<8|32+14),-(2<<8|32-14),-(2<<8|32+15),-(2<<8|32-15),
+    3,
+     -(3<<8|32+16),-(3<<8|32-16),-(3<<8|32+17),-(3<<8|32-17),
+     -(3<<8|32+18),-(3<<8|32-18),-(3<<8|32+19),-(3<<8|32-19),
+    3,
+     -(3<<8|32+20),-(3<<8|32-20),-(3<<8|32+21),-(3<<8|32-21),
+     -(3<<8|32+22),-(3<<8|32-22),-(3<<8|32+23),-(3<<8|32-23),
+    3,
+     -(3<<8|32+24),-(3<<8|32-24),-(3<<8|32+25),-(3<<8|32-25),
+     -(3<<8|32+26),-(3<<8|32-26),-(3<<8|32+27),-(3<<8|32-27),
+    3,
+     -(3<<8|32+28),-(3<<8|32-28),-(3<<8|32+29),-(3<<8|32-29),
+     -(3<<8|32+30),-(3<<8|32-30),-(3<<8|32+31),-(3<<8|32-31)
+};
+
+static const ogg_int16_t OC_CLC_MV_COMP_TREE[65]={
+  6,
+   -(6<<8|32 +0),-(6<<8|32 -0),-(6<<8|32 +1),-(6<<8|32 -1),
+   -(6<<8|32 +2),-(6<<8|32 -2),-(6<<8|32 +3),-(6<<8|32 -3),
+   -(6<<8|32 +4),-(6<<8|32 -4),-(6<<8|32 +5),-(6<<8|32 -5),
+   -(6<<8|32 +6),-(6<<8|32 -6),-(6<<8|32 +7),-(6<<8|32 -7),
+   -(6<<8|32 +8),-(6<<8|32 -8),-(6<<8|32 +9),-(6<<8|32 -9),
+   -(6<<8|32+10),-(6<<8|32-10),-(6<<8|32+11),-(6<<8|32-11),
+   -(6<<8|32+12),-(6<<8|32-12),-(6<<8|32+13),-(6<<8|32-13),
+   -(6<<8|32+14),-(6<<8|32-14),-(6<<8|32+15),-(6<<8|32-15),
+   -(6<<8|32+16),-(6<<8|32-16),-(6<<8|32+17),-(6<<8|32-17),
+   -(6<<8|32+18),-(6<<8|32-18),-(6<<8|32+19),-(6<<8|32-19),
+   -(6<<8|32+20),-(6<<8|32-20),-(6<<8|32+21),-(6<<8|32-21),
+   -(6<<8|32+22),-(6<<8|32-22),-(6<<8|32+23),-(6<<8|32-23),
+   -(6<<8|32+24),-(6<<8|32-24),-(6<<8|32+25),-(6<<8|32-25),
+   -(6<<8|32+26),-(6<<8|32-26),-(6<<8|32+27),-(6<<8|32-27),
+   -(6<<8|32+28),-(6<<8|32-28),-(6<<8|32+29),-(6<<8|32-29),
+   -(6<<8|32+30),-(6<<8|32-30),-(6<<8|32+31),-(6<<8|32-31)
+};
+
+
+static oc_mv oc_mv_unpack(oc_pack_buf *_opb,const ogg_int16_t *_tree){
+  int dx;
+  int dy;
+  dx=oc_huff_token_decode(_opb,_tree)-32;
+  dy=oc_huff_token_decode(_opb,_tree)-32;
+  return OC_MV(dx,dy);
+}
+
+/*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
+   block modes and motion vectors to the individual fragments.*/
+static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
+  const oc_mb_map        *mb_maps;
+  const signed char      *mb_modes;
+  oc_set_chroma_mvs_func  set_chroma_mvs;
+  const ogg_int16_t      *mv_comp_tree;
+  oc_fragment            *frags;
+  oc_mv                  *frag_mvs;
+  const unsigned char    *map_idxs;
+  int                     map_nidxs;
+  oc_mv                   last_mv;
+  oc_mv                   prior_mv;
+  oc_mv                   cbmvs[4];
+  size_t                  nmbs;
+  size_t                  mbi;
+  long                    val;
+  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
+  val=oc_pack_read1(&_dec->opb);
+  mv_comp_tree=val?OC_CLC_MV_COMP_TREE:OC_VLC_MV_COMP_TREE;
+  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
+  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
+  prior_mv=last_mv=0;
+  frags=_dec->state.frags;
+  frag_mvs=_dec->state.frag_mvs;
+  mb_maps=(const oc_mb_map *)_dec->state.mb_maps;
+  mb_modes=_dec->state.mb_modes;
+  nmbs=_dec->state.nmbs;
+  for(mbi=0;mbi<nmbs;mbi++){
+    int mb_mode;
+    mb_mode=mb_modes[mbi];
+    if(mb_mode!=OC_MODE_INVALID){
+      oc_mv     mbmv;
+      ptrdiff_t fragi;
+      int       mapi;
+      int       mapii;
+      int       refi;
+      if(mb_mode==OC_MODE_INTER_MV_FOUR){
+        oc_mv lbmvs[4];
+        int   bi;
+        prior_mv=last_mv;
+        for(bi=0;bi<4;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            frags[fragi].refi=OC_FRAME_PREV;
+            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
+            lbmvs[bi]=last_mv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
+            frag_mvs[fragi]=lbmvs[bi];
+          }
+          else lbmvs[bi]=0;
+        }
+        (*set_chroma_mvs)(cbmvs,lbmvs);
+        for(mapii=4;mapii<map_nidxs;mapii++){
+          mapi=map_idxs[mapii];
+          bi=mapi&3;
+          fragi=mb_maps[mbi][mapi>>2][bi];
+          if(frags[fragi].coded){
+            frags[fragi].refi=OC_FRAME_PREV;
+            frags[fragi].mb_mode=OC_MODE_INTER_MV_FOUR;
+            frag_mvs[fragi]=cbmvs[bi];
+          }
+        }
+      }
+      else{
+        switch(mb_mode){
+          case OC_MODE_INTER_MV:{
+            prior_mv=last_mv;
+            last_mv=mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
+          }break;
+          case OC_MODE_INTER_MV_LAST:mbmv=last_mv;break;
+          case OC_MODE_INTER_MV_LAST2:{
+            mbmv=prior_mv;
+            prior_mv=last_mv;
+            last_mv=mbmv;
+          }break;
+          case OC_MODE_GOLDEN_MV:{
+            mbmv=oc_mv_unpack(&_dec->opb,mv_comp_tree);
+          }break;
+          default:mbmv=0;break;
+        }
+        /*Fill in the MVs for the fragments.*/
+        refi=OC_FRAME_FOR_MODE(mb_mode);
+        mapii=0;
+        do{
+          mapi=map_idxs[mapii];
+          fragi=mb_maps[mbi][mapi>>2][mapi&3];
+          if(frags[fragi].coded){
+            frags[fragi].refi=refi;
+            frags[fragi].mb_mode=mb_mode;
+            frag_mvs[fragi]=mbmv;
+          }
+        }
+        while(++mapii<map_nidxs);
+      }
+    }
+  }
+}
+
+static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        fragi;
+  ncoded_fragis=_dec->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  if(_dec->state.nqis==1){
+    /*If this frame has only a single qi value, then just use it for all coded
+       fragments.*/
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      frags[coded_fragis[fragii]].qii=0;
+    }
+  }
+  else{
+    long val;
+    int  flag;
+    int  nqi1;
+    int  run_count;
+    /*Otherwise, we decode a qi index for each fragment, using two passes of
+      the same binary RLE scheme used for super-block coded bits.
+     The first pass marks each fragment as having a qii of 0 or greater than
+      0, and the second pass (if necessary), distinguishes between a qii of
+      1 and 2.
+     At first we just store the qii in the fragment.
+     After all the qii's are decoded, we make a final pass to replace them
+      with the corresponding qi's for this frame.*/
+    val=oc_pack_read1(&_dec->opb);
+    flag=(int)val;
+    nqi1=0;
+    fragii=0;
+    while(fragii<ncoded_fragis){
+      int full_run;
+      run_count=oc_sb_run_unpack(&_dec->opb);
+      full_run=run_count>=4129;
+      do{
+        frags[coded_fragis[fragii++]].qii=flag;
+        nqi1+=flag;
+      }
+      while(--run_count>0&&fragii<ncoded_fragis);
+      if(full_run&&fragii<ncoded_fragis){
+        val=oc_pack_read1(&_dec->opb);
+        flag=(int)val;
+      }
+      else flag=!flag;
+    }
+    /*TODO: run_count should be 0 here.
+      If it's not, we should issue a warning of some kind.*/
+    /*If we have 3 different qi's for this frame, and there was at least one
+       fragment with a non-zero qi, make the second pass.*/
+    if(_dec->state.nqis==3&&nqi1>0){
+      /*Skip qii==0 fragments.*/
+      for(fragii=0;frags[coded_fragis[fragii]].qii==0;fragii++);
+      val=oc_pack_read1(&_dec->opb);
+      flag=(int)val;
+      do{
+        int full_run;
+        run_count=oc_sb_run_unpack(&_dec->opb);
+        full_run=run_count>=4129;
+        for(;fragii<ncoded_fragis;fragii++){
+          fragi=coded_fragis[fragii];
+          if(frags[fragi].qii==0)continue;
+          if(run_count--<=0)break;
+          frags[fragi].qii+=flag;
+        }
+        if(full_run&&fragii<ncoded_fragis){
+          val=oc_pack_read1(&_dec->opb);
+          flag=(int)val;
+        }
+        else flag=!flag;
+      }
+      while(fragii<ncoded_fragis);
+      /*TODO: run_count should be 0 here.
+        If it's not, we should issue a warning of some kind.*/
+    }
+  }
+}
+
+
+
+/*Unpacks the DC coefficient tokens.
+  Unlike when unpacking the AC coefficient tokens, we actually need to decode
+   the DC coefficient values now so that we can do DC prediction.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  Return: The length of any outstanding EOB run.*/
+static ptrdiff_t oc_dec_dc_coeff_unpack(oc_dec_ctx *_dec,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64]){
+  unsigned char   *dct_tokens;
+  oc_fragment     *frags;
+  const ptrdiff_t *coded_fragis;
+  ptrdiff_t        ncoded_fragis;
+  ptrdiff_t        fragii;
+  ptrdiff_t        eobs;
+  ptrdiff_t        ti;
+  int              pli;
+  dct_tokens=_dec->dct_tokens;
+  frags=_dec->state.frags;
+  coded_fragis=_dec->state.coded_fragis;
+  ncoded_fragis=fragii=eobs=ti=0;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    ptrdiff_t eobi;
+    int       rli;
+    ncoded_fragis+=_dec->state.ncoded_fragis[pli];
+    memset(run_counts,0,sizeof(run_counts));
+    _dec->eob_runs[pli][0]=eobs;
+    _dec->ti0[pli][0]=ti;
+    /*Continue any previous EOB run, if there was one.*/
+    eobi=eobs;
+    if(ncoded_fragis-fragii<eobi)eobi=ncoded_fragis-fragii;
+    eob_count=eobi;
+    eobs-=eobi;
+    while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+    while(fragii<ncoded_fragis){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)eobs=OC_DCT_EOB_FINISH;
+      if(eobs){
+        eobi=OC_MINI(eobs,ncoded_fragis-fragii);
+        eob_count+=eobi;
+        eobs-=eobi;
+        while(eobi-->0)frags[coded_fragis[fragii++]].dc=0;
+      }
+      else{
+        int coeff;
+        skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        if(skip)coeff=0;
+        run_counts[skip]++;
+        frags[coded_fragis[fragii++]].dc=coeff;
+      }
+    }
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64;rli-->0;)_ntoks_left[pli][rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return eobs;
+}
+
+/*Unpacks the AC coefficient tokens.
+  This can completely discard coefficient values while unpacking, and so is
+   somewhat simpler than unpacking the DC coefficient tokens.
+  _huff_idx:   The index of the Huffman table to use for each color plane.
+  _ntoks_left: The number of tokens left to be decoded in each color plane for
+                each coefficient.
+               This is updated as EOB tokens and zero run tokens are decoded.
+  _eobs:       The length of any outstanding EOB run from previous
+                coefficients.
+  Return: The length of any outstanding EOB run.*/
+static int oc_dec_ac_coeff_unpack(oc_dec_ctx *_dec,int _zzi,int _huff_idxs[2],
+ ptrdiff_t _ntoks_left[3][64],ptrdiff_t _eobs){
+  unsigned char *dct_tokens;
+  ptrdiff_t      ti;
+  int            pli;
+  dct_tokens=_dec->dct_tokens;
+  ti=_dec->dct_tokens_count;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t run_counts[64];
+    ptrdiff_t eob_count;
+    size_t    ntoks_left;
+    size_t    ntoks;
+    int       rli;
+    _dec->eob_runs[pli][_zzi]=_eobs;
+    _dec->ti0[pli][_zzi]=ti;
+    ntoks_left=_ntoks_left[pli][_zzi];
+    memset(run_counts,0,sizeof(run_counts));
+    eob_count=0;
+    ntoks=0;
+    while(ntoks+_eobs<ntoks_left){
+      int token;
+      int cw;
+      int eb;
+      int skip;
+      ntoks+=_eobs;
+      eob_count+=_eobs;
+      token=oc_huff_token_decode(&_dec->opb,
+       _dec->huff_tables[_huff_idxs[pli+1>>1]]);
+      dct_tokens[ti++]=(unsigned char)token;
+      if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+        eb=(int)oc_pack_read(&_dec->opb,
+         OC_INTERNAL_DCT_TOKEN_EXTRA_BITS[token]);
+        dct_tokens[ti++]=(unsigned char)eb;
+        if(token==OC_DCT_TOKEN_FAT_EOB)dct_tokens[ti++]=(unsigned char)(eb>>8);
+        eb<<=OC_DCT_TOKEN_EB_POS(token);
+      }
+      else eb=0;
+      cw=OC_DCT_CODE_WORD[token]+eb;
+      skip=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+      _eobs=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+      if(cw==OC_DCT_CW_FINISH)_eobs=OC_DCT_EOB_FINISH;
+      if(_eobs==0){
+        run_counts[skip]++;
+        ntoks++;
+      }
+    }
+    /*Add the portion of the last EOB run actually used by this coefficient.*/
+    eob_count+=ntoks_left-ntoks;
+    /*And remove it from the remaining EOB count.*/
+    _eobs-=ntoks_left-ntoks;
+    /*Add the total EOB count to the longest run length.*/
+    run_counts[63]+=eob_count;
+    /*And convert the run_counts array to a moment table.*/
+    for(rli=63;rli-->0;)run_counts[rli]+=run_counts[rli+1];
+    /*Finally, subtract off the number of coefficients that have been
+       accounted for by runs started in this coefficient.*/
+    for(rli=64-_zzi;rli-->0;)_ntoks_left[pli][_zzi+rli]-=run_counts[rli];
+  }
+  _dec->dct_tokens_count=ti;
+  return _eobs;
+}
+
+/*Tokens describing the DCT coefficients that belong to each fragment are
+   stored in the bitstream grouped by coefficient, not by fragment.
+
+  This means that we either decode all the tokens in order, building up a
+   separate coefficient list for each fragment as we go, and then go back and
+   do the iDCT on each fragment, or we have to create separate lists of tokens
+   for each coefficient, so that we can pull the next token required off the
+   head of the appropriate list when decoding a specific fragment.
+
+  The former was VP3's choice, and it meant 2*w*h extra storage for all the
+   decoded coefficient values.
+
+  We take the second option, which lets us store just one to three bytes per
+   token (generally far fewer than the number of coefficients, due to EOB
+   tokens and zero runs), and which requires us to only maintain a counter for
+   each of the 64 coefficients, instead of a counter for every fragment to
+   determine where the next token goes.
+
+  We actually use 3 counters per coefficient, one for each color plane, so we
+   can decode all color planes simultaneously.
+  This lets color conversion, etc., be done as soon as a full MCU (one or
+   two super block rows) is decoded, while the image data is still in cache.*/
+
+static void oc_dec_residual_tokens_unpack(oc_dec_ctx *_dec){
+  static const unsigned char OC_HUFF_LIST_MAX[5]={1,6,15,28,64};
+  ptrdiff_t  ntoks_left[3][64];
+  int        huff_idxs[2];
+  ptrdiff_t  eobs;
+  long       val;
+  int        pli;
+  int        zzi;
+  int        hgi;
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    ntoks_left[pli][zzi]=_dec->state.ncoded_fragis[pli];
+  }
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  _dec->eob_runs[0][0]=0;
+  eobs=oc_dec_dc_coeff_unpack(_dec,huff_idxs,ntoks_left);
+#if defined(HAVE_CAIRO)
+  _dec->telemetry_dc_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[0]=(int)val;
+  val=oc_pack_read(&_dec->opb,4);
+  huff_idxs[1]=(int)val;
+  zzi=1;
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    for(;zzi<OC_HUFF_LIST_MAX[hgi];zzi++){
+      eobs=oc_dec_ac_coeff_unpack(_dec,zzi,huff_idxs,ntoks_left,eobs);
+    }
+  }
+  /*TODO: eobs should be exactly zero, or 4096 or greater.
+    The second case occurs when an EOB run of size zero is encountered, which
+     gets treated as an infinite EOB run (where infinity is PTRDIFF_MAX).
+    If neither of these conditions holds, then a warning should be issued.*/
+}
+
+
+static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
+  /*pp_level 0: disabled; free any memory used and return*/
+  if(_dec->pp_level<=OC_PP_LEVEL_DISABLED){
+    if(_dec->dc_qis!=NULL){
+      _ogg_free(_dec->dc_qis);
+      _dec->dc_qis=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->dc_qis==NULL){
+    /*If we haven't been tracking DC quantization indices, there's no point in
+       starting now.*/
+    if(_dec->state.frame_type!=OC_INTRA_FRAME)return 1;
+    _dec->dc_qis=(unsigned char *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->dc_qis[0]));
+    if(_dec->dc_qis==NULL)return 1;
+    memset(_dec->dc_qis,_dec->state.qis[0],_dec->state.nfrags);
+  }
+  else{
+    unsigned char   *dc_qis;
+    const ptrdiff_t *coded_fragis;
+    ptrdiff_t        ncoded_fragis;
+    ptrdiff_t        fragii;
+    unsigned char    qi0;
+    /*Update the DC quantization index of each coded block.*/
+    dc_qis=_dec->dc_qis;
+    coded_fragis=_dec->state.coded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[0]+
+     _dec->state.ncoded_fragis[1]+_dec->state.ncoded_fragis[2];
+    qi0=(unsigned char)_dec->state.qis[0];
+    for(fragii=0;fragii<ncoded_fragis;fragii++){
+      dc_qis[coded_fragis[fragii]]=qi0;
+    }
+  }
+  /*pp_level 1: Stop after updating DC quantization indices.*/
+  if(_dec->pp_level<=OC_PP_LEVEL_TRACKDCQI){
+    if(_dec->variances!=NULL){
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+    }
+    return 1;
+  }
+  if(_dec->variances==NULL){
+    size_t frame_sz;
+    size_t c_sz;
+    int    c_w;
+    int    c_h;
+    frame_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+    c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+    c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+    c_sz=c_w*(size_t)c_h;
+    /*Allocate space for the chroma planes, even if we're not going to use
+       them; this simplifies allocation state management, though it may waste
+       memory on the few systems that don't overcommit pages.*/
+    frame_sz+=c_sz<<1;
+    _dec->pp_frame_data=(unsigned char *)_ogg_malloc(
+     frame_sz*sizeof(_dec->pp_frame_data[0]));
+    _dec->variances=(int *)_ogg_malloc(
+     _dec->state.nfrags*sizeof(_dec->variances[0]));
+    if(_dec->variances==NULL||_dec->pp_frame_data==NULL){
+      _ogg_free(_dec->pp_frame_data);
+      _dec->pp_frame_data=NULL;
+      _ogg_free(_dec->variances);
+      _dec->variances=NULL;
+      return 1;
+    }
+    /*Force an update of the PP buffer pointers.*/
+    _dec->pp_frame_state=0;
+  }
+  /*Update the PP buffer pointers if necessary.*/
+  if(_dec->pp_frame_state!=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC)){
+    if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+      /*If chroma processing is disabled, just use the PP luma plane.*/
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=-_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data+
+       (1-_dec->pp_frame_buf[0].height)*(ptrdiff_t)_dec->pp_frame_buf[0].stride;
+    }
+    else{
+      size_t y_sz;
+      size_t c_sz;
+      int    c_w;
+      int    c_h;
+      /*Otherwise, set up pointers to all three PP planes.*/
+      y_sz=_dec->state.info.frame_width*(size_t)_dec->state.info.frame_height;
+      c_w=_dec->state.info.frame_width>>!(_dec->state.info.pixel_fmt&1);
+      c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
+      c_sz=c_w*(size_t)c_h;
+      _dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
+      _dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
+      _dec->pp_frame_buf[0].stride=_dec->pp_frame_buf[0].width;
+      _dec->pp_frame_buf[0].data=_dec->pp_frame_data;
+      _dec->pp_frame_buf[1].width=c_w;
+      _dec->pp_frame_buf[1].height=c_h;
+      _dec->pp_frame_buf[1].stride=_dec->pp_frame_buf[1].width;
+      _dec->pp_frame_buf[1].data=_dec->pp_frame_buf[0].data+y_sz;
+      _dec->pp_frame_buf[2].width=c_w;
+      _dec->pp_frame_buf[2].height=c_h;
+      _dec->pp_frame_buf[2].stride=_dec->pp_frame_buf[2].width;
+      _dec->pp_frame_buf[2].data=_dec->pp_frame_buf[1].data+c_sz;
+      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,_dec->pp_frame_buf);
+    }
+    _dec->pp_frame_state=1+(_dec->pp_level>=OC_PP_LEVEL_DEBLOCKC);
+  }
+  /*If we're not processing chroma, copy the reference frame's chroma planes.*/
+  if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
+    memcpy(_dec->pp_frame_buf+1,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]]+1,
+     sizeof(_dec->pp_frame_buf[1])*2);
+  }
+  return 0;
+}
+
+
+/*Initialize the main decoding pipeline.*/
+static void oc_dec_pipeline_init(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe){
+  const ptrdiff_t *coded_fragis;
+  const ptrdiff_t *uncoded_fragis;
+  int              flimit;
+  int              pli;
+  int              qii;
+  int              qti;
+  int              zzi;
+  /*If chroma is sub-sampled in the vertical direction, we have to decode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _pipe->mcu_nvfrags=4<<!(_dec->state.info.pixel_fmt&2);
+  /*Initialize the token and extra bits indices for each plane and
+     coefficient.*/
+  memcpy(_pipe->ti,_dec->ti0,sizeof(_pipe->ti));
+  /*Also copy over the initial the EOB run counts.*/
+  memcpy(_pipe->eob_runs,_dec->eob_runs,sizeof(_pipe->eob_runs));
+  /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
+  coded_fragis=_dec->state.coded_fragis;
+  uncoded_fragis=coded_fragis+_dec->state.nfrags;
+  for(pli=0;pli<3;pli++){
+    ptrdiff_t ncoded_fragis;
+    _pipe->coded_fragis[pli]=coded_fragis;
+    _pipe->uncoded_fragis[pli]=uncoded_fragis;
+    ncoded_fragis=_dec->state.ncoded_fragis[pli];
+    coded_fragis+=ncoded_fragis;
+    uncoded_fragis+=ncoded_fragis-_dec->state.fplanes[pli].nfrags;
+  }
+  /*Set up condensed quantizer tables.*/
+  for(pli=0;pli<3;pli++){
+    for(qii=0;qii<_dec->state.nqis;qii++){
+      for(qti=0;qti<2;qti++){
+        _pipe->dequant[pli][qii][qti]=
+         _dec->state.dequant_tables[_dec->state.qis[qii]][pli][qti];
+      }
+    }
+  }
+  /*Set the previous DC predictor to 0 for all color planes and frame types.*/
+  memset(_pipe->pred_last,0,sizeof(_pipe->pred_last));
+  /*Initialize the bounding value array for the loop filter.*/
+  flimit=_dec->state.loop_filter_limits[_dec->state.qis[0]];
+  _pipe->loop_filter=flimit!=0;
+  if(flimit!=0)oc_loop_filter_init(&_dec->state,_pipe->bounding_values,flimit);
+  /*Initialize any buffers needed for post-processing.
+    We also save the current post-processing level, to guard against the user
+     changing it from a callback.*/
+  if(!oc_dec_postprocess_init(_dec))_pipe->pp_level=_dec->pp_level;
+  /*If we don't have enough information to post-process, disable it, regardless
+     of the user-requested level.*/
+  else{
+    _pipe->pp_level=OC_PP_LEVEL_DISABLED;
+    memcpy(_dec->pp_frame_buf,
+     _dec->state.ref_frame_bufs[_dec->state.ref_frame_idx[OC_FRAME_SELF]],
+     sizeof(_dec->pp_frame_buf[0])*3);
+  }
+  /*Clear down the DCT coefficient buffer for the first block.*/
+  for(zzi=0;zzi<64;zzi++)_pipe->dct_coeffs[zzi]=0;
+}
+
+/*Undo the DC prediction in a single plane of an MCU (one or two super block
+   rows).
+  As a side effect, the number of coded and uncoded fragments in this plane of
+   the MCU is also computed.*/
+void oc_dec_dc_unpredict_mcu_plane_c(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  const oc_fragment_plane *fplane;
+  oc_fragment             *frags;
+  int                     *pred_last;
+  ptrdiff_t                ncoded_fragis;
+  ptrdiff_t                fragi;
+  int                      fragx;
+  int                      fragy;
+  int                      fragy0;
+  int                      fragy_end;
+  int                      nhfrags;
+  /*Compute the first and last fragment row of the current MCU for this
+     plane.*/
+  fplane=_dec->state.fplanes+_pli;
+  fragy0=_pipe->fragy0[_pli];
+  fragy_end=_pipe->fragy_end[_pli];
+  nhfrags=fplane->nhfrags;
+  pred_last=_pipe->pred_last[_pli];
+  frags=_dec->state.frags;
+  ncoded_fragis=0;
+  fragi=fplane->froffset+fragy0*(ptrdiff_t)nhfrags;
+  for(fragy=fragy0;fragy<fragy_end;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int refi;
+          refi=frags[fragi].refi;
+          pred_last[refi]=frags[fragi].dc+=pred_last[refi];
+          ncoded_fragis++;
+        }
+      }
+    }
+    else{
+      oc_fragment *u_frags;
+      int          l_ref;
+      int          ul_ref;
+      int          u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].refi;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else ur_ref=u_frags[fragi+1].refi;
+        if(frags[fragi].coded){
+          int pred;
+          int refi;
+          refi=frags[fragi].refi;
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==refi)|(ul_ref==refi)<<1|
+           (u_ref==refi)<<2|(ur_ref==refi)<<3){
+            default:pred=pred_last[refi];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              /*The TI compiler mis-compiles this line.*/
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          pred_last[refi]=frags[fragi].dc+=pred;
+          ncoded_fragis++;
+          l_ref=refi;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+  _pipe->ncoded_fragis[_pli]=ncoded_fragis;
+  /*Also save the number of uncoded fragments so we know how many to copy.*/
+  _pipe->nuncoded_fragis[_pli]=
+   (fragy_end-fragy0)*(ptrdiff_t)nhfrags-ncoded_fragis;
+}
+
+/*Reconstructs all coded fragments in a single MCU (one or two super block
+   rows).
+  This requires that each coded fragment have a proper macro block mode and
+   motion vector (if not in INTRA mode), and have its DC value decoded, with
+   the DC prediction process reversed, and the number of coded and uncoded
+   fragments in this plane of the MCU be counted.
+  The token lists for each color plane and coefficient should also be filled
+   in, along with initial token offsets, extra bits offsets, and EOB run
+   counts.*/
+static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
+ oc_dec_pipeline_state *_pipe,int _pli){
+  unsigned char       *dct_tokens;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint16_t         dc_quant[2];
+  const oc_fragment   *frags;
+  const ptrdiff_t     *coded_fragis;
+  ptrdiff_t            ncoded_fragis;
+  ptrdiff_t            fragii;
+  ptrdiff_t           *ti;
+  ptrdiff_t           *eob_runs;
+  int                  qti;
+  dct_tokens=_dec->dct_tokens;
+  dct_fzig_zag=_dec->state.opt_data.dct_fzig_zag;
+  frags=_dec->state.frags;
+  coded_fragis=_pipe->coded_fragis[_pli];
+  ncoded_fragis=_pipe->ncoded_fragis[_pli];
+  ti=_pipe->ti[_pli];
+  eob_runs=_pipe->eob_runs[_pli];
+  for(qti=0;qti<2;qti++)dc_quant[qti]=_pipe->dequant[_pli][0][qti][0];
+  for(fragii=0;fragii<ncoded_fragis;fragii++){
+    const ogg_uint16_t *ac_quant;
+    ptrdiff_t           fragi;
+    int                 last_zzi;
+    int                 zzi;
+    fragi=coded_fragis[fragii];
+    qti=frags[fragi].mb_mode!=OC_MODE_INTRA;
+    ac_quant=_pipe->dequant[_pli][frags[fragi].qii][qti];
+    /*Decode the AC coefficients.*/
+    for(zzi=0;zzi<64;){
+      int token;
+      last_zzi=zzi;
+      if(eob_runs[zzi]){
+        eob_runs[zzi]--;
+        break;
+      }
+      else{
+        ptrdiff_t eob;
+        int       cw;
+        int       rlen;
+        int       coeff;
+        int       lti;
+        lti=ti[zzi];
+        token=dct_tokens[lti++];
+        cw=OC_DCT_CODE_WORD[token];
+        /*These parts could be done branchless, but the branches are fairly
+           predictable and the C code translates into more than a few
+           instructions, so it's worth it to avoid them.*/
+        if(OC_DCT_TOKEN_NEEDS_MORE(token)){
+          cw+=dct_tokens[lti++]<<OC_DCT_TOKEN_EB_POS(token);
+        }
+        eob=cw>>OC_DCT_CW_EOB_SHIFT&0xFFF;
+        if(token==OC_DCT_TOKEN_FAT_EOB){
+          eob+=dct_tokens[lti++]<<8;
+          if(eob==0)eob=OC_DCT_EOB_FINISH;
+        }
+        rlen=(unsigned char)(cw>>OC_DCT_CW_RLEN_SHIFT);
+        cw^=-(cw&1<<OC_DCT_CW_FLIP_BIT);
+        coeff=cw>>OC_DCT_CW_MAG_SHIFT;
+        eob_runs[zzi]=eob;
+        ti[zzi]=lti;
+        zzi+=rlen;
+        _pipe->dct_coeffs[dct_fzig_zag[zzi]]=
+         (ogg_int16_t)(coeff*(int)ac_quant[zzi]);
+        zzi+=!eob;
+      }
+    }
+    /*TODO: zzi should be exactly 64 here.
+      If it's not, we should report some kind of warning.*/
+    zzi=OC_MINI(zzi,64);
+    _pipe->dct_coeffs[0]=(ogg_int16_t)frags[fragi].dc;
+    /*last_zzi is always initialized.
+      If your compiler thinks otherwise, it is dumb.*/
+    oc_state_frag_recon(&_dec->state,fragi,_pli,
+     _pipe->dct_coeffs,last_zzi,dc_quant[qti]);
+  }
+  _pipe->coded_fragis[_pli]+=ncoded_fragis;
+  /*Right now the reconstructed MCU has only the coded blocks in it.*/
+  /*TODO: We make the decision here to always copy the uncoded blocks into it
+     from the reference frame.
+    We could also copy the coded blocks back over the reference frame, if we
+     wait for an additional MCU to be decoded, which might be faster if only a
+     small number of blocks are coded.
+    However, this introduces more latency, creating a larger cache footprint.
+    It's unknown which decision is better, but this one results in simpler
+     code, and the hard case (high bitrate, high resolution) is handled
+     correctly.*/
+  /*Copy the uncoded blocks from the previous reference frame.*/
+  if(_pipe->nuncoded_fragis[_pli]>0){
+    _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
+    oc_frag_copy_list(&_dec->state,
+     _dec->state.ref_frame_data[OC_FRAME_SELF],
+     _dec->state.ref_frame_data[OC_FRAME_PREV],
+     _dec->state.ref_ystride[_pli],_pipe->uncoded_fragis[_pli],
+     _pipe->nuncoded_fragis[_pli],_dec->state.frag_buf_offs);
+  }
+}
+
+/*Filter a horizontal block edge.*/
+static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,int _qstep,int _flimit,
+ int *_variance0,int *_variance1){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  const unsigned char *csrc;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  rdst=_dst;
+  rsrc=_src;
+  for(bx=0;bx<8;bx++){
+    cdst=rdst;
+    csrc=rsrc;
+    for(by=0;by<10;by++){
+      r[by]=*csrc;
+      csrc+=_src_ystride;
+    }
+    sum0=sum1=0;
+    for(by=0;by<4;by++){
+      sum0+=abs(r[by+1]-r[by]);
+      sum1+=abs(r[by+5]-r[by+6]);
+    }
+    *_variance0+=OC_MINI(255,sum0);
+    *_variance1+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      cdst+=_dst_ystride;
+      for(by=0;by<4;by++){
+        *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
+         r[by+4]+r[by+5]+r[by+6]+4>>3);
+        cdst+=_dst_ystride;
+      }
+      *cdst=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      cdst+=_dst_ystride;
+      *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    else{
+      for(by=1;by<=8;by++){
+        *cdst=(unsigned char)r[by];
+        cdst+=_dst_ystride;
+      }
+    }
+    rdst++;
+    rsrc++;
+  }
+}
+
+/*Filter a vertical block edge.*/
+static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
+ int _qstep,int _flimit,int *_variances){
+  unsigned char       *rdst;
+  const unsigned char *rsrc;
+  unsigned char       *cdst;
+  int                  r[10];
+  int                  sum0;
+  int                  sum1;
+  int                  bx;
+  int                  by;
+  cdst=_dst;
+  for(by=0;by<8;by++){
+    rsrc=cdst-1;
+    rdst=cdst;
+    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+    sum0=sum1=0;
+    for(bx=0;bx<4;bx++){
+      sum0+=abs(r[bx+1]-r[bx]);
+      sum1+=abs(r[bx+5]-r[bx+6]);
+    }
+    _variances[0]+=OC_MINI(255,sum0);
+    _variances[1]+=OC_MINI(255,sum1);
+    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+      *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
+      *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
+      for(bx=0;bx<4;bx++){
+        *rdst++=(unsigned char)(r[bx]+r[bx+1]+r[bx+2]+r[bx+3]*2+
+         r[bx+4]+r[bx+5]+r[bx+6]+4>>3);
+      }
+      *rdst++=(unsigned char)(r[4]+r[5]+r[6]+r[7]*2+r[8]+r[9]*2+4>>3);
+      *rdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
+    }
+    cdst+=_dst_ystride;
+  }
+}
+
+static void oc_dec_deblock_frag_rows(oc_dec_ctx *_dec,
+ th_img_plane *_dst,th_img_plane *_src,int _pli,int _fragy0,
+ int _fragy_end){
+  oc_fragment_plane   *fplane;
+  int                 *variance;
+  unsigned char       *dc_qi;
+  unsigned char       *dst;
+  const unsigned char *src;
+  ptrdiff_t            froffset;
+  int                  dst_ystride;
+  int                  src_ystride;
+  int                  nhfrags;
+  int                  width;
+  int                  notstart;
+  int                  notdone;
+  int                  flimit;
+  int                  qstep;
+  int                  y_end;
+  int                  y;
+  int                  x;
+  _dst+=_pli;
+  _src+=_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  dc_qi=_dec->dc_qis+froffset;
+  notstart=_fragy0>0;
+  notdone=_fragy_end<fplane->nvfrags;
+  /*We want to clear an extra row of variances, except at the end.*/
+  memset(variance+(nhfrags&-notstart),0,
+   (_fragy_end+notdone-_fragy0-notstart)*(nhfrags*sizeof(variance[0])));
+  /*Except for the first time, we want to point to the middle of the row.*/
+  y=(_fragy0<<3)+(notstart<<2);
+  dst_ystride=_dst->stride;
+  src_ystride=_src->stride;
+  dst=_dst->data+y*(ptrdiff_t)dst_ystride;
+  src=_src->data+y*(ptrdiff_t)src_ystride;
+  width=_dst->width;
+  for(;y<4;y++){
+    memcpy(dst,src,width*sizeof(dst[0]));
+    dst+=dst_ystride;
+    src+=src_ystride;
+  }
+  /*We also want to skip the last row in the frame for this loop.*/
+  y_end=_fragy_end-!notdone<<3;
+  for(;y<y_end;y+=8){
+    qstep=_dec->pp_dc_scale[*dc_qi];
+    flimit=(qstep*3)>>2;
+    oc_filter_hedge(dst,dst_ystride,src-src_ystride,src_ystride,
+     qstep,flimit,variance,variance+nhfrags);
+    variance++;
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi];
+      flimit=(qstep*3)>>2;
+      oc_filter_hedge(dst+x,dst_ystride,src+x-src_ystride,src_ystride,
+       qstep,flimit,variance,variance+nhfrags);
+      oc_filter_vedge(dst+x-(dst_ystride<<2)-4,dst_ystride,
+       qstep,flimit,variance-1);
+      variance++;
+      dc_qi++;
+    }
+    dst+=dst_ystride<<3;
+    src+=src_ystride<<3;
+  }
+  /*And finally, handle the last row in the frame, if it's in the range.*/
+  if(!notdone){
+    int height;
+    height=_dst->height;
+    for(;y<height;y++){
+      memcpy(dst,src,width*sizeof(dst[0]));
+      dst+=dst_ystride;
+      src+=src_ystride;
+    }
+    /*Filter the last row of vertical block edges.*/
+    dc_qi++;
+    for(x=8;x<width;x+=8){
+      qstep=_dec->pp_dc_scale[*dc_qi++];
+      flimit=(qstep*3)>>2;
+      oc_filter_vedge(dst+x-(dst_ystride<<3)-4,dst_ystride,
+       qstep,flimit,variance++);
+    }
+  }
+}
+
+static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+ int _dc_scale,int _sharp_mod,int _strong){
+  static const unsigned char OC_MOD_MAX[2]={24,32};
+  static const unsigned char OC_MOD_SHIFT[2]={1,0};
+  const unsigned char *psrc;
+  const unsigned char *src;
+  const unsigned char *nsrc;
+  unsigned char       *dst;
+  int                  vmod[72];
+  int                  hmod[72];
+  int                  mod_hi;
+  int                  by;
+  int                  bx;
+  mod_hi=OC_MINI(3*_dc_scale,OC_MOD_MAX[_strong]);
+  dst=_idata;
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  for(by=0;by<9;by++){
+    for(bx=0;bx<8;bx++){
+      int mod;
+      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OC_MOD_SHIFT[_strong]);
+      vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+    }
+    psrc=src;
+    src+=_ystride&-(!(_b&8)|by<7);
+  }
+  nsrc=dst;
+  psrc=dst-!(_b&1);
+  for(bx=0;bx<9;bx++){
+    src=nsrc;
+    for(by=0;by<8;by++){
+      int mod;
+      mod=32+_dc_scale-(abs(*src-*psrc)<<OC_MOD_SHIFT[_strong]);
+      hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+      psrc+=_ystride;
+      src+=_ystride;
+    }
+    psrc=nsrc;
+    nsrc+=!(_b&2)|bx<7;
+  }
+  src=dst;
+  psrc=src-(_ystride&-!(_b&4));
+  nsrc=src+_ystride;
+  for(by=0;by<8;by++){
+    int a;
+    int b;
+    int w;
+    a=128;
+    b=64;
+    w=hmod[by];
+    a-=w;
+    b+=w**(src-!(_b&1));
+    w=vmod[by<<3];
+    a-=w;
+    b+=w*psrc[0];
+    w=vmod[by+1<<3];
+    a-=w;
+    b+=w*nsrc[0];
+    w=hmod[(1<<3)+by];
+    a-=w;
+    b+=w*src[1];
+    dst[0]=OC_CLAMP255(a*src[0]+b>>7);
+    for(bx=1;bx<7;bx++){
+      a=128;
+      b=64;
+      w=hmod[(bx<<3)+by];
+      a-=w;
+      b+=w*src[bx-1];
+      w=vmod[(by<<3)+bx];
+      a-=w;
+      b+=w*psrc[bx];
+      w=vmod[(by+1<<3)+bx];
+      a-=w;
+      b+=w*nsrc[bx];
+      w=hmod[(bx+1<<3)+by];
+      a-=w;
+      b+=w*src[bx+1];
+      dst[bx]=OC_CLAMP255(a*src[bx]+b>>7);
+    }
+    a=128;
+    b=64;
+    w=hmod[(7<<3)+by];
+    a-=w;
+    b+=w*src[6];
+    w=vmod[(by<<3)+7];
+    a-=w;
+    b+=w*psrc[7];
+    w=vmod[(by+1<<3)+7];
+    a-=w;
+    b+=w*nsrc[7];
+    w=hmod[(8<<3)+by];
+    a-=w;
+    b+=w*src[7+!(_b&2)];
+    dst[7]=OC_CLAMP255(a*src[7]+b>>7);
+    dst+=_ystride;
+    psrc=src;
+    src=nsrc;
+    nsrc+=_ystride&-(!(_b&8)|by<6);
+  }
+}
+
+#define OC_DERING_THRESH1 (384)
+#define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
+#define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
+#define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
+
+static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
+ int _pli,int _fragy0,int _fragy_end){
+  th_img_plane      *iplane;
+  oc_fragment_plane *fplane;
+  oc_fragment       *frag;
+  int               *variance;
+  unsigned char     *idata;
+  ptrdiff_t          froffset;
+  int                ystride;
+  int                nhfrags;
+  int                sthresh;
+  int                strong;
+  int                y_end;
+  int                width;
+  int                height;
+  int                y;
+  int                x;
+  iplane=_img+_pli;
+  fplane=_dec->state.fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  froffset=fplane->froffset+_fragy0*(ptrdiff_t)nhfrags;
+  variance=_dec->variances+froffset;
+  frag=_dec->state.frags+froffset;
+  strong=_dec->pp_level>=(_pli?OC_PP_LEVEL_SDERINGC:OC_PP_LEVEL_SDERINGY);
+  sthresh=_pli?OC_DERING_THRESH4:OC_DERING_THRESH3;
+  y=_fragy0<<3;
+  ystride=iplane->stride;
+  idata=iplane->data+y*(ptrdiff_t)ystride;
+  y_end=_fragy_end<<3;
+  width=iplane->width;
+  height=iplane->height;
+  for(;y<y_end;y+=8){
+    for(x=0;x<width;x+=8){
+      int b;
+      int qi;
+      int var;
+      qi=_dec->state.qis[frag->qii];
+      var=*variance;
+      b=(x<=0)|(x+8>=width)<<1|(y<=0)<<2|(y+8>=height)<<3;
+      if(strong&&var>sthresh){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+         !(b&2)&&variance[1]>OC_DERING_THRESH4||
+         !(b&4)&&*(variance-nhfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[nhfrags]>OC_DERING_THRESH4){
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+          oc_dering_block(idata+x,ystride,b,
+           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+        }
+      }
+      else if(var>OC_DERING_THRESH2){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
+      }
+      else if(var>OC_DERING_THRESH1){
+        oc_dering_block(idata+x,ystride,b,
+         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],0);
+      }
+      frag++;
+      variance++;
+    }
+    idata+=ystride<<3;
+  }
+}
+
+
+
+th_dec_ctx *th_decode_alloc(const th_info *_info,const th_setup_info *_setup){
+  oc_dec_ctx *dec;
+  if(_info==NULL||_setup==NULL)return NULL;
+  dec=oc_aligned_malloc(sizeof(*dec),16);
+  if(dec==NULL||oc_dec_init(dec,_info,_setup)<0){
+    oc_aligned_free(dec);
+    return NULL;
+  }
+  dec->state.curframe_num=0;
+  return dec;
+}
+
+void th_decode_free(th_dec_ctx *_dec){
+  if(_dec!=NULL){
+    oc_dec_clear(_dec);
+    oc_aligned_free(_dec);
+  }
+}
+
+int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz){
+  switch(_req){
+  case TH_DECCTL_GET_PPLEVEL_MAX:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    (*(int *)_buf)=OC_PP_LEVEL_MAX;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_PPLEVEL:{
+    int pp_level;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    pp_level=*(int *)_buf;
+    if(pp_level<0||pp_level>OC_PP_LEVEL_MAX)return TH_EINVAL;
+    _dec->pp_level=pp_level;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_GRANPOS:{
+    ogg_int64_t granpos;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(ogg_int64_t))return TH_EINVAL;
+    granpos=*(ogg_int64_t *)_buf;
+    if(granpos<0)return TH_EINVAL;
+    _dec->state.granpos=granpos;
+    _dec->state.keyframe_num=(granpos>>_dec->state.info.keyframe_granule_shift)
+     -_dec->state.granpos_bias;
+    _dec->state.curframe_num=_dec->state.keyframe_num
+     +(granpos&(1<<_dec->state.info.keyframe_granule_shift)-1);
+    return 0;
+  }break;
+  case TH_DECCTL_SET_STRIPE_CB:{
+    th_stripe_callback *cb;
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(th_stripe_callback))return TH_EINVAL;
+    cb=(th_stripe_callback *)_buf;
+    _dec->stripe_cb.ctx=cb->ctx;
+    _dec->stripe_cb.stripe_decoded=cb->stripe_decoded;
+    return 0;
+  }break;
+#ifdef HAVE_CAIRO
+  case TH_DECCTL_SET_TELEMETRY_MBMODE:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mbmode=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_MV:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_mv=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_QI:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_qi=*(int *)_buf;
+    return 0;
+  }break;
+  case TH_DECCTL_SET_TELEMETRY_BITS:{
+    if(_dec==NULL||_buf==NULL)return TH_EFAULT;
+    if(_buf_sz!=sizeof(int))return TH_EINVAL;
+    _dec->telemetry=1;
+    _dec->telemetry_bits=*(int *)_buf;
+    return 0;
+  }break;
+#endif
+  default:return TH_EIMPL;
+  }
+}
+
+/*We're decoding an INTER frame, but have no initialized reference
+   buffers (i.e., decoding did not start on a key frame).
+  We initialize them to a solid gray here.*/
+static void oc_dec_init_dummy_frame(th_dec_ctx *_dec){
+  th_info   *info;
+  size_t     yplane_sz;
+  size_t     cplane_sz;
+  ptrdiff_t  yoffset;
+  int        yhstride;
+  int        yheight;
+  int        chstride;
+  int        cheight;
+  _dec->state.ref_frame_idx[OC_FRAME_GOLD]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_PREV]=0;
+  _dec->state.ref_frame_idx[OC_FRAME_SELF]=0;
+  _dec->state.ref_frame_data[OC_FRAME_GOLD]=
+   _dec->state.ref_frame_data[OC_FRAME_PREV]=
+   _dec->state.ref_frame_data[OC_FRAME_SELF]=
+   _dec->state.ref_frame_bufs[0][0].data;
+  memcpy(_dec->pp_frame_buf,_dec->state.ref_frame_bufs[0],
+   sizeof(_dec->pp_frame_buf[0])*3);
+  info=&_dec->state.info;
+  yhstride=abs(_dec->state.ref_ystride[0]);
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  chstride=abs(_dec->state.ref_ystride[1]);
+  cheight=yheight>>!(info->pixel_fmt&2);
+  yplane_sz=yhstride*(size_t)yheight+16;
+  cplane_sz=chstride*(size_t)cheight;
+  yoffset=yhstride*(ptrdiff_t)(yheight-OC_UMV_PADDING-1)+OC_UMV_PADDING;
+  memset(_dec->state.ref_frame_data[0]-yoffset,0x80,yplane_sz+2*cplane_sz);
+}
+
+#if defined(HAVE_CAIRO)
+static void oc_render_telemetry(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr,
+ int _telemetry){
+  /*Stuff the plane into cairo.*/
+  cairo_surface_t *cs;
+  unsigned char   *data;
+  unsigned char   *y_row;
+  unsigned char   *u_row;
+  unsigned char   *v_row;
+  unsigned char   *rgb_row;
+  int              cstride;
+  int              w;
+  int              h;
+  int              x;
+  int              y;
+  int              hdec;
+  int              vdec;
+  w=_ycbcr[0].width;
+  h=_ycbcr[0].height;
+  hdec=!(_dec->state.info.pixel_fmt&1);
+  vdec=!(_dec->state.info.pixel_fmt&2);
+  /*Lazy data buffer init.
+    We could try to re-use the post-processing buffer, which would save
+     memory, but complicate the allocation logic there.
+    I don't think anyone cares about memory usage when using telemetry; it is
+     not meant for embedded devices.*/
+  if(_dec->telemetry_frame_data==NULL){
+    _dec->telemetry_frame_data=_ogg_malloc(
+     (w*h+2*(w>>hdec)*(h>>vdec))*sizeof(*_dec->telemetry_frame_data));
+    if(_dec->telemetry_frame_data==NULL)return;
+  }
+  cs=cairo_image_surface_create(CAIRO_FORMAT_RGB24,w,h);
+  /*Sadly, no YUV support in Cairo (yet); convert into the RGB buffer.*/
+  data=cairo_image_surface_get_data(cs);
+  if(data==NULL){
+    cairo_surface_destroy(cs);
+    return;
+  }
+  cstride=cairo_image_surface_get_stride(cs);
+  y_row=_ycbcr[0].data;
+  u_row=_ycbcr[1].data;
+  v_row=_ycbcr[2].data;
+  rgb_row=data;
+  for(y=0;y<h;y++){
+    for(x=0;x<w;x++){
+      int r;
+      int g;
+      int b;
+      r=(1904000*y_row[x]+2609823*v_row[x>>hdec]-363703744)/1635200;
+      g=(3827562*y_row[x]-1287801*u_row[x>>hdec]
+       -2672387*v_row[x>>hdec]+447306710)/3287200;
+      b=(952000*y_row[x]+1649289*u_row[x>>hdec]-225932192)/817600;
+      rgb_row[4*x+0]=OC_CLAMP255(b);
+      rgb_row[4*x+1]=OC_CLAMP255(g);
+      rgb_row[4*x+2]=OC_CLAMP255(r);
+    }
+    y_row+=_ycbcr[0].stride;
+    u_row+=_ycbcr[1].stride&-((y&1)|!vdec);
+    v_row+=_ycbcr[2].stride&-((y&1)|!vdec);
+    rgb_row+=cstride;
+  }
+  /*Draw coded identifier for each macroblock (stored in Hilbert order).*/
+  {
+    cairo_t           *c;
+    const oc_fragment *frags;
+    oc_mv             *frag_mvs;
+    const signed char *mb_modes;
+    oc_mb_map         *mb_maps;
+    size_t             nmbs;
+    size_t             mbi;
+    int                row2;
+    int                col2;
+    int                qim[3]={0,0,0};
+    if(_dec->state.nqis==2){
+      int bqi;
+      bqi=_dec->state.qis[0];
+      if(_dec->state.qis[1]>bqi)qim[1]=1;
+      if(_dec->state.qis[1]<bqi)qim[1]=-1;
+    }
+    if(_dec->state.nqis==3){
+      int bqi;
+      int cqi;
+      int dqi;
+      bqi=_dec->state.qis[0];
+      cqi=_dec->state.qis[1];
+      dqi=_dec->state.qis[2];
+      if(cqi>bqi&&dqi>bqi){
+        if(dqi>cqi){
+          qim[1]=1;
+          qim[2]=2;
+        }
+        else{
+          qim[1]=2;
+          qim[2]=1;
+        }
+      }
+      else if(cqi<bqi&&dqi<bqi){
+        if(dqi<cqi){
+          qim[1]=-1;
+          qim[2]=-2;
+        }
+        else{
+          qim[1]=-2;
+          qim[2]=-1;
+        }
+      }
+      else{
+        if(cqi<bqi)qim[1]=-1;
+        else qim[1]=1;
+        if(dqi<bqi)qim[2]=-1;
+        else qim[2]=1;
+      }
+    }
+    c=cairo_create(cs);
+    frags=_dec->state.frags;
+    frag_mvs=_dec->state.frag_mvs;
+    mb_modes=_dec->state.mb_modes;
+    mb_maps=_dec->state.mb_maps;
+    nmbs=_dec->state.nmbs;
+    row2=0;
+    col2=0;
+    for(mbi=0;mbi<nmbs;mbi++){
+      float x;
+      float y;
+      int   bi;
+      y=h-(row2+((col2+1>>1)&1))*16-16;
+      x=(col2>>1)*16;
+      cairo_set_line_width(c,1.);
+      /*Keyframe (all intra) red box.*/
+      if(_dec->state.frame_type==OC_INTRA_FRAME){
+        if(_dec->telemetry_mbmode&0x02){
+          cairo_set_source_rgba(c,1.,0,0,.5);
+          cairo_rectangle(c,x+2.5,y+2.5,11,11);
+          cairo_stroke_preserve(c);
+          cairo_set_source_rgba(c,1.,0,0,.25);
+          cairo_fill(c);
+        }
+      }
+      else{
+        ptrdiff_t fragi;
+        int       frag_mvx;
+        int       frag_mvy;
+        for(bi=0;bi<4;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(fragi>=0&&frags[fragi].coded){
+            frag_mvx=OC_MV_X(frag_mvs[fragi]);
+            frag_mvy=OC_MV_Y(frag_mvs[fragi]);
+            break;
+          }
+        }
+        if(bi<4){
+          switch(mb_modes[mbi]){
+            case OC_MODE_INTRA:{
+              if(_dec->telemetry_mbmode&0x02){
+                cairo_set_source_rgba(c,1.,0,0,.5);
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,1.,0,0,.25);
+                cairo_fill(c);
+              }
+            }break;
+            case OC_MODE_INTER_NOMV:{
+              if(_dec->telemetry_mbmode&0x01){
+                cairo_set_source_rgba(c,0,0,1.,.5);
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,0,0,1.,.25);
+                cairo_fill(c);
+              }
+            }break;
+            case OC_MODE_INTER_MV:{
+              if(_dec->telemetry_mbmode&0x04){
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_set_source_rgba(c,0,1.,0,.5);
+                cairo_stroke(c);
+              }
+              if(_dec->telemetry_mv&0x04){
+                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+8,y+8);
+                cairo_stroke(c);
+              }
+            }break;
+            case OC_MODE_INTER_MV_LAST:{
+              if(_dec->telemetry_mbmode&0x08){
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_set_source_rgba(c,0,1.,0,.5);
+                cairo_move_to(c,x+13.5,y+2.5);
+                cairo_line_to(c,x+2.5,y+8);
+                cairo_line_to(c,x+13.5,y+13.5);
+                cairo_stroke(c);
+              }
+              if(_dec->telemetry_mv&0x08){
+                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+8,y+8);
+                cairo_stroke(c);
+              }
+            }break;
+            case OC_MODE_INTER_MV_LAST2:{
+              if(_dec->telemetry_mbmode&0x10){
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_set_source_rgba(c,0,1.,0,.5);
+                cairo_move_to(c,x+8,y+2.5);
+                cairo_line_to(c,x+2.5,y+8);
+                cairo_line_to(c,x+8,y+13.5);
+                cairo_move_to(c,x+13.5,y+2.5);
+                cairo_line_to(c,x+8,y+8);
+                cairo_line_to(c,x+13.5,y+13.5);
+                cairo_stroke(c);
+              }
+              if(_dec->telemetry_mv&0x10){
+                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+8,y+8);
+                cairo_stroke(c);
+              }
+            }break;
+            case OC_MODE_GOLDEN_NOMV:{
+              if(_dec->telemetry_mbmode&0x20){
+                cairo_set_source_rgba(c,1.,1.,0,.5);
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,1.,1.,0,.25);
+                cairo_fill(c);
+              }
+            }break;
+            case OC_MODE_GOLDEN_MV:{
+              if(_dec->telemetry_mbmode&0x40){
+                cairo_rectangle(c,x+2.5,y+2.5,11,11);
+                cairo_set_source_rgba(c,1.,1.,0,.5);
+                cairo_stroke(c);
+              }
+              if(_dec->telemetry_mv&0x40){
+                cairo_move_to(c,x+8+frag_mvx,y+8-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+8+frag_mvx*.66,y+8-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+8+frag_mvx*.33,y+8-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+8,y+8);
+                cairo_stroke(c);
+              }
+            }break;
+            case OC_MODE_INTER_MV_FOUR:{
+              if(_dec->telemetry_mbmode&0x80){
+                cairo_rectangle(c,x+2.5,y+2.5,4,4);
+                cairo_rectangle(c,x+9.5,y+2.5,4,4);
+                cairo_rectangle(c,x+2.5,y+9.5,4,4);
+                cairo_rectangle(c,x+9.5,y+9.5,4,4);
+                cairo_set_source_rgba(c,0,1.,0,.5);
+                cairo_stroke(c);
+              }
+              /*4mv is odd, coded in raster order.*/
+              fragi=mb_maps[mbi][0][0];
+              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                frag_mvx=OC_MV_X(frag_mvs[fragi]);
+                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
+                cairo_move_to(c,x+4+frag_mvx,y+12-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+4+frag_mvx*.66,y+12-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+4+frag_mvx*.33,y+12-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+4,y+12);
+                cairo_stroke(c);
+              }
+              fragi=mb_maps[mbi][0][1];
+              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                frag_mvx=OC_MV_X(frag_mvs[fragi]);
+                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
+                cairo_move_to(c,x+12+frag_mvx,y+12-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+12+frag_mvx*.66,y+12-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+12+frag_mvx*.33,y+12-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+12,y+12);
+                cairo_stroke(c);
+              }
+              fragi=mb_maps[mbi][0][2];
+              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                frag_mvx=OC_MV_X(frag_mvs[fragi]);
+                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
+                cairo_move_to(c,x+4+frag_mvx,y+4-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+4+frag_mvx*.66,y+4-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+4+frag_mvx*.33,y+4-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+4,y+4);
+                cairo_stroke(c);
+              }
+              fragi=mb_maps[mbi][0][3];
+              if(frags[fragi].coded&&_dec->telemetry_mv&0x80){
+                frag_mvx=OC_MV_X(frag_mvs[fragi]);
+                frag_mvx=OC_MV_Y(frag_mvs[fragi]);
+                cairo_move_to(c,x+12+frag_mvx,y+4-frag_mvy);
+                cairo_set_source_rgba(c,1.,1.,1.,.9);
+                cairo_set_line_width(c,3.);
+                cairo_line_to(c,x+12+frag_mvx*.66,y+4-frag_mvy*.66);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,2.);
+                cairo_line_to(c,x+12+frag_mvx*.33,y+4-frag_mvy*.33);
+                cairo_stroke_preserve(c);
+                cairo_set_line_width(c,1.);
+                cairo_line_to(c,x+12,y+4);
+                cairo_stroke(c);
+              }
+            }break;
+          }
+        }
+      }
+      /*qii illustration.*/
+      if(_dec->telemetry_qi&0x2){
+        cairo_set_line_cap(c,CAIRO_LINE_CAP_SQUARE);
+        for(bi=0;bi<4;bi++){
+          ptrdiff_t fragi;
+          int       qiv;
+          int       xp;
+          int       yp;
+          xp=x+(bi&1)*8;
+          yp=y+8-(bi&2)*4;
+          fragi=mb_maps[mbi][0][bi];
+          if(fragi>=0&&frags[fragi].coded){
+            qiv=qim[frags[fragi].qii];
+            cairo_set_line_width(c,3.);
+            cairo_set_source_rgba(c,0.,0.,0.,.5);
+            switch(qiv){
+              /*Double plus:*/
+              case 2:{
+                if((bi&1)^((bi&2)>>1)){
+                  cairo_move_to(c,xp+2.5,yp+1.5);
+                  cairo_line_to(c,xp+2.5,yp+3.5);
+                  cairo_move_to(c,xp+1.5,yp+2.5);
+                  cairo_line_to(c,xp+3.5,yp+2.5);
+                  cairo_move_to(c,xp+5.5,yp+4.5);
+                  cairo_line_to(c,xp+5.5,yp+6.5);
+                  cairo_move_to(c,xp+4.5,yp+5.5);
+                  cairo_line_to(c,xp+6.5,yp+5.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,0.,1.,1.,1.);
+                }
+                else{
+                  cairo_move_to(c,xp+5.5,yp+1.5);
+                  cairo_line_to(c,xp+5.5,yp+3.5);
+                  cairo_move_to(c,xp+4.5,yp+2.5);
+                  cairo_line_to(c,xp+6.5,yp+2.5);
+                  cairo_move_to(c,xp+2.5,yp+4.5);
+                  cairo_line_to(c,xp+2.5,yp+6.5);
+                  cairo_move_to(c,xp+1.5,yp+5.5);
+                  cairo_line_to(c,xp+3.5,yp+5.5);
+                  cairo_stroke_preserve(c);
+                  cairo_set_source_rgba(c,0.,1.,1.,1.);
+                }
+              }break;
+              /*Double minus:*/
+              case -2:{
+                cairo_move_to(c,xp+2.5,yp+2.5);
+                cairo_line_to(c,xp+5.5,yp+2.5);
+                cairo_move_to(c,xp+2.5,yp+5.5);
+                cairo_line_to(c,xp+5.5,yp+5.5);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,1.,1.,1.,1.);
+              }break;
+              /*Plus:*/
+              case 1:{
+                if((bi&2)==0)yp-=2;
+                if((bi&1)==0)xp-=2;
+                cairo_move_to(c,xp+4.5,yp+2.5);
+                cairo_line_to(c,xp+4.5,yp+6.5);
+                cairo_move_to(c,xp+2.5,yp+4.5);
+                cairo_line_to(c,xp+6.5,yp+4.5);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,.1,1.,.3,1.);
+                break;
+              }
+              /*Fall through.*/
+              /*Minus:*/
+              case -1:{
+                cairo_move_to(c,xp+2.5,yp+4.5);
+                cairo_line_to(c,xp+6.5,yp+4.5);
+                cairo_stroke_preserve(c);
+                cairo_set_source_rgba(c,1.,.3,.1,1.);
+              }break;
+              default:continue;
+            }
+            cairo_set_line_width(c,1.);
+            cairo_stroke(c);
+          }
+        }
+      }
+      col2++;
+      if((col2>>1)>=_dec->state.nhmbs){
+        col2=0;
+        row2+=2;
+      }
+    }
+    /*Bit usage indicator[s]:*/
+    if(_dec->telemetry_bits){
+      int widths[6];
+      int fpsn;
+      int fpsd;
+      int mult;
+      int fullw;
+      int padw;
+      int i;
+      fpsn=_dec->state.info.fps_numerator;
+      fpsd=_dec->state.info.fps_denominator;
+      mult=(_dec->telemetry_bits>=0xFF?1:_dec->telemetry_bits);
+      fullw=250.f*h*fpsd*mult/fpsn;
+      padw=w-24;
+      /*Header and coded block bits.*/
+      if(_dec->telemetry_frame_bytes<0||
+       _dec->telemetry_frame_bytes==OC_LOTS_OF_BITS){
+        _dec->telemetry_frame_bytes=0;
+      }
+      if(_dec->telemetry_coding_bytes<0||
+       _dec->telemetry_coding_bytes>_dec->telemetry_frame_bytes){
+        _dec->telemetry_coding_bytes=0;
+      }
+      if(_dec->telemetry_mode_bytes<0||
+       _dec->telemetry_mode_bytes>_dec->telemetry_frame_bytes){
+        _dec->telemetry_mode_bytes=0;
+      }
+      if(_dec->telemetry_mv_bytes<0||
+       _dec->telemetry_mv_bytes>_dec->telemetry_frame_bytes){
+        _dec->telemetry_mv_bytes=0;
+      }
+      if(_dec->telemetry_qi_bytes<0||
+       _dec->telemetry_qi_bytes>_dec->telemetry_frame_bytes){
+        _dec->telemetry_qi_bytes=0;
+      }
+      if(_dec->telemetry_dc_bytes<0||
+       _dec->telemetry_dc_bytes>_dec->telemetry_frame_bytes){
+        _dec->telemetry_dc_bytes=0;
+      }
+      widths[0]=padw*
+       (_dec->telemetry_frame_bytes-_dec->telemetry_coding_bytes)/fullw;
+      widths[1]=padw*
+       (_dec->telemetry_coding_bytes-_dec->telemetry_mode_bytes)/fullw;
+      widths[2]=padw*
+       (_dec->telemetry_mode_bytes-_dec->telemetry_mv_bytes)/fullw;
+      widths[3]=padw*(_dec->telemetry_mv_bytes-_dec->telemetry_qi_bytes)/fullw;
+      widths[4]=padw*(_dec->telemetry_qi_bytes-_dec->telemetry_dc_bytes)/fullw;
+      widths[5]=padw*(_dec->telemetry_dc_bytes)/fullw;
+      for(i=0;i<6;i++)if(widths[i]>w)widths[i]=w;
+      cairo_set_source_rgba(c,.0,.0,.0,.6);
+      cairo_rectangle(c,10,h-33,widths[0]+1,5);
+      cairo_rectangle(c,10,h-29,widths[1]+1,5);
+      cairo_rectangle(c,10,h-25,widths[2]+1,5);
+      cairo_rectangle(c,10,h-21,widths[3]+1,5);
+      cairo_rectangle(c,10,h-17,widths[4]+1,5);
+      cairo_rectangle(c,10,h-13,widths[5]+1,5);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,1,0,0);
+      cairo_rectangle(c,10.5,h-32.5,widths[0],4);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,0,1,0);
+      cairo_rectangle(c,10.5,h-28.5,widths[1],4);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,0,0,1);
+      cairo_rectangle(c,10.5,h-24.5,widths[2],4);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,.6,.4,.0);
+      cairo_rectangle(c,10.5,h-20.5,widths[3],4);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,.3,.3,.3);
+      cairo_rectangle(c,10.5,h-16.5,widths[4],4);
+      cairo_fill(c);
+      cairo_set_source_rgb(c,.5,.5,.8);
+      cairo_rectangle(c,10.5,h-12.5,widths[5],4);
+      cairo_fill(c);
+    }
+    /*Master qi indicator[s]:*/
+    if(_dec->telemetry_qi&0x1){
+      cairo_text_extents_t extents;
+      char                 buffer[10];
+      int                  p;
+      int                  y;
+      p=0;
+      y=h-7.5;
+      if(_dec->state.qis[0]>=10)buffer[p++]=48+_dec->state.qis[0]/10;
+      buffer[p++]=48+_dec->state.qis[0]%10;
+      if(_dec->state.nqis>=2){
+        buffer[p++]=' ';
+        if(_dec->state.qis[1]>=10)buffer[p++]=48+_dec->state.qis[1]/10;
+        buffer[p++]=48+_dec->state.qis[1]%10;
+      }
+      if(_dec->state.nqis==3){
+        buffer[p++]=' ';
+        if(_dec->state.qis[2]>=10)buffer[p++]=48+_dec->state.qis[2]/10;
+        buffer[p++]=48+_dec->state.qis[2]%10;
+      }
+      buffer[p++]='\0';
+      cairo_select_font_face(c,"sans",
+       CAIRO_FONT_SLANT_NORMAL,CAIRO_FONT_WEIGHT_BOLD);
+      cairo_set_font_size(c,18);
+      cairo_text_extents(c,buffer,&extents);
+      cairo_set_source_rgb(c,1,1,1);
+      cairo_move_to(c,w-extents.x_advance-10,y);
+      cairo_show_text(c,buffer);
+      cairo_set_source_rgb(c,0,0,0);
+      cairo_move_to(c,w-extents.x_advance-10,y);
+      cairo_text_path(c,buffer);
+      cairo_set_line_width(c,.8);
+      cairo_set_line_join(c,CAIRO_LINE_JOIN_ROUND);
+      cairo_stroke(c);
+    }
+    cairo_destroy(c);
+  }
+  /*Out of the Cairo plane into the telemetry YUV buffer.*/
+  _ycbcr[0].data=_dec->telemetry_frame_data;
+  _ycbcr[0].stride=_ycbcr[0].width;
+  _ycbcr[1].data=_ycbcr[0].data+h*_ycbcr[0].stride;
+  _ycbcr[1].stride=_ycbcr[1].width;
+  _ycbcr[2].data=_ycbcr[1].data+(h>>vdec)*_ycbcr[1].stride;
+  _ycbcr[2].stride=_ycbcr[2].width;
+  y_row=_ycbcr[0].data;
+  u_row=_ycbcr[1].data;
+  v_row=_ycbcr[2].data;
+  rgb_row=data;
+  /*This is one of the few places it's worth handling chroma on a
+     case-by-case basis.*/
+  switch(_dec->state.info.pixel_fmt){
+    case TH_PF_420:{
+      for(y=0;y<h;y+=2){
+        unsigned char *y_row2;
+        unsigned char *rgb_row2;
+        y_row2=y_row+_ycbcr[0].stride;
+        rgb_row2=rgb_row+cstride;
+        for(x=0;x<w;x+=2){
+          int y;
+          int u;
+          int v;
+          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+           +24966*rgb_row[4*x+0]+4207500)/255000;
+          y_row[x]=OC_CLAMP255(y);
+          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+           +24966*rgb_row[4*x+4]+4207500)/255000;
+          y_row[x+1]=OC_CLAMP255(y);
+          y=(65481*rgb_row2[4*x+2]+128553*rgb_row2[4*x+1]
+           +24966*rgb_row2[4*x+0]+4207500)/255000;
+          y_row2[x]=OC_CLAMP255(y);
+          y=(65481*rgb_row2[4*x+6]+128553*rgb_row2[4*x+5]
+           +24966*rgb_row2[4*x+4]+4207500)/255000;
+          y_row2[x+1]=OC_CLAMP255(y);
+          u=(-8372*(rgb_row[4*x+2]+rgb_row[4*x+6]
+           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+           -16436*(rgb_row[4*x+1]+rgb_row[4*x+5]
+           +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+           +24808*(rgb_row[4*x+0]+rgb_row[4*x+4]
+           +rgb_row2[4*x+0]+rgb_row2[4*x+4])+29032005)/225930;
+          v=(39256*(rgb_row[4*x+2]+rgb_row[4*x+6]
+           +rgb_row2[4*x+2]+rgb_row2[4*x+6])
+           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5]
+            +rgb_row2[4*x+1]+rgb_row2[4*x+5])
+           -6384*(rgb_row[4*x+0]+rgb_row[4*x+4]
+            +rgb_row2[4*x+0]+rgb_row2[4*x+4])+45940035)/357510;
+          u_row[x>>1]=OC_CLAMP255(u);
+          v_row[x>>1]=OC_CLAMP255(v);
+        }
+        y_row+=_ycbcr[0].stride<<1;
+        u_row+=_ycbcr[1].stride;
+        v_row+=_ycbcr[2].stride;
+        rgb_row+=cstride<<1;
+      }
+    }break;
+    case TH_PF_422:{
+      for(y=0;y<h;y++){
+        for(x=0;x<w;x+=2){
+          int y;
+          int u;
+          int v;
+          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+           +24966*rgb_row[4*x+0]+4207500)/255000;
+          y_row[x]=OC_CLAMP255(y);
+          y=(65481*rgb_row[4*x+6]+128553*rgb_row[4*x+5]
+           +24966*rgb_row[4*x+4]+4207500)/255000;
+          y_row[x+1]=OC_CLAMP255(y);
+          u=(-16744*(rgb_row[4*x+2]+rgb_row[4*x+6])
+           -32872*(rgb_row[4*x+1]+rgb_row[4*x+5])
+           +49616*(rgb_row[4*x+0]+rgb_row[4*x+4])+29032005)/225930;
+          v=(78512*(rgb_row[4*x+2]+rgb_row[4*x+6])
+           -65744*(rgb_row[4*x+1]+rgb_row[4*x+5])
+           -12768*(rgb_row[4*x+0]+rgb_row[4*x+4])+45940035)/357510;
+          u_row[x>>1]=OC_CLAMP255(u);
+          v_row[x>>1]=OC_CLAMP255(v);
+        }
+        y_row+=_ycbcr[0].stride;
+        u_row+=_ycbcr[1].stride;
+        v_row+=_ycbcr[2].stride;
+        rgb_row+=cstride;
+      }
+    }break;
+    /*case TH_PF_444:*/
+    default:{
+      for(y=0;y<h;y++){
+        for(x=0;x<w;x++){
+          int y;
+          int u;
+          int v;
+          y=(65481*rgb_row[4*x+2]+128553*rgb_row[4*x+1]
+           +24966*rgb_row[4*x+0]+4207500)/255000;
+          u=(-33488*rgb_row[4*x+2]-65744*rgb_row[4*x+1]
+           +99232*rgb_row[4*x+0]+29032005)/225930;
+          v=(157024*rgb_row[4*x+2]-131488*rgb_row[4*x+1]
+           -25536*rgb_row[4*x+0]+45940035)/357510;
+          y_row[x]=OC_CLAMP255(y);
+          u_row[x]=OC_CLAMP255(u);
+          v_row[x]=OC_CLAMP255(v);
+        }
+        y_row+=_ycbcr[0].stride;
+        u_row+=_ycbcr[1].stride;
+        v_row+=_ycbcr[2].stride;
+        rgb_row+=cstride;
+      }
+    }break;
+  }
+  /*Finished.
+    Destroy the surface.*/
+  cairo_surface_destroy(cs);
+}
+#endif
+
+int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos){
+  int ret;
+  if(_dec==NULL||_op==NULL)return TH_EFAULT;
+  /*A completely empty packet indicates a dropped frame and is treated exactly
+     like an inter frame with no coded blocks.*/
+  if(_op->bytes==0){
+    _dec->state.frame_type=OC_INTER_FRAME;
+    _dec->state.ntotal_coded_fragis=0;
+  }
+  else{
+    oc_pack_readinit(&_dec->opb,_op->packet,_op->bytes);
+    ret=oc_dec_frame_header_unpack(_dec);
+    if(ret<0)return ret;
+    if(_dec->state.frame_type==OC_INTRA_FRAME)oc_dec_mark_all_intra(_dec);
+    else oc_dec_coded_flags_unpack(_dec);
+  }
+  /*If there have been no reference frames, and we need one, initialize one.*/
+  if(_dec->state.frame_type!=OC_INTRA_FRAME&&
+   (_dec->state.ref_frame_idx[OC_FRAME_GOLD]<0||
+   _dec->state.ref_frame_idx[OC_FRAME_PREV]<0)){
+    oc_dec_init_dummy_frame(_dec);
+  }
+  /*If this was an inter frame with no coded blocks...*/
+  if(_dec->state.ntotal_coded_fragis<=0){
+    /*Just update the granule position and return.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    return TH_DUPFRAME;
+  }
+  else{
+    th_ycbcr_buffer stripe_buf;
+    int             stripe_fragy;
+    int             refi;
+    int             pli;
+    int             notstart;
+    int             notdone;
+#ifdef HAVE_CAIRO
+    int             telemetry;
+    /*Save the current telemetry state.
+      This prevents it from being modified in the middle of decoding this
+       frame, which could cause us to skip calls to the striped decoding
+       callback.*/
+    telemetry=_dec->telemetry;
+#endif
+    /*Select a free buffer to use for the reconstructed version of this frame.*/
+    for(refi=0;refi==_dec->state.ref_frame_idx[OC_FRAME_GOLD]||
+     refi==_dec->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+    _dec->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+    _dec->state.ref_frame_data[OC_FRAME_SELF]=
+     _dec->state.ref_frame_bufs[refi][0].data;
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_frame_bytes=_op->bytes;
+#endif
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      _dec->state.keyframe_num=_dec->state.curframe_num;
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=
+       _dec->telemetry_mode_bytes=
+       _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    else{
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_coding_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mb_modes_unpack(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mode_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+      oc_dec_mv_unpack_and_frag_modes_fill(_dec);
+#if defined(HAVE_CAIRO)
+      _dec->telemetry_mv_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    }
+    oc_dec_block_qis_unpack(_dec);
+#if defined(HAVE_CAIRO)
+    _dec->telemetry_qi_bytes=oc_pack_bytes_left(&_dec->opb);
+#endif
+    oc_dec_residual_tokens_unpack(_dec);
+    /*Update granule position.
+      This must be done before the striped decode callbacks so that the
+       application knows what to do with the frame data.*/
+    _dec->state.granpos=(_dec->state.keyframe_num+_dec->state.granpos_bias<<
+     _dec->state.info.keyframe_granule_shift)
+     +(_dec->state.curframe_num-_dec->state.keyframe_num);
+    _dec->state.curframe_num++;
+    if(_granpos!=NULL)*_granpos=_dec->state.granpos;
+    /*All of the rest of the operations -- DC prediction reversal,
+       reconstructing coded fragments, copying uncoded fragments, loop
+       filtering, extending borders, and out-of-loop post-processing -- should
+       be pipelined.
+      I.e., DC prediction reversal, reconstruction, and uncoded fragment
+       copying are done for one or two super block rows, then loop filtering is
+       run as far as it can, then bordering copying, then post-processing.
+      For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
+       block rows, and one chroma.
+      Otherwise, an MCU consists of one super block row from each plane.
+      Inside each MCU, we perform all of the steps on one color plane before
+       moving on to the next.
+      After reconstruction, the additional filtering stages introduce a delay
+       since they need some pixels from the next fragment row.
+      Thus the actual number of decoded rows available is slightly smaller for
+       the first MCU, and slightly larger for the last.
+
+      This entire process allows us to operate on the data while it is still in
+       cache, resulting in big performance improvements.
+      An application callback allows further application processing (blitting
+       to video memory, color conversion, etc.) to also use the data while it's
+       in cache.*/
+    oc_dec_pipeline_init(_dec,&_dec->pipe);
+    oc_ycbcr_buffer_flip(stripe_buf,_dec->pp_frame_buf);
+    notstart=0;
+    notdone=1;
+    for(stripe_fragy=0;notdone;stripe_fragy+=_dec->pipe.mcu_nvfrags){
+      int avail_fragy0;
+      int avail_fragy_end;
+      avail_fragy0=avail_fragy_end=_dec->state.fplanes[0].nvfrags;
+      notdone=stripe_fragy+_dec->pipe.mcu_nvfrags<avail_fragy_end;
+      for(pli=0;pli<3;pli++){
+        oc_fragment_plane *fplane;
+        int                frag_shift;
+        int                pp_offset;
+        int                sdelay;
+        int                edelay;
+        fplane=_dec->state.fplanes+pli;
+        /*Compute the first and last fragment row of the current MCU for this
+           plane.*/
+        frag_shift=pli!=0&&!(_dec->state.info.pixel_fmt&2);
+        _dec->pipe.fragy0[pli]=stripe_fragy>>frag_shift;
+        _dec->pipe.fragy_end[pli]=OC_MINI(fplane->nvfrags,
+         _dec->pipe.fragy0[pli]+(_dec->pipe.mcu_nvfrags>>frag_shift));
+        oc_dec_dc_unpredict_mcu_plane(_dec,&_dec->pipe,pli);
+        oc_dec_frags_recon_mcu_plane(_dec,&_dec->pipe,pli);
+        sdelay=edelay=0;
+        if(_dec->pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_state_loop_filter_frag_rows(&_dec->state,
+           _dec->pipe.bounding_values,OC_FRAME_SELF,pli,
+           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
+        }
+        /*To fill the borders, we have an additional two pixel delay, since a
+           fragment in the next row could filter its top edge, using two pixels
+           from a fragment in this row.
+          But there's no reason to delay a full fragment between the two.*/
+        oc_state_borders_fill_rows(&_dec->state,refi,pli,
+         (_dec->pipe.fragy0[pli]-sdelay<<3)-(sdelay<<1),
+         (_dec->pipe.fragy_end[pli]-edelay<<3)-(edelay<<1));
+        /*Out-of-loop post-processing.*/
+        pp_offset=3*(pli!=0);
+        if(_dec->pipe.pp_level>=OC_PP_LEVEL_DEBLOCKY+pp_offset){
+          /*Perform de-blocking in one plane.*/
+          sdelay+=notstart;
+          edelay+=notdone;
+          oc_dec_deblock_frag_rows(_dec,_dec->pp_frame_buf,
+           _dec->state.ref_frame_bufs[refi],pli,
+           _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
+          if(_dec->pipe.pp_level>=OC_PP_LEVEL_DERINGY+pp_offset){
+            /*Perform de-ringing in one plane.*/
+            sdelay+=notstart;
+            edelay+=notdone;
+            oc_dec_dering_frag_rows(_dec,_dec->pp_frame_buf,pli,
+             _dec->pipe.fragy0[pli]-sdelay,_dec->pipe.fragy_end[pli]-edelay);
+          }
+        }
+        /*If no post-processing is done, we still need to delay a row for the
+           loop filter, thanks to the strange filtering order VP3 chose.*/
+        else if(_dec->pipe.loop_filter){
+          sdelay+=notstart;
+          edelay+=notdone;
+        }
+        /*Compute the intersection of the available rows in all planes.
+          If chroma is sub-sampled, the effect of each of its delays is
+           doubled, but luma might have more post-processing filters enabled
+           than chroma, so we don't know up front which one is the limiting
+           factor.*/
+        avail_fragy0=OC_MINI(avail_fragy0,
+         _dec->pipe.fragy0[pli]-sdelay<<frag_shift);
+        avail_fragy_end=OC_MINI(avail_fragy_end,
+         _dec->pipe.fragy_end[pli]-edelay<<frag_shift);
+      }
+#ifdef HAVE_CAIRO
+      if(_dec->stripe_cb.stripe_decoded!=NULL&&!telemetry){
+#else
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+#endif
+        /*The callback might want to use the FPU, so let's make sure they can.
+          We violate all kinds of ABI restrictions by not doing this until
+           now, but none of them actually matter since we don't use floating
+           point ourselves.*/
+        oc_restore_fpu(&_dec->state);
+        /*Make the callback, ensuring we flip the sense of the "start" and
+           "end" of the available region upside down.*/
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,stripe_buf,
+         _dec->state.fplanes[0].nvfrags-avail_fragy_end,
+         _dec->state.fplanes[0].nvfrags-avail_fragy0);
+      }
+      notstart=1;
+    }
+    /*Finish filling in the reference frame borders.*/
+    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
+    /*Update the reference frame indices.*/
+    if(_dec->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _dec->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+      _dec->state.ref_frame_data[OC_FRAME_GOLD]=
+       _dec->state.ref_frame_data[OC_FRAME_PREV]=
+       _dec->state.ref_frame_data[OC_FRAME_SELF];
+    }
+    else{
+      /*Otherwise, just replace the previous reference frame.*/
+      _dec->state.ref_frame_idx[OC_FRAME_PREV]=
+       _dec->state.ref_frame_idx[OC_FRAME_SELF];
+      _dec->state.ref_frame_data[OC_FRAME_PREV]=
+       _dec->state.ref_frame_data[OC_FRAME_SELF];
+    }
+    /*Restore the FPU before dump_frame, since that _does_ use the FPU (for PNG
+       gamma values, if nothing else).*/
+    oc_restore_fpu(&_dec->state);
+#ifdef HAVE_CAIRO
+    /*If telemetry ioctls are active, we need to draw to the output buffer.*/
+    if(telemetry){
+      oc_render_telemetry(_dec,stripe_buf,telemetry);
+      oc_ycbcr_buffer_flip(_dec->pp_frame_buf,stripe_buf);
+      /*If we had a striped decoding callback, we skipped calling it above
+         (because the telemetry wasn't rendered yet).
+        Call it now with the whole frame.*/
+      if(_dec->stripe_cb.stripe_decoded!=NULL){
+        (*_dec->stripe_cb.stripe_decoded)(_dec->stripe_cb.ctx,
+         stripe_buf,0,_dec->state.fplanes[0].nvfrags);
+      }
+    }
+#endif
+#if defined(OC_DUMP_IMAGES)
+    /*We only dump images if there were some coded blocks.*/
+    oc_state_dump_frame(&_dec->state,OC_FRAME_SELF,"dec");
+#endif
+    return 0;
+  }
+}
+
+int th_decode_ycbcr_out(th_dec_ctx *_dec,th_ycbcr_buffer _ycbcr){
+  if(_dec==NULL||_ycbcr==NULL)return TH_EFAULT;
+  oc_ycbcr_buffer_flip(_ycbcr,_dec->pp_frame_buf);
+  return 0;
+}

+ 182 - 0
jni/libtheora-1.2.0alpha1/lib/dequant.c

@@ -0,0 +1,182 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "dequant.h"
+#include "decint.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,th_quant_info *_qinfo){
+  th_quant_base *base_mats;
+  long           val;
+  int            nbase_mats;
+  int            sizes[64];
+  int            indices[64];
+  int            nbits;
+  int            bmi;
+  int            ci;
+  int            qti;
+  int            pli;
+  int            qri;
+  int            qi;
+  int            i;
+  val=oc_pack_read(_opb,3);
+  nbits=(int)val;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,4);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    val=oc_pack_read(_opb,nbits);
+    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
+  }
+  val=oc_pack_read(_opb,9);
+  nbase_mats=(int)val+1;
+  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+  if(base_mats==NULL)return TH_EFAULT;
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++){
+      val=oc_pack_read(_opb,8);
+      base_mats[bmi][ci]=(unsigned char)val;
+    }
+  }
+  nbits=oc_ilog(nbase_mats-1);
+  for(i=0;i<6;i++){
+    th_quant_ranges *qranges;
+    th_quant_base   *qrbms;
+    int             *qrsizes;
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      val=oc_pack_read1(_opb);
+      if(!val){
+        int qtj;
+        int plj;
+        if(qti>0){
+          val=oc_pack_read1(_opb);
+          if(val){
+            qtj=qti-1;
+            plj=pli;
+          }
+          else{
+            qtj=(i-1)/3;
+            plj=(i-1)%3;
+          }
+        }
+        else{
+          qtj=(i-1)/3;
+          plj=(i-1)%3;
+        }
+        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
+        continue;
+      }
+    }
+    val=oc_pack_read(_opb,nbits);
+    indices[0]=(int)val;
+    for(qi=qri=0;qi<63;){
+      val=oc_pack_read(_opb,oc_ilog(62-qi));
+      sizes[qri]=(int)val+1;
+      qi+=(int)val+1;
+      val=oc_pack_read(_opb,nbits);
+      indices[++qri]=(int)val;
+    }
+    /*Note: The caller is responsible for cleaning up any partially
+       constructed qinfo.*/
+    if(qi>63){
+      _ogg_free(base_mats);
+      return TH_EBADHEADER;
+    }
+    qranges->nranges=qri;
+    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+    if(qranges->sizes==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
+    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+    if(qrbms==NULL){
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      _ogg_free(base_mats);
+      return TH_EFAULT;
+    }
+    qranges->base_matrices=(const th_quant_base *)qrbms;
+    do{
+      bmi=indices[qri];
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(bmi>=nbase_mats){
+        _ogg_free(base_mats);
+        return TH_EBADHEADER;
+      }
+      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
+    }
+    while(qri-->0);
+  }
+  _ogg_free(base_mats);
+  return 0;
+}
+
+void oc_quant_params_clear(th_quant_info *_qinfo){
+  int i;
+  for(i=6;i-->0;){
+    int qti;
+    int pli;
+    qti=i/3;
+    pli=i%3;
+    /*Clear any duplicate pointer references.*/
+    if(i>0){
+      int qtj;
+      int plj;
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(_qinfo->qi_ranges[qti][pli].sizes==
+       _qinfo->qi_ranges[qtj][plj].sizes){
+        _qinfo->qi_ranges[qti][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[qti][pli].base_matrices==
+       _qinfo->qi_ranges[qtj][plj].base_matrices){
+        _qinfo->qi_ranges[qti][pli].base_matrices=NULL;
+      }
+    }
+    if(qti>0){
+      if(_qinfo->qi_ranges[1][pli].sizes==
+       _qinfo->qi_ranges[0][pli].sizes){
+        _qinfo->qi_ranges[1][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[1][pli].base_matrices==
+       _qinfo->qi_ranges[0][pli].base_matrices){
+        _qinfo->qi_ranges[1][pli].base_matrices=NULL;
+      }
+    }
+    /*Now free all the non-duplicate storage.*/
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
+  }
+}

+ 27 - 0
jni/libtheora-1.2.0alpha1/lib/dequant.h

@@ -0,0 +1,27 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_dequant_H)
+# define _dequant_H (1)
+# include "quant.h"
+# include "bitpack.h"
+
+int oc_quant_params_unpack(oc_pack_buf *_opb,
+ th_quant_info *_qinfo);
+void oc_quant_params_clear(th_quant_info *_qinfo);
+
+#endif

+ 168 - 0
jni/libtheora-1.2.0alpha1/lib/encapiwrapper.c

@@ -0,0 +1,168 @@
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "encint.h"
+#include "theora/theoraenc.h"
+
+
+
+static void th_enc_api_clear(th_api_wrapper *_api){
+  if(_api->encode)th_encode_free(_api->encode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_encode_clear(theora_state *_te){
+  if(_te->i!=NULL)theora_info_clear(_te->i);
+  memset(_te,0,sizeof(*_te));
+}
+
+static int theora_encode_control(theora_state *_te,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_encode_ctl(((th_api_wrapper *)_te->i->codec_setup)->encode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_encode_granule_frame(theora_state *_te,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static double theora_encode_granule_time(theora_state *_te,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_te->i->codec_setup)->encode,_gp);
+}
+
+static const oc_state_dispatch_vtable OC_ENC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_encode_clear,
+  (oc_state_control_func)theora_encode_control,
+  (oc_state_granule_frame_func)theora_encode_granule_frame,
+  (oc_state_granule_time_func)theora_encode_granule_time,
+};
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+  th_api_info *apiinfo;
+  th_info      info;
+  ogg_uint32_t keyframe_frequency_force;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_malloc(sizeof(*apiinfo));
+  if(apiinfo==NULL)return TH_EFAULT;
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  oc_theora_info2th_info(&info,_ci);
+  apiinfo->api.encode=th_encode_alloc(&info);
+  if(apiinfo->api.encode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_enc_api_clear;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _te->internal_encode=(void *)&OC_ENC_DISPATCH_VTBL;
+  _te->internal_decode=NULL;
+  _te->granulepos=0;
+  _te->i=&apiinfo->info;
+  _te->i->codec_setup=&apiinfo->api;
+  /*Set the precise requested keyframe frequency.*/
+  keyframe_frequency_force=_ci->keyframe_auto_p?
+   _ci->keyframe_frequency_force:_ci->keyframe_frequency;
+  th_encode_ctl(apiinfo->api.encode,
+   TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
+   &keyframe_frequency_force,sizeof(keyframe_frequency_force));
+  /*TODO: Additional codec setup using the extra fields in theora_info.*/
+  return 0;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_ycbcr_buffer  buf;
+  int              ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  buf[0].width=_yuv->y_width;
+  buf[0].height=_yuv->y_height;
+  buf[0].stride=_yuv->y_stride;
+  buf[0].data=_yuv->y;
+  buf[1].width=_yuv->uv_width;
+  buf[1].height=_yuv->uv_height;
+  buf[1].stride=_yuv->uv_stride;
+  buf[1].data=_yuv->u;
+  buf[2].width=_yuv->uv_width;
+  buf[2].height=_yuv->uv_height;
+  buf[2].stride=_yuv->uv_stride;
+  buf[2].data=_yuv->v;
+  ret=th_encode_ycbcr_in(api->encode,buf);
+  if(ret<0)return ret;
+  _te->granulepos=api->encode->state.granpos;
+  return ret;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  return th_encode_packetout(api->encode,_last_p,_op);
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output an info packet.*/
+  enc->packet_state=OC_PACKET_INFO_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+  oggpack_buffer  opb;
+  void           *buf;
+  int             packet_state;
+  int             ret;
+  packet_state=OC_PACKET_COMMENT_HDR;
+  oggpackB_writeinit(&opb);
+  ret=oc_state_flushheader(NULL,&packet_state,&opb,NULL,NULL,
+   th_version_string(),(th_comment *)_tc,_op);
+  if(ret>=0){
+    /*The oggpack_buffer's lifetime ends with this function, so we have to
+       copy out the packet contents.
+      Presumably the application knows it is supposed to free this.
+      This part works nothing like the Vorbis API, and the documentation on it
+       has been wrong for some time, claiming libtheora owned the memory.*/
+    buf=_ogg_malloc(_op->bytes);
+    if(buf==NULL){
+      _op->packet=NULL;
+      ret=TH_EFAULT;
+    }
+    else{
+      memcpy(buf,_op->packet,_op->bytes);
+      _op->packet=buf;
+      ret=0;
+    }
+  }
+  oggpack_writeclear(&opb);
+  return ret;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+  oc_enc_ctx     *enc;
+  th_api_wrapper *api;
+  int             ret;
+  api=(th_api_wrapper *)_te->i->codec_setup;
+  enc=api->encode;
+  /*If we've already started encoding, fail.*/
+  if(enc->packet_state>OC_PACKET_EMPTY||enc->state.granpos!=0){
+    return TH_EINVAL;
+  }
+  /*Reset the state to make sure we output a setup packet.*/
+  enc->packet_state=OC_PACKET_SETUP_HDR;
+  ret=th_encode_flushheader(api->encode,NULL,_op);
+  return ret>=0?0:ret;
+}

+ 379 - 0
jni/libtheora-1.2.0alpha1/lib/encfrag.c

@@ -0,0 +1,379 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+void oc_enc_frag_sub_128_c(ogg_int16_t *_diff,
+ const unsigned char *_src,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_diff[i*8+j]=(ogg_int16_t)(_src[j]-128);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-_ref[j]);
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh){
+  unsigned sad;
+  int      i;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-(_ref1[j]+_ref2[j]>>1));
+    if(sad>_thresh)break;
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+  return sad;
+}
+
+unsigned oc_enc_frag_intra_sad_c(const unsigned char *_src, int _ystride){
+  const unsigned char *src = _src;
+  unsigned dc;
+  unsigned sad;
+  int      i;
+  dc=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)dc+=src[j];
+    src+=_ystride;
+  }
+  dc=dc+32>>6;
+  sad=0;
+  for(i=8;i-->0;){
+    int j;
+    for(j=0;j<8;j++)sad+=abs(_src[j]-dc);
+    _src+=_ystride;
+  }
+  return sad;
+}
+
+static void oc_diff_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]-_ref[0]+_src[4]-_ref[4];
+    t4=_src[0]-_ref[0]-_src[4]+_ref[4];
+    t1=_src[1]-_ref[1]+_src[5]-_ref[5];
+    t5=_src[1]-_ref[1]-_src[5]+_ref[5];
+    t2=_src[2]-_ref[2]+_src[6]-_ref[6];
+    t6=_src[2]-_ref[2]-_src[6]+_ref[6];
+    t3=_src[3]-_ref[3]+_src[7]-_ref[7];
+    t7=_src[3]-_ref[3]-_src[7]+_ref[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+}
+
+static void oc_diff_hadamard2(ogg_int16_t _buf[64],const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    r=_ref1[0]+_ref2[0]>>1;
+    t4=_ref1[4]+_ref2[4]>>1;
+    t0=_src[0]-r+_src[4]-t4;
+    t4=_src[0]-r-_src[4]+t4;
+    r=_ref1[1]+_ref2[1]>>1;
+    t5=_ref1[5]+_ref2[5]>>1;
+    t1=_src[1]-r+_src[5]-t5;
+    t5=_src[1]-r-_src[5]+t5;
+    r=_ref1[2]+_ref2[2]>>1;
+    t6=_ref1[6]+_ref2[6]>>1;
+    t2=_src[2]-r+_src[6]-t6;
+    t6=_src[2]-r-_src[6]+t6;
+    r=_ref1[3]+_ref2[3]>>1;
+    t7=_ref1[7]+_ref2[7]>>1;
+    t3=_src[3]-r+_src[7]-t7;
+    t7=_src[3]-r-_src[7]+t7;
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+    _ref1+=_ystride;
+    _ref2+=_ystride;
+  }
+}
+
+static void oc_intra_hadamard(ogg_int16_t _buf[64],const unsigned char *_src,
+ int _ystride){
+  int i;
+  for(i=0;i<8;i++){
+    int t0;
+    int t1;
+    int t2;
+    int t3;
+    int t4;
+    int t5;
+    int t6;
+    int t7;
+    int r;
+    /*Hadamard stage 1:*/
+    t0=_src[0]+_src[4];
+    t4=_src[0]-_src[4];
+    t1=_src[1]+_src[5];
+    t5=_src[1]-_src[5];
+    t2=_src[2]+_src[6];
+    t6=_src[2]-_src[6];
+    t3=_src[3]+_src[7];
+    t7=_src[3]-_src[7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    _buf[0*8+i]=(ogg_int16_t)(t0+t1);
+    _buf[1*8+i]=(ogg_int16_t)(t0-t1);
+    _buf[2*8+i]=(ogg_int16_t)(t2+t3);
+    _buf[3*8+i]=(ogg_int16_t)(t2-t3);
+    _buf[4*8+i]=(ogg_int16_t)(t4+t5);
+    _buf[5*8+i]=(ogg_int16_t)(t4-t5);
+    _buf[6*8+i]=(ogg_int16_t)(t6+t7);
+    _buf[7*8+i]=(ogg_int16_t)(t6-t7);
+    _src+=_ystride;
+  }
+}
+
+unsigned oc_hadamard_sad(int *_dc,const ogg_int16_t _buf[64]){
+  unsigned sad;
+  int      dc;
+  int      t0;
+  int      t1;
+  int      t2;
+  int      t3;
+  int      t4;
+  int      t5;
+  int      t6;
+  int      t7;
+  int      r;
+  int      i;
+  sad=dc=0;
+  for(i=0;i<8;i++){
+    /*Hadamard stage 1:*/
+    t0=_buf[i*8+0]+_buf[i*8+4];
+    t4=_buf[i*8+0]-_buf[i*8+4];
+    t1=_buf[i*8+1]+_buf[i*8+5];
+    t5=_buf[i*8+1]-_buf[i*8+5];
+    t2=_buf[i*8+2]+_buf[i*8+6];
+    t6=_buf[i*8+2]-_buf[i*8+6];
+    t3=_buf[i*8+3]+_buf[i*8+7];
+    t7=_buf[i*8+3]-_buf[i*8+7];
+    /*Hadamard stage 2:*/
+    r=t0;
+    t0+=t2;
+    t2=r-t2;
+    r=t1;
+    t1+=t3;
+    t3=r-t3;
+    r=t4;
+    t4+=t6;
+    t6=r-t6;
+    r=t5;
+    t5+=t7;
+    t7=r-t7;
+    /*Hadamard stage 3:*/
+    r=abs(t0+t1)&-(i>0);
+    r+=abs(t0-t1);
+    r+=abs(t2+t3);
+    r+=abs(t2-t3);
+    r+=abs(t4+t5);
+    r+=abs(t4-t5);
+    r+=abs(t6+t7);
+    r+=abs(t6-t7);
+    sad+=r;
+  }
+  dc=_buf[0]+_buf[1]+_buf[2]+_buf[3]+_buf[4]+_buf[5]+_buf[6]+_buf[7];
+  *_dc=dc;
+  return sad;
+}
+
+unsigned oc_enc_frag_satd_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard(buf,_src,_ref,_ystride);
+  return oc_hadamard_sad(_dc,buf);
+}
+
+unsigned oc_enc_frag_satd2_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride){
+  ogg_int16_t buf[64];
+  oc_diff_hadamard2(buf,_src,_ref1,_ref2,_ystride);
+  return oc_hadamard_sad(_dc,buf);
+}
+
+unsigned oc_enc_frag_intra_satd_c(int *_dc,
+ const unsigned char *_src,int _ystride){
+  ogg_int16_t buf[64];
+  oc_intra_hadamard(buf,_src,_ystride);
+  return oc_hadamard_sad(_dc,buf);
+}
+
+unsigned oc_enc_frag_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride){
+  unsigned ret;
+  int      y;
+  int      x;
+  ret=0;
+  for(y=0;y<8;y++){
+    for(x=0;x<8;x++)ret+=(_src[x]-_ref[x])*(_src[x]-_ref[x]);
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return ret;
+}
+
+unsigned oc_enc_frag_border_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask){
+  unsigned ret;
+  int      y;
+  int      x;
+  ret=0;
+  for(y=0;y<8;y++){
+    for(x=0;x<8;x++,_mask>>=1){
+      if(_mask&1)ret+=(_src[x]-_ref[x])*(_src[x]-_ref[x]);
+    }
+    _src+=_ystride;
+    _ref+=_ystride;
+  }
+  return ret;
+}
+
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride){
+  int i;
+  int j;
+  for(i=8;i-->0;){
+    for(j=0;j<8;j++)_dst[j]=_src1[j]+_src2[j]>>1;
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}

+ 121 - 0
jni/libtheora-1.2.0alpha1/lib/encinfo.c

@@ -0,0 +1,121 @@
+#include <stdlib.h>
+#include <string.h>
+#include "state.h"
+#include "enquant.h"
+#include "huffenc.h"
+
+
+
+/*Packs a series of octets from a given byte array into the pack buffer.
+  _opb: The pack buffer to store the octets in.
+  _buf: The byte array containing the bytes to pack.
+  _len: The number of octets to pack.*/
+static void oc_pack_octets(oggpack_buffer *_opb,const char *_buf,int _len){
+  int i;
+  for(i=0;i<_len;i++)oggpackB_write(_opb,_buf[i],8);
+}
+
+
+
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op){
+  unsigned char *packet;
+  int            b_o_s;
+  if(_op==NULL)return TH_EFAULT;
+  switch(*_packet_state){
+    /*Codec info header.*/
+    case OC_PACKET_INFO_HDR:{
+      if(_state==NULL)return TH_EFAULT;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the info header.*/
+      oggpackB_write(_opb,0x80,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the codec bitstream version.*/
+      oggpackB_write(_opb,TH_VERSION_MAJOR,8);
+      oggpackB_write(_opb,TH_VERSION_MINOR,8);
+      oggpackB_write(_opb,TH_VERSION_SUB,8);
+      /*Describe the encoded frame.*/
+      oggpackB_write(_opb,_state->info.frame_width>>4,16);
+      oggpackB_write(_opb,_state->info.frame_height>>4,16);
+      oggpackB_write(_opb,_state->info.pic_width,24);
+      oggpackB_write(_opb,_state->info.pic_height,24);
+      oggpackB_write(_opb,_state->info.pic_x,8);
+      oggpackB_write(_opb,_state->info.pic_y,8);
+      oggpackB_write(_opb,_state->info.fps_numerator,32);
+      oggpackB_write(_opb,_state->info.fps_denominator,32);
+      oggpackB_write(_opb,_state->info.aspect_numerator,24);
+      oggpackB_write(_opb,_state->info.aspect_denominator,24);
+      oggpackB_write(_opb,_state->info.colorspace,8);
+      oggpackB_write(_opb,_state->info.target_bitrate,24);
+      oggpackB_write(_opb,_state->info.quality,6);
+      oggpackB_write(_opb,_state->info.keyframe_granule_shift,5);
+      oggpackB_write(_opb,_state->info.pixel_fmt,2);
+      /*Spare configuration bits.*/
+      oggpackB_write(_opb,0,3);
+      b_o_s=1;
+    }break;
+    /*Comment header.*/
+    case OC_PACKET_COMMENT_HDR:{
+      int vendor_len;
+      int i;
+      if(_tc==NULL)return TH_EFAULT;
+      vendor_len=strlen(_vendor);
+      oggpackB_reset(_opb);
+      /*Mark this packet as the comment header.*/
+      oggpackB_write(_opb,0x81,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the vendor string.*/
+      oggpack_write(_opb,vendor_len,32);
+      oc_pack_octets(_opb,_vendor,vendor_len);
+      oggpack_write(_opb,_tc->comments,32);
+      for(i=0;i<_tc->comments;i++){
+        if(_tc->user_comments[i]!=NULL){
+          oggpack_write(_opb,_tc->comment_lengths[i],32);
+          oc_pack_octets(_opb,_tc->user_comments[i],_tc->comment_lengths[i]);
+        }
+        else oggpack_write(_opb,0,32);
+      }
+      b_o_s=0;
+    }break;
+    /*Codec setup header.*/
+    case OC_PACKET_SETUP_HDR:{
+      int ret;
+      oggpackB_reset(_opb);
+      /*Mark this packet as the setup header.*/
+      oggpackB_write(_opb,0x82,8);
+      /*Write the codec string.*/
+      oc_pack_octets(_opb,"theora",6);
+      /*Write the quantizer tables.*/
+      oc_quant_params_pack(_opb,_qinfo);
+      /*Write the huffman codes.*/
+      ret=oc_huff_codes_pack(_opb,_codes);
+      /*This should never happen, because we validate the tables when they
+         are set.
+        If you see, it's a good chance memory is being corrupted.*/
+      if(ret<0)return ret;
+      b_o_s=0;
+    }break;
+    /*No more headers to emit.*/
+    default:return 0;
+  }
+  /*This is kind of fugly: we hand the user a buffer which they do not own.
+    We will overwrite it when the next packet is output, so the user better be
+     done with it by then.
+    Vorbis is little better: it hands back buffers that it will free the next
+     time the headers are requested, or when the encoder is cleared.
+    Hopefully libogg2 will make this much cleaner.*/
+  packet=oggpackB_get_buffer(_opb);
+  /*If there's no packet, malloc failed while writing.*/
+  if(packet==NULL)return TH_EFAULT;
+  _op->packet=packet;
+  _op->bytes=oggpackB_bytes(_opb);
+  _op->b_o_s=b_o_s;
+  _op->e_o_s=0;
+  _op->granulepos=0;
+  _op->packetno=*_packet_state+3;
+  return ++(*_packet_state)+3;
+}

+ 845 - 0
jni/libtheora-1.2.0alpha1/lib/encint.h

@@ -0,0 +1,845 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#if !defined(_encint_H)
+# define _encint_H (1)
+# include "theora/theoraenc.h"
+# include "state.h"
+# include "mathops.h"
+# include "enquant.h"
+# include "huffenc.h"
+/*# define OC_COLLECT_METRICS*/
+
+
+
+typedef oc_mv                         oc_mv2[2];
+
+typedef struct oc_enc_opt_vtable      oc_enc_opt_vtable;
+typedef struct oc_enc_opt_data        oc_enc_opt_data;
+typedef struct oc_mb_enc_info         oc_mb_enc_info;
+typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
+typedef struct oc_fr_state            oc_fr_state;
+typedef struct oc_qii_state           oc_qii_state;
+typedef struct oc_enc_pipeline_state  oc_enc_pipeline_state;
+typedef struct oc_mode_rd             oc_mode_rd;
+typedef struct oc_iir_filter          oc_iir_filter;
+typedef struct oc_frame_metrics       oc_frame_metrics;
+typedef struct oc_rc_state            oc_rc_state;
+typedef struct th_enc_ctx             oc_enc_ctx;
+typedef struct oc_token_checkpoint    oc_token_checkpoint;
+
+
+
+/*Encoder-specific accelerated functions.*/
+# if defined(OC_X86_ASM)
+#  if defined(_MSC_VER)
+#   include "x86_vc/x86enc.h"
+#  else
+#   include "x86/x86enc.h"
+#  endif
+# endif
+# if defined(OC_ARM_ASM)
+#  include "arm/armenc.h"
+# endif
+
+# if !defined(oc_enc_accel_init)
+#  define oc_enc_accel_init oc_enc_accel_init_c
+# endif
+# if defined(OC_ENC_USE_VTABLE)
+#  if !defined(oc_enc_frag_sub)
+#   define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sub)(_diff,_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sub_128)
+#   define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sub_128)(_diff,_src,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sad)
+#   define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_sad)(_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_sad_thresh)
+#   define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+  ((*(_enc)->opt_vtable.frag_sad_thresh)(_src,_ref,_ystride,_thresh))
+#  endif
+#  if !defined(oc_enc_frag_sad2_thresh)
+#   define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+  ((*(_enc)->opt_vtable.frag_sad2_thresh)(_src,_ref1,_ref2,_ystride,_thresh))
+#  endif
+#  if !defined(oc_enc_frag_intra_sad)
+#   define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
+  ((*(_enc)->opt_vtable.frag_intra_sad)(_src,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_satd)
+#   define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_satd)(_dc,_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_satd2)
+#   define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+  ((*(_enc)->opt_vtable.frag_satd2)(_dc,_src,_ref1,_ref2,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_intra_satd)
+#   define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+  ((*(_enc)->opt_vtable.frag_intra_satd)(_dc,_src,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_ssd)
+#   define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+  ((*(_enc)->opt_vtable.frag_ssd)(_src,_ref,_ystride))
+#  endif
+#  if !defined(oc_enc_frag_border_ssd)
+#   define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+  ((*(_enc)->opt_vtable.frag_border_ssd)(_src,_ref,_ystride,_mask))
+#  endif
+#  if !defined(oc_enc_frag_copy2)
+#   define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+  ((*(_enc)->opt_vtable.frag_copy2)(_dst,_src1,_src2,_ystride))
+#  endif
+#  if !defined(oc_enc_enquant_table_init)
+#   define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+  ((*(_enc)->opt_vtable.enquant_table_init)(_enquant,_dequant))
+#  endif
+#  if !defined(oc_enc_enquant_table_fixup)
+#   define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+  ((*(_enc)->opt_vtable.enquant_table_fixup)(_enquant,_nqis))
+#  endif
+#  if !defined(oc_enc_quantize)
+#   define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+  ((*(_enc)->opt_vtable.quantize)(_qdct,_dct,_dequant,_enquant))
+#  endif
+#  if !defined(oc_enc_frag_recon_intra)
+#   define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+  ((*(_enc)->opt_vtable.frag_recon_intra)(_dst,_ystride,_residue))
+#  endif
+#  if !defined(oc_enc_frag_recon_inter)
+#   define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+  ((*(_enc)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+#  endif
+#  if !defined(oc_enc_fdct8x8)
+#   define oc_enc_fdct8x8(_enc,_y,_x) \
+  ((*(_enc)->opt_vtable.fdct8x8)(_y,_x))
+#  endif
+# else
+#  if !defined(oc_enc_frag_sub)
+#   define oc_enc_frag_sub(_enc,_diff,_src,_ref,_ystride) \
+  oc_enc_frag_sub_c(_diff,_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sub_128)
+#   define oc_enc_frag_sub_128(_enc,_diff,_src,_ystride) \
+  oc_enc_frag_sub_128_c(_diff,_src,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sad)
+#   define oc_enc_frag_sad(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_sad_c(_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_sad_thresh)
+#   define oc_enc_frag_sad_thresh(_enc,_src,_ref,_ystride,_thresh) \
+  oc_enc_frag_sad_thresh_c(_src,_ref,_ystride,_thresh)
+#  endif
+#  if !defined(oc_enc_frag_sad2_thresh)
+#   define oc_enc_frag_sad2_thresh(_enc,_src,_ref1,_ref2,_ystride,_thresh) \
+  oc_enc_frag_sad2_thresh_c(_src,_ref1,_ref2,_ystride,_thresh)
+#  endif
+#  if !defined(oc_enc_frag_intra_sad)
+#   define oc_enc_frag_intra_sad(_enc,_src,_ystride) \
+  oc_enc_frag_intra_sad_c(_src,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_satd)
+#   define oc_enc_frag_satd(_enc,_dc,_src,_ref,_ystride) \
+  oc_enc_frag_satd_c(_dc,_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_satd2)
+#   define oc_enc_frag_satd2(_enc,_dc,_src,_ref1,_ref2,_ystride) \
+  oc_enc_frag_satd2_c(_dc,_src,_ref1,_ref2,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_intra_satd)
+#   define oc_enc_frag_intra_satd(_enc,_dc,_src,_ystride) \
+  oc_enc_frag_intra_satd_c(_dc,_src,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_ssd)
+#   define oc_enc_frag_ssd(_enc,_src,_ref,_ystride) \
+  oc_enc_frag_ssd_c(_src,_ref,_ystride)
+#  endif
+#  if !defined(oc_enc_frag_border_ssd)
+#   define oc_enc_frag_border_ssd(_enc,_src,_ref,_ystride,_mask) \
+  oc_enc_frag_border_ssd_c(_src,_ref,_ystride,_mask)
+#  endif
+#  if !defined(oc_enc_frag_copy2)
+#   define oc_enc_frag_copy2(_enc,_dst,_src1,_src2,_ystride) \
+  oc_enc_frag_copy2_c(_dst,_src1,_src2,_ystride)
+#  endif
+#  if !defined(oc_enc_enquant_table_init)
+#   define oc_enc_enquant_table_init(_enc,_enquant,_dequant) \
+  oc_enc_enquant_table_init_c(_enquant,_dequant)
+#  endif
+#  if !defined(oc_enc_enquant_table_fixup)
+#   define oc_enc_enquant_table_fixup(_enc,_enquant,_nqis) \
+  oc_enc_enquant_table_fixup_c(_enquant,_nqis)
+#  endif
+#  if !defined(oc_enc_quantize)
+#   define oc_enc_quantize(_enc,_qdct,_dct,_dequant,_enquant) \
+  oc_enc_quantize_c(_qdct,_dct,_dequant,_enquant)
+#  endif
+#  if !defined(oc_enc_frag_recon_intra)
+#   define oc_enc_frag_recon_intra(_enc,_dst,_ystride,_residue) \
+  oc_frag_recon_intra_c(_dst,_ystride,_residue)
+#  endif
+#  if !defined(oc_enc_frag_recon_inter)
+#   define oc_enc_frag_recon_inter(_enc,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+#  endif
+#  if !defined(oc_enc_fdct8x8)
+#   define oc_enc_fdct8x8(_enc,_y,_x) oc_enc_fdct8x8_c(_y,_x)
+#  endif
+# endif
+
+
+
+/*Constants for the packet-out state machine specific to the encoder.*/
+
+/*Next packet to emit: Data packet, but none are ready yet.*/
+#define OC_PACKET_EMPTY (0)
+/*Next packet to emit: Data packet, and one is ready.*/
+#define OC_PACKET_READY (1)
+
+/*All features enabled.*/
+#define OC_SP_LEVEL_SLOW          (0)
+/*Enable early skip.*/
+#define OC_SP_LEVEL_EARLY_SKIP    (1)
+/*Use analysis shortcuts, single quantizer, and faster tokenization.*/
+#define OC_SP_LEVEL_FAST_ANALYSIS (2)
+/*Use SAD instead of SATD*/
+#define OC_SP_LEVEL_NOSATD        (3)
+/*Disable motion compensation.*/
+#define OC_SP_LEVEL_NOMC          (4)
+/*Maximum valid speed level.*/
+#define OC_SP_LEVEL_MAX           (4)
+
+
+/*The number of extra bits of precision at which to store rate metrics.*/
+# define OC_BIT_SCALE  (6)
+/*The number of extra bits of precision at which to store RMSE metrics.
+  This must be at least half OC_BIT_SCALE (rounded up).*/
+# define OC_RMSE_SCALE (5)
+/*The number of quantizer bins to partition statistics into.*/
+# define OC_LOGQ_BINS  (8)
+/*The number of SAD/SATD bins to partition statistics into.*/
+# define OC_COMP_BINS   (24)
+/*The number of bits of precision to drop from SAD and SATD scores
+   to assign them to a bin.*/
+# define OC_SAD_SHIFT  (6)
+# define OC_SATD_SHIFT (9)
+
+/*Masking is applied by scaling the D used in R-D optimization (via rd_scale)
+   or the lambda parameter (via rd_iscale).
+  These are only equivalent within a single block; when more than one block is
+   being considered, the former is the interpretation used.*/
+
+/*This must be at least 4 for OC_RD_SKIP_SCALE() to work below.*/
+# define OC_RD_SCALE_BITS (12-OC_BIT_SCALE)
+# define OC_RD_ISCALE_BITS (11)
+
+/*This macro is applied to _ssd values with just 4 bits of headroom
+   ((15-OC_RMSE_SCALE)*2+OC_BIT_SCALE+2); since we want to allow rd_scales as
+   large as 16, and need additional fractional bits, our only recourse that
+   doesn't lose precision on blocks with very small SSDs is to use a wider
+   multiply.*/
+# if LONG_MAX>2147483647
+#  define OC_RD_SCALE(_ssd,_rd_scale) \
+ ((unsigned)((unsigned long)(_ssd)*(_rd_scale) \
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
+# else
+#  define OC_RD_SCALE(_ssd,_rd_scale) \
+ (((_ssd)>>OC_RD_SCALE_BITS)*(_rd_scale) \
+ +(((_ssd)&(1<<OC_RD_SCALE_BITS)-1)*(_rd_scale) \
+ +((1<<OC_RD_SCALE_BITS)>>1)>>OC_RD_SCALE_BITS))
+# endif
+# define OC_RD_SKIP_SCALE(_ssd,_rd_scale) \
+ ((_ssd)*(_rd_scale)+((1<<OC_RD_SCALE_BITS-4)>>1)>>OC_RD_SCALE_BITS-4)
+# define OC_RD_ISCALE(_lambda,_rd_iscale) \
+ ((_lambda)*(_rd_iscale)+((1<<OC_RD_ISCALE_BITS)>>1)>>OC_RD_ISCALE_BITS)
+
+
+/*The bits used for each of the MB mode codebooks.*/
+extern const unsigned char OC_MODE_BITS[2][OC_NMODES];
+
+/*The bits used for each of the MV codebooks.*/
+extern const unsigned char OC_MV_BITS[2][64];
+
+/*The minimum value that can be stored in a SB run for each codeword.
+  The last entry is the upper bound on the length of a single SB run.*/
+extern const ogg_uint16_t  OC_SB_RUN_VAL_MIN[8];
+/*The bits used for each SB run codeword.*/
+extern const unsigned char OC_SB_RUN_CODE_NBITS[7];
+
+/*The bits used for each block run length (starting with 1).*/
+extern const unsigned char OC_BLOCK_RUN_CODE_NBITS[30];
+
+
+
+/*Encoder specific functions with accelerated variants.*/
+struct oc_enc_opt_vtable{
+  void     (*frag_sub)(ogg_int16_t _diff[64],const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  void     (*frag_sub_128)(ogg_int16_t _diff[64],
+   const unsigned char *_src,int _ystride);
+  unsigned (*frag_sad)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_sad_thresh)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,unsigned _thresh);
+  unsigned (*frag_sad2_thresh)(const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+   unsigned _thresh);
+  unsigned (*frag_intra_sad)(const unsigned char *_src,int _ystride);
+  unsigned (*frag_satd)(int *_dc,const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_satd2)(int *_dc,const unsigned char *_src,
+   const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+  unsigned (*frag_intra_satd)(int *_dc,const unsigned char *_src,int _ystride);
+  unsigned (*frag_ssd)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride);
+  unsigned (*frag_border_ssd)(const unsigned char *_src,
+   const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
+  void     (*frag_copy2)(unsigned char *_dst,
+   const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+  void     (*enquant_table_init)(void *_enquant,
+   const ogg_uint16_t _dequant[64]);
+  void     (*enquant_table_fixup)(void *_enquant[3][3][2],int _nqis);
+  int      (*quantize)(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+   const ogg_uint16_t _dequant[64],const void *_enquant);
+  void     (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void     (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void     (*fdct8x8)(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+};
+
+
+/*Encoder specific data that varies according to which variants of the above
+   functions are used.*/
+struct oc_enc_opt_data{
+  /*The size of a single quantizer table.
+    This must be a multiple of enquant_table_alignment.*/
+  size_t               enquant_table_size;
+  /*The alignment required for the quantizer tables.
+    This must be a positive power of two.*/
+  int                  enquant_table_alignment;
+};
+
+
+void oc_enc_accel_init(oc_enc_ctx *_enc);
+
+
+
+/*Encoder-specific macroblock information.*/
+struct oc_mb_enc_info{
+  /*Neighboring macro blocks that have MVs available from the current frame.*/
+  unsigned      cneighbors[4];
+  /*Neighboring macro blocks to use for MVs from the previous frame.*/
+  unsigned      pneighbors[4];
+  /*The number of current-frame neighbors.*/
+  unsigned char ncneighbors;
+  /*The number of previous-frame neighbors.*/
+  unsigned char npneighbors;
+  /*Flags indicating which MB modes have been refined.*/
+  unsigned char refined;
+  /*Motion vectors for a macro block for the current frame and the
+     previous two frames.
+    Each is a set of 2 vectors against OC_FRAME_GOLD and OC_FRAME_PREV, which
+     can be used to estimate constant velocity and constant acceleration
+     predictors.
+    Uninitialized MVs are (0,0).*/
+  oc_mv2        analysis_mv[3];
+  /*Current unrefined analysis MVs.*/
+  oc_mv         unref_mv[2];
+  /*Unrefined block MVs.*/
+  oc_mv         block_mv[4];
+  /*Refined block MVs.*/
+  oc_mv         ref_mv[4];
+  /*Minimum motion estimation error from the analysis stage.*/
+  ogg_uint16_t  error[2];
+  /*MB error for half-pel refinement for each frame type.*/
+  unsigned      satd[2];
+  /*Block error for half-pel refinement.*/
+  unsigned      block_satd[4];
+};
+
+
+
+/*State machine to estimate the opportunity cost of coding a MB mode.*/
+struct oc_mode_scheme_chooser{
+  /*Pointers to the a list containing the index of each mode in the mode
+     alphabet used by each scheme.
+    The first entry points to the dynamic scheme0_ranks, while the remaining 7
+     point to the constant entries stored in OC_MODE_SCHEMES.*/
+  const unsigned char *mode_ranks[8];
+  /*The ranks for each mode when coded with scheme 0.
+    These are optimized so that the more frequent modes have lower ranks.*/
+  unsigned char        scheme0_ranks[OC_NMODES];
+  /*The list of modes, sorted in descending order of frequency, that
+    corresponds to the ranks above.*/
+  unsigned char        scheme0_list[OC_NMODES];
+  /*The number of times each mode has been chosen so far.*/
+  unsigned             mode_counts[OC_NMODES];
+  /*The list of mode coding schemes, sorted in ascending order of bit cost.*/
+  unsigned char        scheme_list[8];
+  /*The number of bits used by each mode coding scheme.*/
+  ptrdiff_t            scheme_bits[8];
+};
+
+
+void oc_mode_scheme_chooser_init(oc_mode_scheme_chooser *_chooser);
+
+
+
+/*State to track coded block flags and their bit cost.
+  We use opportunity cost to measure the bits required to code or skip the next
+   block, using the cheaper of the cost to code it fully or partially, so long
+   as both are possible.*/
+struct oc_fr_state{
+  /*The number of bits required for the coded block flags so far this frame.*/
+  ptrdiff_t  bits;
+  /*The length of the current run for the partial super block flag, not
+     including the current super block.*/
+  unsigned   sb_partial_count:16;
+  /*The length of the current run for the full super block flag, not
+     including the current super block.*/
+  unsigned   sb_full_count:16;
+  /*The length of the coded block flag run when the current super block
+     started.*/
+  unsigned   b_coded_count_prev:6;
+  /*The coded block flag when the current super block started.*/
+  signed int b_coded_prev:2;
+  /*The length of the current coded block flag run.*/
+  unsigned   b_coded_count:6;
+  /*The current coded block flag.*/
+  signed int b_coded:2;
+  /*The number of blocks processed in the current super block.*/
+  unsigned   b_count:5;
+  /*Whether or not it is cheaper to code the current super block partially,
+     even if it could still be coded fully.*/
+  unsigned   sb_prefer_partial:1;
+  /*Whether the last super block was coded partially.*/
+  signed int sb_partial:2;
+  /*The number of bits required for the flags for the current super block.*/
+  unsigned   sb_bits:6;
+  /*Whether the last non-partial super block was coded fully.*/
+  signed int sb_full:2;
+};
+
+
+
+struct oc_qii_state{
+  ptrdiff_t  bits;
+  unsigned   qi01_count:14;
+  signed int qi01:2;
+  unsigned   qi12_count:14;
+  signed int qi12:2;
+};
+
+
+
+/*Temporary encoder state for the analysis pipeline.*/
+struct oc_enc_pipeline_state{
+  /*DCT coefficient storage.
+    This is kept off the stack because a) gcc can't align things on the stack
+     reliably on ARM, and b) it avoids (unintentional) data hazards between
+     ARM and NEON code.*/
+  OC_ALIGN16(ogg_int16_t dct_data[64*3]);
+  OC_ALIGN16(signed char bounding_values[256]);
+  oc_fr_state         fr[3];
+  oc_qii_state        qs[3];
+  /*Skip SSD storage for the current MCU in each plane.*/
+  unsigned           *skip_ssd[3];
+  /*Coded/uncoded fragment lists for each plane for the current MCU.*/
+  ptrdiff_t          *coded_fragis[3];
+  ptrdiff_t          *uncoded_fragis[3];
+  ptrdiff_t           ncoded_fragis[3];
+  ptrdiff_t           nuncoded_fragis[3];
+  /*The starting fragment for the current MCU in each plane.*/
+  ptrdiff_t           froffset[3];
+  /*The starting row for the current MCU in each plane.*/
+  int                 fragy0[3];
+  /*The ending row for the current MCU in each plane.*/
+  int                 fragy_end[3];
+  /*The starting superblock for the current MCU in each plane.*/
+  unsigned            sbi0[3];
+  /*The ending superblock for the current MCU in each plane.*/
+  unsigned            sbi_end[3];
+  /*The number of tokens for zzi=1 for each color plane.*/
+  int                 ndct_tokens1[3];
+  /*The outstanding eob_run count for zzi=1 for each color plane.*/
+  int                 eob_run1[3];
+  /*Whether or not the loop filter is enabled.*/
+  int                 loop_filter;
+};
+
+
+
+/*Statistics used to estimate R-D cost of a block in a given coding mode.
+  See modedec.h for more details.*/
+struct oc_mode_rd{
+  /*The expected bits used by the DCT tokens, shifted by OC_BIT_SCALE.*/
+  ogg_int16_t rate;
+  /*The expected square root of the sum of squared errors, shifted by
+     OC_RMSE_SCALE.*/
+  ogg_int16_t rmse;
+};
+
+# if defined(OC_COLLECT_METRICS)
+#  include "collect.h"
+# endif
+
+
+
+/*A 2nd order low-pass Bessel follower.
+  We use this for rate control because it has fast reaction time, but is
+   critically damped.*/
+struct oc_iir_filter{
+  ogg_int32_t c[2];
+  ogg_int64_t g;
+  ogg_int32_t x[2];
+  ogg_int32_t y[2];
+};
+
+
+
+/*The 2-pass metrics associated with a single frame.*/
+struct oc_frame_metrics{
+  /*The log base 2 of the scale factor for this frame in Q24 format.*/
+  ogg_int32_t   log_scale;
+  /*The number of application-requested duplicates of this frame.*/
+  unsigned      dup_count:31;
+  /*The frame type from pass 1.*/
+  unsigned      frame_type:1;
+  /*The frame activity average from pass 1.*/
+  unsigned      activity_avg;
+};
+
+
+
+/*Rate control state information.*/
+struct oc_rc_state{
+  /*The target average bits per frame.*/
+  ogg_int64_t        bits_per_frame;
+  /*The current buffer fullness (bits available to be used).*/
+  ogg_int64_t        fullness;
+  /*The target buffer fullness.
+    This is where we'd like to be by the last keyframe the appears in the next
+     buf_delay frames.*/
+  ogg_int64_t        target;
+  /*The maximum buffer fullness (total size of the buffer).*/
+  ogg_int64_t        max;
+  /*The log of the number of pixels in a frame in Q57 format.*/
+  ogg_int64_t        log_npixels;
+  /*The exponent used in the rate model in Q8 format.*/
+  unsigned           exp[2];
+  /*The number of frames to distribute the buffer usage over.*/
+  int                buf_delay;
+  /*The total drop count from the previous frame.
+    This includes duplicates explicitly requested via the
+     TH_ENCCTL_SET_DUP_COUNT API as well as frames we chose to drop ourselves.*/
+  ogg_uint32_t       prev_drop_count;
+  /*The log of an estimated scale factor used to obtain the real framerate, for
+     VFR sources or, e.g., 12 fps content doubled to 24 fps, etc.*/
+  ogg_int64_t        log_drop_scale;
+  /*The log of estimated scale factor for the rate model in Q57 format.*/
+  ogg_int64_t        log_scale[2];
+  /*The log of the target quantizer level in Q57 format.*/
+  ogg_int64_t        log_qtarget;
+  /*Will we drop frames to meet bitrate target?*/
+  unsigned char      drop_frames;
+  /*Do we respect the maximum buffer fullness?*/
+  unsigned char      cap_overflow;
+  /*Can the reservoir go negative?*/
+  unsigned char      cap_underflow;
+  /*Second-order lowpass filters to track scale and VFR.*/
+  oc_iir_filter      scalefilter[2];
+  int                inter_count;
+  int                inter_delay;
+  int                inter_delay_target;
+  oc_iir_filter      vfrfilter;
+  /*Two-pass mode state.
+    0 => 1-pass encoding.
+    1 => 1st pass of 2-pass encoding.
+    2 => 2nd pass of 2-pass encoding.*/
+  int                twopass;
+  /*Buffer for current frame metrics.*/
+  unsigned char      twopass_buffer[48];
+  /*The number of bytes in the frame metrics buffer.
+    When 2-pass encoding is enabled, this is set to 0 after each frame is
+     submitted, and must be non-zero before the next frame will be accepted.*/
+  int                twopass_buffer_bytes;
+  int                twopass_buffer_fill;
+  /*Whether or not to force the next frame to be a keyframe.*/
+  unsigned char      twopass_force_kf;
+  /*The metrics for the previous frame.*/
+  oc_frame_metrics   prev_metrics;
+  /*The metrics for the current frame.*/
+  oc_frame_metrics   cur_metrics;
+  /*The buffered metrics for future frames.*/
+  oc_frame_metrics  *frame_metrics;
+  int                nframe_metrics;
+  int                cframe_metrics;
+  /*The index of the current frame in the circular metric buffer.*/
+  int                frame_metrics_head;
+  /*The frame count of each type (keyframes, delta frames, and dup frames);
+     32 bits limits us to 2.268 years at 60 fps.*/
+  ogg_uint32_t       frames_total[3];
+  /*The number of frames of each type yet to be processed.*/
+  ogg_uint32_t       frames_left[3];
+  /*The sum of the scale values for each frame type.*/
+  ogg_int64_t        scale_sum[2];
+  /*The start of the window over which the current scale sums are taken.*/
+  int                scale_window0;
+  /*The end of the window over which the current scale sums are taken.*/
+  int                scale_window_end;
+  /*The frame count of each type in the current 2-pass window; this does not
+     include dup frames.*/
+  int                nframes[3];
+  /*The total accumulated estimation bias.*/
+  ogg_int64_t        rate_bias;
+};
+
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc);
+void oc_rc_state_clear(oc_rc_state *_rc);
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc);
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp);
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _frame_type);
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable);
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf);
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes);
+
+
+
+/*The internal encoder state.*/
+struct th_enc_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state          state;
+  /*Buffer in which to assemble packets.*/
+  oggpack_buffer           opb;
+  /*Encoder-specific macroblock information.*/
+  oc_mb_enc_info          *mb_info;
+  /*DC coefficients after prediction.*/
+  ogg_int16_t             *frag_dc;
+  /*The list of coded macro blocks, in coded order.*/
+  unsigned                *coded_mbis;
+  /*The number of coded macro blocks.*/
+  size_t                   ncoded_mbis;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and becomes
+     positive when a frame has been processed and data packets are ready.*/
+  int                      packet_state;
+  /*The maximum distance between keyframes.*/
+  ogg_uint32_t             keyframe_frequency_force;
+  /*The number of duplicates to produce for the next frame.*/
+  ogg_uint32_t             dup_count;
+  /*The number of duplicates remaining to be emitted for the current frame.*/
+  ogg_uint32_t             nqueued_dups;
+  /*The number of duplicates emitted for the last frame.*/
+  ogg_uint32_t             prev_dup_count;
+  /*The current speed level.*/
+  int                      sp_level;
+  /*Whether or not VP3 compatibility mode has been enabled.*/
+  unsigned char            vp3_compatible;
+  /*Whether or not any INTER frames have been coded.*/
+  unsigned char            coded_inter_frame;
+  /*Whether or not previous frame was dropped.*/
+  unsigned char            prevframe_dropped;
+  /*Stores most recently chosen Huffman tables for each frame type, DC and AC
+     coefficients, and luma and chroma tokens.
+    The actual Huffman table used for a given coefficient depends not only on
+     the choice made here, but also its index in the zig-zag ordering.*/
+  unsigned char            huff_idxs[2][2][2];
+  /*Current count of bits used by each MV coding mode.*/
+  size_t                   mv_bits[2];
+  /*The mode scheme chooser for estimating mode coding costs.*/
+  oc_mode_scheme_chooser   chooser;
+  /*Temporary encoder state for the analysis pipeline.*/
+  oc_enc_pipeline_state    pipe;
+  /*The number of vertical super blocks in an MCU.*/
+  int                      mcu_nvsbs;
+  /*The SSD error for skipping each fragment in the current MCU.*/
+  unsigned                *mcu_skip_ssd;
+  /*The masking scale factors for chroma blocks in the current MCU.*/
+  ogg_uint16_t            *mcu_rd_scale;
+  ogg_uint16_t            *mcu_rd_iscale;
+  /*The DCT token lists for each coefficient and each plane.*/
+  unsigned char          **dct_tokens[3];
+  /*The extra bits associated with each DCT token.*/
+  ogg_uint16_t           **extra_bits[3];
+  /*The number of DCT tokens for each coefficient for each plane.*/
+  ptrdiff_t                ndct_tokens[3][64];
+  /*Pending EOB runs for each coefficient for each plane.*/
+  ogg_uint16_t             eob_run[3][64];
+  /*The offset of the first DCT token for each coefficient for each plane.*/
+  unsigned char            dct_token_offs[3][64];
+  /*The last DC coefficient for each plane and reference frame.*/
+  int                      dc_pred_last[3][4];
+#if defined(OC_COLLECT_METRICS)
+  /*Fragment SAD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_sad;
+  /*Fragment SATD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_satd;
+  /*Fragment SSD statistics for MB mode estimation metrics.*/
+  unsigned                *frag_ssd;
+#endif
+  /*The R-D optimization parameter.*/
+  int                      lambda;
+  /*The average block "activity" of the previous frame.*/
+  unsigned                 activity_avg;
+  /*The average MB luma of the previous frame.*/
+  unsigned                 luma_avg;
+  /*The huffman tables in use.*/
+  th_huff_code             huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+  /*The quantization parameters in use.*/
+  th_quant_info            qinfo;
+  /*The original DC coefficients saved off from the dequatization tables.*/
+  ogg_uint16_t             dequant_dc[64][3][2];
+  /*Condensed dequantization tables.*/
+  const ogg_uint16_t      *dequant[3][3][2];
+  /*Condensed quantization tables.*/
+  void                    *enquant[3][3][2];
+  /*The full set of quantization tables.*/
+  void                    *enquant_tables[64][3][2];
+  /*Storage for the quantization tables.*/
+  unsigned char           *enquant_table_data;
+  /*An "average" quantizer for each frame type (INTRA or INTER) and qi value.
+    This is used to parameterize the rate control decisions.
+    They are kept in the log domain to simplify later processing.
+    These are DCT domain quantizers, and so are scaled by an additional factor
+     of 4 from the pixel domain.*/
+  ogg_int64_t              log_qavg[2][64];
+  /*The "average" quantizer futher partitioned by color plane.
+    This is used to parameterize mode decision.
+    These are DCT domain quantizers, and so are scaled by an additional factor
+     of 4 from the pixel domain.*/
+  ogg_int16_t              log_plq[64][3][2];
+  /*The R-D scale factors to apply to chroma blocks for a given frame type
+     (INTRA or INTER) and qi value.
+    The first is the "D" modifier (rd_scale), while the second is the "lambda"
+     modifier (rd_iscale).*/
+  ogg_uint16_t             chroma_rd_scale[2][64][2];
+  /*The interpolated mode decision R-D lookup tables for the current
+     quantizers, color plane, and quantization type.*/
+  oc_mode_rd               mode_rd[3][3][2][OC_COMP_BINS];
+  /*The buffer state used to drive rate control.*/
+  oc_rc_state              rc;
+# if defined(OC_ENC_USE_VTABLE)
+  /*Table for encoder acceleration functions.*/
+  oc_enc_opt_vtable        opt_vtable;
+# endif
+  /*Table for encoder data used by accelerated functions.*/
+  oc_enc_opt_data          opt_data;
+};
+
+
+void oc_enc_analyze_intra(oc_enc_ctx *_enc,int _recode);
+int oc_enc_analyze_inter(oc_enc_ctx *_enc,int _allow_keyframe,int _recode);
+
+
+
+/*Perform fullpel motion search for a single MB against both reference frames.*/
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi);
+/*Refine a MB MV for one frame.*/
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame);
+/*Refine the block MVs.*/
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi);
+
+
+
+/*Used to rollback a tokenlog transaction when we retroactively decide to skip
+   a fragment.
+  A checkpoint is taken right before each token is added.*/
+struct oc_token_checkpoint{
+  /*The color plane the token was added to.*/
+  unsigned char pli;
+  /*The zig-zag index the token was added to.*/
+  unsigned char zzi;
+  /*The outstanding EOB run count before the token was added.*/
+  ogg_uint16_t  eob_run;
+  /*The token count before the token was added.*/
+  ptrdiff_t     ndct_tokens;
+};
+
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc);
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_qdct_out,const ogg_int16_t *_qdct_in,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin);
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n);
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend);
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1);
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc);
+
+
+
+/*Utility routine to encode one of the header packets.*/
+int oc_state_flushheader(oc_theora_state *_state,int *_packet_state,
+ oggpack_buffer *_opb,const th_quant_info *_qinfo,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS],
+ const char *_vendor,th_comment *_tc,ogg_packet *_op);
+
+
+
+/*Default pure-C implementations of encoder-specific accelerated functions.*/
+void oc_enc_accel_init_c(oc_enc_ctx *_enc);
+
+void oc_enc_frag_sub_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,const unsigned char *_ref,int _ystride);
+void oc_enc_frag_sub_128_c(ogg_int16_t _diff[64],
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_sad_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_sad_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _thresh);
+unsigned oc_enc_frag_sad2_thresh_c(const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride,
+ unsigned _thresh);
+unsigned oc_enc_frag_intra_sad_c(const unsigned char *_src, int _ystride);
+unsigned oc_enc_frag_satd_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_satd2_c(int *_dc,const unsigned char *_src,
+ const unsigned char *_ref1,const unsigned char *_ref2,int _ystride);
+unsigned oc_enc_frag_intra_satd_c(int *_dc,
+ const unsigned char *_src,int _ystride);
+unsigned oc_enc_frag_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride);
+unsigned oc_enc_frag_border_ssd_c(const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,ogg_int64_t _mask);
+void oc_enc_frag_copy2_c(unsigned char *_dst,
+ const unsigned char *_src1,const unsigned char *_src2,int _ystride);
+void oc_enc_enquant_table_init_c(void *_enquant,
+ const ogg_uint16_t _dequant[64]);
+void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis);
+int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant);
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif

+ 1836 - 0
jni/libtheora-1.2.0alpha1/lib/encode.c

@@ -0,0 +1,1836 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+#include "dequant.h"
+
+
+
+/*The default quantization parameters used by VP3.1.*/
+static const int OC_VP31_RANGE_SIZES[1]={63};
+static const th_quant_base OC_VP31_BASES_INTRA_Y[2]={
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTRA_C[2]={
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_VP31_BASES_INTER[2]={
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_VP31_QUANT_INFO={
+  {
+    220,200,190,180,170,170,160,160,
+    150,150,140,140,130,130,120,120,
+    110,110,100,100, 90, 90, 90, 80,
+     80, 80, 70, 70, 70, 60, 60, 60,
+     60, 50, 50, 50, 50, 40, 40, 40,
+     40, 40, 30, 30, 30, 30, 30, 30,
+     30, 20, 20, 20, 20, 20, 20, 20,
+     20, 10, 10, 10, 10, 10, 10, 10
+  },
+  {
+    500,450,400,370,340,310,285,265,
+    245,225,210,195,185,180,170,160,
+    150,145,135,130,125,115,110,107,
+    100, 96, 93, 89, 85, 82, 75, 74,
+     70, 68, 64, 60, 57, 56, 52, 50,
+     49, 45, 44, 43, 40, 38, 37, 35,
+     33, 32, 30, 29, 28, 25, 24, 22,
+     21, 19, 18, 17, 15, 13, 12, 10
+  },
+  {
+    30,25,20,20,15,15,14,14,
+    13,13,12,12,11,11,10,10,
+     9, 9, 8, 8, 7, 7, 7, 7,
+     6, 6, 6, 6, 5, 5, 5, 5,
+     4, 4, 4, 4, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_Y},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTRA_C}
+    },
+    {
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER},
+      {1,OC_VP31_RANGE_SIZES,OC_VP31_BASES_INTER}
+    }
+  }
+};
+
+/*The current default quantization parameters.*/
+static const int OC_DEF_QRANGE_SIZES[3]={32,16,15};
+static const th_quant_base OC_DEF_BASES_INTRA_Y[4]={
+  {
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+     15, 15, 15, 15, 15, 15, 15, 15,
+  },
+  {
+     15, 12, 12, 15, 18, 20, 20, 21,
+     13, 13, 14, 17, 18, 21, 21, 20,
+     14, 14, 15, 18, 20, 21, 21, 21,
+     14, 16, 17, 19, 20, 21, 21, 21,
+     16, 17, 20, 21, 21, 21, 21, 21,
+     18, 19, 20, 21, 21, 21, 21, 21,
+     20, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     16, 12, 11, 16, 20, 25, 27, 28,
+     13, 13, 14, 18, 21, 28, 28, 27,
+     14, 13, 16, 20, 25, 28, 28, 28,
+     14, 16, 19, 22, 27, 29, 29, 28,
+     17, 19, 25, 28, 28, 30, 30, 29,
+     20, 24, 27, 28, 29, 30, 30, 29,
+     27, 28, 29, 29, 30, 30, 30, 30,
+     29, 29, 29, 29, 30, 30, 30, 29
+  },
+  {
+     16, 11, 10, 16, 24, 40, 51, 61,
+     12, 12, 14, 19, 26, 58, 60, 55,
+     14, 13, 16, 24, 40, 57, 69, 56,
+     14, 17, 22, 29, 51, 87, 80, 62,
+     18, 22, 37, 58, 68,109,103, 77,
+     24, 35, 55, 64, 81,104,113, 92,
+     49, 64, 78, 87,103,121,120,101,
+     72, 92, 95, 98,112,100,103, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTRA_C[4]={
+  {
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19,
+     19, 19, 19, 19, 19, 19, 19, 19
+  },
+  {
+     18, 18, 21, 25, 26, 26, 26, 26,
+     18, 20, 22, 26, 26, 26, 26, 26,
+     21, 22, 25, 26, 26, 26, 26, 26,
+     25, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26,
+     26, 26, 26, 26, 26, 26, 26, 26
+  },
+  {
+     17, 18, 22, 31, 36, 36, 36, 36,
+     18, 20, 24, 34, 36, 36, 36, 36,
+     22, 24, 33, 36, 36, 36, 36, 36,
+     31, 34, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36,
+     36, 36, 36, 36, 36, 36, 36, 36
+  },
+  {
+     17, 18, 24, 47, 99, 99, 99, 99,
+     18, 21, 26, 66, 99, 99, 99, 99,
+     24, 26, 56, 99, 99, 99, 99, 99,
+     47, 66, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99,
+     99, 99, 99, 99, 99, 99, 99, 99
+  }
+};
+static const th_quant_base OC_DEF_BASES_INTER[4]={
+  {
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21,
+     21, 21, 21, 21, 21, 21, 21, 21
+  },
+  {
+     18, 18, 18, 21, 23, 24, 25, 27,
+     18, 18, 21, 23, 24, 25, 27, 28,
+     18, 21, 23, 24, 25, 27, 28, 29,
+     21, 23, 24, 25, 27, 28, 29, 29,
+     23, 24, 25, 27, 28, 29, 29, 29,
+     24, 25, 27, 28, 29, 29, 29, 30,
+     25, 27, 28, 29, 29, 29, 30, 30,
+     27, 28, 29, 29, 29, 30, 30, 30
+  },
+  {
+     17, 17, 17, 20, 23, 26, 28, 32,
+     17, 17, 20, 23, 26, 28, 32, 34,
+     17, 20, 23, 26, 28, 32, 34, 37,
+     20, 23, 26, 28, 32, 34, 37, 37,
+     23, 26, 28, 32, 34, 37, 37, 37,
+     26, 28, 32, 34, 37, 37, 37, 41,
+     28, 32, 34, 37, 37, 37, 41, 42,
+     32, 34, 37, 37, 37, 41, 42, 42
+  },
+  {
+     16, 16, 16, 20, 24, 28, 32, 40,
+     16, 16, 20, 24, 28, 32, 40, 48,
+     16, 20, 24, 28, 32, 40, 48, 64,
+     20, 24, 28, 32, 40, 48, 64, 64,
+     24, 28, 32, 40, 48, 64, 64, 64,
+     28, 32, 40, 48, 64, 64, 64, 96,
+     32, 40, 48, 64, 64, 64, 96,128,
+     40, 48, 64, 64, 64, 96,128,128
+  }
+};
+
+const th_quant_info TH_DEF_QUANT_INFO={
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    365,348,333,316,300,287,277,265,
+    252,240,229,219,206,197,189,180,
+    171,168,160,153,146,139,132,127,
+    121,115,110,107,101, 97, 94, 89,
+     85, 83, 78, 73, 72, 67, 66, 62,
+     60, 59, 56, 53, 52, 48, 47, 43,
+     42, 40, 36, 35, 34, 33, 31, 30,
+     28, 25, 24, 22, 20, 17, 14, 10
+  },
+  {
+    15,12, 9, 8, 6, 6, 5, 5,
+     5, 5, 5, 5, 5, 5, 5, 5,
+     4, 4, 4, 4, 4, 4, 3, 3,
+     3, 3, 3, 3, 3, 3, 3, 3,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 2, 2, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0
+  },
+  {
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_Y},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTRA_C}
+    },
+    {
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER},
+      {3,OC_DEF_QRANGE_SIZES,OC_DEF_BASES_INTER}
+    }
+  }
+};
+
+
+
+/*The Huffman codes used for macro block modes.*/
+
+const unsigned char OC_MODE_BITS[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {1,2,3,4,5,6,7,7},
+  /*Codebook 1: a fixed-length code.*/
+  {3,3,3,3,3,3,3,3}
+};
+
+static const unsigned char OC_MODE_CODES[2][OC_NMODES]={
+  /*Codebook 0: a maximally skewed prefix code.*/
+  {0x00,0x02,0x06,0x0E,0x1E,0x3E,0x7E,0x7F},
+  /*Codebook 1: a fixed-length code.*/
+  {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07}
+};
+
+
+/*The Huffman codes used for motion vectors.*/
+
+const unsigned char OC_MV_BITS[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+      8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+    8,7,7,7,7,7,7,7,7,6,6,6,6,4,4,3,
+    3,
+    3,4,4,6,6,6,6,7,7,7,7,7,7,7,7,8,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).
+    This wastes a code word (0x01, negative zero), or a bit (0x00, positive
+     zero, requires only 5 bits to uniquely decode), but is hopefully not used
+     very often.*/
+  {
+      6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
+  }
+};
+
+static const unsigned char OC_MV_CODES[2][64]={
+  /*Codebook 0: VLC code.*/
+  {
+         0xFF,0xFD,0xFB,0xF9,0xF7,0xF5,0xF3,
+    0xF1,0xEF,0xED,0xEB,0xE9,0xE7,0xE5,0xE3,
+    0xE1,0x6F,0x6D,0x6B,0x69,0x67,0x65,0x63,
+    0x61,0x2F,0x2D,0x2B,0x29,0x09,0x07,0x02,
+    0x00,
+    0x01,0x06,0x08,0x28,0x2A,0x2C,0x2E,0x60,
+    0x62,0x64,0x66,0x68,0x6A,0x6C,0x6E,0xE0,
+    0xE2,0xE4,0xE6,0xE8,0xEA,0xEC,0xEE,0xF0,
+    0xF2,0xF4,0xF6,0xF8,0xFA,0xFC,0xFE
+  },
+  /*Codebook 1: (5 bit magnitude, 1 bit sign).*/
+  {
+         0x3F,0x3D,0x3B,0x39,0x37,0x35,0x33,
+    0x31,0x2F,0x2D,0x2B,0x29,0x27,0x25,0x23,
+    0x21,0x1F,0x1D,0x1B,0x19,0x17,0x15,0x13,
+    0x11,0x0F,0x0D,0x0B,0x09,0x07,0x05,0x03,
+    0x00,
+    0x02,0x04,0x06,0x08,0x0A,0x0C,0x0E,0x10,
+    0x12,0x14,0x16,0x18,0x1A,0x1C,0x1E,0x20,
+    0x22,0x24,0x26,0x28,0x2A,0x2C,0x2E,0x30,
+    0x32,0x34,0x36,0x38,0x3A,0x3C,0x3E
+  }
+};
+
+
+
+/*Super block run coding scheme:
+   Codeword             Run Length
+   0                       1
+   10x                     2-3
+   110x                    4-5
+   1110xx                  6-9
+   11110xxx                10-17
+   111110xxxx              18-33
+   111111xxxxxxxxxxxx      34-4129*/
+const ogg_uint16_t    OC_SB_RUN_VAL_MIN[8]={1,2,4,6,10,18,34,4130};
+static const unsigned OC_SB_RUN_CODE_PREFIX[7]={
+  0,4,0xC,0x38,0xF0,0x3E0,0x3F000
+};
+const unsigned char   OC_SB_RUN_CODE_NBITS[7]={1,3,4,6,8,10,18};
+
+
+/*Writes the bit pattern for the run length of a super block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run, which must be positive.
+  _flag:      The current flag.
+  _done:      Whether or not more flags are to be encoded.*/
+static void oc_sb_run_pack(oggpack_buffer *_opb,ptrdiff_t _run_count,
+ int _flag,int _done){
+  int i;
+  if(_run_count>=4129){
+    do{
+      oggpackB_write(_opb,0x3FFFF,18);
+      _run_count-=4129;
+      if(_run_count>0)oggpackB_write(_opb,_flag,1);
+      else if(!_done)oggpackB_write(_opb,!_flag,1);
+    }
+    while(_run_count>=4129);
+    if(_run_count<=0)return;
+  }
+  for(i=0;_run_count>=OC_SB_RUN_VAL_MIN[i+1];i++);
+  oggpackB_write(_opb,OC_SB_RUN_CODE_PREFIX[i]+_run_count-OC_SB_RUN_VAL_MIN[i],
+   OC_SB_RUN_CODE_NBITS[i]);
+}
+
+
+
+/*Block run coding scheme:
+   Codeword             Run Length
+   0x                      1-2
+   10x                     3-4
+   110x                    5-6
+   1110xx                  7-10
+   11110xx                 11-14
+   11111xxxx               15-30*/
+const unsigned char OC_BLOCK_RUN_CODE_NBITS[30]={
+  2,2,3,3,4,4,6,6,6,6,7,7,7,7,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9
+};
+static const ogg_uint16_t  OC_BLOCK_RUN_CODE_PATTERN[30]={
+        0x000,0x001,0x004,0x005,0x00C,0x00D,0x038,
+  0x039,0x03A,0x03B,0x078,0x079,0x07A,0x07B,0x1F0,
+  0x1F1,0x1F2,0x1F3,0x1F4,0x1F5,0x1F6,0x1F7,0x1F8,
+  0x1F9,0x1FA,0x1FB,0x1FC,0x1FD,0x1FE,0x1FF
+};
+
+
+/*Writes the bit pattern for the run length of a block run to the given
+   oggpack_buffer.
+  _opb:       The buffer to write to.
+  _run_count: The length of the run.
+              This must be positive, and no more than 30.*/
+static void oc_block_run_pack(oggpack_buffer *_opb,int _run_count){
+  oggpackB_write(_opb,OC_BLOCK_RUN_CODE_PATTERN[_run_count-1],
+   OC_BLOCK_RUN_CODE_NBITS[_run_count-1]);
+}
+
+
+
+static void oc_enc_frame_header_pack(oc_enc_ctx *_enc){
+  /*Mark this as a data packet.*/
+  oggpackB_write(&_enc->opb,0,1);
+  /*Output the frame type (key frame or delta frame).*/
+  oggpackB_write(&_enc->opb,_enc->state.frame_type,1);
+  /*Write out the current qi list.*/
+  oggpackB_write(&_enc->opb,_enc->state.qis[0],6);
+  if(_enc->state.nqis>1){
+    oggpackB_write(&_enc->opb,1,1);
+    oggpackB_write(&_enc->opb,_enc->state.qis[1],6);
+    if(_enc->state.nqis>2){
+      oggpackB_write(&_enc->opb,1,1);
+      oggpackB_write(&_enc->opb,_enc->state.qis[2],6);
+    }
+    else oggpackB_write(&_enc->opb,0,1);
+  }
+  else oggpackB_write(&_enc->opb,0,1);
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    /*Key frames have 3 unused configuration bits, holdovers from the VP3 days.
+      Most of the other unused bits in the VP3 headers were eliminated.
+      Monty kept these to leave us some wiggle room for future expansion,
+       though a single bit in all frames would have been far more useful.*/
+    oggpackB_write(&_enc->opb,0,3);
+  }
+}
+
+/*Writes the bit flags for whether or not each super block is partially coded
+   or not.
+  These flags are run-length encoded, with the flag value alternating between
+   each run.
+  Return: The number partially coded SBs.*/
+static unsigned oc_enc_partial_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  unsigned           npartial;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  flag=sb_flags[0].coded_partially;
+  oggpackB_write(&_enc->opb,flag,1);
+  sbi=npartial=0;
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially!=flag)break;
+      run_count++;
+      npartial+=flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+  return npartial;
+}
+
+/*Writes the coded/not coded flags for each super block that is not partially
+   coded.
+  These flags are run-length encoded, with the flag value altenating between
+   each run.*/
+static void oc_enc_coded_sb_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  unsigned           sbi;
+  int                flag;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  /*Skip partially coded super blocks; their flags have already been coded.*/
+  for(sbi=0;sb_flags[sbi].coded_partially;sbi++);
+  flag=sb_flags[sbi].coded_fully;
+  oggpackB_write(&_enc->opb,flag,1);
+  do{
+    unsigned run_count;
+    for(run_count=0;sbi<nsbs;sbi++){
+      if(sb_flags[sbi].coded_partially)continue;
+      if(sb_flags[sbi].coded_fully!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,sbi>=nsbs);
+    flag=!flag;
+  }
+  while(sbi<nsbs);
+}
+
+static void oc_enc_coded_flags_pack(oc_enc_ctx *_enc){
+  const oc_sb_map   *sb_maps;
+  const oc_sb_flags *sb_flags;
+  unsigned           nsbs;
+  const oc_fragment *frags;
+  unsigned           npartial;
+  int                run_count;
+  int                flag;
+  int                pli;
+  unsigned           sbi;
+  npartial=oc_enc_partial_sb_flags_pack(_enc);
+  if(npartial<_enc->state.nsbs)oc_enc_coded_sb_flags_pack(_enc);
+  sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
+  sb_flags=_enc->state.sb_flags;
+  nsbs=_enc->state.nsbs;
+  frags=_enc->state.frags;
+  for(sbi=0;sbi<nsbs&&!sb_flags[sbi].coded_partially;sbi++);
+  /*If there's at least one partial SB, store individual coded block flags.*/
+  if(sbi<nsbs){
+    flag=frags[sb_maps[sbi][0][0]].coded;
+    oggpackB_write(&_enc->opb,flag,1);
+    run_count=0;
+    nsbs=sbi=0;
+    for(pli=0;pli<3;pli++){
+      nsbs+=_enc->state.fplanes[pli].nsbs;
+      for(;sbi<nsbs;sbi++){
+        int       quadi;
+        int       bi;
+        ptrdiff_t fragi;
+        if(sb_flags[sbi].coded_partially){
+          for(quadi=0;quadi<4;quadi++){
+            for(bi=0;bi<4;bi++){
+              fragi=sb_maps[sbi][quadi][bi];
+              if(fragi>=0){
+                if(frags[fragi].coded!=flag){
+                  oc_block_run_pack(&_enc->opb,run_count);
+                  flag=!flag;
+                  run_count=1;
+                }
+                else run_count++;
+              }
+            }
+          }
+        }
+      }
+    }
+    /*Flush any trailing block coded run.*/
+    if(run_count>0)oc_block_run_pack(&_enc->opb,run_count);
+  }
+}
+
+static void oc_enc_mb_modes_pack(oc_enc_ctx *_enc){
+  const unsigned char *mode_codes;
+  const unsigned char *mode_bits;
+  const unsigned char *mode_ranks;
+  unsigned            *coded_mbis;
+  size_t               ncoded_mbis;
+  const signed char   *mb_modes;
+  unsigned             mbii;
+  int                  scheme;
+  int                  mb_mode;
+  scheme=_enc->chooser.scheme_list[0];
+  /*Encode the best scheme.*/
+  oggpackB_write(&_enc->opb,scheme,3);
+  /*If the chosen scheme is scheme 0, send the mode frequency ordering.*/
+  if(scheme==0){
+    for(mb_mode=0;mb_mode<OC_NMODES;mb_mode++){
+      oggpackB_write(&_enc->opb,_enc->chooser.scheme0_ranks[mb_mode],3);
+    }
+  }
+  mode_ranks=_enc->chooser.mode_ranks[scheme];
+  mode_bits=OC_MODE_BITS[scheme+1>>3];
+  mode_codes=OC_MODE_CODES[scheme+1>>3];
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    int rank;
+    rank=mode_ranks[mb_modes[coded_mbis[mbii]]];
+    oggpackB_write(&_enc->opb,mode_codes[rank],mode_bits[rank]);
+  }
+}
+
+static void oc_enc_mv_pack(oc_enc_ctx *_enc,int _mv_scheme,oc_mv _mv){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_mv);
+  dy=OC_MV_Y(_mv);
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][dx+31],OC_MV_BITS[_mv_scheme][dx+31]);
+  oggpackB_write(&_enc->opb,
+   OC_MV_CODES[_mv_scheme][dy+31],OC_MV_BITS[_mv_scheme][dy+31]);
+}
+
+static void oc_enc_mvs_pack(oc_enc_ctx *_enc){
+  const unsigned     *coded_mbis;
+  size_t              ncoded_mbis;
+  const oc_mb_map    *mb_maps;
+  const signed char  *mb_modes;
+  const oc_fragment  *frags;
+  const oc_mv        *frag_mvs;
+  unsigned            mbii;
+  int                 mv_scheme;
+  /*Choose the coding scheme.*/
+  mv_scheme=_enc->mv_bits[1]<_enc->mv_bits[0];
+  oggpackB_write(&_enc->opb,mv_scheme,1);
+  /*Encode the motion vectors.
+    Macro blocks are iterated in Hilbert scan order, but the MVs within the
+     macro block are coded in raster order.*/
+  coded_mbis=_enc->coded_mbis;
+  ncoded_mbis=_enc->ncoded_mbis;
+  mb_modes=_enc->state.mb_modes;
+  mb_maps=(const oc_mb_map *)_enc->state.mb_maps;
+  frags=_enc->state.frags;
+  frag_mvs=_enc->state.frag_mvs;
+  for(mbii=0;mbii<ncoded_mbis;mbii++){
+    ptrdiff_t fragi;
+    unsigned  mbi;
+    int       bi;
+    mbi=coded_mbis[mbii];
+    switch(mb_modes[mbi]){
+      case OC_MODE_INTER_MV:
+      case OC_MODE_GOLDEN_MV:{
+        for(bi=0;;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,frag_mvs[fragi]);
+            /*Only code a single MV for this macro block.*/
+            break;
+          }
+        }
+      }break;
+      case OC_MODE_INTER_MV_FOUR:{
+        for(bi=0;bi<4;bi++){
+          fragi=mb_maps[mbi][0][bi];
+          if(frags[fragi].coded){
+            oc_enc_mv_pack(_enc,mv_scheme,frag_mvs[fragi]);
+            /*Keep coding all the MVs for this macro block.*/
+          }
+        }
+      }break;
+    }
+  }
+}
+
+static void oc_enc_block_qis_pack(oc_enc_ctx *_enc){
+  const oc_fragment *frags;
+  ptrdiff_t         *coded_fragis;
+  ptrdiff_t          ncoded_fragis;
+  ptrdiff_t          fragii;
+  ptrdiff_t          run_count;
+  ptrdiff_t          nqi0;
+  int                flag;
+  if(_enc->state.nqis<=1)return;
+  ncoded_fragis=_enc->state.ntotal_coded_fragis;
+  if(ncoded_fragis<=0)return;
+  coded_fragis=_enc->state.coded_fragis;
+  frags=_enc->state.frags;
+  flag=!!frags[coded_fragis[0]].qii;
+  oggpackB_write(&_enc->opb,flag,1);
+  nqi0=0;
+  for(fragii=0;fragii<ncoded_fragis;){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      if(!!frags[coded_fragis[fragii]].qii!=flag)break;
+      run_count++;
+      nqi0+=!flag;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+  if(_enc->state.nqis<3||nqi0>=ncoded_fragis)return;
+  for(fragii=0;!frags[coded_fragis[fragii]].qii;fragii++);
+  flag=frags[coded_fragis[fragii]].qii-1;
+  oggpackB_write(&_enc->opb,flag,1);
+  while(fragii<ncoded_fragis){
+    for(run_count=0;fragii<ncoded_fragis;fragii++){
+      int qii;
+      qii=frags[coded_fragis[fragii]].qii;
+      if(!qii)continue;
+      if(qii-1!=flag)break;
+      run_count++;
+    }
+    oc_sb_run_pack(&_enc->opb,run_count,flag,fragii>=ncoded_fragis);
+    flag=!flag;
+  }
+}
+
+/*Counts the tokens of each type used for the given range of coefficient
+   indices in zig-zag order.
+  _zzi_start:      The first zig-zag index to include.
+  _zzi_end:        The first zig-zag index to not include.
+  _token_counts_y: Returns the token counts for the Y' plane.
+  _token_counts_c: Returns the token counts for the Cb and Cr planes.*/
+static void oc_enc_count_tokens(oc_enc_ctx *_enc,int _zzi_start,int _zzi_end,
+ ptrdiff_t _token_counts_y[32],ptrdiff_t _token_counts_c[32]){
+  const unsigned char *dct_tokens;
+  ptrdiff_t            ndct_tokens;
+  int                  pli;
+  int                  zzi;
+  ptrdiff_t            ti;
+  memset(_token_counts_y,0,32*sizeof(*_token_counts_y));
+  memset(_token_counts_c,0,32*sizeof(*_token_counts_c));
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    dct_tokens=_enc->dct_tokens[0][zzi];
+    ndct_tokens=_enc->ndct_tokens[0][zzi];
+    for(ti=_enc->dct_token_offs[0][zzi];ti<ndct_tokens;ti++){
+      _token_counts_y[dct_tokens[ti]]++;
+    }
+  }
+  for(pli=1;pli<3;pli++){
+    for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        _token_counts_c[dct_tokens[ti]]++;
+      }
+    }
+  }
+}
+
+/*Computes the number of bits used for each of the potential Huffman code for
+   the given list of token counts.
+  The bits are added to whatever the current bit counts are.*/
+static void oc_enc_count_bits(oc_enc_ctx *_enc,int _hgi,
+ const ptrdiff_t _token_counts[32],size_t _bit_counts[16]){
+  int huffi;
+  int huff_offs;
+  int token;
+  huff_offs=_hgi<<4;
+  for(huffi=0;huffi<16;huffi++){
+    for(token=0;token<32;token++){
+      _bit_counts[huffi]+=
+       _token_counts[token]*_enc->huff_codes[huffi+huff_offs][token].nbits;
+    }
+  }
+}
+
+/*Returns the Huffman index using the fewest number of bits.*/
+static int oc_select_huff_idx(size_t _bit_counts[16]){
+  int best_huffi;
+  int huffi;
+  best_huffi=0;
+  for(huffi=1;huffi<16;huffi++)if(_bit_counts[huffi]<_bit_counts[best_huffi]){
+    best_huffi=huffi;
+  }
+  return best_huffi;
+}
+
+static void oc_enc_huff_group_pack(oc_enc_ctx *_enc,
+ int _zzi_start,int _zzi_end,const int _huff_idxs[2]){
+  int zzi;
+  for(zzi=_zzi_start;zzi<_zzi_end;zzi++){
+    int pli;
+    for(pli=0;pli<3;pli++){
+      const unsigned char *dct_tokens;
+      const ogg_uint16_t  *extra_bits;
+      ptrdiff_t            ndct_tokens;
+      const th_huff_code  *huff_codes;
+      ptrdiff_t            ti;
+      dct_tokens=_enc->dct_tokens[pli][zzi];
+      extra_bits=_enc->extra_bits[pli][zzi];
+      ndct_tokens=_enc->ndct_tokens[pli][zzi];
+      huff_codes=_enc->huff_codes[_huff_idxs[pli+1>>1]];
+      for(ti=_enc->dct_token_offs[pli][zzi];ti<ndct_tokens;ti++){
+        int token;
+        int neb;
+        token=dct_tokens[ti];
+        oggpackB_write(&_enc->opb,huff_codes[token].pattern,
+         huff_codes[token].nbits);
+        neb=OC_DCT_TOKEN_EXTRA_BITS[token];
+        if(neb)oggpackB_write(&_enc->opb,extra_bits[ti],neb);
+      }
+    }
+  }
+}
+
+static void oc_enc_residual_tokens_pack(oc_enc_ctx *_enc){
+  static const unsigned char  OC_HUFF_GROUP_MIN[6]={0,1,6,15,28,64};
+  static const unsigned char *OC_HUFF_GROUP_MAX=OC_HUFF_GROUP_MIN+1;
+  ptrdiff_t token_counts_y[32];
+  ptrdiff_t token_counts_c[32];
+  size_t    bits_y[16];
+  size_t    bits_c[16];
+  int       huff_idxs[2];
+  int       frame_type;
+  int       hgi;
+  frame_type=_enc->state.frame_type;
+  /*Choose which Huffman tables to use for the DC token list.*/
+  oc_enc_count_tokens(_enc,0,1,token_counts_y,token_counts_c);
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  oc_enc_count_bits(_enc,0,token_counts_y,bits_y);
+  oc_enc_count_bits(_enc,0,token_counts_c,bits_c);
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the DC token list with the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][0][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][0][1]=(unsigned char)huff_idxs[1];
+  oc_enc_huff_group_pack(_enc,0,1,huff_idxs);
+  /*Choose which Huffman tables to use for the AC token lists.*/
+  memset(bits_y,0,sizeof(bits_y));
+  memset(bits_c,0,sizeof(bits_c));
+  for(hgi=1;hgi<5;hgi++){
+    oc_enc_count_tokens(_enc,OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],
+     token_counts_y,token_counts_c);
+    oc_enc_count_bits(_enc,hgi,token_counts_y,bits_y);
+    oc_enc_count_bits(_enc,hgi,token_counts_c,bits_c);
+  }
+  huff_idxs[0]=oc_select_huff_idx(bits_y);
+  huff_idxs[1]=oc_select_huff_idx(bits_c);
+  /*Write the AC token lists using the chosen tables.*/
+  oggpackB_write(&_enc->opb,huff_idxs[0],4);
+  oggpackB_write(&_enc->opb,huff_idxs[1],4);
+  _enc->huff_idxs[frame_type][1][0]=(unsigned char)huff_idxs[0];
+  _enc->huff_idxs[frame_type][1][1]=(unsigned char)huff_idxs[1];
+  for(hgi=1;hgi<5;hgi++){
+    huff_idxs[0]+=16;
+    huff_idxs[1]+=16;
+    oc_enc_huff_group_pack(_enc,
+     OC_HUFF_GROUP_MIN[hgi],OC_HUFF_GROUP_MAX[hgi],huff_idxs);
+  }
+}
+
+/*Packs an explicit drop frame, instead of using the more efficient 0-byte
+   packet.
+  This is only enabled in VP3-compatibility mode, even though it is not
+   strictly required for VP3 compatibility (VP3 could be encoded in AVI, which
+   also supports dropping frames by inserting 0 byte packets).
+  However, almost every _Theora_ player used to get this wrong (and many still
+   do), and it wasn't until we started shipping a post-VP3 encoder that
+   actually used non-VP3 features that this began to be discovered and fixed,
+   despite being in the standard since 2004.
+  The pack buffer must be reset before calling this function.*/
+static void oc_enc_drop_frame_pack(oc_enc_ctx *_enc){
+  unsigned nsbs;
+  /*Mark this as a data packet.*/
+  oggpackB_write(&_enc->opb,0,1);
+  /*Output the frame type (key frame or delta frame).*/
+  oggpackB_write(&_enc->opb,OC_INTER_FRAME,1);
+  /*Write out the current qi list.
+    We always use just 1 qi, to avoid wasting bits on the others.*/
+  oggpackB_write(&_enc->opb,_enc->state.qis[0],6);
+  oggpackB_write(&_enc->opb,0,1);
+  /*Coded block flags: everything is uncoded.*/
+  nsbs=_enc->state.nsbs;
+  /*No partially coded SBs.*/
+  oggpackB_write(&_enc->opb,0,1);
+  oc_sb_run_pack(&_enc->opb,nsbs,0,1);
+  /*No fully coded SBs.*/
+  oggpackB_write(&_enc->opb,0,1);
+  oc_sb_run_pack(&_enc->opb,nsbs,0,1);
+  /*MB modes: just need write which scheme to use.
+    Since we have no coded MBs, we can pick any of them except 0, which would
+     require writing out an additional mode list.*/
+  oggpackB_write(&_enc->opb,7,3);
+  /*MVs: just need write which scheme to use.
+    We can pick either one, since we have no MVs.*/
+  oggpackB_write(&_enc->opb,1,1);
+  /*Write the chosen DC token tables.*/
+  oggpackB_write(&_enc->opb,_enc->huff_idxs[OC_INTER_FRAME][0][0],4);
+  oggpackB_write(&_enc->opb,_enc->huff_idxs[OC_INTER_FRAME][0][1],4);
+  /*Write the chosen AC token tables.*/
+  oggpackB_write(&_enc->opb,_enc->huff_idxs[OC_INTER_FRAME][1][0],4);
+  oggpackB_write(&_enc->opb,_enc->huff_idxs[OC_INTER_FRAME][1][1],4);
+}
+
+static void oc_enc_frame_pack(oc_enc_ctx *_enc){
+  oggpackB_reset(&_enc->opb);
+  /*Only proceed if we have some coded blocks.*/
+  if(_enc->state.ntotal_coded_fragis>0){
+    oc_enc_frame_header_pack(_enc);
+    if(_enc->state.frame_type==OC_INTER_FRAME){
+      /*Coded block flags, MB modes, and MVs are only needed for delta frames.*/
+      oc_enc_coded_flags_pack(_enc);
+      oc_enc_mb_modes_pack(_enc);
+      oc_enc_mvs_pack(_enc);
+    }
+    oc_enc_block_qis_pack(_enc);
+    oc_enc_tokenize_finish(_enc);
+    oc_enc_residual_tokens_pack(_enc);
+  }
+  /*If there are no coded blocks, we can drop this frame simply by emitting a
+     0 byte packet.
+    We emit an inter frame with no coded blocks in VP3-compatibility mode.*/
+  else if(_enc->vp3_compatible)oc_enc_drop_frame_pack(_enc);
+  /*Success: Mark the packet as ready to be flushed.*/
+  _enc->packet_state=OC_PACKET_READY;
+#if defined(OC_COLLECT_METRICS)
+  oc_enc_mode_metrics_collect(_enc);
+#endif
+}
+
+
+void oc_enc_accel_init_c(oc_enc_ctx *_enc){
+  /*The implementations prefixed with oc_enc_ are encoder-specific.
+    The rest we re-use from the decoder.*/
+# if defined(OC_ENC_USE_VTABLE)
+  _enc->opt_vtable.frag_sub=oc_enc_frag_sub_c;
+  _enc->opt_vtable.frag_sub_128=oc_enc_frag_sub_128_c;
+  _enc->opt_vtable.frag_sad=oc_enc_frag_sad_c;
+  _enc->opt_vtable.frag_sad_thresh=oc_enc_frag_sad_thresh_c;
+  _enc->opt_vtable.frag_sad2_thresh=oc_enc_frag_sad2_thresh_c;
+  _enc->opt_vtable.frag_intra_sad=oc_enc_frag_intra_sad_c;
+  _enc->opt_vtable.frag_satd=oc_enc_frag_satd_c;
+  _enc->opt_vtable.frag_satd2=oc_enc_frag_satd2_c;
+  _enc->opt_vtable.frag_intra_satd=oc_enc_frag_intra_satd_c;
+  _enc->opt_vtable.frag_ssd=oc_enc_frag_ssd_c;
+  _enc->opt_vtable.frag_border_ssd=oc_enc_frag_border_ssd_c;
+  _enc->opt_vtable.frag_copy2=oc_enc_frag_copy2_c;
+  _enc->opt_vtable.enquant_table_init=oc_enc_enquant_table_init_c;
+  _enc->opt_vtable.enquant_table_fixup=oc_enc_enquant_table_fixup_c;
+  _enc->opt_vtable.quantize=oc_enc_quantize_c;
+  _enc->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _enc->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _enc->opt_vtable.fdct8x8=oc_enc_fdct8x8_c;
+# endif
+  _enc->opt_data.enquant_table_size=64*sizeof(oc_iquant);
+  _enc->opt_data.enquant_table_alignment=16;
+}
+
+/*Initialize the macro block neighbor lists for MC analysis.
+  This assumes that the entire mb_info memory region has been initialized with
+   zeros.*/
+static void oc_enc_mb_info_init(oc_enc_ctx *_enc){
+  oc_mb_enc_info    *embs;
+  const signed char *mb_modes;
+  unsigned           nhsbs;
+  unsigned           nvsbs;
+  unsigned           nhmbs;
+  unsigned           nvmbs;
+  unsigned           sby;
+  mb_modes=_enc->state.mb_modes;
+  embs=_enc->mb_info;
+  nhsbs=_enc->state.fplanes[0].nhsbs;
+  nvsbs=_enc->state.fplanes[0].nvsbs;
+  nhmbs=_enc->state.nhmbs;
+  nvmbs=_enc->state.nvmbs;
+  for(sby=0;sby<nvsbs;sby++){
+    unsigned sbx;
+    for(sbx=0;sbx<nhsbs;sbx++){
+      int quadi;
+      for(quadi=0;quadi<4;quadi++){
+        /*Because of the Hilbert curve ordering the macro blocks are
+           visited in, the available neighbors change depending on where in
+           a super block the macro block is located.
+          Only the first three vectors are used in the median calculation
+           for the optimal predictor, and so the most important should be
+           listed first.
+          Additional vectors are used, so there will always be at least 3,
+           except for in the upper-left most macro block.*/
+        /*The number of current neighbors for each macro block position.*/
+        static const unsigned char NCNEIGHBORS[4]={4,3,2,4};
+        /*The offset of each current neighbor in the X direction.*/
+        static const signed char   CDX[4][4]={
+          {-1,0,1,-1},
+          {-1,0,-1,},
+          {-1,-1},
+          {-1,0,0,1}
+        };
+        /*The offset of each current neighbor in the Y direction.*/
+        static const signed char   CDY[4][4]={
+          {0,-1,-1,-1},
+          {0,-1,-1},
+          {0,-1},
+          {0,-1,1,-1}
+        };
+        /*The offset of each previous neighbor in the X direction.*/
+        static const signed char   PDX[4]={-1,0,1,0};
+        /*The offset of each previous neighbor in the Y direction.*/
+        static const signed char   PDY[4]={0,-1,0,1};
+        unsigned mbi;
+        int      mbx;
+        int      mby;
+        unsigned nmbi;
+        int      nmbx;
+        int      nmby;
+        int      ni;
+        mbi=(sby*nhsbs+sbx<<2)+quadi;
+        if(mb_modes[mbi]==OC_MODE_INVALID)continue;
+        mbx=2*sbx+(quadi>>1);
+        mby=2*sby+(quadi+1>>1&1);
+        /*Fill in the neighbors with current motion vectors available.*/
+        for(ni=0;ni<NCNEIGHBORS[quadi];ni++){
+          nmbx=mbx+CDX[quadi][ni];
+          nmby=mby+CDY[quadi][ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].cneighbors[embs[mbi].ncneighbors++]=nmbi;
+        }
+        /*Fill in the neighbors with previous motion vectors available.*/
+        for(ni=0;ni<4;ni++){
+          nmbx=mbx+PDX[ni];
+          nmby=mby+PDY[ni];
+          if(nmbx<0||nmbx>=nhmbs||nmby<0||nmby>=nvmbs)continue;
+          nmbi=(nmby&~1)*nhmbs+((nmbx&~1)<<1)+OC_MB_MAP[nmby&1][nmbx&1];
+          if(mb_modes[nmbi]==OC_MODE_INVALID)continue;
+          embs[mbi].pneighbors[embs[mbi].npneighbors++]=nmbi;
+        }
+      }
+    }
+  }
+}
+
+static int oc_enc_set_huffman_codes(oc_enc_ctx *_enc,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int ret;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_codes==NULL)_codes=TH_VP31_HUFF_CODES;
+  /*Validate the codes.*/
+  oggpackB_reset(&_enc->opb);
+  ret=oc_huff_codes_pack(&_enc->opb,_codes);
+  if(ret<0)return ret;
+  memcpy(_enc->huff_codes,_codes,sizeof(_enc->huff_codes));
+  return 0;
+}
+
+static void oc_enc_enquant_tables_init(oc_enc_ctx *_enc,
+ const th_quant_info *_qinfo){
+  unsigned char *etd;
+  size_t         ets;
+  int            align;
+  int            qii;
+  int            qi;
+  int            pli;
+  int            qti;
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _enc->state.dequant_tables[qi][pli][qti]=
+     _enc->state.dequant_table_data[qi][pli][qti];
+  }
+  /*Initialize the dequantization tables.*/
+  oc_dequant_tables_init(_enc->state.dequant_tables,NULL,_qinfo);
+  /*And save off the DC values.*/
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _enc->dequant_dc[qi][pli][qti]=_enc->state.dequant_tables[qi][pli][qti][0];
+  }
+  /*Set up storage for the quantization tables.*/
+  etd=_enc->enquant_table_data;
+  ets=_enc->opt_data.enquant_table_size;
+  align=-(etd-(unsigned char *)0)&_enc->opt_data.enquant_table_alignment-1;
+  etd+=align;
+  /*Set up the main tables.*/
+  for(qi=0;qi<64;qi++)for(pli=0;pli<3;pli++)for(qti=0;qti<2;qti++){
+    _enc->enquant_tables[qi][pli][qti]=etd;
+    oc_enc_enquant_table_init(_enc,etd,
+     _enc->state.dequant_tables[qi][pli][qti]);
+    etd+=ets;
+  }
+  /*Set up storage for the local copies we modify for each frame.*/
+  for(pli=0;pli<3;pli++)for(qii=0;qii<3;qii++)for(qti=0;qti<2;qti++){
+    _enc->enquant[pli][qii][qti]=etd;
+    etd+=ets;
+  }
+}
+
+/*Updates the encoder state after the quantization parameters have been
+   changed.*/
+static void oc_enc_quant_params_updated(oc_enc_ctx *_enc,
+ const th_quant_info *_qinfo){
+  oc_enc_enquant_tables_init(_enc,_qinfo);
+  memcpy(_enc->state.loop_filter_limits,_qinfo->loop_filter_limits,
+   sizeof(_enc->state.loop_filter_limits));
+  oc_enquant_qavg_init(_enc->log_qavg,_enc->log_plq,_enc->chroma_rd_scale,
+   _enc->state.dequant_tables,_enc->state.info.pixel_fmt);
+}
+
+/*Sets the quantization parameters to use.
+  This may only be called before the setup header is written.
+  If it is called multiple times, only the last call has any effect.
+  _qinfo: The quantization parameters.
+          These are described in more detail in theoraenc.h.
+          This can be NULL, in which case the default quantization parameters
+           will be used.*/
+static int oc_enc_set_quant_params(oc_enc_ctx *_enc,
+ const th_quant_info *_qinfo){
+  th_quant_info old_qinfo;
+  int           ret;
+  if(_enc==NULL)return TH_EFAULT;
+  if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+  if(_qinfo==NULL)_qinfo=&TH_DEF_QUANT_INFO;
+  memcpy(&old_qinfo,&_enc->qinfo,sizeof(old_qinfo));
+  ret=oc_quant_params_clone(&_enc->qinfo,_qinfo);
+  if(ret<0){
+    oc_quant_params_clear(&_enc->qinfo);
+    memcpy(&_enc->qinfo,&old_qinfo,sizeof(old_qinfo));
+    return ret;
+  }
+  else oc_quant_params_clear(&old_qinfo);
+  oc_enc_quant_params_updated(_enc,_qinfo);
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc);
+
+static int oc_enc_init(oc_enc_ctx *_enc,const th_info *_info){
+  th_info   info;
+  size_t    mcu_nmbs;
+  ptrdiff_t mcu_ncfrags;
+  ptrdiff_t mcu_nfrags;
+  int       hdec;
+  int       vdec;
+  int       ret;
+  int       pli;
+  /*Clean up the requested settings.*/
+  memcpy(&info,_info,sizeof(info));
+  info.version_major=TH_VERSION_MAJOR;
+  info.version_minor=TH_VERSION_MINOR;
+  info.version_subminor=TH_VERSION_SUB;
+  if(info.quality>63)info.quality=63;
+  if(info.quality<0)info.quality=32;
+  if(info.target_bitrate<0)info.target_bitrate=0;
+  /*Initialize the shared encoder/decoder state.*/
+  ret=oc_state_init(&_enc->state,&info,6);
+  if(ret<0)return ret;
+  oc_enc_accel_init(_enc);
+  _enc->mb_info=_ogg_calloc(_enc->state.nmbs,sizeof(*_enc->mb_info));
+  _enc->frag_dc=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_dc));
+  _enc->coded_mbis=
+   (unsigned *)_ogg_malloc(_enc->state.nmbs*sizeof(*_enc->coded_mbis));
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  /*If chroma is sub-sampled in the vertical direction, we have to encode two
+     super block rows of Y' for each super block row of Cb and Cr.*/
+  _enc->mcu_nvsbs=1<<vdec;
+  mcu_nmbs=_enc->mcu_nvsbs*_enc->state.fplanes[0].nhsbs*(size_t)4;
+  mcu_ncfrags=mcu_nmbs<<3-(hdec+vdec);
+  mcu_nfrags=4*mcu_nmbs+mcu_ncfrags;
+  _enc->mcu_skip_ssd=(unsigned *)_ogg_malloc(
+   mcu_nfrags*sizeof(*_enc->mcu_skip_ssd));
+  _enc->mcu_rd_scale=(ogg_uint16_t *)_ogg_malloc(
+   (mcu_ncfrags>>1)*sizeof(*_enc->mcu_rd_scale));
+  _enc->mcu_rd_iscale=(ogg_uint16_t *)_ogg_malloc(
+   (mcu_ncfrags>>1)*sizeof(*_enc->mcu_rd_iscale));
+  for(pli=0;pli<3;pli++){
+    _enc->dct_tokens[pli]=(unsigned char **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->dct_tokens));
+    _enc->extra_bits[pli]=(ogg_uint16_t **)oc_malloc_2d(64,
+     _enc->state.fplanes[pli].nfrags,sizeof(**_enc->extra_bits));
+  }
+#if defined(OC_COLLECT_METRICS)
+  _enc->frag_sad=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_sad));
+  _enc->frag_satd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_satd));
+  _enc->frag_ssd=_ogg_calloc(_enc->state.nfrags,sizeof(*_enc->frag_ssd));
+#endif
+  _enc->enquant_table_data=(unsigned char *)_ogg_malloc(
+   (64+3)*3*2*_enc->opt_data.enquant_table_size
+   +_enc->opt_data.enquant_table_alignment-1);
+  _enc->keyframe_frequency_force=1<<_enc->state.info.keyframe_granule_shift;
+  _enc->state.qis[0]=_enc->state.info.quality;
+  _enc->state.nqis=1;
+  _enc->activity_avg=90<<12;
+  _enc->luma_avg=128<<8;
+  oc_rc_state_init(&_enc->rc,_enc);
+  oggpackB_writeinit(&_enc->opb);
+  memcpy(_enc->huff_codes,TH_VP31_HUFF_CODES,sizeof(_enc->huff_codes));
+  memset(_enc->qinfo.qi_ranges,0,sizeof(_enc->qinfo.qi_ranges));
+  /*Reset the packet-out state machine.*/
+  _enc->packet_state=OC_PACKET_INFO_HDR;
+  _enc->dup_count=0;
+  _enc->nqueued_dups=0;
+  _enc->prev_dup_count=0;
+  /*Enable speed optimizations up through early skip by default.*/
+  _enc->sp_level=OC_SP_LEVEL_EARLY_SKIP;
+  /*Disable VP3 compatibility by default.*/
+  _enc->vp3_compatible=0;
+  /*No INTER frames coded yet.*/
+  _enc->coded_inter_frame=0;
+  if(_enc->mb_info==NULL||_enc->frag_dc==NULL||_enc->coded_mbis==NULL
+   ||_enc->mcu_skip_ssd==NULL||_enc->dct_tokens[0]==NULL
+   ||_enc->dct_tokens[1]==NULL||_enc->dct_tokens[2]==NULL
+   ||_enc->extra_bits[0]==NULL||_enc->extra_bits[1]==NULL
+   ||_enc->extra_bits[2]==NULL
+#if defined(OC_COLLECT_METRICS)
+   ||_enc->frag_sad==NULL||_enc->frag_satd==NULL||_enc->frag_ssd==NULL
+#endif
+   ||oc_enc_set_quant_params(_enc,NULL)<0){
+    oc_enc_clear(_enc);
+    return TH_EFAULT;
+  }
+  oc_mode_scheme_chooser_init(&_enc->chooser);
+  oc_enc_mb_info_init(_enc);
+  memset(_enc->huff_idxs,0,sizeof(_enc->huff_idxs));
+  return 0;
+}
+
+static void oc_enc_clear(oc_enc_ctx *_enc){
+  int pli;
+  oc_rc_state_clear(&_enc->rc);
+  oggpackB_writeclear(&_enc->opb);
+  oc_quant_params_clear(&_enc->qinfo);
+  _ogg_free(_enc->enquant_table_data);
+#if defined(OC_COLLECT_METRICS)
+  /*Save the collected metrics from this run.
+    Use tools/process_modedec_stats to actually generate modedec.h from the
+     resulting file.*/
+  oc_mode_metrics_dump();
+  _ogg_free(_enc->frag_ssd);
+  _ogg_free(_enc->frag_satd);
+  _ogg_free(_enc->frag_sad);
+#endif
+  for(pli=3;pli-->0;){
+    oc_free_2d(_enc->extra_bits[pli]);
+    oc_free_2d(_enc->dct_tokens[pli]);
+  }
+  _ogg_free(_enc->mcu_rd_iscale);
+  _ogg_free(_enc->mcu_rd_scale);
+  _ogg_free(_enc->mcu_skip_ssd);
+  _ogg_free(_enc->coded_mbis);
+  _ogg_free(_enc->frag_dc);
+  _ogg_free(_enc->mb_info);
+  oc_state_clear(&_enc->state);
+}
+
+static void oc_enc_drop_frame(th_enc_ctx *_enc){
+  /*Use the previous frame's reconstruction.*/
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=
+   _enc->state.ref_frame_idx[OC_FRAME_PREV];
+  _enc->state.ref_frame_data[OC_FRAME_SELF]=
+   _enc->state.ref_frame_data[OC_FRAME_PREV];
+  /*Flag motion vector analysis about the frame drop.*/
+  _enc->prevframe_dropped=1;
+  /*Zero the packet.*/
+  oggpackB_reset(&_enc->opb);
+  /*Emit an inter frame with no coded blocks in VP3-compatibility mode.*/
+  if(_enc->vp3_compatible)oc_enc_drop_frame_pack(_enc);
+}
+
+static void oc_enc_compress_keyframe(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTRA_FRAME,
+     _enc->state.curframe_num>0);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTRA_FRAME);
+  oc_enc_analyze_intra(_enc,_recode);
+  oc_enc_frame_pack(_enc);
+  /*On the first frame, the previous call was an initial dry-run to prime
+     feed-forward statistics.*/
+  if(!_recode&&_enc->state.curframe_num==0){
+    if(_enc->state.info.target_bitrate>0){
+      oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+                             OC_INTRA_FRAME,_enc->state.qis[0],1,0);
+    }
+    oc_enc_compress_keyframe(_enc,1);
+  }
+}
+
+static void oc_enc_compress_frame(oc_enc_ctx *_enc,int _recode){
+  if(_enc->state.info.target_bitrate>0){
+    _enc->state.qis[0]=oc_enc_select_qi(_enc,OC_INTER_FRAME,1);
+    _enc->state.nqis=1;
+  }
+  oc_enc_calc_lambda(_enc,OC_INTER_FRAME);
+  if(oc_enc_analyze_inter(_enc,_enc->rc.twopass!=2,_recode)){
+    /*Mode analysis thinks this should have been a keyframe; start over.*/
+    oc_enc_compress_keyframe(_enc,1);
+  }
+  else{
+    oc_enc_frame_pack(_enc);
+    if(!_enc->coded_inter_frame){
+      /*On the first INTER frame, the previous call was an initial dry-run to
+         prime feed-forward statistics.*/
+      _enc->coded_inter_frame=1;
+      if(_enc->state.info.target_bitrate>0){
+        /*Rate control also needs to prime.*/
+        oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+         OC_INTER_FRAME,_enc->state.qis[0],1,0);
+      }
+      oc_enc_compress_frame(_enc,1);
+    }
+  }
+}
+
+/*Set the granule position for the next packet to output based on the current
+   internal state.*/
+static void oc_enc_set_granpos(oc_enc_ctx *_enc){
+  unsigned dup_offs;
+  /*Add an offset for the number of duplicate frames we've emitted so far.*/
+  dup_offs=_enc->prev_dup_count-_enc->nqueued_dups;
+  /*If the current frame was a keyframe, use it for the high part.*/
+  if(_enc->state.frame_type==OC_INTRA_FRAME){
+    _enc->state.granpos=(_enc->state.curframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)+dup_offs;
+  }
+  /*Otherwise use the last keyframe in the high part and put the current frame
+     in the low part.*/
+  else{
+    _enc->state.granpos=
+     (_enc->state.keyframe_num+_enc->state.granpos_bias<<
+     _enc->state.info.keyframe_granule_shift)
+     +_enc->state.curframe_num-_enc->state.keyframe_num+dup_offs;
+  }
+}
+
+
+th_enc_ctx *th_encode_alloc(const th_info *_info){
+  oc_enc_ctx *enc;
+  if(_info==NULL)return NULL;
+  enc=oc_aligned_malloc(sizeof(*enc),16);
+  if(enc==NULL||oc_enc_init(enc,_info)<0){
+    oc_aligned_free(enc);
+    return NULL;
+  }
+  return enc;
+}
+
+void th_encode_free(th_enc_ctx *_enc){
+  if(_enc!=NULL){
+    oc_enc_clear(_enc);
+    oc_aligned_free(_enc);
+  }
+}
+
+int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
+  switch(_req){
+    case TH_ENCCTL_SET_HUFFMAN_CODES:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_huff_table)*TH_NHUFFMAN_TABLES){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_huffman_codes(_enc,(const th_huff_table *)_buf);
+    }break;
+    case TH_ENCCTL_SET_QUANT_PARAMS:{
+      if(_buf==NULL&&_buf_sz!=0||
+       _buf!=NULL&&_buf_sz!=sizeof(th_quant_info)){
+        return TH_EINVAL;
+      }
+      return oc_enc_set_quant_params(_enc,(th_quant_info *)_buf);
+    }break;
+    case TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE:{
+      ogg_uint32_t keyframe_frequency_force;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(keyframe_frequency_force))return TH_EINVAL;
+      keyframe_frequency_force=*(ogg_uint32_t *)_buf;
+      if(keyframe_frequency_force<=0)keyframe_frequency_force=1;
+      if(_enc->packet_state==OC_PACKET_INFO_HDR){
+        /*It's still early enough to enlarge keyframe_granule_shift.*/
+        _enc->state.info.keyframe_granule_shift=OC_CLAMPI(
+         _enc->state.info.keyframe_granule_shift,
+         OC_ILOG_32(keyframe_frequency_force-1),31);
+      }
+      _enc->keyframe_frequency_force=OC_MINI(keyframe_frequency_force,
+       (ogg_uint32_t)1U<<_enc->state.info.keyframe_granule_shift);
+      *(ogg_uint32_t *)_buf=_enc->keyframe_frequency_force;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_VP3_COMPATIBLE:{
+      int vp3_compatible;
+      int ret;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(vp3_compatible))return TH_EINVAL;
+      /*Try this before we change anything else, because it can fail.*/
+      ret=oc_enc_set_quant_params(_enc,&TH_VP31_QUANT_INFO);
+      /*If we can't allocate enough memory, don't change any of the state.*/
+      if(ret==TH_EFAULT)return ret;
+      vp3_compatible=*(int *)_buf;
+      _enc->vp3_compatible=vp3_compatible;
+      if(oc_enc_set_huffman_codes(_enc,TH_VP31_HUFF_CODES)<0)vp3_compatible=0;
+      if(ret<0)vp3_compatible=0;
+      if(_enc->state.info.pixel_fmt!=TH_PF_420||
+       _enc->state.info.pic_width<_enc->state.info.frame_width||
+       _enc->state.info.pic_height<_enc->state.info.frame_height||
+      /*If we have more than 4095 super blocks, VP3's RLE coding might
+         overflow.
+        We could overcome this by ensuring we flip the coded/not-coded flags on
+         at least one super block in the frame, but we pick the simple solution
+         of just telling the user the stream will be incompatible instead.
+        It's unlikely the old VP3 codec would be able to decode streams at this
+         resolution in real time in the first place.*/
+       _enc->state.nsbs>4095){
+        vp3_compatible=0;
+      }
+      *(int *)_buf=vp3_compatible;
+      return 0;
+    }break;
+    case TH_ENCCTL_GET_SPLEVEL_MAX:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(int))return TH_EINVAL;
+      *(int *)_buf=OC_SP_LEVEL_MAX;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_SPLEVEL:{
+      int speed;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(speed))return TH_EINVAL;
+      speed=*(int *)_buf;
+      if(speed<0||speed>OC_SP_LEVEL_MAX)return TH_EINVAL;
+      _enc->sp_level=speed;
+      return 0;
+    }break;
+    case TH_ENCCTL_GET_SPLEVEL:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(int))return TH_EINVAL;
+      *(int *)_buf=_enc->sp_level;
+      return 0;
+    }
+    case TH_ENCCTL_SET_DUP_COUNT:{
+      int dup_count;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(dup_count))return TH_EINVAL;
+      dup_count=*(int *)_buf;
+      if(dup_count>=_enc->keyframe_frequency_force)return TH_EINVAL;
+      _enc->dup_count=OC_MAXI(dup_count,0);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_QUALITY:{
+      int qi;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate>0)return TH_EINVAL;
+      qi=*(int *)_buf;
+      if(qi<0||qi>63)return TH_EINVAL;
+      _enc->state.info.quality=qi;
+      _enc->state.qis[0]=(unsigned char)qi;
+      _enc->state.nqis=1;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_BITRATE:{
+      long bitrate;
+      int  reset;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      bitrate=*(long *)_buf;
+      if(bitrate<=0)return TH_EINVAL;
+      reset=_enc->state.info.target_bitrate<=0;
+      _enc->state.info.target_bitrate=bitrate>INT_MAX?INT_MAX:bitrate;
+      if(reset)oc_rc_state_init(&_enc->rc,_enc);
+      else oc_enc_rc_resize(_enc);
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_FLAGS:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.drop_frames=set&TH_RATECTL_DROP_FRAMES;
+      _enc->rc.cap_overflow=set&TH_RATECTL_CAP_OVERFLOW;
+      _enc->rc.cap_underflow=set&TH_RATECTL_CAP_UNDERFLOW;
+      return 0;
+    }break;
+    case TH_ENCCTL_SET_RATE_BUFFER:{
+      int set;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_buf_sz!=sizeof(set))return TH_EINVAL;
+      if(_enc->state.info.target_bitrate<=0)return TH_EINVAL;
+      set=*(int *)_buf;
+      _enc->rc.buf_delay=set;
+      oc_enc_rc_resize(_enc);
+      *(int *)_buf=_enc->rc.buf_delay;
+      return 0;
+    }break;
+    case TH_ENCCTL_2PASS_OUT:{
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=1||
+       _buf_sz!=sizeof(unsigned char *)){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_out(_enc,(unsigned char **)_buf);
+    }break;
+    case TH_ENCCTL_2PASS_IN:{
+      if(_enc==NULL)return TH_EFAULT;
+      if(_enc->state.info.target_bitrate<=0||
+       _enc->state.curframe_num>=0&&_enc->rc.twopass!=2){
+        return TH_EINVAL;
+      }
+      return oc_enc_rc_2pass_in(_enc,_buf,_buf_sz);
+    }break;
+    case TH_ENCCTL_SET_COMPAT_CONFIG:{
+      unsigned char buf[7];
+      oc_pack_buf   opb;
+      th_quant_info qinfo;
+      th_huff_code  huff_codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+      int           ret;
+      int           i;
+      if(_enc==NULL||_buf==NULL)return TH_EFAULT;
+      if(_enc->packet_state>OC_PACKET_SETUP_HDR)return TH_EINVAL;
+      oc_pack_readinit(&opb,_buf,_buf_sz);
+      /*Validate the setup packet header.*/
+      for(i=0;i<7;i++)buf[i]=(unsigned char)oc_pack_read(&opb,8);
+      if(!(buf[0]&0x80)||memcmp(buf+1,"theora",6)!=0)return TH_ENOTFORMAT;
+      if(buf[0]!=0x82)return TH_EBADHEADER;
+      /*Reads its contents.*/
+      ret=oc_quant_params_unpack(&opb,&qinfo);
+      if(ret<0){
+        oc_quant_params_clear(&qinfo);
+        return ret;
+      }
+      ret=oc_huff_codes_unpack(&opb,huff_codes);
+      if(ret<0){
+        oc_quant_params_clear(&qinfo);
+        return ret;
+      }
+      /*Install the new state.*/
+      oc_quant_params_clear(&_enc->qinfo);
+      memcpy(&_enc->qinfo,&qinfo,sizeof(qinfo));
+      oc_enc_quant_params_updated(_enc,&qinfo);
+      memcpy(_enc->huff_codes,huff_codes,sizeof(_enc->huff_codes));
+      return 0;
+    }
+#if defined(OC_COLLECT_METRICS)
+    case TH_ENCCTL_SET_METRICS_FILE:{
+      OC_MODE_METRICS_FILENAME=(const char *)_buf;
+      return 0;
+    }
+#endif
+    default:return TH_EIMPL;
+  }
+}
+
+int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){
+  if(_enc==NULL)return TH_EFAULT;
+  return oc_state_flushheader(&_enc->state,&_enc->packet_state,&_enc->opb,
+   &_enc->qinfo,(const th_huff_table *)_enc->huff_codes,th_version_string(),
+   _tc,_op);
+}
+
+static void oc_img_plane_copy_pad(th_img_plane *_dst,th_img_plane *_src,
+ ogg_int32_t _pic_x,ogg_int32_t _pic_y,
+ ogg_int32_t _pic_width,ogg_int32_t _pic_height){
+  unsigned char *dst;
+  int            dstride;
+  ogg_uint32_t   frame_width;
+  ogg_uint32_t   frame_height;
+  ogg_uint32_t   y;
+  frame_width=_dst->width;
+  frame_height=_dst->height;
+  /*If we have _no_ data, just encode a dull green.*/
+  if(_pic_width==0||_pic_height==0){
+    dst=_dst->data;
+    dstride=_dst->stride;
+    for(y=0;y<frame_height;y++){
+      memset(dst,0,frame_width*sizeof(*dst));
+      dst+=dstride;
+    }
+  }
+  /*Otherwise, copy what we do have, and add our own padding.*/
+  else{
+    unsigned char *dst_data;
+    unsigned char *src_data;
+    unsigned char *src;
+    int            sstride;
+    ogg_uint32_t   x;
+    /*Step 1: Copy the data we do have.*/
+    dstride=_dst->stride;
+    sstride=_src->stride;
+    dst_data=_dst->data;
+    src_data=_src->data;
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride+_pic_x;
+    src=src_data+_pic_y*(ptrdiff_t)sstride+_pic_x;
+    for(y=0;y<_pic_height;y++){
+      memcpy(dst,src,_pic_width);
+      dst+=dstride;
+      src+=sstride;
+    }
+    /*Step 2: Perform a low-pass extension into the padding region.*/
+    /*Left side.*/
+    for(x=_pic_x;x-->0;){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x;
+      for(y=0;y<_pic_height;y++){
+        dst[0]=(dst[1]<<1)+(dst-(dstride&-(y>0)))[1]
+         +(dst+(dstride&-(y+1<_pic_height)))[1]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Right side.*/
+    for(x=_pic_x+_pic_width;x<frame_width;x++){
+      dst=dst_data+_pic_y*(ptrdiff_t)dstride+x-1;
+      for(y=0;y<_pic_height;y++){
+        dst[1]=(dst[0]<<1)+(dst-(dstride&-(y>0)))[0]
+         +(dst+(dstride&-(y+1<_pic_height)))[0]+2>>2;
+        dst+=dstride;
+      }
+    }
+    /*Top.*/
+    dst=dst_data+_pic_y*(ptrdiff_t)dstride;
+    for(y=_pic_y;y-->0;){
+      for(x=0;x<frame_width;x++){
+        (dst-dstride)[x]=(dst[x]<<1)+dst[x-(x>0)]
+         +dst[x+(x+1<frame_width)]+2>>2;
+      }
+      dst-=dstride;
+    }
+    /*Bottom.*/
+    dst=dst_data+(_pic_y+_pic_height)*(ptrdiff_t)dstride;
+    for(y=_pic_y+_pic_height;y<frame_height;y++){
+      for(x=0;x<frame_width;x++){
+        dst[x]=((dst-dstride)[x]<<1)+(dst-dstride)[x-(x>0)]
+         +(dst-dstride)[x+(x+1<frame_width)]+2>>2;
+      }
+      dst+=dstride;
+    }
+  }
+}
+
+int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){
+  th_ycbcr_buffer img;
+  int             frame_width;
+  int             frame_height;
+  int             pic_width;
+  int             pic_height;
+  int             pic_x;
+  int             pic_y;
+  int             cframe_width;
+  int             cframe_height;
+  int             cpic_width;
+  int             cpic_height;
+  int             cpic_x;
+  int             cpic_y;
+  int             hdec;
+  int             vdec;
+  int             pli;
+  int             refi;
+  int             drop;
+  /*Step 1: validate parameters.*/
+  if(_enc==NULL||_img==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_DONE)return TH_EINVAL;
+  if(_enc->rc.twopass&&_enc->rc.twopass_buffer_bytes==0)return TH_EINVAL;
+  hdec=!(_enc->state.info.pixel_fmt&1);
+  vdec=!(_enc->state.info.pixel_fmt&2);
+  frame_width=_enc->state.info.frame_width;
+  frame_height=_enc->state.info.frame_height;
+  pic_x=_enc->state.info.pic_x;
+  pic_y=_enc->state.info.pic_y;
+  pic_width=_enc->state.info.pic_width;
+  pic_height=_enc->state.info.pic_height;
+  cframe_width=frame_width>>hdec;
+  cframe_height=frame_height>>vdec;
+  cpic_x=pic_x>>hdec;
+  cpic_y=pic_y>>vdec;
+  cpic_width=(pic_x+pic_width+hdec>>hdec)-cpic_x;
+  cpic_height=(pic_y+pic_height+vdec>>vdec)-cpic_y;
+  /*Flip the input buffer upside down.*/
+  oc_ycbcr_buffer_flip(img,_img);
+  if(img[0].width!=frame_width||img[0].height!=frame_height||
+   img[1].width!=cframe_width||img[2].width!=cframe_width||
+   img[1].height!=cframe_height||img[2].height!=cframe_height){
+    /*The buffer does not match the frame size.
+      Check to see if it matches the picture size.*/
+    if(img[0].width!=pic_width||img[0].height!=pic_height||
+     img[1].width!=cpic_width||img[2].width!=cpic_width||
+     img[1].height!=cpic_height||img[2].height!=cpic_height){
+      /*It doesn't; we don't know how to handle it.*/
+      return TH_EINVAL;
+    }
+    /*Adjust the pointers to address a full frame.
+      We still only use the picture region, however.*/
+    img[0].data-=pic_y*(ptrdiff_t)img[0].stride+pic_x;
+    img[1].data-=cpic_y*(ptrdiff_t)img[1].stride+cpic_x;
+    img[2].data-=cpic_y*(ptrdiff_t)img[2].stride+cpic_x;
+  }
+  /*Step 2: Update the buffer state.*/
+  if(_enc->state.ref_frame_idx[OC_FRAME_SELF]>=0){
+    _enc->state.ref_frame_idx[OC_FRAME_PREV]=
+     _enc->state.ref_frame_idx[OC_FRAME_SELF];
+    _enc->state.ref_frame_data[OC_FRAME_PREV]=
+     _enc->state.ref_frame_data[OC_FRAME_SELF];
+    if(_enc->state.frame_type==OC_INTRA_FRAME){
+      /*The new frame becomes both the previous and gold reference frames.*/
+      _enc->state.keyframe_num=_enc->state.curframe_num;
+      _enc->state.ref_frame_idx[OC_FRAME_GOLD]=
+       _enc->state.ref_frame_idx[OC_FRAME_SELF];
+      _enc->state.ref_frame_data[OC_FRAME_GOLD]=
+       _enc->state.ref_frame_data[OC_FRAME_SELF];
+    }
+  }
+  if(_enc->state.ref_frame_idx[OC_FRAME_IO]>=0&&_enc->prevframe_dropped==0){
+    _enc->state.ref_frame_idx[OC_FRAME_PREV_ORIG]=
+     _enc->state.ref_frame_idx[OC_FRAME_IO];
+    _enc->state.ref_frame_data[OC_FRAME_PREV_ORIG]=
+     _enc->state.ref_frame_data[OC_FRAME_IO];
+    if(_enc->state.frame_type==OC_INTRA_FRAME){
+      /*The new input frame becomes both the previous and gold
+         original-reference frames.*/
+      _enc->state.ref_frame_idx[OC_FRAME_GOLD_ORIG]=
+       _enc->state.ref_frame_idx[OC_FRAME_IO];
+      _enc->state.ref_frame_data[OC_FRAME_GOLD_ORIG]=
+       _enc->state.ref_frame_data[OC_FRAME_IO];
+    }
+  }
+  /*Select a free buffer to use for the incoming frame*/
+  for(refi=3;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD_ORIG]||
+   refi==_enc->state.ref_frame_idx[OC_FRAME_PREV_ORIG];refi++);
+  _enc->state.ref_frame_idx[OC_FRAME_IO]=refi;
+  _enc->state.ref_frame_data[OC_FRAME_IO]=
+   _enc->state.ref_frame_bufs[refi][0].data;
+  /*Step 3: Copy the input to our internal buffer.
+    This lets us add padding, so we don't have to worry about dereferencing
+     possibly invalid addresses, and allows us to use the same strides and
+     fragment offsets for both the input frame and the reference frames.*/
+  oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[refi]+0,img+0,
+   pic_x,pic_y,pic_width,pic_height);
+  oc_state_borders_fill_rows(&_enc->state,refi,0,0,frame_height);
+  oc_state_borders_fill_caps(&_enc->state,refi,0);
+  for(pli=1;pli<3;pli++){
+    oc_img_plane_copy_pad(_enc->state.ref_frame_bufs[refi]+pli,img+pli,
+     cpic_x,cpic_y,cpic_width,cpic_height);
+    oc_state_borders_fill_rows(&_enc->state,refi,pli,0,cframe_height);
+    oc_state_borders_fill_caps(&_enc->state,refi,pli);
+  }
+  /*Select a free buffer to use for the reconstructed version of this frame.*/
+  for(refi=0;refi==_enc->state.ref_frame_idx[OC_FRAME_GOLD]||
+   refi==_enc->state.ref_frame_idx[OC_FRAME_PREV];refi++);
+  _enc->state.ref_frame_idx[OC_FRAME_SELF]=refi;
+  _enc->state.ref_frame_data[OC_FRAME_SELF]=
+   _enc->state.ref_frame_bufs[refi][0].data;
+  _enc->state.curframe_num+=_enc->prev_dup_count+1;
+  /*Step 4: Compress the frame.*/
+  /*Start with a keyframe, and don't allow the generation of invalid files that
+     overflow the keyframe_granule_shift.*/
+  if(_enc->rc.twopass_force_kf||_enc->state.curframe_num==0||
+   _enc->state.curframe_num-_enc->state.keyframe_num+_enc->dup_count>=
+   _enc->keyframe_frequency_force){
+    oc_enc_compress_keyframe(_enc,0);
+    drop=0;
+  }
+  else{
+    oc_enc_compress_frame(_enc,0);
+    drop=1;
+  }
+  oc_restore_fpu(&_enc->state);
+  /*drop currently indicates if the frame is droppable.*/
+  if(_enc->state.info.target_bitrate>0){
+    drop=oc_enc_update_rc_state(_enc,oggpackB_bytes(&_enc->opb)<<3,
+     _enc->state.frame_type,_enc->state.qis[0],0,drop);
+  }
+  else drop=0;
+  /*drop now indicates if the frame was dropped.*/
+  if(drop)oc_enc_drop_frame(_enc);
+  else _enc->prevframe_dropped=0;
+  _enc->packet_state=OC_PACKET_READY;
+  _enc->prev_dup_count=_enc->nqueued_dups=_enc->dup_count;
+  _enc->dup_count=0;
+#if defined(OC_DUMP_IMAGES)
+  oc_enc_set_granpos(_enc);
+  oc_state_dump_frame(&_enc->state,OC_FRAME_IO,"src");
+  oc_state_dump_frame(&_enc->state,OC_FRAME_SELF,"rec");
+#endif
+  return 0;
+}
+
+int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){
+  unsigned char *packet;
+  if(_enc==NULL||_op==NULL)return TH_EFAULT;
+  if(_enc->packet_state==OC_PACKET_READY){
+    _enc->packet_state=OC_PACKET_EMPTY;
+    if(_enc->rc.twopass!=1){
+      packet=oggpackB_get_buffer(&_enc->opb);
+      /*If there's no packet, malloc failed while writing; it's lost forever.*/
+      if(packet==NULL)return TH_EFAULT;
+      _op->packet=packet;
+      _op->bytes=oggpackB_bytes(&_enc->opb);
+    }
+    /*For the first pass in 2-pass mode, don't emit any packet data.*/
+    else{
+      _op->packet=NULL;
+      _op->bytes=0;
+    }
+  }
+  else if(_enc->packet_state==OC_PACKET_EMPTY){
+    if(_enc->nqueued_dups>0){
+      _enc->nqueued_dups--;
+      /*Emit an inter frame with no coded blocks in VP3-compatibility mode.*/
+      if(_enc->vp3_compatible){
+        oggpackB_reset(&_enc->opb);
+        oc_enc_drop_frame_pack(_enc);
+        packet=oggpackB_get_buffer(&_enc->opb);
+        /*If there's no packet, malloc failed while writing; it's lost
+           forever.*/
+        if(packet==NULL)return TH_EFAULT;
+        _op->packet=packet;
+        _op->bytes=oggpackB_bytes(&_enc->opb);
+      }
+      /*Otherwise emit a 0-byte packet.*/
+      else{
+        _op->packet=NULL;
+        _op->bytes=0;
+      }
+    }
+    else{
+      if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+      return 0;
+    }
+  }
+  else return 0;
+  _last_p=_last_p&&_enc->nqueued_dups<=0;
+  _op->b_o_s=0;
+  _op->e_o_s=_last_p;
+  oc_enc_set_granpos(_enc);
+  _op->packetno=th_granule_frame(_enc,_enc->state.granpos)+3;
+  _op->granulepos=_enc->state.granpos;
+  if(_last_p)_enc->packet_state=OC_PACKET_DONE;
+  return 1+_enc->nqueued_dups;
+}

+ 67 - 0
jni/libtheora-1.2.0alpha1/lib/encoder_disabled.c

@@ -0,0 +1,67 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include "apiwrapper.h"
+#include "encint.h"
+
+th_enc_ctx *th_encode_alloc(const th_info *_info){
+  return NULL;
+}
+
+void th_encode_free(th_enc_ctx *_enc){}
+
+
+int th_encode_ctl(th_enc_ctx *_enc,int _req,void *_buf,size_t _buf_sz){
+  return OC_DISABLED;
+}
+
+int th_encode_flushheader(th_enc_ctx *_enc,th_comment *_tc,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int th_encode_ycbcr_in(th_enc_ctx *_enc,th_ycbcr_buffer _img){
+  return OC_DISABLED;
+}
+
+int th_encode_packetout(th_enc_ctx *_enc,int _last_p,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+
+
+int theora_encode_init(theora_state *_te,theora_info *_ci){
+  return OC_DISABLED;
+}
+
+int theora_encode_YUVin(theora_state *_te,yuv_buffer *_yuv){
+  return OC_DISABLED;
+}
+
+int theora_encode_packetout(theora_state *_te,int _last_p,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_header(theora_state *_te,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_comment(theora_comment *_tc,ogg_packet *_op){
+  return OC_DISABLED;
+}
+
+int theora_encode_tables(theora_state *_te,ogg_packet *_op){
+  return OC_DISABLED;
+}

+ 370 - 0
jni/libtheora-1.2.0alpha1/lib/enquant.c

@@ -0,0 +1,370 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+int oc_quant_params_clone(th_quant_info *_dst,const th_quant_info *_src){
+  int i;
+  memcpy(_dst,_src,sizeof(*_dst));
+  memset(_dst->qi_ranges,0,sizeof(_dst->qi_ranges));
+  for(i=0;i<6;i++){
+    int nranges;
+    int qti;
+    int pli;
+    int qtj;
+    int plj;
+    int pdup;
+    int qdup;
+    qti=i/3;
+    pli=i%3;
+    qtj=(i-1)/3;
+    plj=(i-1)%3;
+    nranges=_src->qi_ranges[qti][pli].nranges;
+    /*Check for those duplicates that can be cleanly handled by
+       oc_quant_params_clear().*/
+    pdup=i>0&&nranges<=_src->qi_ranges[qtj][plj].nranges;
+    qdup=qti>0&&nranges<=_src->qi_ranges[0][pli].nranges;
+    _dst->qi_ranges[qti][pli].nranges=nranges;
+    if(pdup&&_src->qi_ranges[qti][pli].sizes==_src->qi_ranges[qtj][plj].sizes){
+      _dst->qi_ranges[qti][pli].sizes=_dst->qi_ranges[qtj][plj].sizes;
+    }
+    else if(qdup&&_src->qi_ranges[1][pli].sizes==_src->qi_ranges[0][pli].sizes){
+      _dst->qi_ranges[1][pli].sizes=_dst->qi_ranges[0][pli].sizes;
+    }
+    else{
+      int *sizes;
+      sizes=(int *)_ogg_malloc(nranges*sizeof(*sizes));
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(sizes==NULL)return TH_EFAULT;
+      memcpy(sizes,_src->qi_ranges[qti][pli].sizes,nranges*sizeof(*sizes));
+      _dst->qi_ranges[qti][pli].sizes=sizes;
+    }
+    if(pdup&&_src->qi_ranges[qti][pli].base_matrices==
+     _src->qi_ranges[qtj][plj].base_matrices){
+      _dst->qi_ranges[qti][pli].base_matrices=
+       _dst->qi_ranges[qtj][plj].base_matrices;
+    }
+    else if(qdup&&_src->qi_ranges[1][pli].base_matrices==
+     _src->qi_ranges[0][pli].base_matrices){
+      _dst->qi_ranges[1][pli].base_matrices=
+       _dst->qi_ranges[0][pli].base_matrices;
+    }
+    else{
+      th_quant_base *base_matrices;
+      base_matrices=(th_quant_base *)_ogg_malloc(
+       (nranges+1)*sizeof(*base_matrices));
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(base_matrices==NULL)return TH_EFAULT;
+      memcpy(base_matrices,_src->qi_ranges[qti][pli].base_matrices,
+       (nranges+1)*sizeof(*base_matrices));
+      _dst->qi_ranges[qti][pli].base_matrices=
+       (const th_quant_base *)base_matrices;
+    }
+  }
+  return 0;
+}
+
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
+  const th_quant_ranges *qranges;
+  const th_quant_base   *base_mats[2*3*64];
+  int                    indices[2][3][64];
+  int                    nbase_mats;
+  int                    nbits;
+  int                    ci;
+  int                    qi;
+  int                    qri;
+  int                    qti;
+  int                    pli;
+  int                    qtj;
+  int                    plj;
+  int                    bmi;
+  int                    i;
+  i=_qinfo->loop_filter_limits[0];
+  for(qi=1;qi<64;qi++)i=OC_MAXI(i,_qinfo->loop_filter_limits[qi]);
+  nbits=OC_ILOG_32(i);
+  oggpackB_write(_opb,nbits,3);
+  for(qi=0;qi<64;qi++){
+    oggpackB_write(_opb,_qinfo->loop_filter_limits[qi],nbits);
+  }
+  /*580 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->ac_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->ac_scale[qi],nbits);
+  /*516 bits for VP3.*/
+  i=1;
+  for(qi=0;qi<64;qi++)i=OC_MAXI(_qinfo->dc_scale[qi],i);
+  nbits=OC_ILOGNZ_32(i);
+  oggpackB_write(_opb,nbits-1,4);
+  for(qi=0;qi<64;qi++)oggpackB_write(_opb,_qinfo->dc_scale[qi],nbits);
+  /*Consolidate any duplicate base matrices.*/
+  nbase_mats=0;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    for(qri=0;qri<=qranges->nranges;qri++){
+      for(bmi=0;;bmi++){
+        if(bmi>=nbase_mats){
+          base_mats[bmi]=qranges->base_matrices+qri;
+          indices[qti][pli][qri]=nbase_mats++;
+          break;
+        }
+        else if(memcmp(base_mats[bmi][0],qranges->base_matrices[qri],
+         sizeof(base_mats[bmi][0]))==0){
+          indices[qti][pli][qri]=bmi;
+          break;
+        }
+      }
+    }
+  }
+  /*Write out the list of unique base matrices.
+    1545 bits for VP3 matrices.*/
+  oggpackB_write(_opb,nbase_mats-1,9);
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++)oggpackB_write(_opb,base_mats[bmi][0][ci],8);
+  }
+  /*Now store quant ranges and their associated indices into the base matrix
+     list.
+    46 bits for VP3 matrices.*/
+  nbits=OC_ILOG_32(nbase_mats-1);
+  for(i=0;i<6;i++){
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      if(qti>0){
+        if(qranges->nranges==_qinfo->qi_ranges[qti-1][pli].nranges&&
+         memcmp(qranges->sizes,_qinfo->qi_ranges[qti-1][pli].sizes,
+         qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+         memcmp(indices[qti][pli],indices[qti-1][pli],
+         (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+          oggpackB_write(_opb,1,2);
+          continue;
+        }
+      }
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(qranges->nranges==_qinfo->qi_ranges[qtj][plj].nranges&&
+       memcmp(qranges->sizes,_qinfo->qi_ranges[qtj][plj].sizes,
+       qranges->nranges*sizeof(qranges->sizes[0]))==0&&
+       memcmp(indices[qti][pli],indices[qtj][plj],
+       (qranges->nranges+1)*sizeof(indices[qti][pli][0]))==0){
+        oggpackB_write(_opb,0,1+(qti>0));
+        continue;
+      }
+      oggpackB_write(_opb,1,1);
+    }
+    oggpackB_write(_opb,indices[qti][pli][0],nbits);
+    for(qi=qri=0;qi<63;qri++){
+      oggpackB_write(_opb,qranges->sizes[qri]-1,OC_ILOG_32(62-qi));
+      qi+=qranges->sizes[qri];
+      oggpackB_write(_opb,indices[qti][pli][qri+1],nbits);
+    }
+  }
+}
+
+void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d){
+  ogg_uint32_t t;
+  int          l;
+  _d<<=1;
+  l=OC_ILOGNZ_32(_d)-1;
+  t=1+((ogg_uint32_t)1<<16+l)/_d;
+  _this->m=(ogg_int16_t)(t-0x10000);
+  _this->l=l;
+}
+
+void oc_enc_enquant_table_init_c(void *_enquant,
+ const ogg_uint16_t _dequant[64]){
+  oc_iquant *enquant;
+  int        zzi;
+  /*In the original VP3.2 code, the rounding offset and the size of the
+     dead zone around 0 were controlled by a "sharpness" parameter.
+    We now R-D optimize the tokens for each block after quantization,
+     so the rounding offset should always be 1/2, and an explicit dead
+     zone is unnecessary.
+    Hence, all of that VP3.2 code is gone from here, and the remaining
+     floating point code has been implemented as equivalent integer
+     code with exact precision.*/
+  enquant=(oc_iquant *)_enquant;
+  for(zzi=0;zzi<64;zzi++)oc_iquant_init(enquant+zzi,_dequant[zzi]);
+}
+
+void oc_enc_enquant_table_fixup_c(void *_enquant[3][3][2],int _nqis){
+  int pli;
+  int qii;
+  int qti;
+  for(pli=0;pli<3;pli++)for(qii=1;qii<_nqis;qii++)for(qti=0;qti<2;qti++){
+    *((oc_iquant *)_enquant[pli][qii][qti])=
+     *((oc_iquant *)_enquant[pli][0][qti]);
+  }
+}
+
+int oc_enc_quantize_c(ogg_int16_t _qdct[64],const ogg_int16_t _dct[64],
+ const ogg_uint16_t _dequant[64],const void *_enquant){
+  const oc_iquant *enquant;
+  int              nonzero;
+  int              zzi;
+  int              val;
+  int              d;
+  int              s;
+  enquant=(const oc_iquant *)_enquant;
+  nonzero=0;
+  for(zzi=0;zzi<64;zzi++){
+    val=_dct[zzi];
+    d=_dequant[zzi];
+    val=val<<1;
+    if(abs(val)>=d){
+      s=OC_SIGNMASK(val);
+      /*The bias added here rounds ties away from zero, since token
+         optimization can only decrease the magnitude of the quantized
+         value.*/
+      val+=d+s^s;
+      /*Note the arithmetic right shift is not guaranteed by ANSI C.
+        Hopefully no one still uses ones-complement architectures.*/
+      val=((enquant[zzi].m*(ogg_int32_t)val>>16)+val>>enquant[zzi].l)-s;
+      _qdct[zzi]=(ogg_int16_t)val;
+      nonzero=zzi;
+    }
+    else _qdct[zzi]=0;
+  }
+  return nonzero;
+}
+
+
+
+/*This table gives the square root of the fraction of the squared magnitude of
+   each DCT coefficient relative to the total, scaled by 2**16, for both INTRA
+   and INTER modes.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video (from QCIF to 1080p) encoded at
+   all possible rates.
+  The DC coefficient takes into account the DPCM prediction (using the
+   quantized values from neighboring blocks, as the encoder does, but still
+   before quantization of the coefficient in the current block).
+  The results differ significantly from the expected variance (e.g., using an
+   AR(1) model of the signal with rho=0.95, as is frequently done to compute
+   the coding gain of the DCT).
+  We use them to estimate an "average" quantizer for a given quantizer matrix,
+   as this is used to parameterize a number of the rate control decisions.
+  These values are themselves probably quantizer-matrix dependent, since the
+   shape of the matrix affects the noise distribution in the reference frames,
+   but they should at least give us _some_ amount of adaptivity to different
+   matrices, as opposed to hard-coding a table of average Q values for the
+   current set.
+  The main features they capture are that a) only a few of the quantizers in
+   the upper-left corner contribute anything significant at all (though INTER
+   mode is significantly flatter) and b) the DPCM prediction of the DC
+   coefficient gives a very minor improvement in the INTRA case and a quite
+   significant one in the INTER case (over the expected variance).*/
+static const ogg_uint16_t OC_RPSD[2][64]={
+  {
+    52725,17370,10399, 6867, 5115, 3798, 2942, 2076,
+    17370, 9900, 6948, 4994, 3836, 2869, 2229, 1619,
+    10399, 6948, 5516, 4202, 3376, 2573, 2015, 1461,
+     6867, 4994, 4202, 3377, 2800, 2164, 1718, 1243,
+     5115, 3836, 3376, 2800, 2391, 1884, 1530, 1091,
+     3798, 2869, 2573, 2164, 1884, 1495, 1212,  873,
+     2942, 2229, 2015, 1718, 1530, 1212, 1001,  704,
+     2076, 1619, 1461, 1243, 1091,  873,  704,  474
+  },
+  {
+    23411,15604,13529,11601,10683, 8958, 7840, 6142,
+    15604,11901,10718, 9108, 8290, 6961, 6023, 4487,
+    13529,10718, 9961, 8527, 7945, 6689, 5742, 4333,
+    11601, 9108, 8527, 7414, 7084, 5923, 5175, 3743,
+    10683, 8290, 7945, 7084, 6771, 5754, 4793, 3504,
+     8958, 6961, 6689, 5923, 5754, 4679, 3936, 2989,
+     7840, 6023, 5742, 5175, 4793, 3936, 3522, 2558,
+     6142, 4487, 4333, 3743, 3504, 2989, 2558, 1829
+  }
+};
+
+/*The fraction of the squared magnitude of the residuals in each color channel
+   relative to the total, scaled by 2**16, for each pixel format.
+  These values were measured after motion-compensated prediction, before
+   quantization, over a large set of test video encoded at all possible rates.
+  TODO: These values are only from INTER frames; they should be re-measured for
+   INTRA frames.*/
+static const ogg_uint16_t OC_PCD[4][3]={
+  {59926, 3038, 2572},
+  {55201, 5597, 4738},
+  {55201, 5597, 4738},
+  {47682, 9669, 8185}
+};
+
+
+/*Compute "average" quantizers for each qi level to use for rate control.
+  We do one for each color channel, as well as an average across color
+   channels, separately for INTER and INTRA, since their behavior is very
+   different.
+  The basic approach is to compute a harmonic average of the squared quantizer,
+   weighted by the expected squared magnitude of the DCT coefficients.
+  Under the (not quite true) assumption that DCT coefficients are
+   Laplacian-distributed, this preserves the product Q*lambda, where
+   lambda=sqrt(2/sigma**2) is the Laplacian distribution parameter (not to be
+   confused with the lambda used in R-D optimization throughout most of the
+   rest of the code), when the distributions from multiple coefficients are
+   pooled.
+  The value Q*lambda completely determines the entropy of coefficients drawn
+   from a Laplacian distribution, and thus the expected bitrate.*/
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_int16_t _log_plq[64][3][2],ogg_uint16_t _chroma_rd_scale[2][64][2],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt){
+  int qi;
+  int pli;
+  int qti;
+  int ci;
+  for(qti=0;qti<2;qti++)for(qi=0;qi<64;qi++){
+    ogg_int64_t  q2;
+    ogg_uint32_t qp[3];
+    ogg_uint32_t cqp;
+    ogg_uint32_t d;
+    q2=0;
+    for(pli=0;pli<3;pli++){
+      qp[pli]=0;
+      for(ci=0;ci<64;ci++){
+        unsigned rq;
+        unsigned qd;
+        qd=_dequant[qi][pli][qti][OC_IZIG_ZAG[ci]];
+        rq=(OC_RPSD[qti][ci]+(qd>>1))/qd;
+        qp[pli]+=rq*(ogg_uint32_t)rq;
+      }
+      q2+=OC_PCD[_pixel_fmt][pli]*(ogg_int64_t)qp[pli];
+      /*plq=1.0/sqrt(qp)*/
+      _log_plq[qi][pli][qti]=
+       (ogg_int16_t)(OC_Q10(32)-oc_blog32_q10(qp[pli])>>1);
+    }
+    d=OC_PCD[_pixel_fmt][1]+OC_PCD[_pixel_fmt][2];
+    cqp=(ogg_uint32_t)((OC_PCD[_pixel_fmt][1]*(ogg_int64_t)qp[1]+
+     OC_PCD[_pixel_fmt][2]*(ogg_int64_t)qp[2]+(d>>1))/d);
+    /*chroma_rd_scale=clamp(0.25,cqp/qp[0],4)*/
+    d=OC_MAXI(qp[0]+(1<<OC_RD_SCALE_BITS-1)>>OC_RD_SCALE_BITS,1);
+    d=OC_CLAMPI(1<<OC_RD_SCALE_BITS-2,(cqp+(d>>1))/d,4<<OC_RD_SCALE_BITS);
+    _chroma_rd_scale[qti][qi][0]=(ogg_int16_t)d;
+    /*chroma_rd_iscale=clamp(0.25,qp[0]/cqp,4)*/
+    d=OC_MAXI(OC_RD_ISCALE(cqp,1),1);
+    d=OC_CLAMPI(1<<OC_RD_ISCALE_BITS-2,(qp[0]+(d>>1))/d,4<<OC_RD_ISCALE_BITS);
+    _chroma_rd_scale[qti][qi][1]=(ogg_int16_t)d;
+    /*qavg=1.0/sqrt(q2).*/
+    _log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1;
+  }
+}

+ 26 - 0
jni/libtheora-1.2.0alpha1/lib/enquant.h

@@ -0,0 +1,26 @@
+#if !defined(_enquant_H)
+# define _enquant_H (1)
+# include "quant.h"
+
+typedef struct oc_iquant oc_iquant;
+
+#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1))
+
+/*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0))
+   (i.e., one 16x16->16 mul, 2 shifts, and 2 adds).
+  This is not an approximation; for 16-bit x and d, it is exact.*/
+struct oc_iquant{
+  ogg_int16_t m;
+  ogg_int16_t l;
+};
+
+
+
+int oc_quant_params_clone(th_quant_info *_dst,const th_quant_info *_src);
+void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo);
+void oc_iquant_init(oc_iquant *_this,ogg_uint16_t _d);
+void oc_enquant_qavg_init(ogg_int64_t _log_qavg[2][64],
+ ogg_int16_t _log_plq[64][3][2],ogg_uint16_t _pl_rd_scale[2][64][2],
+ ogg_uint16_t *_dequant[64][3][2],int _pixel_fmt);
+
+#endif

+ 417 - 0
jni/libtheora-1.2.0alpha1/lib/fdct.c

@@ -0,0 +1,417 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include "encint.h"
+#include "dct.h"
+
+
+
+/*Performs a forward 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 from the orthonormal version of the
+   transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).*/
+static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
+  int t0;
+  int t1;
+  int t2;
+  int t3;
+  int t4;
+  int t5;
+  int t6;
+  int t7;
+  int r;
+  int s;
+  int u;
+  int v;
+  /*Stage 1:*/
+  /*0-7 butterfly.*/
+  t0=_x[0<<3]+(int)_x[7<<3];
+  t7=_x[0<<3]-(int)_x[7<<3];
+  /*1-6 butterfly.*/
+  t1=_x[1<<3]+(int)_x[6<<3];
+  t6=_x[1<<3]-(int)_x[6<<3];
+  /*2-5 butterfly.*/
+  t2=_x[2<<3]+(int)_x[5<<3];
+  t5=_x[2<<3]-(int)_x[5<<3];
+  /*3-4 butterfly.*/
+  t3=_x[3<<3]+(int)_x[4<<3];
+  t4=_x[3<<3]-(int)_x[4<<3];
+  /*Stage 2:*/
+  /*0-3 butterfly.*/
+  r=t0+t3;
+  t3=t0-t3;
+  t0=r;
+  /*1-2 butterfly.*/
+  r=t1+t2;
+  t2=t1-t2;
+  t1=r;
+  /*6-5 butterfly.*/
+  r=t6+t5;
+  t5=t6-t5;
+  t6=r;
+  /*Stages 3 and 4 are where all the approximation occurs.
+    These are chosen to be as close to an exact inverse of the approximations
+     made in the iDCT as possible, while still using mostly 16-bit arithmetic.
+    We use some 16x16->32 signed MACs, but those still commonly execute in 1
+     cycle on a 16-bit DSP.
+    For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
+     t5=(OC_C4S4*s>>16).
+    That is, applying the latter to the output of the former will recover t5
+     exactly (over the valid input range of t5, -23171...23169).
+    We increase the rounding bias to 0xB500 in this particular case so that
+     errors inverting the subsequent butterfly are not one-sided (e.g., the
+     mean error is very close to zero).
+    The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
+    The fDCT of an all-zeros block will still not be zero, because of the
+     biases we added at the very beginning of the process, but it will be close
+     enough that it is guaranteed to round to zero.*/
+  /*Stage 3:*/
+  /*4-5 butterfly.*/
+  s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
+  r=t4+s;
+  t5=t4-s;
+  t4=r;
+  /*7-6 butterfly.*/
+  s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
+  r=t7+s;
+  t6=t7-s;
+  t7=r;
+  /*Stage 4:*/
+  /*0-1 butterfly.*/
+  r=(27146*t0+0x4000>>16)+t0+(t0!=0);
+  s=(27146*t1+0xB500>>16)+t1+(t1!=0);
+  u=r+s>>1;
+  v=r-u;
+  _y[0]=u;
+  _y[4]=v;
+  /*3-2 rotation by 6pi/16*/
+  u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
+  s=(OC_C6S2*u>>16)-t2;
+  v=(s*21600+0x2800>>18)+s+(s!=0);
+  _y[2]=u;
+  _y[6]=v;
+  /*6-5 rotation by 3pi/16*/
+  u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
+  s=t6-(OC_C5S3*u>>16);
+  v=(s*26568+0x3400>>17)+s+(s!=0);
+  _y[5]=u;
+  _y[3]=v;
+  /*7-4 rotation by 7pi/16*/
+  u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
+  s=(OC_C7S1*u>>16)-t4;
+  v=(s*20539+0x3000>>20)+s+(s!=0);
+  _y[1]=u;
+  _y[7]=v;
+}
+
+/*Performs a forward 8x8 Type-II DCT transform.
+  The output is scaled by a factor of 4 relative to the orthonormal version
+   of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients. */
+void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  int                i;
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(i=0;i<64;i++)w[i]=_x[i]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform columns of w into rows of _y.*/
+  for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Transform columns of _y into rows of w.*/
+  for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(i=0;i<64;i++)_y[i]=w[OC_FZIG_ZAG[i]]+2>>2;
+}
+
+
+
+/*This does not seem to outperform simple LFE border padding before MC.
+  It yields higher PSNR, but much higher bitrate usage.*/
+#if 0
+typedef struct oc_extension_info oc_extension_info;
+
+
+
+/*Information needed to pad boundary blocks.
+  We multiply each row/column by an extension matrix that fills in the padding
+   values as a linear combination of the active values, so that an equivalent
+   number of coefficients are forced to zero.
+  This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
+   little as 7 multiplies.
+  We compute the extension matrices for every possible shape in advance, as
+   there are only 35.
+  The coefficients for all matrices are stored in a single array to take
+   advantage of the overlap and repetitiveness of many of the shapes.
+  A similar technique is applied to the offsets into this array.
+  This reduces the required table storage by about 48%.
+  See tools/extgen.c for details.
+  We could conceivably do the same for all 256 possible shapes.*/
+struct oc_extension_info{
+  /*The mask of the active pixels in the shape.*/
+  short                     mask;
+  /*The number of active pixels in the shape.*/
+  short                     na;
+  /*The extension matrix.
+    This is (8-na)xna*/
+  const ogg_int16_t *const *ext;
+  /*The pixel indices: na active pixels followed by 8-na padding pixels.*/
+  unsigned char             pi[8];
+  /*The coefficient indices: na unconstrained coefficients followed by 8-na
+     coefficients to be forced to zero.*/
+  unsigned char             ci[8];
+};
+
+
+/*The number of shapes we need.*/
+#define OC_NSHAPES   (35)
+
+static const ogg_int16_t OC_EXT_COEFFS[229]={
+  0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
+  0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
+  0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
+  0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
+  0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
+  0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
+  0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
+  0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
+  0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
+  0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
+  0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
+  0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
+  0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
+  0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
+  0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
+  0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
+  0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
+  0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
+  0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
+  0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
+  0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
+  0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
+  0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
+  0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
+  0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
+  0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
+  0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
+  0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
+  0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
+};
+
+static const ogg_int16_t *const OC_EXT_ROWS[96]={
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,
+  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   6,
+  OC_EXT_COEFFS+  27,OC_EXT_COEFFS+  38,OC_EXT_COEFFS+  43,OC_EXT_COEFFS+  32,
+  OC_EXT_COEFFS+  49,OC_EXT_COEFFS+  58,OC_EXT_COEFFS+  67,OC_EXT_COEFFS+  71,
+  OC_EXT_COEFFS+  62,OC_EXT_COEFFS+  53,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  14,OC_EXT_COEFFS+  13,OC_EXT_COEFFS+  76,OC_EXT_COEFFS+  81,
+  OC_EXT_COEFFS+  86,OC_EXT_COEFFS+  91,OC_EXT_COEFFS+  96,OC_EXT_COEFFS+  98,
+  OC_EXT_COEFFS+  93,OC_EXT_COEFFS+  88,OC_EXT_COEFFS+  83,OC_EXT_COEFFS+  78,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,
+  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
+  OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
+  OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+  16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
+  OC_EXT_COEFFS+  20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
+  OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
+  OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
+  OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
+  OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
+  OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
+  OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+  24,OC_EXT_COEFFS+ 204,
+  OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
+  OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
+  OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+ 134,
+  OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
+  OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
+};
+
+static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
+  {0x7F,7,OC_EXT_ROWS+  0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
+  {0xFE,7,OC_EXT_ROWS+  7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
+  {0x3F,6,OC_EXT_ROWS+  8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
+  {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
+  {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
+  {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
+  {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
+  {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
+  {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
+  {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
+  {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
+  {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x01,1,OC_EXT_ROWS+  0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
+  {0x80,1,OC_EXT_ROWS+  0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
+  {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
+  {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
+  {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
+  {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
+  {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
+  {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
+  {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
+  {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
+  {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
+  {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
+  {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
+  {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
+  {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
+  {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
+  {0x40,1,OC_EXT_ROWS+  0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x20,1,OC_EXT_ROWS+  0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x10,1,OC_EXT_ROWS+  0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x08,1,OC_EXT_ROWS+  0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
+  {0x04,1,OC_EXT_ROWS+  0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
+  {0x02,1,OC_EXT_ROWS+  0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
+};
+
+
+
+/*Pads a single column of a partial block and then performs a forward Type-II
+   DCT on the result.
+  The input is scaled by a factor of 4 and biased appropriately for the current
+   fDCT implementation.
+  The output is scaled by an additional factor of 2 from the orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
+  _x: The input coefficients.
+      Every 8th entry is used (e.g., from a column of an 8x8 block).
+  _e: The extension information for the shape.*/
+static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
+ const oc_extension_info *_e){
+  const unsigned char *pi;
+  int                  na;
+  na=_e->na;
+  pi=_e->pi;
+  if(na==1){
+    int ci;
+    /*While the branch below is still correct for shapes with na==1, we can
+       perform the entire transform with just 1 multiply in this case instead
+       of 23.*/
+    _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
+    for(ci=1;ci<8;ci++)_y[ci]=0;
+  }
+  else{
+    const ogg_int16_t *const *ext;
+    int                       zpi;
+    int                       api;
+    int                       nz;
+    /*First multiply by the extension matrix to compute the padding values.*/
+    nz=8-na;
+    ext=_e->ext;
+    for(zpi=0;zpi<nz;zpi++){
+      ogg_int32_t v;
+      v=0;
+      for(api=0;api<na;api++){
+        v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
+      }
+      _x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
+    }
+    oc_fdct8(_y,_x);
+  }
+}
+
+/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
+   border of the picture region.
+  This method ONLY works with rectangular regions.
+  _border: A description of which pixels are inside the border.
+  _y:      The buffer to store the result in.
+           This may be the same as _x.
+  _x:      The input pixel values.
+           Pixel values outside the border will be ignored.*/
+void oc_fdct8x8_border(const oc_border_info *_border,
+ ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  ogg_int16_t             *in;
+  ogg_int16_t             *out;
+  ogg_int16_t              w[64];
+  ogg_int64_t              mask;
+  const oc_extension_info *cext;
+  const oc_extension_info *rext;
+  int                      cmask;
+  int                      rmask;
+  int                      ri;
+  int                      ci;
+  /*Identify the shapes of the non-zero rows and columns.*/
+  rmask=cmask=0;
+  mask=_border->mask;
+  for(ri=0;ri<8;ri++){
+    /*This aggregation is _only_ correct for rectangular masks.*/
+    cmask|=((mask&0xFF)!=0)<<ri;
+    rmask|=mask&0xFF;
+    mask>>=8;
+  }
+  /*Find the associated extension info for these shapes.*/
+  if(cmask==0xFF)cext=NULL;
+  else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  if(rmask==0xFF)rext=NULL;
+  else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
+    /*If we somehow can't find the shape, then just do an unpadded fDCT.
+      It won't be efficient, but it should still be correct.*/
+    if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
+      oc_enc_fdct8x8_c(_y,_x);
+      return;
+    }
+  }
+  /*Add two extra bits of working precision to improve accuracy; any more and
+     we could overflow.*/
+  for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
+  /*These biases correct for some systematic error that remains in the full
+     fDCT->iDCT round trip.
+    We can safely add them before padding, since if these pixel values are
+     overwritten, we didn't care what they were anyway (and the unbiased values
+     will usually yield smaller DCT coefficient magnitudes).*/
+  w[0]+=(w[0]!=0)+1;
+  w[1]++;
+  w[8]--;
+  /*Transform the columns.
+    We can ignore zero columns without a problem.*/
+  in=w;
+  out=_y;
+  if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
+  else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
+  /*Transform the rows.
+    We transform even rows that are supposedly zero, because rounding errors
+     may make them slightly non-zero, and this will give a more precise
+     reconstruction with very small quantizers.*/
+  in=_y;
+  out=w;
+  if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
+  else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
+  /*Round the result back to the external working precision (which is still
+     scaled by four relative to the orthogonal result).
+    TODO: We should just update the external working precision.*/
+  for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
+}
+#endif

+ 82 - 0
jni/libtheora-1.2.0alpha1/lib/fragment.c

@@ -0,0 +1,82 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+#include <string.h>
+#include "internal.h"
+
+void oc_frag_copy_c(unsigned char *_dst,const unsigned char *_src,int _ystride){
+  int i;
+  for(i=8;i-->0;){
+    memcpy(_dst,_src,8*sizeof(*_dst));
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _dst_frame:     The reference frame to copy to.
+  _src_frame:     The reference frame to copy from.
+  _ystride:       The row stride of the reference frames.
+  _fragis:        A pointer to a list of fragment indices.
+  _nfragis:       The number of fragment indices to copy.
+  _frag_buf_offs: The offsets of fragments in the reference frames.*/
+void oc_frag_copy_list_c(unsigned char *_dst_frame,
+ const unsigned char *_src_frame,int _ystride,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs){
+  ptrdiff_t fragii;
+  for(fragii=0;fragii<_nfragis;fragii++){
+    ptrdiff_t frag_buf_off;
+    frag_buf_off=_frag_buf_offs[_fragis[fragii]];
+    oc_frag_copy_c(_dst_frame+frag_buf_off,
+     _src_frame+frag_buf_off,_ystride);
+  }
+}
+
+void oc_frag_recon_intra_c(unsigned char *_dst,int _ystride,
+ const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+128);
+    _dst+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+_src[j]);
+    _dst+=_ystride;
+    _src+=_ystride;
+  }
+}
+
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++)_dst[j]=OC_CLAMP255(_residue[i*8+j]+(_src1[j]+_src2[j]>>1));
+    _dst+=_ystride;
+    _src1+=_ystride;
+    _src2+=_ystride;
+  }
+}
+
+void oc_restore_fpu_c(void){}

+ 515 - 0
jni/libtheora-1.2.0alpha1/lib/huffdec.c

@@ -0,0 +1,515 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+
+/*Instead of storing every branching in the tree, subtrees can be collapsed
+   into one node, with a table of size 1<<nbits pointing directly to its
+   descedents nbits levels down.
+  This allows more than one bit to be read at a time, and avoids following all
+   the intermediate branches with next to no increased code complexity once
+   the collapsed tree has been built.
+  We do _not_ require that a subtree be complete to be collapsed, but instead
+   store duplicate pointers in the table, and record the actual depth of the
+   node below its parent.
+  This tells us the number of bits to advance the stream after reaching it.
+
+  This turns out to be equivalent to the method described in \cite{Hash95},
+   without the requirement that codewords be sorted by length.
+  If the codewords were sorted by length (so-called ``canonical-codes''), they
+   could be decoded much faster via either Lindell and Moffat's approach or
+   Hashemian's Condensed Huffman Code approach, the latter of which has an
+   extremely small memory footprint.
+  We can't use Choueka et al.'s finite state machine approach, which is
+   extremely fast, because we can't allow multiple symbols to be output at a
+   time; the codebook can and does change between symbols.
+  It also has very large memory requirements, which impairs cache coherency.
+
+  We store the tree packed in an array of 16-bit integers (words).
+  Each node consists of a single word, followed consecutively by two or more
+   indices of its children.
+  Let n be the value of this first word.
+  This is the number of bits that need to be read to traverse the node, and
+   must be positive.
+  1<<n entries follow in the array, each an index to a child node.
+  If the child is positive, then it is the index of another internal node in
+   the table.
+  If the child is negative or zero, then it is a leaf node.
+  These are stored directly in the child pointer to save space, since they only
+   require a single word.
+  If a leaf node would have been encountered before reading n bits, then it is
+   duplicated the necessary number of times in this table.
+  Leaf nodes pack both a token value and their actual depth in the tree.
+  The token in the leaf node is (-leaf&255).
+  The number of bits that need to be consumed to reach the leaf, starting from
+   the current node, is (-leaf>>8).
+
+  @ARTICLE{Hash95,
+    author="Reza Hashemian",
+    title="Memory Efficient and High-Speed Search {Huffman} Coding",
+    journal="{IEEE} Transactions on Communications",
+    volume=43,
+    number=10,
+    pages="2576--2581",
+    month=Oct,
+    year=1995
+  }*/
+
+
+
+/*The map from external spec-defined tokens to internal tokens.
+  This is constructed so that any extra bits read with the original token value
+   can be masked off the least significant bits of its internal token index.
+  In addition, all of the tokens which require additional extra bits are placed
+   at the start of the list, and grouped by type.
+  OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so
+   giving it index 0 may simplify comparisons on some architectures.
+  These requirements require some substantial reordering.*/
+static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={
+  /*OC_DCT_EOB1_TOKEN (0 extra bits)*/
+  15,
+  /*OC_DCT_EOB2_TOKEN (0 extra bits)*/
+  16,
+  /*OC_DCT_EOB3_TOKEN (0 extra bits)*/
+  17,
+  /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/
+  88,
+  /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/
+  80,
+  /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/
+   1,
+  /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/
+   0,
+  /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/
+  48,
+  /*OC_DCT_ZRL_TOKEN (6 extra bits)*/
+  14,
+  /*OC_ONE_TOKEN (0 extra bits)*/
+  56,
+  /*OC_MINUS_ONE_TOKEN (0 extra bits)*/
+  57,
+  /*OC_TWO_TOKEN (0 extra bits)*/
+  58,
+  /*OC_MINUS_TWO_TOKEN (0 extra bits)*/
+  59,
+  /*OC_DCT_VAL_CAT2 (1 extra bit)*/
+  60,
+  62,
+  64,
+  66,
+  /*OC_DCT_VAL_CAT3 (2 extra bits)*/
+  68,
+  /*OC_DCT_VAL_CAT4 (3 extra bits)*/
+  72,
+  /*OC_DCT_VAL_CAT5 (4 extra bits)*/
+   2,
+  /*OC_DCT_VAL_CAT6 (5 extra bits)*/
+   4,
+  /*OC_DCT_VAL_CAT7 (6 extra bits)*/
+   6,
+  /*OC_DCT_VAL_CAT8 (10 extra bits)*/
+   8,
+  /*OC_DCT_RUN_CAT1A (1 extra bit)*/
+  18,
+  20,
+  22,
+  24,
+  26,
+  /*OC_DCT_RUN_CAT1B (3 extra bits)*/
+  32,
+  /*OC_DCT_RUN_CAT1C (4 extra bits)*/
+  12,
+  /*OC_DCT_RUN_CAT2A (2 extra bits)*/
+  28,
+  /*OC_DCT_RUN_CAT2B (3 extra bits)*/
+  40
+};
+
+/*The log base 2 of number of internal tokens associated with each of the spec
+   tokens (i.e., how many of the extra bits are folded into the token value).
+  Increasing the maximum value beyond 3 will enlarge the amount of stack
+   required for tree construction.*/
+static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={
+  0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3
+};
+
+
+/*The size a lookup table is allowed to grow to relative to the number of
+   unique nodes it contains.
+  E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is
+   wasted (1/4 of the space must be used).
+  Larger numbers can decode tokens with fewer read operations, while smaller
+   numbers may save more space.
+  With a sample file:
+  32233473 read calls are required when no tree collapsing is done (100.0%).
+  19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%).
+  11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%).
+  10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%).
+  10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%).
+  Since a value of 2 gets us the vast majority of the speed-up with only a
+   small amount of wasted memory, this is what we use.
+  This value must be less than 128, or you could create a tree with more than
+   32767 entries, which would overflow the 16-bit words used to index it.*/
+#define OC_HUFF_SLUSH (2)
+/*The root of the tree is on the fast path, and a larger value here is more
+   beneficial than elsewhere in the tree.
+  7 appears to give the best performance, trading off between increased use of
+   the single-read fast path and cache footprint for the tables, though
+   obviously this will depend on your cache size.
+  Using 7 here, the VP3 tables are about twice as large compared to using 2.*/
+#define OC_ROOT_HUFF_SLUSH (7)
+
+
+
+/*Unpacks a Huffman codebook.
+  _opb:    The buffer to unpack from.
+  _tokens: Stores a list of internal tokens, in the order they were found in
+            the codebook, and the lengths of their corresponding codewords.
+           This is enough to completely define the codebook, while minimizing
+            stack usage and avoiding temporary allocations (for platforms
+            where free() is a no-op).
+  Return: The number of internal tokens in the codebook, or a negative value
+   on error.*/
+int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){
+  ogg_uint32_t code;
+  int          len;
+  int          ntokens;
+  int          nleaves;
+  code=0;
+  len=ntokens=nleaves=0;
+  for(;;){
+    long bits;
+    bits=oc_pack_read1(_opb);
+    /*Only process nodes so long as there's more bits in the buffer.*/
+    if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+    /*Read an internal node:*/
+    if(!bits){
+      len++;
+      /*Don't allow codewords longer than 32 bits.*/
+      if(len>32)return TH_EBADHEADER;
+    }
+    /*Read a leaf node:*/
+    else{
+      ogg_uint32_t code_bit;
+      int          neb;
+      int          nentries;
+      int          token;
+      /*Don't allow more than 32 spec-tokens per codebook.*/
+      if(++nleaves>32)return TH_EBADHEADER;
+      bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+      neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits];
+      token=OC_DCT_TOKEN_MAP[bits];
+      nentries=1<<neb;
+      while(nentries-->0){
+        _tokens[ntokens][0]=(unsigned char)token++;
+        _tokens[ntokens][1]=(unsigned char)(len+neb);
+        ntokens++;
+      }
+      code_bit=0x80000000U>>len-1;
+      while(len>0&&(code&code_bit)){
+        code^=code_bit;
+        code_bit<<=1;
+        len--;
+      }
+      if(len<=0)break;
+      code|=code_bit;
+    }
+  }
+  return ntokens;
+}
+
+/*Count how many tokens would be required to fill a subtree at depth _depth.
+  _tokens: A list of internal tokens, in the order they are found in the
+            codebook, and the lengths of their corresponding codewords.
+  _depth:  The depth of the desired node in the corresponding tree structure.
+  Return: The number of tokens that belong to that subtree.*/
+static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){
+  ogg_uint32_t code;
+  int          ti;
+  code=0;
+  ti=0;
+  do{
+    if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth;
+    else{
+      /*Because of the expanded internal tokens, we can have codewords as long
+         as 35 bits.
+        A single recursion here is enough to advance past them.*/
+      code++;
+      ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31);
+    }
+  }
+  while(code<0x80000000U);
+  return ti;
+}
+
+/*Compute the number of bits to use for a collapsed tree node at the given
+   depth.
+  _tokens:  A list of internal tokens, in the order they are found in the
+             codebook, and the lengths of their corresponding codewords.
+  _ntokens: The number of tokens corresponding to this tree node.
+  _depth:   The depth of this tree node.
+  Return: The number of bits to use for a collapsed tree node rooted here.
+          This is always at least one, even if this was a leaf node.*/
+static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2],
+ int _ntokens,int _depth){
+  int got_leaves;
+  int loccupancy;
+  int occupancy;
+  int slush;
+  int nbits;
+  int best_nbits;
+  slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH;
+  /*It's legal to have a tree with just a single node, which requires no bits
+     to decode and always returns the same token.
+    However, no encoder actually does this (yet).
+    To avoid a special case in oc_huff_token_decode(), we force the number of
+     lookahead bits to be at least one.
+    This will produce a tree that looks ahead one bit and then advances the
+     stream zero bits.*/
+  nbits=1;
+  occupancy=2;
+  got_leaves=1;
+  do{
+    int ti;
+    if(got_leaves)best_nbits=nbits;
+    nbits++;
+    got_leaves=0;
+    loccupancy=occupancy;
+    for(occupancy=ti=0;ti<_ntokens;occupancy++){
+      if(_tokens[ti][1]<_depth+nbits)ti++;
+      else if(_tokens[ti][1]==_depth+nbits){
+        got_leaves=1;
+        ti++;
+      }
+      else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits);
+    }
+  }
+  while(occupancy>loccupancy&&occupancy*slush>=1<<nbits);
+  return best_nbits;
+}
+
+/*Determines the size in words of a Huffman tree node that represents a
+   subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          This must be greater than zero.
+  Return: The number of words required to store the node.*/
+static size_t oc_huff_node_size(int _nbits){
+  return 1+(1<<_nbits);
+}
+
+/*Produces a collapsed-tree representation of the given token list.
+  _tree: The storage for the collapsed Huffman tree.
+         This may be NULL to compute the required storage size instead of
+          constructing the tree.
+  _tokens:  A list of internal tokens, in the order they are found in the
+             codebook, and the lengths of their corresponding codewords.
+  _ntokens: The number of tokens corresponding to this tree node.
+  Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_collapse(ogg_int16_t *_tree,
+ unsigned char _tokens[][2],int _ntokens){
+  ogg_int16_t   node[34];
+  unsigned char depth[34];
+  unsigned char last[34];
+  size_t        ntree;
+  int           ti;
+  int           l;
+  depth[0]=0;
+  last[0]=(unsigned char)(_ntokens-1);
+  ntree=0;
+  ti=0;
+  l=0;
+  do{
+    int nbits;
+    nbits=oc_huff_tree_collapse_depth(_tokens+ti,last[l]+1-ti,depth[l]);
+    node[l]=(ogg_int16_t)ntree;
+    ntree+=oc_huff_node_size(nbits);
+    if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)nbits;
+    do{
+      while(ti<=last[l]&&_tokens[ti][1]<=depth[l]+nbits){
+        if(_tree!=NULL){
+          ogg_int16_t leaf;
+          int         nentries;
+          nentries=1<<depth[l]+nbits-_tokens[ti][1];
+          leaf=(ogg_int16_t)-(_tokens[ti][1]-depth[l]<<8|_tokens[ti][0]);
+          while(nentries-->0)_tree[node[l]++]=leaf;
+        }
+        ti++;
+      }
+      if(ti<=last[l]){
+        /*We need to recurse*/
+        depth[l+1]=(unsigned char)(depth[l]+nbits);
+        if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree;
+        l++;
+        last[l]=
+         (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1);
+        break;
+      }
+      /*Pop back up a level of recursion.*/
+      else if(l-->0)nbits=depth[l+1]-depth[l];
+    }
+    while(l>=0);
+  }
+  while(l>=0);
+  return ntree;
+}
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+   representation.
+  _opb:   The buffer to unpack the trees from.
+  _nodes: The table to fill with the Huffman trees.
+  Return: 0 on success, or a negative value on error.
+          The caller is responsible for cleaning up any partially initialized
+           _nodes on failure.*/
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    unsigned char  tokens[256][2];
+    int            ntokens;
+    ogg_int16_t   *tree;
+    size_t         size;
+    /*Unpack the full tree into a temporary buffer.*/
+    ntokens=oc_huff_tree_unpack(_opb,tokens);
+    if(ntokens<0)return ntokens;
+    /*Figure out how big the collapsed tree will be and allocate space for it.*/
+    size=oc_huff_tree_collapse(NULL,tokens,ntokens);
+    /*This should never happen; if it does it means you set OC_HUFF_SLUSH or
+       OC_ROOT_HUFF_SLUSH too large.*/
+    if(size>32767)return TH_EIMPL;
+    tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree));
+    if(tree==NULL)return TH_EFAULT;
+    /*Construct the collapsed the tree.*/
+    oc_huff_tree_collapse(tree,tokens,ntokens);
+    _nodes[i]=tree;
+  }
+  return 0;
+}
+
+/*Determines the size in words of a Huffman subtree.
+  _tree: The complete Huffman tree.
+  _node: The index of the root of the desired subtree.
+  Return: The number of words required to store the tree.*/
+static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){
+  size_t size;
+  int    nchildren;
+  int    n;
+  int    i;
+  n=_tree[_node];
+  size=oc_huff_node_size(n);
+  nchildren=1<<n;
+  i=0;
+  do{
+    int child;
+    child=_tree[_node+i+1];
+    if(child<=0)i+=1<<n-(-child>>8);
+    else{
+      size+=oc_huff_tree_size(_tree,child);
+      i++;
+    }
+  }
+  while(i<nchildren);
+  return size;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+  _dst: The array to store the copy in.
+  _src: The array of trees to copy.*/
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]){
+  int total;
+  int i;
+  total=0;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    size_t size;
+    size=oc_huff_tree_size(_src[i],0);
+    total+=size;
+    _dst[i]=(ogg_int16_t *)_ogg_malloc(size*sizeof(*_dst[i]));
+    if(_dst[i]==NULL){
+      while(i-->0)_ogg_free(_dst[i]);
+      return TH_EFAULT;
+    }
+    memcpy(_dst[i],_src[i],size*sizeof(*_dst[i]));
+  }
+  return 0;
+}
+
+/*Frees the memory used by a set of Huffman trees.
+  _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_ogg_free(_nodes[i]);
+}
+
+
+/*Unpacks a single token using the given Huffman tree.
+  _opb:  The buffer to unpack the token from.
+  _node: The tree to unpack the token with.
+  Return: The token value.*/
+int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_tree){
+  const unsigned char *ptr;
+  const unsigned char *stop;
+  oc_pb_window         window;
+  int                  available;
+  long                 bits;
+  int                  node;
+  int                  n;
+  ptr=_opb->ptr;
+  window=_opb->window;
+  stop=_opb->stop;
+  available=_opb->bits;
+  node=0;
+  for(;;){
+    n=_tree[node];
+    if(n>available){
+      unsigned shift;
+      shift=OC_PB_WINDOW_SIZE-available;
+      do{
+        /*We don't bother setting eof because we won't check for it after we've
+           started decoding DCT tokens.*/
+        if(ptr>=stop){
+          shift=(unsigned)-OC_LOTS_OF_BITS;
+          break;
+        }
+        shift-=8;
+        window|=(oc_pb_window)*ptr++<<shift;
+      }
+      while(shift>=8);
+      /*Note: We never request more than 24 bits, so there's no need to fill in
+         the last partial byte here.*/
+      available=OC_PB_WINDOW_SIZE-shift;
+    }
+    bits=window>>OC_PB_WINDOW_SIZE-n;
+    node=_tree[node+1+bits];
+    if(node<=0)break;
+    window<<=n;
+    available-=n;
+  }
+  node=-node;
+  n=node>>8;
+  window<<=n;
+  available-=n;
+  _opb->ptr=ptr;
+  _opb->window=window;
+  _opb->bits=available;
+  return node&255;
+}

+ 32 - 0
jni/libtheora-1.2.0alpha1/lib/huffdec.h

@@ -0,0 +1,32 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_huffdec_H)
+# define _huffdec_H (1)
+# include "huffman.h"
+# include "bitpack.h"
+
+
+
+int oc_huff_trees_unpack(oc_pack_buf *_opb,
+ ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_trees_copy(ogg_int16_t *_dst[TH_NHUFFMAN_TABLES],
+ const ogg_int16_t *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode_c(oc_pack_buf *_opb,const ogg_int16_t *_node);
+
+#endif

+ 966 - 0
jni/libtheora-1.2.0alpha1/lib/huffenc.c

@@ -0,0 +1,966 @@
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "huffenc.h"
+
+
+
+/*The default Huffman codes used for VP3.1.*/
+const th_huff_code TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]={
+  {
+    {0x002D, 6},{0x0026, 7},{0x0166, 9},{0x004E, 8},
+    {0x02CE,10},{0x059E,11},{0x027D,11},{0x0008, 5},
+    {0x04F9,12},{0x000F, 4},{0x000E, 4},{0x001B, 5},
+    {0x0006, 4},{0x0008, 4},{0x0005, 4},{0x001A, 5},
+    {0x0015, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x0029, 6},
+    {0x0028, 6},{0x00B2, 8},{0x04F8,12},{0x059F,11},
+    {0x009E, 9},{0x013F,10},{0x0012, 6},{0x0058, 7}
+  },
+  {
+    {0x0010, 5},{0x0047, 7},{0x01FF, 9},{0x008C, 8},
+    {0x03FC,10},{0x046A,11},{0x0469,11},{0x0022, 6},
+    {0x11A1,13},{0x000E, 4},{0x000D, 4},{0x0004, 4},
+    {0x0005, 4},{0x0009, 4},{0x0006, 4},{0x001E, 5},
+    {0x0016, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x000A, 4},{0x0017, 5},{0x007D, 7},
+    {0x007E, 7},{0x011B, 9},{0x08D1,12},{0x03FD,10},
+    {0x046B,11},{0x11A0,13},{0x007C, 7},{0x00FE, 8}
+  },
+  {
+    {0x0016, 5},{0x0020, 6},{0x0086, 8},{0x0087, 8},
+    {0x0367,10},{0x06CC,11},{0x06CB,11},{0x006E, 7},
+    {0x366D,14},{0x000F, 4},{0x000E, 4},{0x0004, 4},
+    {0x0005, 4},{0x000A, 4},{0x0006, 4},{0x001A, 5},
+    {0x0011, 5},{0x0007, 4},{0x000C, 4},{0x0001, 3},
+    {0x0000, 3},{0x0009, 4},{0x0017, 5},{0x006F, 7},
+    {0x006D, 7},{0x0364,10},{0x0D9A,12},{0x06CA,11},
+    {0x1B37,13},{0x366C,14},{0x0042, 7},{0x00D8, 8}
+  },
+  {
+    {0x0000, 4},{0x002D, 6},{0x00F7, 8},{0x0058, 7},
+    {0x0167, 9},{0x02CB,10},{0x02CA,10},{0x000E, 6},
+    {0x1661,13},{0x0003, 3},{0x0002, 3},{0x0008, 4},
+    {0x0009, 4},{0x000D, 4},{0x0002, 4},{0x001F, 5},
+    {0x0017, 5},{0x0001, 4},{0x000C, 4},{0x000E, 4},
+    {0x000A, 4},{0x0006, 5},{0x0078, 7},{0x000F, 6},
+    {0x007A, 7},{0x0164, 9},{0x0599,11},{0x02CD,10},
+    {0x0B31,12},{0x1660,13},{0x0079, 7},{0x00F6, 8}
+  },
+  {
+    {0x0003, 4},{0x003C, 6},{0x000F, 7},{0x007A, 7},
+    {0x001D, 8},{0x0020, 9},{0x0072,10},{0x0006, 6},
+    {0x0399,13},{0x0004, 3},{0x0005, 3},{0x0005, 4},
+    {0x0006, 4},{0x000E, 4},{0x0004, 4},{0x0000, 4},
+    {0x0019, 5},{0x0002, 4},{0x000D, 4},{0x0007, 4},
+    {0x001F, 5},{0x0030, 6},{0x0011, 8},{0x0031, 6},
+    {0x0005, 6},{0x0021, 9},{0x00E7,11},{0x0038, 9},
+    {0x01CD,12},{0x0398,13},{0x007B, 7},{0x0009, 7}
+  },
+  {
+    {0x0009, 4},{0x0002, 5},{0x0074, 7},{0x0007, 6},
+    {0x00EC, 8},{0x00D1, 9},{0x01A6,10},{0x0006, 6},
+    {0x0D21,13},{0x0005, 3},{0x0006, 3},{0x0008, 4},
+    {0x0007, 4},{0x000F, 4},{0x0004, 4},{0x0000, 4},
+    {0x001C, 5},{0x0002, 4},{0x0005, 4},{0x0003, 4},
+    {0x000C, 5},{0x0035, 7},{0x01A7,10},{0x001B, 6},
+    {0x0077, 7},{0x01A5,10},{0x0349,11},{0x00D0, 9},
+    {0x0691,12},{0x0D20,13},{0x0075, 7},{0x00ED, 8}
+  },
+  {
+    {0x000A, 4},{0x000C, 5},{0x0012, 6},{0x001B, 6},
+    {0x00B7, 8},{0x016C, 9},{0x0099, 9},{0x005A, 7},
+    {0x16D8,13},{0x0007, 3},{0x0006, 3},{0x0009, 4},
+    {0x0008, 4},{0x0000, 3},{0x0005, 4},{0x0017, 5},
+    {0x000E, 5},{0x0002, 4},{0x0003, 4},{0x000F, 5},
+    {0x001A, 6},{0x004D, 8},{0x2DB3,14},{0x002C, 6},
+    {0x0011, 6},{0x02DA,10},{0x05B7,11},{0x0098, 9},
+    {0x0B6D,12},{0x2DB2,14},{0x0010, 6},{0x0027, 7}
+  },
+  {
+    {0x000D, 4},{0x000F, 5},{0x001D, 6},{0x0008, 5},
+    {0x0051, 7},{0x0056, 8},{0x00AF, 9},{0x002A, 7},
+    {0x148A,13},{0x0007, 3},{0x0000, 2},{0x0008, 4},
+    {0x0009, 4},{0x000C, 4},{0x0006, 4},{0x0017, 5},
+    {0x000B, 5},{0x0016, 5},{0x0015, 5},{0x0009, 5},
+    {0x0050, 7},{0x00AE, 9},{0x2917,14},{0x001C, 6},
+    {0x0014, 6},{0x0290,10},{0x0523,11},{0x0149, 9},
+    {0x0A44,12},{0x2916,14},{0x0053, 7},{0x00A5, 8}
+  },
+  {
+    {0x0001, 4},{0x001D, 6},{0x00F5, 8},{0x00F4, 8},
+    {0x024D,10},{0x0499,11},{0x0498,11},{0x0001, 5},
+    {0x0021, 6},{0x0006, 3},{0x0005, 3},{0x0006, 4},
+    {0x0005, 4},{0x0002, 4},{0x0007, 5},{0x0025, 6},
+    {0x007B, 7},{0x001C, 6},{0x0020, 6},{0x000D, 6},
+    {0x0048, 7},{0x0092, 8},{0x0127, 9},{0x000E, 4},
+    {0x0004, 4},{0x0011, 5},{0x000C, 6},{0x003C, 6},
+    {0x000F, 5},{0x0000, 5},{0x001F, 5},{0x0013, 5}
+  },
+  {
+    {0x0005, 4},{0x003C, 6},{0x0040, 7},{0x000D, 7},
+    {0x0031, 9},{0x0061,10},{0x0060,10},{0x0002, 5},
+    {0x00F5, 8},{0x0006, 3},{0x0005, 3},{0x0007, 4},
+    {0x0006, 4},{0x0002, 4},{0x0009, 5},{0x0025, 6},
+    {0x0007, 6},{0x0021, 6},{0x0024, 6},{0x0010, 6},
+    {0x0041, 7},{0x00F4, 8},{0x0019, 8},{0x000E, 4},
+    {0x0003, 4},{0x0011, 5},{0x0011, 6},{0x003F, 6},
+    {0x003E, 6},{0x007B, 7},{0x0000, 4},{0x0013, 5}
+  },
+  {
+    {0x000A, 4},{0x0007, 5},{0x0001, 6},{0x0009, 6},
+    {0x0131, 9},{0x0261,10},{0x0260,10},{0x0015, 6},
+    {0x0001, 7},{0x0007, 3},{0x0006, 3},{0x0008, 4},
+    {0x0007, 4},{0x0006, 4},{0x0012, 5},{0x002F, 6},
+    {0x0014, 6},{0x0027, 6},{0x002D, 6},{0x0016, 6},
+    {0x004D, 7},{0x0099, 8},{0x0000, 7},{0x0004, 4},
+    {0x0001, 4},{0x0005, 5},{0x0017, 6},{0x002E, 6},
+    {0x002C, 6},{0x0008, 6},{0x0006, 5},{0x0001, 5}
+  },
+  {
+    {0x0000, 3},{0x000E, 5},{0x0017, 6},{0x002A, 6},
+    {0x0010, 7},{0x00F9,10},{0x00F8,10},{0x001E, 7},
+    {0x003F, 8},{0x0007, 3},{0x0006, 3},{0x0009, 4},
+    {0x0008, 4},{0x0006, 4},{0x000F, 5},{0x0005, 5},
+    {0x0016, 6},{0x0029, 6},{0x002B, 6},{0x0015, 6},
+    {0x0050, 7},{0x0011, 7},{0x007D, 9},{0x0004, 4},
+    {0x0017, 5},{0x0006, 5},{0x0014, 6},{0x002C, 6},
+    {0x002D, 6},{0x000E, 6},{0x0009, 6},{0x0051, 7}
+  },
+  {
+    {0x0002, 3},{0x0018, 5},{0x002F, 6},{0x000D, 5},
+    {0x0053, 7},{0x0295,10},{0x0294,10},{0x00A4, 8},
+    {0x007C, 8},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x001B, 5},{0x000C, 5},{0x0028, 6},
+    {0x006A, 7},{0x001E, 6},{0x001D, 6},{0x0069, 7},
+    {0x00D7, 8},{0x007D, 8},{0x014B, 9},{0x0019, 5},
+    {0x0016, 5},{0x002E, 6},{0x001C, 6},{0x002B, 6},
+    {0x002A, 6},{0x0068, 7},{0x003F, 7},{0x00D6, 8}
+  },
+  {
+    {0x0002, 3},{0x001B, 5},{0x000C, 5},{0x0018, 5},
+    {0x0029, 6},{0x007F, 8},{0x02F0,10},{0x0198, 9},
+    {0x0179, 9},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x001A, 5},{0x000D, 5},{0x002A, 6},
+    {0x0064, 7},{0x001E, 6},{0x0067, 7},{0x005F, 7},
+    {0x00CD, 8},{0x007E, 8},{0x02F1,10},{0x0016, 5},
+    {0x000E, 5},{0x002E, 6},{0x0065, 7},{0x002B, 6},
+    {0x0028, 6},{0x003E, 7},{0x00BD, 8},{0x0199, 9}
+  },
+  {
+    {0x0002, 3},{0x0007, 4},{0x0016, 5},{0x0006, 4},
+    {0x0036, 6},{0x005C, 7},{0x015D, 9},{0x015C, 9},
+    {0x02BF,10},{0x0000, 2},{0x0007, 3},{0x0009, 4},
+    {0x0008, 4},{0x0018, 5},{0x0034, 6},{0x002A, 6},
+    {0x005E, 7},{0x006A, 7},{0x0064, 7},{0x005D, 7},
+    {0x00CB, 8},{0x00AD, 8},{0x02BE,10},{0x0014, 5},
+    {0x0033, 6},{0x006E, 7},{0x005F, 7},{0x006F, 7},
+    {0x006B, 7},{0x00CA, 8},{0x00AC, 8},{0x015E, 9}
+  },
+  {
+    {0x000F, 4},{0x001D, 5},{0x0018, 5},{0x000B, 4},
+    {0x0019, 5},{0x0029, 6},{0x00D6, 8},{0x0551,11},
+    {0x0AA1,12},{0x0001, 2},{0x0000, 2},{0x0009, 4},
+    {0x0008, 4},{0x001B, 5},{0x0038, 6},{0x0028, 6},
+    {0x0057, 7},{0x006A, 7},{0x0068, 7},{0x0056, 7},
+    {0x00E5, 8},{0x0155, 9},{0x0AA0,12},{0x0073, 7},
+    {0x0069, 7},{0x00D7, 8},{0x00AB, 8},{0x00E4, 8},
+    {0x00A9, 8},{0x0151, 9},{0x0150, 9},{0x02A9,10}
+  },
+  {
+    {0x0008, 5},{0x0025, 7},{0x017A, 9},{0x02F7,10},
+    {0x0BDB,12},{0x17B4,13},{0x2F6B,14},{0x001D, 5},
+    {0x2F6A,14},{0x0008, 4},{0x0007, 4},{0x0001, 4},
+    {0x0002, 4},{0x000A, 4},{0x0006, 4},{0x0000, 4},
+    {0x001C, 5},{0x0009, 4},{0x000D, 4},{0x000F, 4},
+    {0x000C, 4},{0x0003, 4},{0x000A, 5},{0x0016, 5},
+    {0x0013, 6},{0x005D, 7},{0x0024, 7},{0x00BC, 8},
+    {0x005C, 7},{0x05EC,11},{0x000B, 5},{0x005F, 7}
+  },
+  {
+    {0x000F, 5},{0x0010, 6},{0x004B, 8},{0x00C6, 8},
+    {0x031D,10},{0x0C71,12},{0x0C70,12},{0x0001, 4},
+    {0x0C73,12},{0x0008, 4},{0x0009, 4},{0x0002, 4},
+    {0x0003, 4},{0x000B, 4},{0x0006, 4},{0x0000, 4},
+    {0x001C, 5},{0x0005, 4},{0x000D, 4},{0x000F, 4},
+    {0x000A, 4},{0x0019, 5},{0x0013, 6},{0x001D, 5},
+    {0x0030, 6},{0x0062, 7},{0x0024, 7},{0x004A, 8},
+    {0x018F, 9},{0x0C72,12},{0x000E, 5},{0x0011, 6}
+  },
+  {
+    {0x001B, 5},{0x0003, 6},{0x008D, 8},{0x0040, 7},
+    {0x0239,10},{0x0471,11},{0x08E0,12},{0x0003, 4},
+    {0x11C3,13},{0x000A, 4},{0x0009, 4},{0x0004, 4},
+    {0x0005, 4},{0x000E, 4},{0x0007, 4},{0x0001, 4},
+    {0x001E, 5},{0x0006, 4},{0x000C, 4},{0x000B, 4},
+    {0x0002, 4},{0x0000, 5},{0x0041, 7},{0x001F, 5},
+    {0x0022, 6},{0x0002, 6},{0x008F, 8},{0x008C, 8},
+    {0x011D, 9},{0x11C2,13},{0x001A, 5},{0x0021, 6}
+  },
+  {
+    {0x001F, 5},{0x0003, 6},{0x0003, 7},{0x0043, 7},
+    {0x000B, 9},{0x0015,10},{0x0051,12},{0x0003, 4},
+    {0x0050,12},{0x000D, 4},{0x000C, 4},{0x0004, 4},
+    {0x0006, 4},{0x000E, 4},{0x000A, 4},{0x0001, 4},
+    {0x001E, 5},{0x0005, 4},{0x0009, 4},{0x0007, 4},
+    {0x0011, 5},{0x0002, 6},{0x0004, 8},{0x0002, 4},
+    {0x002D, 6},{0x0020, 6},{0x0042, 7},{0x0001, 7},
+    {0x0000, 7},{0x0029,11},{0x0017, 5},{0x002C, 6}
+  },
+  {
+    {0x0003, 4},{0x001F, 6},{0x003A, 7},{0x005D, 7},
+    {0x0173, 9},{0x02E4,10},{0x172D,13},{0x0004, 4},
+    {0x172C,13},{0x000F, 4},{0x000E, 4},{0x0009, 4},
+    {0x0008, 4},{0x000C, 4},{0x000A, 4},{0x0001, 4},
+    {0x0016, 5},{0x0002, 4},{0x0005, 4},{0x001A, 5},
+    {0x002F, 6},{0x0038, 7},{0x05CA,11},{0x0006, 4},
+    {0x0037, 6},{0x001E, 6},{0x003B, 7},{0x0039, 7},
+    {0x00B8, 8},{0x0B97,12},{0x0000, 4},{0x0036, 6}
+  },
+  {
+    {0x0006, 4},{0x0037, 6},{0x005D, 7},{0x000C, 6},
+    {0x00B9, 8},{0x02E3,10},{0x05C4,11},{0x0004, 4},
+    {0x1715,13},{0x0000, 3},{0x000F, 4},{0x0008, 4},
+    {0x0007, 4},{0x000C, 4},{0x0009, 4},{0x001D, 5},
+    {0x0016, 5},{0x001C, 5},{0x001A, 5},{0x000B, 5},
+    {0x005E, 7},{0x0170, 9},{0x1714,13},{0x000A, 4},
+    {0x000A, 5},{0x0036, 6},{0x005F, 7},{0x001B, 7},
+    {0x001A, 7},{0x0B8B,12},{0x0002, 4},{0x0007, 5}
+  },
+  {
+    {0x000C, 4},{0x000B, 5},{0x0079, 7},{0x0022, 6},
+    {0x00F0, 8},{0x0119, 9},{0x0230,10},{0x001D, 5},
+    {0x08C4,12},{0x0001, 3},{0x0000, 3},{0x000A, 4},
+    {0x0009, 4},{0x000B, 4},{0x0007, 4},{0x001C, 5},
+    {0x003D, 6},{0x000D, 5},{0x0008, 5},{0x0015, 6},
+    {0x008D, 8},{0x118B,13},{0x118A,13},{0x000D, 4},
+    {0x0010, 5},{0x0009, 5},{0x0014, 6},{0x0047, 7},
+    {0x00F1, 8},{0x0463,11},{0x001F, 5},{0x000C, 5}
+  },
+  {
+    {0x0000, 3},{0x001A, 5},{0x0033, 6},{0x000C, 5},
+    {0x0046, 7},{0x01E3, 9},{0x03C5,10},{0x0017, 5},
+    {0x1E21,13},{0x0002, 3},{0x0001, 3},{0x0009, 4},
+    {0x000A, 4},{0x0007, 4},{0x001B, 5},{0x003D, 6},
+    {0x001B, 6},{0x0022, 6},{0x0079, 7},{0x00F0, 8},
+    {0x1E20,13},{0x1E23,13},{0x1E22,13},{0x000E, 4},
+    {0x0016, 5},{0x0018, 5},{0x0032, 6},{0x001A, 6},
+    {0x0047, 7},{0x0789,11},{0x001F, 5},{0x0010, 5}
+  },
+  {
+    {0x001D, 5},{0x0061, 7},{0x004E, 8},{0x009E, 9},
+    {0x027C,11},{0x09F5,13},{0x09F4,13},{0x0003, 4},
+    {0x0060, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
+    {0x000A, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
+    {0x0031, 6},{0x0008, 5},{0x0038, 6},{0x0012, 6},
+    {0x0026, 7},{0x013F,10},{0x04FB,12},{0x000D, 4},
+    {0x0002, 4},{0x000C, 5},{0x0039, 6},{0x001C, 6},
+    {0x000F, 5},{0x001D, 6},{0x0008, 4},{0x0019, 5}
+  },
+  {
+    {0x0007, 4},{0x0019, 6},{0x00AB, 8},{0x00AA, 8},
+    {0x0119,10},{0x0461,12},{0x0460,12},{0x001B, 5},
+    {0x0047, 8},{0x0001, 3},{0x0000, 3},{0x000C, 4},
+    {0x000B, 4},{0x0009, 4},{0x0005, 4},{0x000D, 5},
+    {0x0035, 6},{0x003D, 6},{0x003C, 6},{0x0018, 6},
+    {0x0022, 7},{0x008D, 9},{0x0231,11},{0x000E, 4},
+    {0x001F, 5},{0x0009, 5},{0x002B, 6},{0x0010, 6},
+    {0x0034, 6},{0x0054, 7},{0x0008, 4},{0x0014, 5}
+  },
+  {
+    {0x000C, 4},{0x0005, 5},{0x0008, 6},{0x005B, 7},
+    {0x004D, 9},{0x0131,11},{0x0261,12},{0x001A, 5},
+    {0x0012, 7},{0x0000, 3},{0x000F, 4},{0x000A, 4},
+    {0x0009, 4},{0x0006, 4},{0x001B, 5},{0x0006, 5},
+    {0x001C, 6},{0x002C, 6},{0x0015, 6},{0x005A, 7},
+    {0x0027, 8},{0x0099,10},{0x0260,12},{0x000E, 4},
+    {0x0004, 4},{0x000F, 5},{0x0007, 5},{0x001D, 6},
+    {0x000B, 5},{0x0014, 6},{0x0008, 4},{0x0017, 5}
+  },
+  {
+    {0x000F, 4},{0x0013, 5},{0x0075, 7},{0x0024, 6},
+    {0x0095, 8},{0x0251,10},{0x04A0,11},{0x0010, 5},
+    {0x00C8, 8},{0x0002, 3},{0x0001, 3},{0x0001, 4},
+    {0x0000, 4},{0x001A, 5},{0x0011, 5},{0x002C, 6},
+    {0x0065, 7},{0x0074, 7},{0x004B, 7},{0x00C9, 8},
+    {0x0129, 9},{0x0943,12},{0x0942,12},{0x0003, 3},
+    {0x000A, 4},{0x001C, 5},{0x0018, 5},{0x0033, 6},
+    {0x0017, 5},{0x002D, 6},{0x001B, 5},{0x003B, 6}
+  },
+  {
+    {0x0003, 3},{0x001A, 5},{0x002D, 6},{0x0038, 6},
+    {0x0028, 7},{0x0395,10},{0x0E51,12},{0x0037, 6},
+    {0x00E4, 8},{0x0001, 3},{0x0000, 3},{0x001F, 5},
+    {0x001E, 5},{0x0017, 5},{0x003A, 6},{0x0073, 7},
+    {0x002A, 7},{0x002B, 7},{0x0029, 7},{0x01CB, 9},
+    {0x0729,11},{0x1CA1,13},{0x1CA0,13},{0x0004, 3},
+    {0x000A, 4},{0x0004, 4},{0x0018, 5},{0x0036, 6},
+    {0x000B, 5},{0x002C, 6},{0x0019, 5},{0x003B, 6}
+  },
+  {
+    {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0017, 5},
+    {0x0075, 7},{0x01F5, 9},{0x07D1,11},{0x0017, 6},
+    {0x01F6, 9},{0x0001, 3},{0x0000, 3},{0x001B, 5},
+    {0x001A, 5},{0x000A, 5},{0x0032, 6},{0x0074, 7},
+    {0x00F8, 8},{0x00F9, 8},{0x01F7, 9},{0x03E9,10},
+    {0x0FA0,12},{0x1F43,13},{0x1F42,13},{0x0003, 3},
+    {0x000A, 4},{0x001E, 5},{0x001C, 5},{0x003B, 6},
+    {0x0018, 5},{0x0016, 6},{0x0016, 5},{0x0033, 6}
+  },
+  {
+    {0x0004, 3},{0x0007, 4},{0x0018, 5},{0x001E, 5},
+    {0x0036, 6},{0x0031, 7},{0x0177, 9},{0x0077, 7},
+    {0x0176, 9},{0x0001, 3},{0x0000, 3},{0x001A, 5},
+    {0x0019, 5},{0x003A, 6},{0x0019, 6},{0x005C, 7},
+    {0x00BA, 8},{0x0061, 8},{0x00C1, 9},{0x0180,10},
+    {0x0302,11},{0x0607,12},{0x0606,12},{0x0002, 3},
+    {0x000A, 4},{0x001F, 5},{0x001C, 5},{0x0037, 6},
+    {0x0016, 5},{0x0076, 7},{0x000D, 5},{0x002F, 6}
+  },
+  {
+    {0x0000, 3},{0x000A, 4},{0x001A, 5},{0x000C, 4},
+    {0x001D, 5},{0x0039, 6},{0x0078, 7},{0x005E, 7},
+    {0x0393,11},{0x0002, 3},{0x0001, 3},{0x0016, 5},
+    {0x000F, 5},{0x002E, 6},{0x005F, 7},{0x0073, 8},
+    {0x00E5, 9},{0x01C8,10},{0x0E4A,13},{0x1C97,14},
+    {0x1C96,14},{0x0E49,13},{0x0E48,13},{0x0004, 3},
+    {0x0006, 4},{0x001F, 5},{0x001B, 5},{0x001D, 6},
+    {0x0038, 6},{0x0038, 7},{0x003D, 6},{0x0079, 7}
+  },
+  {
+    {0x000B, 5},{0x002B, 7},{0x0054, 8},{0x01B7, 9},
+    {0x06D9,11},{0x0DB1,12},{0x0DB0,12},{0x0002, 4},
+    {0x00AB, 9},{0x0009, 4},{0x000A, 4},{0x0007, 4},
+    {0x0008, 4},{0x000F, 4},{0x000C, 4},{0x0003, 4},
+    {0x001D, 5},{0x0004, 4},{0x000B, 4},{0x0006, 4},
+    {0x001A, 5},{0x0003, 6},{0x00AA, 9},{0x0001, 4},
+    {0x0000, 5},{0x0014, 6},{0x006C, 7},{0x00DA, 8},
+    {0x0002, 6},{0x036D,10},{0x001C, 5},{0x0037, 6}
+  },
+  {
+    {0x001D, 5},{0x0004, 6},{0x00B6, 8},{0x006A, 8},
+    {0x05B9,11},{0x16E1,13},{0x16E0,13},{0x0007, 4},
+    {0x016F, 9},{0x000C, 4},{0x000D, 4},{0x0009, 4},
+    {0x0008, 4},{0x000F, 4},{0x000A, 4},{0x0003, 4},
+    {0x0017, 5},{0x0002, 4},{0x0004, 4},{0x001C, 5},
+    {0x002C, 6},{0x006B, 8},{0x0B71,12},{0x0005, 4},
+    {0x0003, 5},{0x001B, 6},{0x005A, 7},{0x0034, 7},
+    {0x0005, 6},{0x02DD,10},{0x0000, 4},{0x000C, 5}
+  },
+  {
+    {0x0003, 4},{0x007F, 7},{0x00A1, 8},{0x00A0, 8},
+    {0x020C,10},{0x0834,12},{0x106B,13},{0x0007, 4},
+    {0x0082, 8},{0x000E, 4},{0x000D, 4},{0x000B, 4},
+    {0x000C, 4},{0x0000, 3},{0x0009, 4},{0x0002, 4},
+    {0x0011, 5},{0x001E, 5},{0x0015, 5},{0x003E, 6},
+    {0x0040, 7},{0x041B,11},{0x106A,13},{0x0006, 4},
+    {0x000A, 5},{0x0029, 6},{0x007E, 7},{0x0051, 7},
+    {0x0021, 6},{0x0107, 9},{0x0004, 4},{0x000B, 5}
+  },
+  {
+    {0x0007, 4},{0x001B, 6},{0x00F6, 8},{0x00E9, 8},
+    {0x03A1,10},{0x0740,11},{0x0E82,12},{0x001F, 5},
+    {0x01EF, 9},{0x0001, 3},{0x0002, 3},{0x000B, 4},
+    {0x000C, 4},{0x000D, 4},{0x0008, 4},{0x001C, 5},
+    {0x0003, 5},{0x0012, 5},{0x0002, 5},{0x0075, 7},
+    {0x01D1, 9},{0x1D07,13},{0x1D06,13},{0x000A, 4},
+    {0x0013, 5},{0x003B, 6},{0x001A, 6},{0x007A, 7},
+    {0x003C, 6},{0x01EE, 9},{0x0000, 4},{0x000C, 5}
+  },
+  {
+    {0x000D, 4},{0x003D, 6},{0x0042, 7},{0x0037, 7},
+    {0x00D9, 9},{0x0362,11},{0x06C6,12},{0x001F, 5},
+    {0x0086, 8},{0x0001, 3},{0x0002, 3},{0x000C, 4},
+    {0x000B, 4},{0x000A, 4},{0x0001, 4},{0x000F, 5},
+    {0x0025, 6},{0x003C, 6},{0x001A, 6},{0x0087, 8},
+    {0x01B0,10},{0x0D8F,13},{0x0D8E,13},{0x000E, 4},
+    {0x0013, 5},{0x000C, 5},{0x0024, 6},{0x0020, 6},
+    {0x0011, 5},{0x006D, 8},{0x0000, 4},{0x000E, 5}
+  },
+  {
+    {0x0000, 3},{0x0012, 5},{0x0076, 7},{0x0077, 7},
+    {0x014D, 9},{0x0533,11},{0x14C9,13},{0x0013, 5},
+    {0x00A5, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x0008, 4},{0x001A, 5},{0x002B, 6},
+    {0x0075, 7},{0x0074, 7},{0x00A7, 8},{0x0298,10},
+    {0x14C8,13},{0x14CB,13},{0x14CA,13},{0x000F, 4},
+    {0x001C, 5},{0x0007, 5},{0x002A, 6},{0x0028, 6},
+    {0x001B, 5},{0x00A4, 8},{0x0002, 4},{0x0006, 5}
+  },
+  {
+    {0x0002, 3},{0x001A, 5},{0x002B, 6},{0x003A, 6},
+    {0x00ED, 8},{0x0283,10},{0x0A0A,12},{0x0004, 5},
+    {0x00A1, 8},{0x0004, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x001F, 5},{0x0006, 5},{0x0077, 7},
+    {0x00A3, 8},{0x00A2, 8},{0x0140, 9},{0x1417,13},
+    {0x1416,13},{0x0A09,12},{0x0A08,12},{0x0000, 3},
+    {0x001E, 5},{0x0007, 5},{0x002A, 6},{0x0029, 6},
+    {0x001C, 5},{0x00EC, 8},{0x001B, 5},{0x0005, 5}
+  },
+  {
+    {0x0002, 3},{0x0002, 4},{0x0018, 5},{0x001D, 5},
+    {0x0035, 6},{0x00E4, 8},{0x01CF,11},{0x001D, 7},
+    {0x0072, 9},{0x0004, 3},{0x0005, 3},{0x0006, 4},
+    {0x0007, 4},{0x0006, 5},{0x0073, 7},{0x0038, 8},
+    {0x01CE,11},{0x039B,12},{0x0398,12},{0x0733,13},
+    {0x0732,13},{0x0735,13},{0x0734,13},{0x0000, 3},
+    {0x001F, 5},{0x001B, 5},{0x0034, 6},{0x000F, 6},
+    {0x001E, 5},{0x00E5, 8},{0x0019, 5},{0x0038, 6}
+  },
+  {
+    {0x0016, 5},{0x0050, 7},{0x0172, 9},{0x02E7,10},
+    {0x1732,13},{0x2E67,14},{0x2E66,14},{0x0006, 4},
+    {0x0051, 7},{0x0001, 3},{0x0000, 3},{0x000D, 4},
+    {0x000C, 4},{0x0009, 4},{0x001C, 5},{0x0009, 5},
+    {0x001C, 6},{0x001D, 6},{0x005D, 7},{0x00B8, 8},
+    {0x05CD,11},{0x1731,13},{0x1730,13},{0x000F, 4},
+    {0x0005, 4},{0x000F, 5},{0x0008, 5},{0x0029, 6},
+    {0x001D, 5},{0x002F, 6},{0x0008, 4},{0x0015, 5}
+  },
+  {
+    {0x0009, 4},{0x0021, 6},{0x0040, 7},{0x00AD, 8},
+    {0x02B0,10},{0x1589,13},{0x1588,13},{0x001C, 5},
+    {0x005F, 7},{0x0000, 3},{0x000F, 4},{0x000D, 4},
+    {0x000C, 4},{0x0006, 4},{0x0011, 5},{0x002A, 6},
+    {0x0057, 7},{0x005E, 7},{0x0041, 7},{0x0159, 9},
+    {0x0563,11},{0x158B,13},{0x158A,13},{0x0001, 3},
+    {0x0005, 4},{0x0014, 5},{0x003B, 6},{0x002E, 6},
+    {0x0004, 4},{0x003A, 6},{0x0007, 4},{0x0016, 5}
+  },
+  {
+    {0x000E, 4},{0x0007, 5},{0x0046, 7},{0x0045, 7},
+    {0x0064, 9},{0x032A,12},{0x0657,13},{0x0018, 5},
+    {0x000D, 6},{0x0000, 3},{0x000F, 4},{0x000A, 4},
+    {0x000B, 4},{0x001A, 5},{0x0036, 6},{0x0047, 7},
+    {0x0044, 7},{0x0018, 7},{0x0033, 8},{0x00CB,10},
+    {0x0656,13},{0x0329,12},{0x0328,12},{0x0002, 3},
+    {0x0006, 4},{0x0019, 5},{0x000E, 5},{0x0037, 6},
+    {0x0009, 4},{0x000F, 5},{0x0002, 4},{0x0010, 5}
+  },
+  {
+    {0x0003, 3},{0x0018, 5},{0x0023, 6},{0x0077, 7},
+    {0x0194, 9},{0x1956,13},{0x32AF,14},{0x003A, 6},
+    {0x0076, 7},{0x0002, 3},{0x0001, 3},{0x001F, 5},
+    {0x001E, 5},{0x0014, 5},{0x0022, 6},{0x0064, 7},
+    {0x0197, 9},{0x0196, 9},{0x032B,10},{0x0654,11},
+    {0x32AE,14},{0x1955,13},{0x1954,13},{0x0000, 3},
+    {0x0009, 4},{0x001C, 5},{0x0015, 5},{0x0010, 5},
+    {0x000D, 4},{0x0017, 5},{0x0016, 5},{0x0033, 6}
+  },
+  {
+    {0x0005, 3},{0x0006, 4},{0x003E, 6},{0x0010, 5},
+    {0x0048, 7},{0x093F,12},{0x24FA,14},{0x0032, 6},
+    {0x0067, 7},{0x0002, 3},{0x0001, 3},{0x001B, 5},
+    {0x001E, 5},{0x0034, 6},{0x0066, 7},{0x0092, 8},
+    {0x0126, 9},{0x024E,10},{0x049E,11},{0x49F7,15},
+    {0x49F6,15},{0x24F9,14},{0x24F8,14},{0x0000, 3},
+    {0x0007, 4},{0x0018, 5},{0x0011, 5},{0x003F, 6},
+    {0x000E, 4},{0x0013, 5},{0x0035, 6},{0x0025, 6}
+  },
+  {
+    {0x0005, 3},{0x0008, 4},{0x0012, 5},{0x001C, 5},
+    {0x001C, 6},{0x00EA, 9},{0x1D75,14},{0x001E, 6},
+    {0x0066, 7},{0x0001, 3},{0x0002, 3},{0x001B, 5},
+    {0x001A, 5},{0x001F, 6},{0x003B, 7},{0x0074, 8},
+    {0x01D6,10},{0x03AF,11},{0x1D74,14},{0x1D77,14},
+    {0x1D76,14},{0x0EB9,13},{0x0EB8,13},{0x000F, 4},
+    {0x0006, 4},{0x0013, 5},{0x003B, 6},{0x003A, 6},
+    {0x0000, 3},{0x0018, 5},{0x0032, 6},{0x0067, 7}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x001B, 5},{0x000C, 4},
+    {0x000D, 5},{0x00E6, 8},{0x0684,11},{0x0072, 7},
+    {0x00E7, 8},{0x0002, 3},{0x0001, 3},{0x0017, 5},
+    {0x0016, 5},{0x0018, 6},{0x00D1, 8},{0x01A0, 9},
+    {0x0686,11},{0x0D0F,12},{0x0D0A,12},{0x1A17,13},
+    {0x1A16,13},{0x1A1D,13},{0x1A1C,13},{0x000F, 4},
+    {0x001D, 5},{0x000E, 5},{0x0035, 6},{0x0038, 6},
+    {0x0000, 3},{0x000F, 5},{0x0019, 6},{0x0069, 7}
+  },
+  {
+    {0x0003, 3},{0x000C, 4},{0x001B, 5},{0x0000, 3},
+    {0x0003, 4},{0x002E, 6},{0x0051, 9},{0x00BC, 8},
+    {0x0053, 9},{0x0004, 3},{0x0002, 3},{0x0016, 5},
+    {0x0015, 5},{0x0015, 7},{0x0050, 9},{0x00A4,10},
+    {0x0294,12},{0x052B,13},{0x052A,13},{0x052D,13},
+    {0x052C,13},{0x052F,13},{0x052E,13},{0x000E, 4},
+    {0x001A, 5},{0x0004, 5},{0x0028, 6},{0x0029, 6},
+    {0x000F, 4},{0x000B, 6},{0x005F, 7},{0x00BD, 8}
+  },
+  {
+    {0x0003, 4},{0x0009, 6},{0x00D0, 8},{0x01A3, 9},
+    {0x0344,10},{0x0D14,12},{0x1A2B,13},{0x0004, 4},
+    {0x0015, 7},{0x0000, 3},{0x000F, 4},{0x000B, 4},
+    {0x000C, 4},{0x000E, 4},{0x0009, 4},{0x001B, 5},
+    {0x000A, 5},{0x0014, 5},{0x000D, 5},{0x002A, 6},
+    {0x0014, 7},{0x068B,11},{0x1A2A,13},{0x0008, 4},
+    {0x000B, 5},{0x002B, 6},{0x000B, 6},{0x0069, 7},
+    {0x0035, 6},{0x0008, 6},{0x0007, 4},{0x000C, 5}
+  },
+  {
+    {0x000A, 4},{0x003C, 6},{0x0032, 7},{0x0030, 7},
+    {0x00C5, 9},{0x0621,12},{0x0620,12},{0x001F, 5},
+    {0x0033, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
+    {0x000D, 4},{0x000C, 4},{0x0004, 4},{0x000D, 5},
+    {0x0026, 6},{0x0027, 6},{0x0014, 6},{0x0063, 8},
+    {0x0189,10},{0x0623,12},{0x0622,12},{0x000B, 4},
+    {0x0012, 5},{0x003D, 6},{0x0022, 6},{0x0015, 6},
+    {0x000B, 5},{0x0023, 6},{0x0007, 4},{0x0010, 5}
+  },
+  {
+    {0x000F, 4},{0x000C, 5},{0x0043, 7},{0x0010, 6},
+    {0x0044, 8},{0x0114,10},{0x0455,12},{0x0018, 5},
+    {0x0023, 7},{0x0001, 3},{0x0000, 3},{0x000E, 4},
+    {0x000D, 4},{0x0009, 4},{0x0019, 5},{0x0009, 5},
+    {0x0017, 6},{0x0016, 6},{0x0042, 7},{0x008B, 9},
+    {0x0454,12},{0x0457,12},{0x0456,12},{0x000B, 4},
+    {0x0015, 5},{0x000A, 5},{0x0029, 6},{0x0020, 6},
+    {0x000D, 5},{0x0028, 6},{0x0007, 4},{0x0011, 5}
+  },
+  {
+    {0x0001, 3},{0x001A, 5},{0x0029, 6},{0x002A, 6},
+    {0x00A0, 8},{0x0285,10},{0x1425,13},{0x0002, 5},
+    {0x0000, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
+    {0x000B, 4},{0x0008, 4},{0x0012, 5},{0x0001, 6},
+    {0x0051, 7},{0x0001, 7},{0x0143, 9},{0x0508,11},
+    {0x1424,13},{0x1427,13},{0x1426,13},{0x000F, 4},
+    {0x001C, 5},{0x0003, 5},{0x0037, 6},{0x002B, 6},
+    {0x0013, 5},{0x0036, 6},{0x001D, 5},{0x0001, 5}
+  },
+  {
+    {0x0004, 3},{0x001F, 5},{0x003D, 6},{0x0006, 5},
+    {0x0016, 7},{0x0053, 9},{0x014A,11},{0x0034, 6},
+    {0x002A, 8},{0x0002, 3},{0x0003, 3},{0x000B, 4},
+    {0x000C, 4},{0x001C, 5},{0x0037, 6},{0x0017, 7},
+    {0x002B, 8},{0x0028, 8},{0x00A4,10},{0x052D,13},
+    {0x052C,13},{0x052F,13},{0x052E,13},{0x0000, 3},
+    {0x001D, 5},{0x0007, 5},{0x0004, 5},{0x0035, 6},
+    {0x0014, 5},{0x0036, 6},{0x0015, 5},{0x003C, 6}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x0007, 5},{0x001D, 5},
+    {0x0009, 6},{0x01F3, 9},{0x07C7,11},{0x0008, 6},
+    {0x01F0, 9},{0x0003, 3},{0x0002, 3},{0x000D, 4},
+    {0x000C, 4},{0x0017, 5},{0x007D, 7},{0x01F2, 9},
+    {0x07C6,11},{0x07C5,11},{0x1F12,13},{0x3E27,14},
+    {0x3E26,14},{0x1F11,13},{0x1F10,13},{0x0000, 3},
+    {0x001E, 5},{0x0006, 5},{0x0039, 6},{0x0038, 6},
+    {0x003F, 6},{0x002C, 6},{0x0005, 5},{0x002D, 6}
+  },
+  {
+    {0x0002, 3},{0x0007, 4},{0x0018, 5},{0x0003, 4},
+    {0x0005, 5},{0x0035, 7},{0x004F, 9},{0x0012, 7},
+    {0x04E5,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
+    {0x000E, 4},{0x0033, 6},{0x0026, 8},{0x009D,10},
+    {0x04E4,13},{0x04E7,13},{0x04E6,13},{0x04E1,13},
+    {0x04E0,13},{0x04E3,13},{0x04E2,13},{0x0000, 3},
+    {0x001F, 5},{0x000C, 5},{0x003D, 6},{0x003C, 6},
+    {0x0032, 6},{0x0034, 7},{0x001B, 6},{0x0008, 6}
+  },
+  {
+    {0x0000, 3},{0x0004, 4},{0x001C, 5},{0x000F, 4},
+    {0x0002, 4},{0x0007, 5},{0x0075, 7},{0x00E8, 8},
+    {0x1D2A,13},{0x0005, 3},{0x0004, 3},{0x000D, 4},
+    {0x000C, 4},{0x0077, 7},{0x0E96,12},{0x3A57,14},
+    {0x3A56,14},{0x3A5D,14},{0x3A5C,14},{0x3A5F,14},
+    {0x3A5E,14},{0x1D29,13},{0x1D28,13},{0x0003, 3},
+    {0x0006, 5},{0x000A, 5},{0x002C, 7},{0x0017, 6},
+    {0x0076, 7},{0x01D3, 9},{0x03A4,10},{0x002D, 7}
+  },
+  {
+    {0x000A, 4},{0x0024, 6},{0x00BF, 8},{0x0085, 8},
+    {0x0211,10},{0x0842,12},{0x1087,13},{0x0018, 5},
+    {0x0020, 6},{0x0001, 3},{0x0002, 3},{0x000E, 4},
+    {0x000D, 4},{0x0007, 4},{0x0013, 5},{0x0025, 6},
+    {0x005E, 7},{0x0043, 7},{0x00BE, 8},{0x0109, 9},
+    {0x1086,13},{0x0841,12},{0x0840,12},{0x000F, 4},
+    {0x0001, 4},{0x0011, 5},{0x0000, 5},{0x002E, 6},
+    {0x0019, 5},{0x0001, 5},{0x0006, 4},{0x0016, 5}
+  },
+  {
+    {0x0002, 3},{0x000F, 5},{0x006F, 7},{0x0061, 7},
+    {0x0374,10},{0x1BA8,13},{0x3753,14},{0x0012, 5},
+    {0x0036, 6},{0x0000, 3},{0x0001, 3},{0x000A, 4},
+    {0x000B, 4},{0x001A, 5},{0x0031, 6},{0x0060, 7},
+    {0x00DC, 8},{0x01BB, 9},{0x06EB,11},{0x1BAB,13},
+    {0x3752,14},{0x3755,14},{0x3754,14},{0x000E, 4},
+    {0x0006, 4},{0x0013, 5},{0x000E, 5},{0x003E, 6},
+    {0x0008, 4},{0x001E, 5},{0x0019, 5},{0x003F, 6}
+  },
+  {
+    {0x0003, 3},{0x001C, 5},{0x0025, 6},{0x0024, 6},
+    {0x01DA, 9},{0x1DBD,13},{0x3B7C,14},{0x003C, 6},
+    {0x003D, 6},{0x0000, 3},{0x0001, 3},{0x000B, 4},
+    {0x000A, 4},{0x000B, 5},{0x0077, 7},{0x00EC, 8},
+    {0x03B6,10},{0x076E,11},{0x1DBF,13},{0x76FB,15},
+    {0x76FA,15},{0x3B79,14},{0x3B78,14},{0x000D, 4},
+    {0x001F, 5},{0x0013, 5},{0x000A, 5},{0x0008, 5},
+    {0x000C, 4},{0x0008, 4},{0x0009, 5},{0x003A, 6}
+  },
+  {
+    {0x0005, 3},{0x0003, 4},{0x0004, 5},{0x0010, 5},
+    {0x008F, 8},{0x0475,11},{0x11D1,13},{0x0079, 7},
+    {0x0027, 6},{0x0002, 3},{0x0003, 3},{0x0001, 4},
+    {0x0000, 4},{0x0026, 6},{0x0046, 7},{0x011C, 9},
+    {0x0477,11},{0x08ED,12},{0x11D0,13},{0x11D3,13},
+    {0x11D2,13},{0x11D9,13},{0x11D8,13},{0x000D, 4},
+    {0x001F, 5},{0x0012, 5},{0x0005, 5},{0x003D, 6},
+    {0x000C, 4},{0x000E, 4},{0x0022, 6},{0x0078, 7}
+  },
+  {
+    {0x0005, 3},{0x000C, 4},{0x001B, 5},{0x0000, 4},
+    {0x0006, 6},{0x03E2,10},{0x3E3D,14},{0x000F, 7},
+    {0x0034, 6},{0x0003, 3},{0x0002, 3},{0x001E, 5},
+    {0x001D, 5},{0x007D, 7},{0x01F0, 9},{0x07C6,11},
+    {0x3E3C,14},{0x3E3F,14},{0x3E3E,14},{0x3E39,14},
+    {0x3E38,14},{0x3E3B,14},{0x3E3A,14},{0x0008, 4},
+    {0x001C, 5},{0x0002, 5},{0x003F, 6},{0x0035, 6},
+    {0x0009, 4},{0x0001, 3},{0x000E, 7},{0x00F9, 8}
+  },
+  {
+    {0x0004, 3},{0x000B, 4},{0x0001, 4},{0x000A, 4},
+    {0x001E, 6},{0x00E0, 9},{0x0E1E,13},{0x0071, 8},
+    {0x0039, 7},{0x0007, 3},{0x0006, 3},{0x000D, 5},
+    {0x000C, 5},{0x0020, 7},{0x01C2,10},{0x1C3F,14},
+    {0x1C3E,14},{0x0E19,13},{0x0E18,13},{0x0E1B,13},
+    {0x0E1A,13},{0x0E1D,13},{0x0E1C,13},{0x0000, 4},
+    {0x0009, 5},{0x001D, 6},{0x001F, 6},{0x0011, 6},
+    {0x0005, 4},{0x0001, 3},{0x0043, 8},{0x0042, 8}
+  },
+  {
+    {0x0004, 3},{0x000D, 4},{0x0007, 4},{0x0002, 3},
+    {0x0014, 5},{0x016C, 9},{0x16D1,13},{0x02DF,10},
+    {0x016E, 9},{0x0000, 2},{0x0007, 3},{0x002C, 6},
+    {0x002B, 6},{0x02DE,10},{0x16D0,13},{0x16D3,13},
+    {0x16D2,13},{0x2DB5,14},{0x2DB4,14},{0x2DB7,14},
+    {0x2DB6,14},{0x16D9,13},{0x16D8,13},{0x000C, 5},
+    {0x002A, 6},{0x005A, 7},{0x001B, 6},{0x001A, 6},
+    {0x0017, 5},{0x000C, 4},{0x05B7,11},{0x05B5,11}
+  },
+  {
+    {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
+    {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
+    {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
+    {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
+    {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
+    {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
+    {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
+    {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
+  },
+  {
+    {0x0000, 3},{0x0010, 5},{0x0072, 7},{0x0071, 7},
+    {0x0154, 9},{0x0AAB,12},{0x0AA8,12},{0x0014, 5},
+    {0x0070, 7},{0x0002, 3},{0x0003, 3},{0x000C, 4},
+    {0x000B, 4},{0x0003, 4},{0x0011, 5},{0x0073, 7},
+    {0x0054, 7},{0x00AB, 8},{0x02AB,10},{0x1553,13},
+    {0x1552,13},{0x1555,13},{0x1554,13},{0x000D, 4},
+    {0x001E, 5},{0x0012, 5},{0x003E, 6},{0x002B, 6},
+    {0x0002, 4},{0x003F, 6},{0x001D, 5},{0x0013, 5}
+  },
+  {
+    {0x0003, 3},{0x001F, 5},{0x0029, 6},{0x003D, 6},
+    {0x000C, 7},{0x0069,10},{0x0345,13},{0x0002, 5},
+    {0x0028, 6},{0x0002, 3},{0x0001, 3},{0x000E, 4},
+    {0x000C, 4},{0x0015, 5},{0x0007, 6},{0x001B, 8},
+    {0x006B,10},{0x006A,10},{0x0344,13},{0x0347,13},
+    {0x0346,13},{0x01A1,12},{0x01A0,12},{0x000B, 4},
+    {0x001A, 5},{0x0012, 5},{0x0000, 5},{0x003C, 6},
+    {0x0008, 4},{0x001B, 5},{0x0013, 5},{0x0001, 5}
+  },
+  {
+    {0x0004, 3},{0x0004, 4},{0x003F, 6},{0x0014, 5},
+    {0x0056, 7},{0x015C, 9},{0x15D5,13},{0x003C, 6},
+    {0x002A, 6},{0x0000, 3},{0x0001, 3},{0x000E, 4},
+    {0x000D, 4},{0x000C, 5},{0x00AF, 8},{0x02BB,10},
+    {0x15D4,13},{0x15D7,13},{0x15D6,13},{0x15D1,13},
+    {0x15D0,13},{0x15D3,13},{0x15D2,13},{0x000B, 4},
+    {0x0019, 5},{0x000D, 5},{0x003E, 6},{0x0031, 6},
+    {0x0007, 4},{0x0005, 4},{0x003D, 6},{0x0030, 6}
+  },
+  {
+    {0x0005, 3},{0x0008, 4},{0x001A, 5},{0x0000, 4},
+    {0x0036, 6},{0x0011, 8},{0x0106,12},{0x000A, 7},
+    {0x006E, 7},{0x0002, 3},{0x0003, 3},{0x0003, 4},
+    {0x0002, 4},{0x006F, 7},{0x0021, 9},{0x020F,13},
+    {0x020E,13},{0x0101,12},{0x0100,12},{0x0103,12},
+    {0x0102,12},{0x0105,12},{0x0104,12},{0x000C, 4},
+    {0x001E, 5},{0x0003, 5},{0x003E, 6},{0x003F, 6},
+    {0x0009, 4},{0x000E, 4},{0x000B, 7},{0x0009, 7}
+  },
+  {
+    {0x0002, 3},{0x000E, 4},{0x001E, 5},{0x000C, 4},
+    {0x001F, 5},{0x006E, 7},{0x00AD,10},{0x00AF,10},
+    {0x0014, 7},{0x0004, 3},{0x0003, 3},{0x001A, 5},
+    {0x0017, 5},{0x002A, 8},{0x0576,13},{0x0AEF,14},
+    {0x0AEE,14},{0x0571,13},{0x0570,13},{0x0573,13},
+    {0x0572,13},{0x0575,13},{0x0574,13},{0x0003, 4},
+    {0x0016, 5},{0x0004, 5},{0x0036, 6},{0x000B, 6},
+    {0x000A, 4},{0x0000, 3},{0x006F, 7},{0x00AC,10}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0004, 3},{0x0005, 4},{0x0003, 3},{0x0001, 3},
+    {0x0004, 4},{0x002F, 6},{0x0526,11},{0x1495,13},
+    {0x00A6, 8},{0x0007, 3},{0x0006, 3},{0x002D, 6},
+    {0x002C, 6},{0x1494,13},{0x1497,13},{0x1496,13},
+    {0x1491,13},{0x1490,13},{0x1493,13},{0x1492,13},
+    {0x293D,14},{0x293C,14},{0x293F,14},{0x0000, 3},
+    {0x0028, 6},{0x00A5, 8},{0x0148, 9},{0x00A7, 8},
+    {0x002E, 6},{0x0015, 5},{0x0A4E,12},{0x293E,14}
+  },
+  {
+    {0x0003, 3},{0x0011, 5},{0x0020, 6},{0x0074, 7},
+    {0x010D, 9},{0x0863,12},{0x0860,12},{0x000A, 5},
+    {0x0075, 7},{0x0001, 3},{0x0000, 3},{0x000B, 4},
+    {0x000A, 4},{0x0018, 5},{0x0038, 6},{0x0042, 7},
+    {0x010F, 9},{0x010E, 9},{0x0219,10},{0x10C3,13},
+    {0x10C2,13},{0x10C5,13},{0x10C4,13},{0x000F, 4},
+    {0x0004, 4},{0x0019, 5},{0x000B, 5},{0x0039, 6},
+    {0x0009, 4},{0x001B, 5},{0x001A, 5},{0x003B, 6}
+  },
+  {
+    {0x0005, 3},{0x0001, 4},{0x003E, 6},{0x0001, 5},
+    {0x00E2, 8},{0x1C6F,13},{0x38D9,14},{0x0039, 6},
+    {0x001F, 6},{0x0002, 3},{0x0001, 3},{0x0009, 4},
+    {0x0008, 4},{0x0000, 5},{0x0070, 7},{0x01C7, 9},
+    {0x038C,10},{0x071A,11},{0x38D8,14},{0x38DB,14},
+    {0x38DA,14},{0x38DD,14},{0x38DC,14},{0x000D, 4},
+    {0x001D, 5},{0x000E, 5},{0x003F, 6},{0x003C, 6},
+    {0x000C, 4},{0x0006, 4},{0x003D, 6},{0x001E, 6}
+  },
+  {
+    {0x0006, 3},{0x000B, 4},{0x0011, 5},{0x001E, 5},
+    {0x0074, 7},{0x03AA,10},{0x1D5C,13},{0x0001, 6},
+    {0x0021, 6},{0x0001, 3},{0x0002, 3},{0x0007, 4},
+    {0x0006, 4},{0x003E, 6},{0x00EB, 8},{0x01D4, 9},
+    {0x0EAF,12},{0x3ABB,14},{0x3ABA,14},{0x1D59,13},
+    {0x1D58,13},{0x1D5B,13},{0x1D5A,13},{0x000A, 4},
+    {0x001C, 5},{0x0001, 5},{0x003F, 6},{0x003B, 6},
+    {0x0001, 4},{0x0009, 4},{0x0020, 6},{0x0000, 6}
+  },
+  {
+    {0x0004, 3},{0x000A, 4},{0x0017, 5},{0x0004, 4},
+    {0x0016, 6},{0x016A, 9},{0x16B1,13},{0x0017, 7},
+    {0x005B, 7},{0x0006, 3},{0x0007, 3},{0x0001, 4},
+    {0x0000, 4},{0x000A, 6},{0x02D7,10},{0x0B5A,12},
+    {0x16B0,13},{0x16B3,13},{0x16B2,13},{0x2D6D,14},
+    {0x2D6C,14},{0x2D6F,14},{0x2D6E,14},{0x0006, 4},
+    {0x000A, 5},{0x0004, 5},{0x002C, 6},{0x0017, 6},
+    {0x0003, 4},{0x0007, 4},{0x0016, 7},{0x00B4, 8}
+  },
+  {
+    {0x0005, 3},{0x000D, 4},{0x0005, 4},{0x0009, 4},
+    {0x0033, 6},{0x0193, 9},{0x192C,13},{0x0061, 8},
+    {0x0031, 7},{0x0000, 2},{0x0007, 3},{0x0010, 5},
+    {0x0011, 5},{0x00C8, 8},{0x192F,13},{0x325B,14},
+    {0x325A,14},{0x1929,13},{0x1928,13},{0x192B,13},
+    {0x192A,13},{0x325D,14},{0x325C,14},{0x0018, 5},
+    {0x001A, 6},{0x001B, 6},{0x0065, 7},{0x0019, 6},
+    {0x0004, 4},{0x0007, 4},{0x0060, 8},{0x0324,10}
+  },
+  {
+    {0x0006, 3},{0x0000, 3},{0x0002, 4},{0x000F, 4},
+    {0x0039, 6},{0x01D9, 9},{0x1D82,13},{0x0761,11},
+    {0x03BE,10},{0x0001, 2},{0x0002, 2},{0x000F, 6},
+    {0x000E, 6},{0x0762,11},{0x3B07,14},{0x3B06,14},
+    {0x3B1D,14},{0x3B1C,14},{0x3B1F,14},{0x3B1E,14},
+    {0x3B19,14},{0x3B18,14},{0x3B1B,14},{0x0038, 6},
+    {0x01DE, 9},{0x00ED, 8},{0x03BF,10},{0x00EE, 8},
+    {0x003A, 6},{0x0006, 5},{0x0EC0,12},{0x3B1A,14}
+  },
+  {
+    {0x0000, 2},{0x0002, 3},{0x000F, 5},{0x0006, 4},
+    {0x001C, 6},{0x01D0,10},{0x0E8C,13},{0x1D1B,14},
+    {0x1D1A,14},{0x0003, 2},{0x0002, 2},{0x00EA, 9},
+    {0x00E9, 9},{0x0E89,13},{0x0E88,13},{0x0E8B,13},
+    {0x0E8A,13},{0x1D65,14},{0x1D64,14},{0x1D67,14},
+    {0x1D66,14},{0x1D61,14},{0x1D60,14},{0x03AD,11},
+    {0x1D63,14},{0x1D62,14},{0x1D1D,14},{0x1D1C,14},
+    {0x003B, 7},{0x01D7,10},{0x1D1F,14},{0x1D1E,14}
+  },
+  {
+    {0x0002, 2},{0x000F, 4},{0x001C, 5},{0x000C, 4},
+    {0x003B, 6},{0x01AC, 9},{0x1AD8,13},{0x35B3,14},
+    {0x35B2,14},{0x0001, 2},{0x0000, 2},{0x0069, 7},
+    {0x0068, 7},{0x35BD,14},{0x35BC,14},{0x35BF,14},
+    {0x35BE,14},{0x35B9,14},{0x35B8,14},{0x35BB,14},
+    {0x35BA,14},{0x35B5,14},{0x35B4,14},{0x01A9, 9},
+    {0x01A8, 9},{0x035A,10},{0x00D7, 8},{0x00D5, 8},
+    {0x003A, 6},{0x001B, 5},{0x35B7,14},{0x35B6,14}
+  }
+};
+
+
+
+/*A description of a Huffman code value used when encoding the tree.*/
+typedef struct{
+  /*The bit pattern, left-shifted so that the MSB of all patterns is
+     aligned.*/
+  ogg_uint32_t pattern;
+  /*The amount the bit pattern was shifted.*/
+  int          shift;
+  /*The token this bit pattern represents.*/
+  int          token;
+}oc_huff_entry;
+
+
+
+/*Compares two oc_huff_entry structures by their bit patterns.
+  _c1: The first entry to compare.
+  _c2: The second entry to compare.
+  Return: <0 if _c1<_c2, >0 if _c1>_c2.*/
+static int huff_entry_cmp(const void *_c1,const void *_c2){
+  ogg_uint32_t b1;
+  ogg_uint32_t b2;
+  b1=((const oc_huff_entry *)_c1)->pattern;
+  b2=((const oc_huff_entry *)_c2)->pattern;
+  return b1<b2?-1:b1>b2?1:0;
+}
+
+/*Encodes a description of the given Huffman tables.
+  Although the codes are stored in the encoder as flat arrays, in the bit
+   stream and in the decoder they are structured as a tree.
+  This function recovers the tree structure from the flat array and then
+   writes it out.
+  Note that the codes MUST form a Huffman code, and not merely a prefix-free
+   code, since the binary tree is assumed to be full.
+  _opb:   The buffer to store the tree in.
+  _codes: The Huffman tables to pack.
+  Return: 0 on success, or a negative value if one of the given Huffman tables
+   does not form a full, prefix-free code.*/
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    oc_huff_entry entries[TH_NDCT_TOKENS];
+    int           bpos;
+    int           maxlen;
+    int           mask;
+    int           j;
+    /*First, find the maximum code length so we can align all the bit
+       patterns.*/
+    maxlen=_codes[i][0].nbits;
+    for(j=1;j<TH_NDCT_TOKENS;j++)maxlen=OC_MAXI(_codes[i][j].nbits,maxlen);
+    /*It's improbable that a code with more than 32 bits could pass the
+       validation below, but abort early in any case.*/
+    if(maxlen>32)return TH_EINVAL;
+    mask=(1<<(maxlen>>1)<<(maxlen+1>>1))-1;
+    /*Copy over the codes into our temporary workspace.
+      The bit patterns are aligned, and the original entry each code is from
+       is stored as well.*/
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      entries[j].shift=maxlen-_codes[i][j].nbits;
+      entries[j].pattern=_codes[i][j].pattern<<entries[j].shift&mask;
+      entries[j].token=j;
+    }
+    /*Sort the codes into ascending order.
+      This is the order the leaves of the tree will be traversed.*/
+    qsort(entries,TH_NDCT_TOKENS,sizeof(entries[0]),huff_entry_cmp);
+    /*For each leaf of the tree:*/
+    bpos=maxlen;
+    for(j=0;j<TH_NDCT_TOKENS;j++){
+      ogg_uint32_t bit;
+      /*Fail if this code has no bits at all.
+        Technically a codebook with a single 0-bit entry is legal, but the
+         encoder currently does not support codebooks which do not contain all
+         the tokens.*/
+      if(entries[j].shift>=maxlen)return TH_EINVAL;
+      /*Descend into the tree, writing a bit for each branch.*/
+      for(;bpos>entries[j].shift;bpos--)oggpackB_write(_opb,0,1);
+      /*Mark this as a leaf node, and write its value.*/
+      oggpackB_write(_opb,1,1);
+      oggpackB_write(_opb,entries[j].token,5);
+      /*For each 1 branch we've descended, back up the tree until we reach a
+         0 branch.*/
+      bit=(ogg_uint32_t)1<<bpos;
+      for(;entries[j].pattern&bit;bpos++)bit<<=1;
+      /*Validate the code.*/
+      if(j+1<TH_NDCT_TOKENS){
+        mask=~(bit-1)<<1;
+        /*The next entry should have a 1 bit where we had a 0, and should
+           match our code above that bit.
+          This verifies both fullness and prefix-freeness simultaneously.*/
+        if(!(entries[j+1].pattern&bit)||
+         (entries[j].pattern&mask)!=(entries[j+1].pattern&mask)){
+          return TH_EINVAL;
+        }
+      }
+      /*If there are no more codes, we should have ascended back to the top
+         of the tree.*/
+      else if(bpos<maxlen)return TH_EINVAL;
+    }
+  }
+  return 0;
+}
+
+/*This is used to copy the configuration of an existing setup header for use by
+   the encoder.
+  The decoder uses a completely different data structure for the Huffman
+   codebooks.*/
+int oc_huff_codes_unpack(oc_pack_buf *_opb,
+ th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    ogg_uint32_t code;
+    int          len;
+    int          nleaves;
+    code=0;
+    len=nleaves=0;
+    memset(_codes[i],0,TH_NDCT_TOKENS*sizeof(*_codes[i]));
+    for(;;){
+      long bits;
+      bits=oc_pack_read1(_opb);
+      /*Only process nodes so long as there's more bits in the buffer.*/
+      if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER;
+      /*Read an internal node:*/
+      if(!bits){
+        len++;
+        /*Don't allow codewords longer than 32 bits.*/
+        if(len>32)return TH_EBADHEADER;
+      }
+      /*Read a leaf node:*/
+      else{
+        ogg_uint32_t code_bit;
+        /*Don't allow more than 32 tokens per codebook.*/
+        if(++nleaves>32)return TH_EBADHEADER;
+        bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS);
+        /*The current encoder does not support codebooks that do not contain
+           all of the tokens.*/
+        if(_codes[i][bits].nbits>0)return TH_EINVAL;
+        _codes[i][bits].pattern=code>>32-len;
+        _codes[i][bits].nbits=len;
+        code_bit=0x80000000U>>len-1;
+        while(len>0&&(code&code_bit)){
+          code^=code_bit;
+          code_bit<<=1;
+          len--;
+        }
+        if(len<=0)break;
+        code|=code_bit;
+      }
+    }
+    /*The current encoder does not support codebooks that do not contain all of
+       the tokens.*/
+    if(nleaves<32)return TH_EINVAL;
+  }
+  return 0;
+}

+ 22 - 0
jni/libtheora-1.2.0alpha1/lib/huffenc.h

@@ -0,0 +1,22 @@
+#if !defined(_huffenc_H)
+# define _huffenc_H (1)
+# include "huffman.h"
+# include "bitpack.h"
+
+
+
+typedef th_huff_code                  th_huff_table[TH_NDCT_TOKENS];
+
+
+
+extern const th_huff_code
+ TH_VP31_HUFF_CODES[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS];
+
+
+
+int oc_huff_codes_pack(oggpack_buffer *_opb,
+ const th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]);
+int oc_huff_codes_unpack(oc_pack_buf *_opb,
+ th_huff_code _codes[TH_NHUFFMAN_TABLES][TH_NDCT_TOKENS]);
+
+#endif

+ 70 - 0
jni/libtheora-1.2.0alpha1/lib/huffman.h

@@ -0,0 +1,70 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_huffman_H)
+# define _huffman_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+/*The range of valid quantized DCT coefficient values.
+  VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
+#define OC_DCT_VAL_RANGE         (580)
+
+#define OC_NDCT_TOKEN_BITS       (5)
+
+#define OC_DCT_EOB1_TOKEN        (0)
+#define OC_DCT_EOB2_TOKEN        (1)
+#define OC_DCT_EOB3_TOKEN        (2)
+#define OC_DCT_REPEAT_RUN0_TOKEN (3)
+#define OC_DCT_REPEAT_RUN1_TOKEN (4)
+#define OC_DCT_REPEAT_RUN2_TOKEN (5)
+#define OC_DCT_REPEAT_RUN3_TOKEN (6)
+
+#define OC_DCT_SHORT_ZRL_TOKEN   (7)
+#define OC_DCT_ZRL_TOKEN         (8)
+
+#define OC_ONE_TOKEN             (9)
+#define OC_MINUS_ONE_TOKEN       (10)
+#define OC_TWO_TOKEN             (11)
+#define OC_MINUS_TWO_TOKEN       (12)
+
+#define OC_DCT_VAL_CAT2          (13)
+#define OC_DCT_VAL_CAT3          (17)
+#define OC_DCT_VAL_CAT4          (18)
+#define OC_DCT_VAL_CAT5          (19)
+#define OC_DCT_VAL_CAT6          (20)
+#define OC_DCT_VAL_CAT7          (21)
+#define OC_DCT_VAL_CAT8          (22)
+
+#define OC_DCT_RUN_CAT1A         (23)
+#define OC_DCT_RUN_CAT1B         (28)
+#define OC_DCT_RUN_CAT1C         (29)
+#define OC_DCT_RUN_CAT2A         (30)
+#define OC_DCT_RUN_CAT2B         (31)
+
+#define OC_NDCT_EOB_TOKEN_MAX    (7)
+#define OC_NDCT_ZRL_TOKEN_MAX    (9)
+#define OC_NDCT_VAL_MAX          (23)
+#define OC_NDCT_VAL_CAT1_MAX     (13)
+#define OC_NDCT_VAL_CAT2_MAX     (17)
+#define OC_NDCT_VAL_CAT2_SIZE    (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
+#define OC_NDCT_RUN_MAX          (32)
+#define OC_NDCT_RUN_CAT1A_MAX    (28)
+
+extern const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
+
+#endif

+ 330 - 0
jni/libtheora-1.2.0alpha1/lib/idct.c

@@ -0,0 +1,330 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <string.h>
+#include "internal.h"
+#include "dct.h"
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
+static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  /*0-1 butterfly.*/
+  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
+  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
+  /*2-3 rotation by 6pi/16.*/
+  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
+  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
+  /*4-7 rotation by 7pi/16.*/
+  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
+  /*5-6 rotation by 3pi/16.*/
+  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
+  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
+  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
+  /*Stage 2:*/
+  /*4-5 butterfly.*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  /*7-6 butterfly.*/
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  /*0-3 butterfly.*/
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  /*1-2 butterfly.*/
+  r=t[1]+t[2];
+  t[2]=t[1]-t[2];
+  t[1]=r;
+  /*6-5 butterfly.*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  /*0-7 butterfly.*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  /*1-6 butterfly.*/
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  /*2-5 butterfly.*/
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  /*3-4 butterfly.*/
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 4 entries are used.
+      The other 4 are assumed to be 0.*/
+static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[5]=-(OC_C5S3*_x[3]>>16);
+  t[6]=OC_C3S5*_x[3]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 3 entries are used.
+      The other 5 are assumed to be 0.*/
+static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 2 entries are used.
+      The other 6 are assumed to be 0.*/
+static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first entry is used.
+      The other 7 are assumed to be 0.*/
+static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
+  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
+   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
+  ogg_int16_t w[64];
+  int         i;
+  /*Transform rows of x into columns of w.*/
+  idct8_2(w,_x);
+  idct8_1(w+1,_x+8);
+  /*Transform rows of w into columns of y.*/
+  for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
+  /*Adjust for the scale factor.*/
+  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
+  /*Clear input data for next block.*/
+  _x[0]=_x[1]=_x[8]=0;
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
+   x  x  x  x  0  0  0  0
+   x  x  x  0  0  0  0  0
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
+  ogg_int16_t w[64];
+  int         i;
+  /*Transform rows of x into columns of w.*/
+  idct8_4(w,_x);
+  idct8_3(w+1,_x+8);
+  idct8_2(w+2,_x+16);
+  idct8_1(w+3,_x+24);
+  /*Transform rows of w into columns of y.*/
+  for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
+  /*Adjust for the scale factor.*/
+  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
+  /*Clear input data for next block.*/
+  _x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients.*/
+static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
+  ogg_int16_t w[64];
+  int         i;
+  /*Transform rows of x into columns of w.*/
+  for(i=0;i<8;i++)idct8(w+i,_x+i*8);
+  /*Transform rows of w into columns of y.*/
+  for(i=0;i<8;i++)idct8(_y+i,w+i*8);
+  /*Adjust for the scale factor.*/
+  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
+  /*Clear input data for next block.*/
+  for(i=0;i<64;i++)_x[i]=0;
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.*/
+void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     use a smaller transform when the block ends with a long zero run instead
+     of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Then perform the iDCT.*/
+  if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
+  else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
+  else oc_idct8x8_slow(_y,_x);
+}

+ 131 - 0
jni/libtheora-1.2.0alpha1/lib/info.c

@@ -0,0 +1,131 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*This is more or less the same as strncasecmp, but that doesn't exist
+   everywhere, and this is a fairly trivial function, so we include it.
+  Note: We take advantage of the fact that we know _n is less than or equal to
+   the length of at least one of the strings.*/
+static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
+  int c;
+  for(c=0;c<_n;c++){
+    if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
+  }
+  return _s1[c]!='=';
+}
+
+
+
+void th_info_init(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+  _info->version_major=TH_VERSION_MAJOR;
+  _info->version_minor=TH_VERSION_MINOR;
+  _info->version_subminor=TH_VERSION_SUB;
+  _info->keyframe_granule_shift=6;
+}
+
+void th_info_clear(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+}
+
+
+
+void th_comment_init(th_comment *_tc){
+  memset(_tc,0,sizeof(*_tc));
+}
+
+void th_comment_add(th_comment *_tc,const char *_comment){
+  char **user_comments;
+  int   *comment_lengths;
+  int    comment_len;
+  user_comments=_ogg_realloc(_tc->user_comments,
+   (_tc->comments+2)*sizeof(*_tc->user_comments));
+  if(user_comments==NULL)return;
+  _tc->user_comments=user_comments;
+  comment_lengths=_ogg_realloc(_tc->comment_lengths,
+   (_tc->comments+2)*sizeof(*_tc->comment_lengths));
+  if(comment_lengths==NULL)return;
+  _tc->comment_lengths=comment_lengths;
+  comment_len=strlen(_comment);
+  comment_lengths[_tc->comments]=comment_len;
+  user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
+  if(user_comments[_tc->comments]==NULL)return;
+  memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
+  _tc->comments++;
+  _tc->user_comments[_tc->comments]=NULL;
+}
+
+void th_comment_add_tag(th_comment *_tc,const char *_tag,const char *_val){
+  char *comment;
+  int   tag_len;
+  int   val_len;
+  tag_len=strlen(_tag);
+  val_len=strlen(_val);
+  /*+2 for '=' and '\0'.*/
+  comment=_ogg_malloc(tag_len+val_len+2);
+  if(comment==NULL)return;
+  memcpy(comment,_tag,tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_val,val_len+1);
+  th_comment_add(_tc,comment);
+  _ogg_free(comment);
+}
+
+char *th_comment_query(th_comment *_tc,const char *_tag,int _count){
+  long i;
+  int  found;
+  int  tag_len;
+  tag_len=strlen(_tag);
+  found=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return _tc->user_comments[i]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int th_comment_query_count(th_comment *_tc,const char *_tag){
+  long i;
+  int  tag_len;
+  int  count;
+  tag_len=strlen(_tag);
+  count=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
+  }
+  return count;
+}
+
+void th_comment_clear(th_comment *_tc){
+  if(_tc!=NULL){
+    long i;
+    for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
+    _ogg_free(_tc->user_comments);
+    _ogg_free(_tc->comment_lengths);
+    _ogg_free(_tc->vendor);
+    memset(_tc,0,sizeof(*_tc));
+  }
+}

+ 210 - 0
jni/libtheora-1.2.0alpha1/lib/internal.c

@@ -0,0 +1,210 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "internal.h"
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.
+  All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
+   past the end of a block in bogus streams get mapped to a known location.*/
+const unsigned char OC_FZIG_ZAG[128]={
+   0, 1, 8,16, 9, 2, 3,10,
+  17,24,32,25,18,11, 4, 5,
+  12,19,26,33,40,48,41,34,
+  27,20,13, 6, 7,14,21,28,
+  35,42,49,56,57,50,43,36,
+  29,22,15,23,30,37,44,51,
+  58,59,52,45,38,31,39,46,
+  53,60,61,54,47,55,62,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+const unsigned char OC_IZIG_ZAG[64]={
+   0, 1, 5, 6,14,15,27,28,
+   2, 4, 7,13,16,26,29,42,
+   3, 8,12,17,25,30,41,43,
+   9,11,18,24,31,40,44,53,
+  10,19,23,32,39,45,52,54,
+  20,22,33,38,46,51,55,60,
+  21,34,37,47,50,56,59,61,
+  35,36,48,49,57,58,62,63
+};
+
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+const unsigned char OC_MB_MAP[2][2]={{0,3},{1,2}};
+
+/*A list of the indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
+  {0,1,2,3,4,8},
+  {0,1,2,3,4,5,8,9},
+  {0,1,2,3,4,6,8,10},
+  {0,1,2,3,4,5,6,7,8,9,10,11}
+};
+
+/*The number of indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
+
+/*The number of extra bits that are coded with each of the DCT tokens.
+  Each DCT token has some fixed number of additional bits (possibly 0) stored
+   after the token itself, containing, for example, coefficient magnitude,
+   sign bits, etc.*/
+const unsigned char OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
+  0,0,0,2,3,4,12,3,6,
+  0,0,0,0,
+  1,1,1,1,2,3,4,5,6,10,
+  1,1,1,1,1,3,4,
+  2,3
+};
+
+
+
+int oc_ilog(unsigned _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+
+
+void *oc_aligned_malloc(size_t _sz,size_t _align){
+  unsigned char *p;
+  if(_align-1>UCHAR_MAX||(_align&_align-1)||_sz>~(size_t)0-_align)return NULL;
+  p=(unsigned char *)_ogg_malloc(_sz+_align);
+  if(p!=NULL){
+    int offs;
+    offs=((p-(unsigned char *)0)-1&_align-1);
+    p[offs]=offs;
+    p+=offs+1;
+  }
+  return p;
+}
+
+void oc_aligned_free(void *_ptr){
+  unsigned char *p;
+  p=(unsigned char *)_ptr;
+  if(p!=NULL){
+    int offs;
+    offs=*--p;
+    _ogg_free(p-offs);
+  }
+}
+
+
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  rowsz;
+  size_t  colsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_malloc(datsz+colsz);
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  colsz;
+  size_t  rowsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_calloc(datsz+colsz,1);
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void oc_free_2d(void *_ptr){
+  _ogg_free(_ptr);
+}
+
+/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
+   buffer, but with the opposite vertical orientation.
+  _dst: The destination buffer.
+        This can be the same as _src.
+  _src: The source buffer.*/
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    _dst[pli].width=_src[pli].width;
+    _dst[pli].height=_src[pli].height;
+    _dst[pli].stride=-_src[pli].stride;
+    _dst[pli].data=_src[pli].data
+     +(1-_dst[pli].height)*(ptrdiff_t)_dst[pli].stride;
+  }
+}
+
+const char *th_version_string(void){
+  return OC_VENDOR_STRING;
+}
+
+ogg_uint32_t th_version_number(void){
+  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+TH_VERSION_SUB;
+}
+
+/*Determines the packet type.
+  Note that this correctly interprets a 0-byte packet as a video data packet.
+  Return: 1 for a header packet, 0 for a data packet.*/
+int th_packet_isheader(ogg_packet *_op){
+  return _op->bytes>0?_op->packet[0]>>7:0;
+}
+
+/*Determines the frame type of a video data packet.
+  Note that this correctly interprets a 0-byte packet as a delta frame.
+  Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
+           packet.*/
+int th_packet_iskeyframe(ogg_packet *_op){
+  return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
+}

+ 116 - 0
jni/libtheora-1.2.0alpha1/lib/internal.h

@@ -0,0 +1,116 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+#if !defined(_internal_H)
+# define _internal_H (1)
+# include <stdlib.h>
+# include <limits.h>
+# if defined(HAVE_CONFIG_H)
+#  include "config.h"
+# endif
+# include "theora/codec.h"
+# include "theora/theora.h"
+# include "ocintrin.h"
+
+# if !defined(__GNUC_PREREQ)
+#  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
+#   define __GNUC_PREREQ(_maj,_min) \
+ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
+#  else
+#   define __GNUC_PREREQ(_maj,_min) 0
+#  endif
+# endif
+
+# if defined(_MSC_VER)
+/*Disable missing EMMS warnings.*/
+#  pragma warning(disable:4799)
+/*Thank you Microsoft, I know the order of operations.*/
+#  pragma warning(disable:4554)
+# endif
+/*You, too, gcc.*/
+# if __GNUC_PREREQ(4,2)
+#  pragma GCC diagnostic ignored "-Wparentheses"
+# endif
+
+/*Some assembly constructs require aligned operands.
+  The following macros are _only_ intended for structure member declarations.
+  Although they will sometimes work on stack variables, gcc will often silently
+   ignore them.
+  A separate set of macros could be made for manual stack alignment, but we
+   don't actually require it anywhere.*/
+# if defined(OC_X86_ASM)||defined(OC_ARM_ASM)
+#  if defined(__GNUC__)
+#   define OC_ALIGN8(expr) expr __attribute__((aligned(8)))
+#   define OC_ALIGN16(expr) expr __attribute__((aligned(16)))
+#  elif defined(_MSC_VER)
+#   define OC_ALIGN8(expr) __declspec (align(8)) expr
+#   define OC_ALIGN16(expr) __declspec (align(16)) expr
+#  else
+#   error "Alignment macros required for this platform."
+#  endif
+# endif
+# if !defined(OC_ALIGN8)
+#  define OC_ALIGN8(expr) expr
+# endif
+# if !defined(OC_ALIGN16)
+#  define OC_ALIGN16(expr) expr
+# endif
+
+
+
+/*This library's version.*/
+# define OC_VENDOR_STRING "Xiph.Org libtheora 1.2.0alpha 20100924 (Ptalarbvorm)"
+
+/*Theora bitstream version.*/
+# define TH_VERSION_MAJOR (3)
+# define TH_VERSION_MINOR (2)
+# define TH_VERSION_SUB   (1)
+# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
+ ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
+ ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
+ (_info)->version_subminor>=(_sub)))
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.*/
+extern const unsigned char OC_FZIG_ZAG[128];
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+extern const unsigned char OC_IZIG_ZAG[64];
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+extern const unsigned char OC_MB_MAP[2][2];
+/*A list of the indices in the oc_mb_map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const unsigned char OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
+/*The number of indices in the oc_mb_map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const unsigned char OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
+
+
+
+int oc_ilog(unsigned _v);
+void *oc_aligned_malloc(size_t _sz,size_t _align);
+void oc_aligned_free(void *_ptr);
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
+void oc_free_2d(void *_ptr);
+
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src);
+
+#endif

+ 314 - 0
jni/libtheora-1.2.0alpha1/lib/mathops.c

@@ -0,0 +1,314 @@
+#include "internal.h"
+#include "mathops.h"
+
+/*The fastest fallback strategy for platforms with fast multiplication appears
+   to be based on de Bruijn sequences~\cite{LP98}.
+  Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where
+   multiplication or table lookups are too expensive.
+
+  @UNPUBLISHED{LP98,
+    author="Charles E. Leiserson and Harald Prokop",
+    title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word",
+    month=Jun,
+    year=1998,
+    note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}"
+  }*/
+#if !defined(OC_ILOG_NODEBRUIJN)&&!defined(OC_CLZ32)
+static const unsigned char OC_DEBRUIJN_IDX32[32]={
+   0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8,
+  31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9
+};
+#endif
+
+int oc_ilog32(ogg_uint32_t _v){
+#if defined(OC_CLZ32)
+  return OC_CLZ32_OFFS-OC_CLZ32(_v)&-!!_v;
+#else
+/*On a Pentium M, this branchless version tested as the fastest version without
+   multiplications on 1,000,000,000 random 32-bit integers, edging out a
+   similar version with branches, and a 256-entry LUT version.*/
+# if defined(OC_ILOG_NODEBRUIJN)
+  int ret;
+  int m;
+  ret=_v>0;
+  m=(_v>0xFFFFU)<<4;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFFU)<<3;
+  _v>>=m;
+  ret|=m;
+  m=(_v>0xFU)<<2;
+  _v>>=m;
+  ret|=m;
+  m=(_v>3)<<1;
+  _v>>=m;
+  ret|=m;
+  ret+=_v>1;
+  return ret;
+/*This de Bruijn sequence version is faster if you have a fast multiplier.*/
+# else
+  int ret;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  ret=_v&1;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F];
+  return ret;
+# endif
+#endif
+}
+
+int oc_ilog64(ogg_int64_t _v){
+#if defined(OC_CLZ64)
+  return OC_CLZ64_OFFS-OC_CLZ64(_v)&-!!_v;
+#else
+/*If we don't have a fast 64-bit word implementation, split it into two 32-bit
+   halves.*/
+# if defined(OC_ILOG_NODEBRUIJN)|| \
+ defined(OC_CLZ32)||LONG_MAX<9223372036854775807LL
+  ogg_uint32_t v;
+  int          ret;
+  int          m;
+  m=(_v>0xFFFFFFFFU)<<5;
+  v=(ogg_uint32_t)(_v>>m);
+#  if defined(OC_CLZ32)
+  ret=m+OC_CLZ32_OFFS-OC_CLZ32(v)&-!!v;
+#  elif defined(OC_ILOG_NODEBRUIJN)
+  ret=v>0|m;
+  m=(v>0xFFFFU)<<4;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFFU)<<3;
+  v>>=m;
+  ret|=m;
+  m=(v>0xFU)<<2;
+  v>>=m;
+  ret|=m;
+  m=(v>3)<<1;
+  v>>=m;
+  ret|=m;
+  ret+=v>1;
+  return ret;
+#  else
+  v|=v>>1;
+  v|=v>>2;
+  v|=v>>4;
+  v|=v>>8;
+  v|=v>>16;
+  ret=v&1|m;
+  v=(v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F];
+#  endif
+  return ret;
+/*Otherwise do it in one 64-bit multiply.*/
+# else
+  static const unsigned char OC_DEBRUIJN_IDX64[64]={
+     0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
+     5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
+    63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
+    62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
+  };
+  int ret;
+  _v|=_v>>1;
+  _v|=_v>>2;
+  _v|=_v>>4;
+  _v|=_v>>8;
+  _v|=_v>>16;
+  _v|=_v>>32;
+  ret=(int)_v&1;
+  _v=(_v>>1)+1;
+  ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F];
+  return ret;
+# endif
+#endif
+}
+
+/*round(2**(62+i)*atanh(2**(-(i+1)))/log(2))*/
+static const ogg_int64_t OC_ATANH_LOG2[32]={
+  0x32B803473F7AD0F4LL,0x2F2A71BD4E25E916LL,0x2E68B244BB93BA06LL,
+  0x2E39FB9198CE62E4LL,0x2E2E683F68565C8FLL,0x2E2B850BE2077FC1LL,
+  0x2E2ACC58FE7B78DBLL,0x2E2A9E2DE52FD5F2LL,0x2E2A92A338D53EECLL,
+  0x2E2A8FC08F5E19B6LL,0x2E2A8F07E51A485ELL,0x2E2A8ED9BA8AF388LL,
+  0x2E2A8ECE2FE7384ALL,0x2E2A8ECB4D3E4B1ALL,0x2E2A8ECA94940FE8LL,
+  0x2E2A8ECA6669811DLL,0x2E2A8ECA5ADEDD6ALL,0x2E2A8ECA57FC347ELL,
+  0x2E2A8ECA57438A43LL,0x2E2A8ECA57155FB4LL,0x2E2A8ECA5709D510LL,
+  0x2E2A8ECA5706F267LL,0x2E2A8ECA570639BDLL,0x2E2A8ECA57060B92LL,
+  0x2E2A8ECA57060008LL,0x2E2A8ECA5705FD25LL,0x2E2A8ECA5705FC6CLL,
+  0x2E2A8ECA5705FC3ELL,0x2E2A8ECA5705FC33LL,0x2E2A8ECA5705FC30LL,
+  0x2E2A8ECA5705FC2FLL,0x2E2A8ECA5705FC2FLL
+};
+
+/*Computes the binary exponential of _z, a log base 2 in Q57 format.*/
+ogg_int64_t oc_bexp64(ogg_int64_t _z){
+  ogg_int64_t w;
+  ogg_int64_t z;
+  int         ipart;
+  ipart=(int)(_z>>57);
+  if(ipart<0)return 0;
+  if(ipart>=63)return 0x7FFFFFFFFFFFFFFFLL;
+  z=_z-OC_Q57(ipart);
+  if(z){
+    ogg_int64_t mask;
+    long        wlo;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q62 format.
+      We need 1 bit of headroom since the magnitude can get larger than 1
+       during the iteration, and a sign bit.*/
+    z<<=5;
+    /*w is the exponential in Q61 format (since it also needs headroom and can
+       get as large as 2.0); we could get another bit if we dropped the sign,
+       but we'll recover that bit later anyway.
+      Ideally this should start out as
+        \lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
+       but in order to guarantee convergence we have to repeat iterations 4,
+        13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
+    w=0x26A3D0E401DD846DLL;
+    for(i=0;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 4.*/
+      if(i>=3)break;
+      z<<=1;
+    }
+    for(;;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z-=OC_ATANH_LOG2[i]+mask^mask;
+      /*Repeat iteration 13.*/
+      if(i>=12)break;
+      z<<=1;
+    }
+    for(;i<32;i++){
+      mask=-(z<0);
+      w+=(w>>i+1)+mask^mask;
+      z=z-(OC_ATANH_LOG2[i]+mask^mask)<<1;
+    }
+    wlo=0;
+    /*Skip the remaining iterations unless we really require that much
+       precision.
+      We could have bailed out earlier for smaller iparts, but that would
+       require initializing w from a table, as the limit doesn't converge to
+       61-bit precision until n=30.*/
+    if(ipart>30){
+      /*For these iterations, we just update the low bits, as the high bits
+         can't possibly be affected.
+        OC_ATANH_LOG2 has also converged (it actually did so one iteration
+         earlier, but that's no reason for an extra special case).*/
+      for(;;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z-=OC_ATANH_LOG2[31]+mask^mask;
+        /*Repeat iteration 40.*/
+        if(i>=39)break;
+        z<<=1;
+      }
+      for(;i<61;i++){
+        mask=-(z<0);
+        wlo+=(w>>i)+mask^mask;
+        z=z-(OC_ATANH_LOG2[31]+mask^mask)<<1;
+      }
+    }
+    w=(w<<1)+wlo;
+  }
+  else w=(ogg_int64_t)1<<62;
+  if(ipart<62)w=(w>>61-ipart)+1>>1;
+  return w;
+}
+
+/*Computes the binary logarithm of _w, returned in Q57 format.*/
+ogg_int64_t oc_blog64(ogg_int64_t _w){
+  ogg_int64_t z;
+  int         ipart;
+  if(_w<=0)return -1;
+  ipart=OC_ILOGNZ_64(_w)-1;
+  if(ipart>61)_w>>=ipart-61;
+  else _w<<=61-ipart;
+  z=0;
+  if(_w&_w-1){
+    ogg_int64_t x;
+    ogg_int64_t y;
+    ogg_int64_t u;
+    ogg_int64_t mask;
+    int         i;
+    /*C doesn't give us 64x64->128 muls, so we use CORDIC.
+      This is not particularly fast, but it's not being used in time-critical
+       code; it is very accurate.*/
+    /*z is the fractional part of the log in Q61 format.*/
+    /*x and y are the cosh() and sinh(), respectively, in Q61 format.
+      We are computing z=2*atanh(y/x)=2*atanh((_w-1)/(_w+1)).*/
+    x=_w+((ogg_int64_t)1<<61);
+    y=_w-((ogg_int64_t)1<<61);
+    for(i=0;i<4;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 4.*/
+    for(i--;i<13;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 13.*/
+    for(i--;i<32;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[i]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*OC_ATANH_LOG2 has converged.*/
+    for(;i<40;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    /*Repeat iteration 40.*/
+    for(i--;i<62;i++){
+      mask=-(y<0);
+      z+=(OC_ATANH_LOG2[31]>>i)+mask^mask;
+      u=x>>i+1;
+      x-=(y>>i+1)+mask^mask;
+      y-=u+mask^mask;
+    }
+    z=z+8>>4;
+  }
+  return OC_Q57(ipart)+z;
+}
+
+/*Polynomial approximation of a binary exponential.
+  Q10 input, Q0 output.*/
+ogg_uint32_t oc_bexp32_q10(int _z){
+  unsigned n;
+  int      ipart;
+  ipart=_z>>10;
+  n=(_z&(1<<10)-1)<<4;
+  n=(n*((n*((n*((n*3548>>15)+6817)>>15)+15823)>>15)+22708)>>15)+16384;
+  return 14-ipart>0?n+(1<<13-ipart)>>14-ipart:n<<ipart-14;
+}
+
+/*Polynomial approximation of a binary logarithm.
+  Q0 input, Q10 output.*/
+int oc_blog32_q10(ogg_uint32_t _w){
+  int n;
+  int ipart;
+  int fpart;
+  if(_w<=0)return -1;
+  ipart=OC_ILOGNZ_32(_w);
+  n=(ipart-16>0?_w>>ipart-16:_w<<16-ipart)-32768-16384;
+  fpart=(n*((n*((n*((n*-1402>>15)+2546)>>15)-5216)>>15)+15745)>>15)-6793;
+  return (ipart<<10)+(fpart>>4);
+}

+ 143 - 0
jni/libtheora-1.2.0alpha1/lib/mathops.h

@@ -0,0 +1,143 @@
+#if !defined(_mathops_H)
+# define _mathops_H (1)
+# include <ogg/ogg.h>
+
+# if __GNUC_PREREQ(3,4)
+#  include <limits.h>
+/*Note the casts to (int) below: this prevents OC_CLZ{32|64}_OFFS from
+   "upgrading" the type of an entire expression to an (unsigned) size_t.*/
+#  if INT_MAX>=2147483647
+#   define OC_CLZ32_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#   define OC_CLZ32(_x) (__builtin_clz(_x))
+#  elif LONG_MAX>=2147483647L
+#   define OC_CLZ32_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#   define OC_CLZ32(_x) (__builtin_clzl(_x))
+#  endif
+#  if INT_MAX>=9223372036854775807LL
+#   define OC_CLZ64_OFFS ((int)sizeof(unsigned)*CHAR_BIT)
+#   define OC_CLZ64(_x) (__builtin_clz(_x))
+#  elif LONG_MAX>=9223372036854775807LL
+#   define OC_CLZ64_OFFS ((int)sizeof(unsigned long)*CHAR_BIT)
+#   define OC_CLZ64(_x) (__builtin_clzl(_x))
+#  elif LLONG_MAX>=9223372036854775807LL|| \
+    __LONG_LONG_MAX__>=9223372036854775807LL
+#   define OC_CLZ64_OFFS ((int)sizeof(unsigned long long)*CHAR_BIT)
+#   define OC_CLZ64(_x) (__builtin_clzll(_x))
+#  endif
+# endif
+
+
+
+/**
+ * oc_ilog32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_32() or OC_ILOGNZ_32() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog32(ogg_uint32_t _v);
+/**
+ * oc_ilog64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * The OC_ILOG_64() or OC_ILOGNZ_64() macros may be able to use a builtin
+ *  function instead, which should be faster.
+ */
+int oc_ilog64(ogg_int64_t _v);
+
+
+# if defined(OC_CLZ32)
+/**
+ * OC_ILOGNZ_32 - Integer binary logarithm of a non-zero 32-bit value.
+ * @_v: A non-zero 32-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_32() instead.
+ */
+#  define OC_ILOGNZ_32(_v) (OC_CLZ32_OFFS-OC_CLZ32(_v))
+/**
+ * OC_ILOG_32 - Integer binary logarithm of a 32-bit value.
+ * @_v: A 32-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_32(_v)   (OC_ILOGNZ_32(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_32(_v) (oc_ilog32(_v))
+#  define OC_ILOG_32(_v)   (oc_ilog32(_v))
+# endif
+
+# if defined(CLZ64)
+/**
+ * OC_ILOGNZ_64 - Integer binary logarithm of a non-zero 64-bit value.
+ * @_v: A non-zero 64-bit value.
+ * Returns floor(log2(_v))+1.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * If _v is zero, the return value is undefined; use OC_ILOG_64() instead.
+ */
+#  define OC_ILOGNZ_64(_v) (CLZ64_OFFS-CLZ64(_v))
+/**
+ * OC_ILOG_64 - Integer binary logarithm of a 64-bit value.
+ * @_v: A 64-bit value.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ */
+#  define OC_ILOG_64(_v)   (OC_ILOGNZ_64(_v)&-!!(_v))
+# else
+#  define OC_ILOGNZ_64(_v) (oc_ilog64(_v))
+#  define OC_ILOG_64(_v)   (oc_ilog64(_v))
+# endif
+
+# define OC_STATIC_ILOG0(_v) (!!(_v))
+# define OC_STATIC_ILOG1(_v) (((_v)&0x2)?2:OC_STATIC_ILOG0(_v))
+# define OC_STATIC_ILOG2(_v) \
+ (((_v)&0xC)?2+OC_STATIC_ILOG1((_v)>>2):OC_STATIC_ILOG1(_v))
+# define OC_STATIC_ILOG3(_v) \
+ (((_v)&0xF0)?4+OC_STATIC_ILOG2((_v)>>4):OC_STATIC_ILOG2(_v))
+# define OC_STATIC_ILOG4(_v) \
+ (((_v)&0xFF00)?8+OC_STATIC_ILOG3((_v)>>8):OC_STATIC_ILOG3(_v))
+# define OC_STATIC_ILOG5(_v) \
+ (((_v)&0xFFFF0000)?16+OC_STATIC_ILOG4((_v)>>16):OC_STATIC_ILOG4(_v))
+# define OC_STATIC_ILOG6(_v) \
+ (((_v)&0xFFFFFFFF00000000ULL)?32+OC_STATIC_ILOG5((_v)>>32):OC_STATIC_ILOG5(_v))
+/**
+ * OC_STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant.
+ * @_v: A non-negative 32-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_32(_v) (OC_STATIC_ILOG5((ogg_uint32_t)(_v)))
+/**
+ * OC_STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant.
+ * @_v: A non-negative 64-bit constant.
+ * Returns floor(log2(_v))+1, or 0 if _v==0.
+ * This is the number of bits that would be required to represent _v in two's
+ *  complement notation with all of the leading zeros stripped.
+ * This macro is suitable for evaluation at compile time, but it should not be
+ *  used on values that can change at runtime, as it operates via exhaustive
+ *  search.
+ */
+# define OC_STATIC_ILOG_64(_v) (OC_STATIC_ILOG6((ogg_int64_t)(_v)))
+
+#define OC_Q57(_v) ((ogg_int64_t)(_v)<<57)
+#define OC_Q10(_v) ((_v)<<10)
+
+ogg_int64_t oc_bexp64(ogg_int64_t _z);
+ogg_int64_t oc_blog64(ogg_int64_t _w);
+
+ogg_uint32_t oc_bexp32_q10(int _z);
+int oc_blog32_q10(ogg_uint32_t _w);
+
+#endif

+ 792 - 0
jni/libtheora-1.2.0alpha1/lib/mcenc.c

@@ -0,0 +1,792 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+typedef struct oc_mcenc_ctx           oc_mcenc_ctx;
+
+
+
+/*Temporary state used for motion estimation.*/
+struct oc_mcenc_ctx{
+  /*The candidate motion vectors.*/
+  int                candidates[13][2];
+  /*The start of the Set B candidates.*/
+  int                setb0;
+  /*The total number of candidates.*/
+  int                ncandidates;
+};
+
+
+
+/*The maximum Y plane SAD value for accepting the median predictor.*/
+#define OC_YSAD_THRESH1            (256)
+/*The amount to right shift the minimum error by when inflating it for
+   computing the second maximum Y plane SAD threshold.*/
+#define OC_YSAD_THRESH2_SCALE_BITS (4)
+/*The amount to add to the second maximum Y plane threshold when inflating
+   it.*/
+#define OC_YSAD_THRESH2_OFFSET     (64)
+
+/*The vector offsets in the X direction for each search site in the square
+   pattern.*/
+static const int OC_SQUARE_DX[9]={-1,0,1,-1,0,1,-1,0,1};
+/*The vector offsets in the Y direction for each search site in the square
+   pattern.*/
+static const int OC_SQUARE_DY[9]={-1,-1,-1,0,0,0,1,1,1};
+/*The number of sites to search for each boundary condition in the square
+   pattern.
+  Bit flags for the boundary conditions are as follows:
+  1: -16==dx
+  2:      dx==15(.5)
+  4: -16==dy
+  8:      dy==15(.5)*/
+static const int OC_SQUARE_NSITES[11]={8,5,5,0,5,3,3,0,5,3,3};
+/*The list of sites to search for each boundary condition in the square
+   pattern.*/
+static const int OC_SQUARE_SITES[11][8]={
+  /* -15.5<dx<31,       -15.5<dy<15(.5)*/
+  {0,1,2,3,5,6,7,8},
+  /*-15.5==dx,          -15.5<dy<15(.5)*/
+  {1,2,5,7,8},
+  /*     dx==15(.5),    -15.5<dy<15(.5)*/
+  {0,1,3,6,7},
+  /*-15.5==dx==15(.5),  -15.5<dy<15(.5)*/
+  {-1},
+  /* -15.5<dx<15(.5),  -15.5==dy*/
+  {3,5,6,7,8},
+  /*-15.5==dx,         -15.5==dy*/
+  {5,7,8},
+  /*     dx==15(.5),   -15.5==dy*/
+  {3,6,7},
+  /*-15.5==dx==15(.5), -15.5==dy*/
+  {-1},
+  /*-15.5dx<15(.5),           dy==15(.5)*/
+  {0,1,2,3,5},
+  /*-15.5==dx,                dy==15(.5)*/
+  {1,2,5},
+  /*       dx==15(.5),        dy==15(.5)*/
+  {0,1,3}
+};
+
+
+static void oc_mcenc_find_candidates_a(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
+ oc_mv _accum,int _mbi,int _frame){
+  oc_mb_enc_info *embs;
+  int             accum_x;
+  int             accum_y;
+  int             a[3][2];
+  int             ncandidates;
+  unsigned        nmbi;
+  int             i;
+  embs=_enc->mb_info;
+  /*Skip a position to store the median predictor in.*/
+  ncandidates=1;
+  if(embs[_mbi].ncneighbors>0){
+    /*Fill in the first part of set A: the vectors from adjacent blocks.*/
+    for(i=0;i<embs[_mbi].ncneighbors;i++){
+      nmbi=embs[_mbi].cneighbors[i];
+      _mcenc->candidates[ncandidates][0]=
+       OC_MV_X(embs[nmbi].analysis_mv[0][_frame]);
+      _mcenc->candidates[ncandidates][1]=
+       OC_MV_Y(embs[nmbi].analysis_mv[0][_frame]);
+      ncandidates++;
+    }
+  }
+  accum_x=OC_MV_X(_accum);
+  accum_y=OC_MV_Y(_accum);
+  /*Add a few additional vectors to set A: the vectors used in the previous
+     frames and the (0,0) vector.*/
+  _mcenc->candidates[ncandidates][0]=accum_x;
+  _mcenc->candidates[ncandidates][1]=accum_y;
+  ncandidates++;
+  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+   OC_MV_X(embs[_mbi].analysis_mv[1][_frame])+accum_x,31);
+  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+   OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])+accum_y,31);
+  ncandidates++;
+  _mcenc->candidates[ncandidates][0]=0;
+  _mcenc->candidates[ncandidates][1]=0;
+  ncandidates++;
+  /*Use the first three vectors of set A to find our best predictor: their
+     median.*/
+  memcpy(a,_mcenc->candidates+1,sizeof(a));
+  OC_SORT2I(a[0][0],a[1][0]);
+  OC_SORT2I(a[0][1],a[1][1]);
+  OC_SORT2I(a[1][0],a[2][0]);
+  OC_SORT2I(a[1][1],a[2][1]);
+  OC_SORT2I(a[0][0],a[1][0]);
+  OC_SORT2I(a[0][1],a[1][1]);
+  _mcenc->candidates[0][0]=a[1][0];
+  _mcenc->candidates[0][1]=a[1][1];
+  _mcenc->setb0=ncandidates;
+}
+
+static void oc_mcenc_find_candidates_b(oc_enc_ctx *_enc,oc_mcenc_ctx *_mcenc,
+ oc_mv _accum,int _mbi,int _frame){
+  oc_mb_enc_info *embs;
+  int             accum_x;
+  int             accum_y;
+  int             ncandidates;
+  embs=_enc->mb_info;
+  accum_x=OC_MV_X(_accum);
+  accum_y=OC_MV_Y(_accum);
+  /*Fill in set B: accelerated predictors for this and adjacent macro blocks.*/
+  ncandidates=_mcenc->setb0;
+  /*Use only the current block. Using more did not appear to be helpful
+    with the current selection logic due to escaping the local search too
+    quickly.*/
+  _mcenc->candidates[ncandidates][0]=OC_CLAMPI(-31,
+   2*OC_MV_X(embs[_mbi].analysis_mv[1][_frame])
+   -OC_MV_X(embs[_mbi].analysis_mv[2][_frame])+accum_x,31);
+  _mcenc->candidates[ncandidates][1]=OC_CLAMPI(-31,
+   2*OC_MV_Y(embs[_mbi].analysis_mv[1][_frame])
+   -OC_MV_Y(embs[_mbi].analysis_mv[2][_frame])+accum_y,31);
+  ncandidates++;
+  _mcenc->ncandidates=ncandidates;
+}
+
+static unsigned oc_sad16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+  unsigned err;
+  int      bi;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    err+=oc_enc_frag_sad2_thresh(_enc,_src+frag_offs,_ref+frag_offs+_mvoffset0,
+     _ref+frag_offs+_mvoffset1,_ystride,_best_err-err);
+  }
+  return err;
+}
+
+static unsigned oc_satd16_halfpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],
+ int _mvoffset0,int _mvoffset1,const unsigned char *_src,
+ const unsigned char *_ref,int _ystride,unsigned _best_err){
+  unsigned err;
+  int      dc;
+  int      bi;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    err+=oc_enc_frag_satd2(_enc,&dc,_src+frag_offs,
+     _ref+frag_offs+_mvoffset0,_ref+frag_offs+_mvoffset1,_ystride);
+    err+=abs(dc);
+  }
+  return err;
+}
+
+static unsigned oc_mcenc_ysad_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ unsigned _block_err[4]){
+  unsigned err;
+  int      mvoffset;
+  int      bi;
+  mvoffset=_dx+_dy*_ystride;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    unsigned  block_err;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    block_err=oc_enc_frag_sad(_enc,
+     _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+    _block_err[bi]=block_err;
+    err+=block_err;
+  }
+  return err;
+}
+
+static int oc_mcenc_ysatd_check_mbcandidate_fullpel(const oc_enc_ctx *_enc,
+ const ptrdiff_t *_frag_buf_offs,const ptrdiff_t _fragis[4],int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  int mvoffset;
+  int err;
+  int bi;
+  mvoffset=_dx+_dy*_ystride;
+  err=0;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    int       dc;
+    frag_offs=_frag_buf_offs[_fragis[bi]];
+    if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+      err+=oc_enc_frag_satd(_enc,&dc,
+       _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+      err+=abs(dc);
+    }
+    else{
+      err+=oc_enc_frag_sad(_enc,
+       _src+frag_offs,_ref+frag_offs+mvoffset,_ystride);
+    }
+  }
+  return err;
+}
+
+static unsigned oc_mcenc_ysatd_check_bcandidate_fullpel(const oc_enc_ctx *_enc,
+ ptrdiff_t _frag_offs,int _dx,int _dy,
+ const unsigned char *_src,const unsigned char *_ref,int _ystride){
+  unsigned err;
+  int      dc;
+  err=oc_enc_frag_satd(_enc,&dc,
+   _src+_frag_offs,_ref+_frag_offs+_dx+_dy*_ystride,_ystride);
+  return err+abs(dc);
+}
+
+/*Perform a motion vector search for this macro block against a single
+   reference frame.
+  As a bonus, individual block motion vectors are computed as well, as much of
+   the work can be shared.
+  The actual motion vector is stored in the appropriate place in the
+   oc_mb_enc_info structure.
+  _accum:      Drop frame/golden MV accumulators.
+  _mbi:        The macro block index.
+  _frame:      The frame to use for SATD calculations and refinement,
+                either OC_FRAME_PREV or OC_FRAME_GOLD.
+  _frame_full: The frame to perform the 1px search on, one of OC_FRAME_PREV,
+                OC_FRAME_GOLD, OC_FRAME_PREV_ORIG, or OC_FRAME_GOLD_ORIG.*/
+void oc_mcenc_search_frame(oc_enc_ctx *_enc,oc_mv _accum,int _mbi,int _frame,
+ int _frame_full){
+  /*Note: Traditionally this search is done using a rate-distortion objective
+     function of the form D+lambda*R.
+    However, xiphmont tested this and found it produced a small degredation,
+     while requiring extra computation.
+    This is most likely due to Theora's peculiar MV encoding scheme: MVs are
+     not coded relative to a predictor, and the only truly cheap way to use a
+     MV is in the LAST or LAST2 MB modes, which are not being considered here.
+    Therefore if we use the MV found here, it's only because both LAST and
+     LAST2 performed poorly, and therefore the MB is not likely to be uniform
+     or suffer from the aperture problem.
+    Furthermore we would like to re-use the MV found here for as many MBs as
+     possible, so picking a slightly sub-optimal vector to save a bit or two
+     may cause increased degredation in many blocks to come.
+    We could artificially reduce lambda to compensate, but it's faster to just
+     disable it entirely, and use D (the distortion) as the sole criterion.*/
+  oc_mcenc_ctx         mcenc;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  const unsigned char *src;
+  const unsigned char *ref;
+  const unsigned char *satd_ref;
+  int                  ystride;
+  oc_mb_enc_info      *embs;
+  ogg_int32_t          hit_cache[31];
+  ogg_int32_t          hitbit;
+  unsigned             best_block_err[4];
+  unsigned             block_err[4];
+  unsigned             best_err;
+  int                  best_vec[2];
+  int                  best_block_vec[4][2];
+  int                  candx;
+  int                  candy;
+  int                  bi;
+  embs=_enc->mb_info;
+  /*Find some candidate motion vectors.*/
+  oc_mcenc_find_candidates_a(_enc,&mcenc,_accum,_mbi,_frame);
+  /*Clear the cache of locations we've examined.*/
+  memset(hit_cache,0,sizeof(hit_cache));
+  /*Start with the median predictor.*/
+  candx=OC_DIV2(mcenc.candidates[0][0]);
+  candy=OC_DIV2(mcenc.candidates[0][1]);
+  hit_cache[candy+15]|=(ogg_int32_t)1<<candx+15;
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_frame_full];
+  satd_ref=_enc->state.ref_frame_data[_frame];
+  ystride=_enc->state.ref_ystride[0];
+  /*TODO: customize error function for speed/(quality+size) tradeoff.*/
+  best_err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+  best_vec[0]=candx;
+  best_vec[1]=candy;
+  if(_frame==OC_FRAME_PREV){
+    for(bi=0;bi<4;bi++){
+      best_block_err[bi]=block_err[bi];
+      best_block_vec[bi][0]=candx;
+      best_block_vec[bi][1]=candy;
+    }
+  }
+  /*If this predictor fails, move on to set A.*/
+  if(best_err>OC_YSAD_THRESH1){
+    unsigned err;
+    unsigned t2;
+    int      ncs;
+    int      ci;
+    /*Compute the early termination threshold for set A.*/
+    t2=embs[_mbi].error[_frame];
+    ncs=OC_MINI(3,embs[_mbi].ncneighbors);
+    for(ci=0;ci<ncs;ci++){
+      t2=OC_MAXI(t2,embs[embs[_mbi].cneighbors[ci]].error[_frame]);
+    }
+    t2+=(t2>>OC_YSAD_THRESH2_SCALE_BITS)+OC_YSAD_THRESH2_OFFSET;
+    /*Examine the candidates in set A.*/
+    for(ci=1;ci<mcenc.setb0;ci++){
+      candx=OC_DIV2(mcenc.candidates[ci][0]);
+      candy=OC_DIV2(mcenc.candidates[ci][1]);
+      /*If we've already examined this vector, then we would be using it if it
+         was better than what we are using.*/
+      hitbit=(ogg_int32_t)1<<candx+15;
+      if(hit_cache[candy+15]&hitbit)continue;
+      hit_cache[candy+15]|=hitbit;
+      err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+       frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+      if(err<best_err){
+        best_err=err;
+        best_vec[0]=candx;
+        best_vec[1]=candy;
+      }
+      if(_frame==OC_FRAME_PREV){
+        for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+          best_block_err[bi]=block_err[bi];
+          best_block_vec[bi][0]=candx;
+          best_block_vec[bi][1]=candy;
+        }
+      }
+    }
+    if(best_err>t2){
+      oc_mcenc_find_candidates_b(_enc,&mcenc,_accum,_mbi,_frame);
+      /*Examine the candidates in set B.*/
+      for(;ci<mcenc.ncandidates;ci++){
+        candx=OC_DIV2(mcenc.candidates[ci][0]);
+        candy=OC_DIV2(mcenc.candidates[ci][1]);
+        hitbit=(ogg_int32_t)1<<candx+15;
+        if(hit_cache[candy+15]&hitbit)continue;
+        hit_cache[candy+15]|=hitbit;
+        err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+         frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+        if(err<best_err){
+          best_err=err;
+          best_vec[0]=candx;
+          best_vec[1]=candy;
+        }
+        if(_frame==OC_FRAME_PREV){
+          for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+            best_block_err[bi]=block_err[bi];
+            best_block_vec[bi][0]=candx;
+            best_block_vec[bi][1]=candy;
+          }
+        }
+      }
+      /*Use the same threshold for set B as in set A.*/
+      if(best_err>t2){
+        int best_site;
+        int nsites;
+        int sitei;
+        int site;
+        int b;
+        /*Square pattern search.*/
+        for(;;){
+          best_site=4;
+          /*Compose the bit flags for boundary conditions.*/
+          b=OC_DIV16(-best_vec[0]+1)|OC_DIV16(best_vec[0]+1)<<1|
+           OC_DIV16(-best_vec[1]+1)<<2|OC_DIV16(best_vec[1]+1)<<3;
+          nsites=OC_SQUARE_NSITES[b];
+          for(sitei=0;sitei<nsites;sitei++){
+            site=OC_SQUARE_SITES[b][sitei];
+            candx=best_vec[0]+OC_SQUARE_DX[site];
+            candy=best_vec[1]+OC_SQUARE_DY[site];
+            hitbit=(ogg_int32_t)1<<candx+15;
+            if(hit_cache[candy+15]&hitbit)continue;
+            hit_cache[candy+15]|=hitbit;
+            err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+             frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+            if(err<best_err){
+              best_err=err;
+              best_site=site;
+            }
+            if(_frame==OC_FRAME_PREV){
+              for(bi=0;bi<4;bi++)if(block_err[bi]<best_block_err[bi]){
+                best_block_err[bi]=block_err[bi];
+                best_block_vec[bi][0]=candx;
+                best_block_vec[bi][1]=candy;
+              }
+            }
+          }
+          if(best_site==4)break;
+          best_vec[0]+=OC_SQUARE_DX[best_site];
+          best_vec[1]+=OC_SQUARE_DY[best_site];
+        }
+        /*Final 4-MV search.*/
+        /*Simply use 1/4 of the macro block set A and B threshold as the
+           individual block threshold.*/
+        if(_frame==OC_FRAME_PREV){
+          t2>>=2;
+          for(bi=0;bi<4;bi++){
+            if(best_block_err[bi]>t2){
+              /*Square pattern search.
+                We do this in a slightly interesting manner.
+                We continue to check the SAD of all four blocks in the
+                 macro block.
+                This gives us two things:
+                 1) We can continue to use the hit_cache to avoid duplicate
+                     checks.
+                    Otherwise we could continue to read it, but not write to it
+                     without saving and restoring it for each block.
+                    Note that we could still eliminate a large number of
+                     duplicate checks by taking into account the site we came
+                     from when choosing the site list.
+                    We can still do that to avoid extra hit_cache queries, and
+                     it might even be a speed win.
+                 2) It gives us a slightly better chance of escaping local
+                     minima.
+                    We would not be here if we weren't doing a fairly bad job
+                     in finding a good vector, and checking these vectors can
+                     save us from 100 to several thousand points off our SAD 1
+                     in 15 times.
+                TODO: Is this a good idea?
+                Who knows.
+                It needs more testing.*/
+              for(;;){
+                int bestx;
+                int besty;
+                int bj;
+                bestx=best_block_vec[bi][0];
+                besty=best_block_vec[bi][1];
+                /*Compose the bit flags for boundary conditions.*/
+                b=OC_DIV16(-bestx+1)|OC_DIV16(bestx+1)<<1|
+                 OC_DIV16(-besty+1)<<2|OC_DIV16(besty+1)<<3;
+                nsites=OC_SQUARE_NSITES[b];
+                for(sitei=0;sitei<nsites;sitei++){
+                  site=OC_SQUARE_SITES[b][sitei];
+                  candx=bestx+OC_SQUARE_DX[site];
+                  candy=besty+OC_SQUARE_DY[site];
+                  hitbit=(ogg_int32_t)1<<candx+15;
+                  if(hit_cache[candy+15]&hitbit)continue;
+                  hit_cache[candy+15]|=hitbit;
+                  err=oc_mcenc_ysad_check_mbcandidate_fullpel(_enc,
+                   frag_buf_offs,fragis,candx,candy,src,ref,ystride,block_err);
+                  if(err<best_err){
+                    best_err=err;
+                    best_vec[0]=candx;
+                    best_vec[1]=candy;
+                  }
+                  for(bj=0;bj<4;bj++)if(block_err[bj]<best_block_err[bj]){
+                    best_block_err[bj]=block_err[bj];
+                    best_block_vec[bj][0]=candx;
+                    best_block_vec[bj][1]=candy;
+                  }
+                }
+                if(best_block_vec[bi][0]==bestx&&best_block_vec[bi][1]==besty){
+                  break;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  embs[_mbi].error[_frame]=(ogg_uint16_t)best_err;
+  candx=best_vec[0];
+  candy=best_vec[1];
+  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_check_mbcandidate_fullpel(_enc,
+   frag_buf_offs,fragis,candx,candy,src,satd_ref,ystride);
+  embs[_mbi].analysis_mv[0][_frame]=OC_MV(candx<<1,candy<<1);
+  if(_frame==OC_FRAME_PREV&&_enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+    for(bi=0;bi<4;bi++){
+      candx=best_block_vec[bi][0];
+      candy=best_block_vec[bi][1];
+      embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_check_bcandidate_fullpel(_enc,
+       frag_buf_offs[fragis[bi]],candx,candy,src,satd_ref,ystride);
+      embs[_mbi].block_mv[bi]=OC_MV(candx<<1,candy<<1);
+    }
+  }
+}
+
+void oc_mcenc_search(oc_enc_ctx *_enc,int _mbi){
+  oc_mv2 *mvs;
+  oc_mv   accum_p;
+  oc_mv   accum_g;
+  oc_mv   mv2_p;
+  mvs=_enc->mb_info[_mbi].analysis_mv;
+  if(_enc->prevframe_dropped)accum_p=mvs[0][OC_FRAME_PREV];
+  else accum_p=0;
+  accum_g=mvs[2][OC_FRAME_GOLD];
+  /*Move the motion vector predictors back a frame.*/
+  mv2_p=mvs[2][OC_FRAME_PREV];
+  mvs[2][OC_FRAME_GOLD]=mvs[1][OC_FRAME_GOLD];
+  mvs[2][OC_FRAME_PREV]=mvs[1][OC_FRAME_PREV];
+  mvs[1][OC_FRAME_GOLD]=mvs[0][OC_FRAME_GOLD];
+  mvs[1][OC_FRAME_PREV]=OC_MV_SUB(mvs[0][OC_FRAME_PREV],mv2_p);
+  /*Search the last frame.*/
+  oc_mcenc_search_frame(_enc,accum_p,_mbi,OC_FRAME_PREV,OC_FRAME_PREV_ORIG);
+  mvs[2][OC_FRAME_PREV]=accum_p;
+  /*GOLDEN MVs are different from PREV MVs in that they're each absolute
+     offsets from some frame in the past rather than relative offsets from the
+     frame before.
+    For predictor calculation to make sense, we need them to be in the same
+     form as PREV MVs.*/
+  mvs[1][OC_FRAME_GOLD]=OC_MV_SUB(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
+  mvs[2][OC_FRAME_GOLD]=OC_MV_SUB(mvs[2][OC_FRAME_GOLD],accum_g);
+  /*Search the golden frame.*/
+  oc_mcenc_search_frame(_enc,accum_g,_mbi,OC_FRAME_GOLD,OC_FRAME_GOLD_ORIG);
+  /*Put GOLDEN MVs back into absolute offset form.
+    The newest MV is already an absolute offset.*/
+  mvs[2][OC_FRAME_GOLD]=OC_MV_ADD(mvs[2][OC_FRAME_GOLD],accum_g);
+  mvs[1][OC_FRAME_GOLD]=OC_MV_ADD(mvs[1][OC_FRAME_GOLD],mvs[2][OC_FRAME_GOLD]);
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_mbrefine(const oc_enc_ctx *_enc,int _mbi,
+ int _vec[2],int _best_err,int _frame){
+  const unsigned char *src;
+  const unsigned char *ref;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  mvoffset_base;
+  int                  best_site;
+  int                  sitei;
+  int                  err;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_framei];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  ystride=_enc->state.ref_ystride[0];
+  mvoffset_base=_vec[0]+_vec[1]*ystride;
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    int site;
+    int xmask;
+    int ymask;
+    int dx;
+    int dy;
+    int mvoffset0;
+    int mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+    err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
+     mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_mbrefine(const oc_enc_ctx *_enc,
+ int _mbi,int _vec[2],unsigned _best_err,int _frame){
+  const unsigned char *src;
+  const unsigned char *ref;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  mvoffset_base;
+  int                  best_site;
+  int                  sitei;
+  int                  err;
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[_frame];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  ystride=_enc->state.ref_ystride[0];
+  mvoffset_base=_vec[0]+_vec[1]*ystride;
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    int site;
+    int xmask;
+    int ymask;
+    int dx;
+    int dy;
+    int mvoffset0;
+    int mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(offset_y[site]&~ymask);
+    if(_enc->sp_level<OC_SP_LEVEL_NOSATD){
+      err=oc_satd16_halfpel(_enc,frag_buf_offs,fragis,
+       mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+    }
+    else{
+      err=oc_sad16_halfpel(_enc,frag_buf_offs,fragis,
+           mvoffset0,mvoffset1,src,ref,ystride,_best_err);
+    }
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+
+void oc_mcenc_refine1mv(oc_enc_ctx *_enc,int _mbi,int _frame){
+  oc_mb_enc_info *embs;
+  int             vec[2];
+  embs=_enc->mb_info;
+  vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].analysis_mv[0][_frame]));
+  vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].analysis_mv[0][_frame]));
+  embs[_mbi].satd[_frame]=oc_mcenc_ysatd_halfpel_mbrefine(_enc,
+   _mbi,vec,embs[_mbi].satd[_frame],_frame);
+  embs[_mbi].analysis_mv[0][_frame]=OC_MV(vec[0],vec[1]);
+}
+
+#if 0
+static int oc_mcenc_ysad_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+  int mvoffset_base;
+  int best_site;
+  int sitei;
+  mvoffset_base=_vec[0]+_vec[1]*_ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    unsigned err;
+    int      site;
+    int      xmask;
+    int      ymask;
+    int      dx;
+    int      dy;
+    int      mvoffset0;
+    int      mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_mcenc->enc.state,&mvoffset0,&mvoffset1,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy,ref_ystride,0);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+    err=oc_enc_frag_sad2_thresh(_enc,_src,
+     _ref+mvoffset0,_ref+mvoffset1,ystride,_best_err);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+#endif
+
+static unsigned oc_mcenc_ysatd_halfpel_brefine(const oc_enc_ctx *_enc,
+ int _vec[2],const unsigned char *_src,const unsigned char *_ref,int _ystride,
+ int _offset_y[9],unsigned _best_err){
+  int mvoffset_base;
+  int best_site;
+  int sitei;
+  mvoffset_base=_vec[0]+_vec[1]*_ystride;
+  best_site=4;
+  for(sitei=0;sitei<8;sitei++){
+    unsigned err;
+    int      dc;
+    int      site;
+    int      xmask;
+    int      ymask;
+    int      dx;
+    int      dy;
+    int      mvoffset0;
+    int      mvoffset1;
+    site=OC_SQUARE_SITES[0][sitei];
+    dx=OC_SQUARE_DX[site];
+    dy=OC_SQUARE_DY[site];
+    /*The following code SHOULD be equivalent to
+        oc_state_get_mv_offsets(&_enc->state,&mvoffsets,0,
+         (_vec[0]<<1)+dx,(_vec[1]<<1)+dy);
+      However, it should also be much faster, as it involves no multiplies and
+       doesn't have to handle chroma vectors.*/
+    xmask=OC_SIGNMASK(((_vec[0]<<1)+dx)^dx);
+    ymask=OC_SIGNMASK(((_vec[1]<<1)+dy)^dy);
+    mvoffset0=mvoffset_base+(dx&xmask)+(_offset_y[site]&ymask);
+    mvoffset1=mvoffset_base+(dx&~xmask)+(_offset_y[site]&~ymask);
+    err=oc_enc_frag_satd2(_enc,&dc,_src,
+     _ref+mvoffset0,_ref+mvoffset1,_ystride);
+    err+=abs(dc);
+    if(err<_best_err){
+      _best_err=err;
+      best_site=site;
+    }
+  }
+  _vec[0]=(_vec[0]<<1)+OC_SQUARE_DX[best_site];
+  _vec[1]=(_vec[1]<<1)+OC_SQUARE_DY[best_site];
+  return _best_err;
+}
+
+void oc_mcenc_refine4mv(oc_enc_ctx *_enc,int _mbi){
+  oc_mb_enc_info      *embs;
+  const ptrdiff_t     *frag_buf_offs;
+  const ptrdiff_t     *fragis;
+  const unsigned char *src;
+  const unsigned char *ref;
+  int                  offset_y[9];
+  int                  ystride;
+  int                  bi;
+  ystride=_enc->state.ref_ystride[0];
+  frag_buf_offs=_enc->state.frag_buf_offs;
+  fragis=_enc->state.mb_maps[_mbi][0];
+  src=_enc->state.ref_frame_data[OC_FRAME_IO];
+  ref=_enc->state.ref_frame_data[OC_FRAME_PREV];
+  offset_y[0]=offset_y[1]=offset_y[2]=-ystride;
+  offset_y[3]=offset_y[5]=0;
+  offset_y[6]=offset_y[7]=offset_y[8]=ystride;
+  embs=_enc->mb_info;
+  for(bi=0;bi<4;bi++){
+    ptrdiff_t frag_offs;
+    int       vec[2];
+    frag_offs=frag_buf_offs[fragis[bi]];
+    vec[0]=OC_DIV2(OC_MV_X(embs[_mbi].block_mv[bi]));
+    vec[1]=OC_DIV2(OC_MV_Y(embs[_mbi].block_mv[bi]));
+    embs[_mbi].block_satd[bi]=oc_mcenc_ysatd_halfpel_brefine(_enc,vec,
+     src+frag_offs,ref+frag_offs,ystride,offset_y,embs[_mbi].block_satd[bi]);
+    embs[_mbi].ref_mv[bi]=OC_MV(vec[0],vec[1]);
+  }
+}

+ 1030 - 0
jni/libtheora-1.2.0alpha1/lib/modedec.h

@@ -0,0 +1,1030 @@
+/*File generated by libtheora with OC_COLLECT_METRICS defined at compile time.*/
+#if !defined(_modedec_H)
+# define _modedec_H (1)
+# include "encint.h"
+
+
+
+/*The log of the average quantizer for each of the OC_MODE_RD table rows
+   (e.g., for the represented qi's, and each pli and qti), in Q10 format.
+  The actual statistics used by the encoder will be interpolated from
+   that table based on log_plq for the actual quantization matrix used.*/
+# if !defined(OC_COLLECT_METRICS)
+static const
+# endif
+ogg_int16_t OC_MODE_LOGQ[OC_LOGQ_BINS][3][2]={
+  { {0x1F05,0x2101},{0x206E,0x2101},{0x206E,0x2101} },
+  { {0x1C9A,0x1EAC},{0x1E0E,0x1EAC},{0x1E0E,0x1EAC} },
+  { {0x1A31,0x1C48},{0x1B6F,0x1C48},{0x1B6F,0x1C48} },
+  { {0x17B0,0x19E7},{0x1938,0x19E7},{0x1938,0x19E7} },
+  { {0x152F,0x178F},{0x16AB,0x178F},{0x16AB,0x178F} },
+  { {0x12F1,0x1534},{0x145D,0x1534},{0x145D,0x1534} },
+  { {0x0FF3,0x1321},{0x11BE,0x1321},{0x11BE,0x1321} },
+  { {0x0E1F,0x1073},{0x0E93,0x1073},{0x0E93,0x1073} }
+};
+
+# if !defined(OC_COLLECT_METRICS)
+static const
+# endif
+oc_mode_rd OC_MODE_RD_SATD[OC_LOGQ_BINS][3][2][OC_COMP_BINS]={
+  {
+    {
+      /*Y'  qi=0  INTRA*/
+      {
+        {   57, 1550},{  121, 2460},{  185, 3901},{  336, 5189},
+        {  406, 6243},{  501, 7329},{  565, 8292},{  674, 9257},
+        {  746,10219},{  843,11056},{  961,11822},{ 1120,12512},
+        { 1208,13233},{ 1394,13600},{ 1409,14381},{ 1492,15129},
+        { 1593,15804},{ 1639,16573},{ 1731,17161},{ 1844,17707},
+        { 1949,18300},{ 2073,18654},{ 2140,19465},{ 2278,19794}
+      },
+      /*Y'  qi=0  INTER*/
+      {
+        {  -18, 1274},{   23, 2505},{   32, 3612},{   57, 5153},
+        {   79, 6636},{   97, 8082},{  109, 9505},{  122,10924},
+        {  134,12293},{  145,13634},{  158,14942},{  172,16212},
+        {  186,17422},{  198,18604},{  209,19757},{  218,20875},
+        {  235,21980},{  253,23056},{  276,24121},{  305,25184},
+        {  342,26202},{  393,27140},{  439,28140},{  556,28659}
+      }
+    },
+    {
+      /*Cb  qi=0  INTRA*/
+      {
+        {   32, 1763},{   56, 2150},{   78, 2336},{   88, 2608},
+        {  105, 2975},{  121, 3297},{  113, 3460},{  126, 3993},
+        {  142, 4432},{  177, 4733},{  185, 5058},{  194, 5447},
+        {  220, 5812},{  227, 6202},{  246, 6415},{  269, 6821},
+        {  279, 7026},{  313, 7313},{  321, 7708},{  316, 8021},
+        {  370, 8203},{  389, 8573},{  410, 8607},{  431, 8816}
+      },
+      /*Cb  qi=0  INTER*/
+      {
+        {    3,  282},{    3, 1200},{    3, 1605},{    6, 2190},
+        {   15, 2519},{   18, 2798},{   21, 3115},{   25, 3460},
+        {   33, 3839},{   40, 4217},{   47, 4592},{   51, 4958},
+        {   56, 5326},{   59, 5710},{   63, 6066},{   65, 6412},
+        {   67, 6762},{   68, 7104},{   70, 7461},{   72, 7829},
+        {   77, 8200},{   80, 8566},{   86, 8906},{   90, 9203}
+      }
+    },
+    {
+      /*Cr  qi=0  INTRA*/
+      {
+        {   27, 1720},{   44, 1920},{   66, 2255},{   73, 2429},
+        {   95, 2988},{  103, 3279},{  123, 3691},{  129, 4012},
+        {  151, 4415},{  150, 4760},{  183, 5008},{  193, 5351},
+        {  211, 5788},{  235, 6134},{  263, 6400},{  276, 6711},
+        {  291, 7100},{  346, 7285},{  329, 7616},{  387, 7827},
+        {  361, 8214},{  430, 8534},{  429, 8608},{  450, 8823}
+      },
+      /*Cr  qi=0  INTER*/
+      {
+        {    4,  439},{    2, 1131},{    3, 1593},{    6, 2130},
+        {   14, 2535},{   17, 2786},{   21, 3128},{   27, 3494},
+        {   35, 3875},{   42, 4256},{   48, 4637},{   53, 5019},
+        {   57, 5395},{   61, 5777},{   64, 6156},{   66, 6512},
+        {   68, 6853},{   71, 7183},{   77, 7511},{   81, 7841},
+        {   83, 8192},{   88, 8510},{   93, 8834},{   98, 9138}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=9  INTRA*/
+      {
+        {   76,  777},{  178, 1995},{  340, 3162},{  591, 4097},
+        {  746, 4973},{  916, 5847},{ 1047, 6687},{ 1218, 7430},
+        { 1385, 8079},{ 1566, 8685},{ 1755, 9167},{ 1992, 9572},
+        { 2164,10023},{ 2395,10270},{ 2536,10755},{ 2694,11285},
+        { 2895,11580},{ 3029,12143},{ 3182,12543},{ 3377,12800},
+        { 3525,13228},{ 3718,13463},{ 3878,13852},{ 4077,14001}
+      },
+      /*Y'  qi=9  INTER*/
+      {
+        {   10,  770},{   45, 1845},{   59, 3227},{   99, 4708},
+        {  135, 6092},{  164, 7425},{  190, 8729},{  218, 9991},
+        {  246,11234},{  281,12427},{  315,13573},{  354,14678},
+        {  402,15734},{  467,16728},{  543,17709},{  639,18610},
+        {  736,19503},{  855,20312},{  995,21033},{ 1151,21656},
+        { 1341,22130},{ 1525,22582},{ 1735,22922},{ 1922,23102}
+      }
+    },
+    {
+      /*Cb  qi=9  INTRA*/
+      {
+        {   41, 1227},{   70, 1452},{  102, 1697},{  110, 1967},
+        {  134, 2326},{  153, 2695},{  160, 3007},{  196, 3393},
+        {  232, 3769},{  266, 4067},{  297, 4376},{  326, 4728},
+        {  351, 5040},{  390, 5299},{  398, 5538},{  443, 5900},
+        {  448, 6107},{  506, 6370},{  519, 6636},{  525, 6953},
+        {  567, 7177},{  625, 7386},{  622, 7613},{  654, 7764}
+      },
+      /*Cb  qi=9  INTER*/
+      {
+        {    7,  377},{    2, 1102},{    7, 1262},{   19, 1693},
+        {   22, 1957},{   27, 2302},{   35, 2654},{   43, 3034},
+        {   52, 3431},{   58, 3826},{   63, 4207},{   67, 4570},
+        {   71, 4927},{   75, 5283},{   79, 5624},{   82, 5944},
+        {   85, 6279},{   88, 6616},{   94, 6955},{  102, 7284},
+        {  108, 7622},{  116, 7944},{  124, 8293},{  133, 8568}
+      }
+    },
+    {
+      /*Cr  qi=9  INTRA*/
+      {
+        {   38, 1217},{   61, 1473},{   88, 1650},{  100, 1908},
+        {  137, 2400},{  147, 2777},{  176, 3149},{  205, 3433},
+        {  227, 3772},{  249, 4092},{  286, 4370},{  313, 4746},
+        {  342, 5053},{  368, 5261},{  411, 5530},{  442, 5859},
+        {  494, 6061},{  526, 6340},{  532, 6646},{  580, 6799},
+        {  567, 7203},{  649, 7357},{  625, 7559},{  660, 7709}
+      },
+      /*Cr  qi=9  INTER*/
+      {
+        {    5,  408},{    3, 1197},{    7, 1275},{   16, 1695},
+        {   22, 1979},{   30, 2324},{   38, 2691},{   47, 3071},
+        {   53, 3462},{   59, 3857},{   64, 4255},{   69, 4612},
+        {   74, 4975},{   76, 5347},{   81, 5694},{   86, 6020},
+        {   91, 6357},{   96, 6687},{  102, 7020},{  108, 7351},
+        {  115, 7663},{  122, 7979},{  125, 8298},{  136, 8576}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=18  INTRA*/
+      {
+        {   83,  534},{  261, 1697},{  507, 2691},{  852, 3418},
+        { 1127, 4094},{ 1378, 4775},{ 1626, 5442},{ 1905, 5975},
+        { 2164, 6468},{ 2445, 6913},{ 2704, 7301},{ 3001, 7631},
+        { 3285, 7934},{ 3536, 8217},{ 3837, 8489},{ 4076, 8814},
+        { 4325, 9046},{ 4590, 9313},{ 4794, 9546},{ 5062, 9751},
+        { 5285, 9963},{ 5578,10079},{ 5777,10302},{ 6054,10296}
+      },
+      /*Y'  qi=18  INTER*/
+      {
+        {   33,  490},{   62, 1599},{   96, 3015},{  164, 4378},
+        {  225, 5633},{  285, 6831},{  351, 7999},{  427, 9133},
+        {  526,10181},{  652,11141},{  829,11991},{ 1049,12732},
+        { 1310,13367},{ 1592,13896},{ 1881,14350},{ 2207,14667},
+        { 2529,14877},{ 2873,14980},{ 3231,14949},{ 3571,14926},
+        { 3922,14816},{ 4246,14715},{ 4559,14579},{ 4778,14590}
+      }
+    },
+    {
+      /*Cb  qi=18  INTRA*/
+      {
+        {   55,  825},{   95, 1021},{  131, 1276},{  150, 1618},
+        {  180, 1958},{  220, 2306},{  256, 2608},{  322, 2939},
+        {  385, 3239},{  436, 3530},{  475, 3771},{  518, 4078},
+        {  557, 4348},{  604, 4592},{  620, 4851},{  676, 5083},
+        {  704, 5363},{  739, 5582},{  788, 5782},{  819, 6000},
+        {  893, 6158},{  940, 6418},{  984, 6499},{ 1035, 6596}
+      },
+      /*Cb  qi=18  INTER*/
+      {
+        {   -2,  642},{   12,  771},{   20, 1054},{   29, 1394},
+        {   35, 1721},{   45, 2080},{   53, 2450},{   63, 2835},
+        {   73, 3225},{   81, 3596},{   87, 3952},{   95, 4300},
+        {  102, 4634},{  109, 4959},{  115, 5283},{  120, 5608},
+        {  130, 5931},{  139, 6254},{  152, 6571},{  163, 6887},
+        {  179, 7204},{  191, 7508},{  198, 7834},{  224, 8066}
+      }
+    },
+    {
+      /*Cr  qi=18  INTRA*/
+      {
+        {   49,  780},{   86,  986},{  120, 1261},{  137, 1588},
+        {  183, 1998},{  228, 2339},{  291, 2670},{  334, 2938},
+        {  376, 3239},{  412, 3522},{  459, 3783},{  490, 4113},
+        {  547, 4321},{  593, 4571},{  640, 4828},{  675, 5137},
+        {  730, 5254},{  774, 5524},{  821, 5754},{  859, 5911},
+        {  887, 6178},{  982, 6266},{  941, 6536},{  996, 6630}
+      },
+      /*Cr  qi=18  INTER*/
+      {
+        {    0,  741},{    9,  743},{   16, 1034},{   26, 1385},
+        {   39, 1741},{   48, 2090},{   56, 2459},{   64, 2850},
+        {   72, 3242},{   81, 3622},{   89, 3980},{   98, 4323},
+        {  104, 4667},{  110, 5005},{  118, 5337},{  126, 5675},
+        {  137, 5998},{  146, 6311},{  156, 6621},{  170, 6914},
+        {  181, 7205},{  196, 7490},{  203, 7779},{  232, 8012}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=27  INTRA*/
+      {
+        {  121,  378},{  379, 1464},{  810, 2335},{ 1447, 2725},
+        { 1851, 3194},{ 2311, 3655},{ 2747, 4081},{ 3211, 4393},
+        { 3640, 4672},{ 4056, 4933},{ 4427, 5150},{ 4842, 5259},
+        { 5220, 5381},{ 5584, 5443},{ 5925, 5648},{ 6233, 5783},
+        { 6547, 5944},{ 6905, 6056},{ 7203, 6181},{ 7526, 6207},
+        { 7800, 6330},{ 8175, 6312},{ 8415, 6437},{ 8705, 6459}
+      },
+      /*Y'  qi=27  INTER*/
+      {
+        {   48,  199},{   90, 1458},{  167, 2824},{  291, 4050},
+        {  434, 5144},{  638, 6133},{  901, 7011},{ 1249, 7743},
+        { 1726, 8280},{ 2317, 8616},{ 2957, 8789},{ 3561, 8896},
+        { 4126, 8936},{ 4646, 8933},{ 5115, 8931},{ 5579, 8890},
+        { 6008, 8804},{ 6411, 8744},{ 6774, 8646},{ 7153, 8549},
+        { 7475, 8462},{ 7790, 8372},{ 8069, 8280},{ 8299, 8278}
+      }
+    },
+    {
+      /*Cb  qi=27  INTRA*/
+      {
+        {   75,  612},{  117,  751},{  160, 1068},{  195, 1406},
+        {  240, 1741},{  305, 2066},{  364, 2359},{  454, 2639},
+        {  538, 2899},{  609, 3149},{  664, 3384},{  730, 3625},
+        {  785, 3860},{  836, 4094},{  872, 4312},{  948, 4507},
+        { 1023, 4677},{ 1081, 4843},{ 1165, 4985},{ 1238, 5092},
+        { 1316, 5235},{ 1418, 5345},{ 1430, 5478},{ 1505, 5538}
+      },
+      /*Cb  qi=27  INTER*/
+      {
+        {   16,  637},{   13,  634},{   32,  869},{   46, 1230},
+        {   55, 1583},{   67, 1950},{   79, 2320},{   93, 2690},
+        {  107, 3052},{  120, 3399},{  133, 3733},{  146, 4054},
+        {  162, 4367},{  175, 4679},{  191, 4984},{  211, 5285},
+        {  232, 5581},{  252, 5875},{  276, 6155},{  305, 6433},
+        {  333, 6706},{  364, 6967},{  398, 7244},{  474, 7394}
+      }
+    },
+    {
+      /*Cr  qi=27  INTRA*/
+      {
+        {   64,  632},{  107,  763},{  147, 1054},{  176, 1411},
+        {  255, 1770},{  324, 2079},{  411, 2359},{  475, 2621},
+        {  545, 2880},{  590, 3158},{  647, 3425},{  709, 3648},
+        {  766, 3878},{  831, 4082},{  911, 4260},{  960, 4493},
+        { 1042, 4558},{ 1115, 4760},{ 1200, 4852},{ 1280, 4950},
+        { 1327, 5186},{ 1445, 5157},{ 1443, 5431},{ 1518, 5493}
+      },
+      /*Cr  qi=27  INTER*/
+      {
+        {   12,  688},{   11,  660},{   28,  869},{   46, 1227},
+        {   60, 1598},{   68, 1954},{   79, 2318},{   93, 2693},
+        {  108, 3054},{  123, 3406},{  138, 3748},{  151, 4078},
+        {  165, 4400},{  180, 4716},{  197, 5024},{  217, 5314},
+        {  243, 5599},{  275, 5866},{  301, 6128},{  327, 6394},
+        {  352, 6644},{  375, 6894},{  376, 7180},{  458, 7334}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=36  INTRA*/
+      {
+        {  156,  263},{  484, 1370},{ 1174, 2110},{ 1914, 2456},
+        { 2601, 2695},{ 3221, 2984},{ 3865, 3284},{ 4450, 3530},
+        { 4979, 3739},{ 5470, 3928},{ 5905, 4080},{ 6375, 4200},
+        { 6761, 4373},{ 7175, 4429},{ 7615, 4616},{ 8069, 4687},
+        { 8417, 4820},{ 8813, 4908},{ 9211, 5001},{ 9508, 5073},
+        { 9888, 5133},{10209, 5140},{10529, 5196},{10830, 5173}
+      },
+      /*Y'  qi=36  INTER*/
+      {
+        {   68,  151},{  107, 1413},{  262, 2665},{  542, 3715},
+        {  946, 4584},{ 1508, 5279},{ 2167, 5829},{ 2968, 6179},
+        { 3758, 6392},{ 4481, 6517},{ 5139, 6577},{ 5706, 6636},
+        { 6271, 6612},{ 6746, 6585},{ 7216, 6533},{ 7622, 6496},
+        { 8045, 6403},{ 8393, 6389},{ 8799, 6272},{ 9062, 6281},
+        { 9436, 6184},{ 9637, 6238},{ 9864, 6215},{10147, 6215}
+      }
+    },
+    {
+      /*Cb  qi=36  INTRA*/
+      {
+        {   91,  385},{  138,  613},{  205,  932},{  265, 1239},
+        {  353, 1549},{  443, 1839},{  518, 2104},{  655, 2341},
+        {  764, 2559},{  876, 2756},{  967, 2950},{ 1088, 3107},
+        { 1184, 3266},{ 1295, 3396},{ 1375, 3548},{ 1502, 3664},
+        { 1610, 3764},{ 1731, 3844},{ 1839, 3938},{ 1954, 4016},
+        { 2069, 4100},{ 2207, 4167},{ 2274, 4253},{ 2374, 4289}
+      },
+      /*Cb  qi=36  INTER*/
+      {
+        {   59,   18},{   56,  463},{   50,  790},{   76, 1155},
+        {   90, 1515},{  108, 1877},{  125, 2226},{  150, 2562},
+        {  177, 2890},{  203, 3203},{  231, 3501},{  259, 3789},
+        {  289, 4074},{  325, 4348},{  367, 4608},{  418, 4857},
+        {  486, 5093},{  574, 5307},{  677, 5494},{  784, 5688},
+        {  914, 5844},{ 1033, 6004},{ 1142, 6179},{ 1307, 6220}
+      }
+    },
+    {
+      /*Cr  qi=36  INTRA*/
+      {
+        {   87,  376},{  132,  616},{  190,  931},{  268, 1260},
+        {  358, 1550},{  457, 1833},{  592, 2082},{  685, 2318},
+        {  781, 2548},{  867, 2757},{  968, 2953},{ 1080, 3124},
+        { 1173, 3255},{ 1282, 3390},{ 1410, 3477},{ 1528, 3593},
+        { 1645, 3612},{ 1766, 3739},{ 1885, 3789},{ 1954, 3892},
+        { 2115, 3987},{ 2202, 4052},{ 2280, 4172},{ 2379, 4213}
+      },
+      /*Cr  qi=36  INTER*/
+      {
+        {   53,   45},{   50,  467},{   45,  789},{   76, 1150},
+        {   92, 1531},{  107, 1877},{  125, 2219},{  147, 2561},
+        {  176, 2893},{  206, 3209},{  231, 3514},{  260, 3808},
+        {  298, 4085},{  350, 4344},{  411, 4587},{  475, 4814},
+        {  532, 5037},{  587, 5261},{  647, 5480},{  707, 5694},
+        {  793, 5900},{  891, 6093},{ 1017, 6292},{ 1205, 6307}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=45  INTRA*/
+      {
+        {   47,  170},{  955, 1217},{ 1713, 2014},{ 3050, 2094},
+        { 3954, 2179},{ 4801, 2357},{ 5629, 2494},{ 6313, 2614},
+        { 6962, 2716},{ 7566, 2820},{ 8138, 2886},{ 8613, 2949},
+        { 9097, 3031},{ 9574, 3044},{10053, 3142},{10514, 3134},
+        {10897, 3241},{11397, 3275},{11775, 3297},{12200, 3350},
+        {12527, 3350},{12959, 3393},{13246, 3401},{13573, 3397}
+      },
+      /*Y'  qi=45  INTER*/
+      {
+        {   53,   73},{  175, 1343},{  649, 2439},{ 1339, 3250},
+        { 2297, 3837},{ 3395, 4203},{ 4438, 4400},{ 5401, 4529},
+        { 6222, 4588},{ 7018, 4564},{ 7713, 4532},{ 8378, 4464},
+        { 8959, 4414},{ 9464, 4364},{ 9980, 4315},{10401, 4291},
+        {10805, 4260},{11172, 4260},{11501, 4231},{11798, 4248},
+        {12082, 4254},{12381, 4262},{12572, 4285},{12877, 4289}
+      }
+    },
+    {
+      /*Cb  qi=45  INTRA*/
+      {
+        {  112,  -14},{  173,  495},{  260,  827},{  355, 1122},
+        {  451, 1420},{  579, 1695},{  697, 1934},{  917, 2101},
+        { 1104, 2244},{ 1266, 2381},{ 1417, 2520},{ 1609, 2611},
+        { 1801, 2689},{ 1973, 2764},{ 2108, 2864},{ 2298, 2948},
+        { 2452, 3008},{ 2588, 3080},{ 2732, 3161},{ 2888, 3203},
+        { 3052, 3266},{ 3240, 3294},{ 3342, 3351},{ 3467, 3373}
+      },
+      /*Cb  qi=45  INTER*/
+      {
+        {   41,  -49},{   52,  385},{   87,  743},{  110, 1102},
+        {  135, 1453},{  162, 1788},{  207, 2096},{  272, 2391},
+        {  330, 2677},{  392, 2950},{  464, 3205},{  556, 3442},
+        {  674, 3656},{  827, 3847},{ 1030, 4006},{ 1275, 4132},
+        { 1544, 4234},{ 1809, 4317},{ 2089, 4408},{ 2377, 4456},
+        { 2647, 4532},{ 2919, 4595},{ 3256, 4659},{ 3465, 4657}
+      }
+    },
+    {
+      /*Cr  qi=45  INTRA*/
+      {
+        {   99,  -14},{  164,  493},{  247,  832},{  358, 1123},
+        {  468, 1416},{  599, 1680},{  795, 1886},{  958, 2063},
+        { 1133, 2211},{ 1300, 2345},{ 1480, 2461},{ 1664, 2554},
+        { 1807, 2656},{ 1995, 2742},{ 2146, 2799},{ 2331, 2856},
+        { 2440, 2894},{ 2592, 2996},{ 2751, 3033},{ 2865, 3112},
+        { 3073, 3162},{ 3210, 3208},{ 3330, 3306},{ 3454, 3332}
+      },
+      /*Cr  qi=45  INTER*/
+      {
+        {   39,  -33},{   48,  403},{   86,  744},{  110, 1101},
+        {  134, 1461},{  165, 1779},{  205, 2095},{  259, 2401},
+        {  318, 2686},{  386, 2958},{  481, 3204},{  610, 3415},
+        {  753, 3603},{  908, 3780},{ 1055, 3959},{ 1220, 4132},
+        { 1422, 4281},{ 1656, 4419},{ 1939, 4512},{ 2259, 4574},
+        { 2593, 4593},{ 2950, 4569},{ 3339, 4505},{ 3542, 4497}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=54  INTRA*/
+      {
+        {  339,   30},{  785, 1251},{ 2395, 1971},{ 4075, 2063},
+        { 4924, 2135},{ 5806, 2270},{ 6604, 2372},{ 7224, 2497},
+        { 7879, 2608},{ 8400, 2729},{ 8951, 2829},{ 9379, 2864},
+        { 9782, 2955},{10230, 3020},{10704, 3132},{11264, 3272},
+        {11618, 3284},{12034, 3394},{12500, 3482},{12767, 3484},
+        {13162, 3580},{13552, 3565},{13997, 3732},{14320, 3715}
+      },
+      /*Y'  qi=54  INTER*/
+      {
+        {   65,   95},{  269, 1312},{ 1152, 2242},{ 2336, 2863},
+        { 3728, 3239},{ 4944, 3439},{ 6034, 3543},{ 7064, 3580},
+        { 7991, 3586},{ 8849, 3568},{ 9605, 3561},{10306, 3550},
+        {10919, 3544},{11466, 3530},{11972, 3528},{12401, 3536},
+        {12818, 3511},{13185, 3522},{13523, 3505},{13827, 3505},
+        {14114, 3522},{14395, 3521},{14625, 3533},{14909, 3532}
+      }
+    },
+    {
+      /*Cb  qi=54  INTRA*/
+      {
+        {  148,   -3},{  218,  480},{  351,  787},{  437, 1069},
+        {  550, 1350},{  730, 1592},{  931, 1784},{ 1243, 1884},
+        { 1499, 1984},{ 1680, 2115},{ 1864, 2244},{ 2062, 2334},
+        { 2278, 2407},{ 2442, 2496},{ 2602, 2603},{ 2783, 2686},
+        { 2928, 2771},{ 3073, 2856},{ 3207, 2938},{ 3368, 2998},
+        { 3516, 3077},{ 3699, 3122},{ 3818, 3202},{ 3939, 3230}
+      },
+      /*Cb  qi=54  INTER*/
+      {
+        {   48,  -11},{   54,  407},{   86,  743},{  122, 1083},
+        {  176, 1400},{  241, 1699},{  347, 1968},{  496, 2208},
+        {  664, 2431},{  863, 2637},{ 1120, 2816},{ 1442, 2961},
+        { 1835, 3066},{ 2261, 3140},{ 2676, 3203},{ 3092, 3245},
+        { 3480, 3266},{ 3862, 3286},{ 4254, 3305},{ 4604, 3316},
+        { 4989, 3335},{ 5306, 3351},{ 5654, 3339},{ 5855, 3345}
+      }
+    },
+    {
+      /*Cr  qi=54  INTRA*/
+      {
+        {  137,   10},{  212,  492},{  315,  795},{  470, 1061},
+        {  612, 1333},{  821, 1539},{ 1105, 1680},{ 1335, 1811},
+        { 1566, 1927},{ 1773, 2038},{ 1973, 2153},{ 2148, 2259},
+        { 2311, 2352},{ 2474, 2460},{ 2647, 2516},{ 2810, 2607},
+        { 2928, 2638},{ 3085, 2742},{ 3232, 2815},{ 3348, 2899},
+        { 3533, 2993},{ 3679, 3029},{ 3803, 3138},{ 3925, 3170}
+      },
+      /*Cr  qi=54  INTER*/
+      {
+        {   46,    2},{   47,  419},{   87,  746},{  125, 1083},
+        {  177, 1401},{  249, 1687},{  342, 1964},{  453, 2226},
+        {  627, 2454},{  869, 2641},{ 1152, 2800},{ 1455, 2942},
+        { 1776, 3077},{ 2135, 3187},{ 2524, 3287},{ 2984, 3325},
+        { 3425, 3344},{ 3881, 3328},{ 4313, 3274},{ 4701, 3218},
+        { 5027, 3171},{ 5299, 3130},{ 5597, 3107},{ 5791, 3120}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=63  INTRA*/
+      {
+        {  -86,  167},{ 2070, 1104},{ 5138, 1428},{ 7014, 1535},
+        { 8430, 1629},{ 9663, 1690},{10576, 1745},{11277, 1809},
+        {12003, 1869},{12663, 1925},{13258, 1983},{13701, 2016},
+        {14228, 2073},{14756, 2088},{15203, 2164},{15993, 2175},
+        {16378, 2256},{16917, 2240},{17361, 2332},{17782, 2312},
+        {18376, 2381},{18728, 2362},{19224, 2408},{19705, 2392}
+      },
+      /*Y'  qi=63  INTER*/
+      {
+        { -529,  154},{  967, 1233},{ 4201, 1610},{ 6285, 1800},
+        { 8058, 1908},{ 9439, 1968},{10737, 1987},{11999, 1979},
+        {13003, 1972},{13854, 1963},{14584, 1965},{15217, 1955},
+        {15773, 1956},{16229, 1949},{16735, 1952},{17085, 1956},
+        {17508, 1956},{17821, 1961},{18191, 1961},{18465, 1982},
+        {18792, 1975},{19158, 1995},{19378, 2010},{19817, 2021}
+      }
+    },
+    {
+      /*Cb  qi=63  INTRA*/
+      {
+        {  136,    4},{  338,  438},{  593,  730},{  835,  974},
+        { 1168, 1188},{ 1602, 1345},{ 2004, 1467},{ 2465, 1505},
+        { 2799, 1574},{ 3091, 1669},{ 3384, 1758},{ 3673, 1817},
+        { 3950, 1861},{ 4190, 1924},{ 4444, 1993},{ 4701, 2051},
+        { 4915, 2123},{ 5119, 2166},{ 5329, 2231},{ 5576, 2259},
+        { 5793, 2310},{ 6001, 2334},{ 6198, 2384},{ 6344, 2401}
+      },
+      /*Cb  qi=63  INTER*/
+      {
+        {   49,    4},{   51,  403},{   98,  729},{  185, 1034},
+        {  352, 1304},{  622, 1533},{ 1068, 1696},{ 1604, 1821},
+        { 2203, 1924},{ 2890, 1988},{ 3622, 2017},{ 4359, 2019},
+        { 5025, 2005},{ 5586, 2002},{ 6090, 1989},{ 6519, 1977},
+        { 6927, 1977},{ 7305, 1968},{ 7730, 1984},{ 8087, 1981},
+        { 8435, 1991},{ 8822, 1987},{ 9155, 2008},{ 9392, 2011}
+      }
+    },
+    {
+      /*Cr  qi=63  INTRA*/
+      {
+        {  131,   11},{  334,  448},{  569,  739},{  929,  946},
+        { 1285, 1145},{ 1718, 1274},{ 2176, 1343},{ 2531, 1424},
+        { 2866, 1504},{ 3176, 1580},{ 3475, 1657},{ 3736, 1728},
+        { 3962, 1807},{ 4232, 1872},{ 4425, 1921},{ 4657, 1976},
+        { 4817, 2009},{ 5063, 2082},{ 5281, 2129},{ 5480, 2199},
+        { 5743, 2258},{ 5887, 2283},{ 6124, 2358},{ 6273, 2378}
+      },
+      /*Cr  qi=63  INTER*/
+      {
+        {   47,   15},{   40,  405},{  100,  730},{  189, 1037},
+        {  351, 1303},{  625, 1526},{  984, 1719},{ 1512, 1862},
+        { 2189, 1947},{ 2895, 2003},{ 3576, 2046},{ 4249, 2072},
+        { 4901, 2068},{ 5514, 2043},{ 6079, 2009},{ 6528, 1977},
+        { 6927, 1940},{ 7274, 1915},{ 7580, 1894},{ 7910, 1910},
+        { 8211, 1902},{ 8472, 1920},{ 8742, 1926},{ 8981, 1930}
+      }
+    }
+  }
+};
+
+# if !defined(OC_COLLECT_METRICS)
+static const
+# endif
+oc_mode_rd OC_MODE_RD_SAD[OC_LOGQ_BINS][3][2][OC_COMP_BINS]={
+  {
+    {
+      /*Y'  qi=0  INTRA*/
+      {
+        {   33,  122},{   57, 1297},{   13, 2226},{  157, 3890},
+        {  227, 3682},{  169, 3084},{  197, 2700},{  227, 3238},
+        {  290, 4294},{  354, 5230},{  406, 5615},{  417, 5322},
+        {  452, 5462},{  455, 5683},{  493, 5938},{  553, 6374},
+        {  558, 6464},{  606, 6493},{  616, 6417},{  643, 6557},
+        {  641, 6664},{  716, 7285},{  748, 7518},{  747, 7502}
+      },
+      /*Y'  qi=0  INTER*/
+      {
+        {   16,  205},{    5, 1338},{   16, 2554},{    6, 3809},
+        {    9, 5188},{   58, 6446},{   76, 7561},{   95, 8648},
+        {  124, 9713},{  158,10787},{  193,11887},{  233,12991},
+        {  270,14116},{  307,15236},{  341,16346},{  372,17426},
+        {  398,18499},{  422,19594},{  448,20669},{  479,21732},
+        {  526,22720},{  583,23572},{  655,24516},{  758,24647}
+      }
+    },
+    {
+      /*Cb  qi=0  INTRA*/
+      {
+        {   26,   40},{   23,  589},{   27,  784},{   27, 1079},
+        {   24, 1186},{   25, 1641},{   25, 1915},{   29, 2207},
+        {   39, 2361},{   39, 2746},{   32, 3020},{   16, 3387},
+        {   31, 3604},{   36, 4076},{   69, 4426},{  102, 4724},
+        {  139, 4923},{  196, 5061},{  211, 5103},{  214, 5063},
+        {  161, 4466},{  208, 4793},{  218, 4537},{  219, 4539}
+      },
+      /*Cb  qi=0  INTER*/
+      {
+        {    3,  164},{    1,  535},{    1,  779},{    2, 1048},
+        {    3, 1267},{    1, 1625},{    2, 1921},{    5, 2224},
+        {    8, 2481},{    8, 2813},{    4, 3089},{   -2, 3386},
+        {   -9, 3642},{  -14, 3993},{  -11, 4300},{   -6, 4628},
+        {    4, 4929},{   25, 5299},{   44, 5623},{   83, 5915},
+        {   93, 6186},{   91, 6483},{   90, 6775},{   95, 6952}
+      }
+    },
+    {
+      /*Cr  qi=0  INTRA*/
+      {
+        {   22,   49},{   26,  579},{   23,  762},{   15, 1050},
+        {   20, 1191},{   24, 1608},{   26, 1875},{   35, 2173},
+        {   39, 2359},{   30, 2736},{   16, 2987},{    0, 3334},
+        {   14, 3625},{   11, 4095},{   57, 4512},{   95, 4793},
+        {  141, 4949},{  206, 5242},{  230, 5191},{  242, 5177},
+        {  178, 4775},{  237, 5010},{  223, 4656},{  224, 4657}
+      },
+      /*Cr  qi=0  INTER*/
+      {
+        {    3,  163},{    1,  536},{    1,  773},{    3, 1023},
+        {    2, 1225},{    1, 1607},{    1, 1900},{    5, 2204},
+        {    9, 2453},{    8, 2781},{    3, 3049},{   -5, 3338},
+        {  -13, 3570},{  -17, 3950},{  -13, 4255},{   -6, 4596},
+        {    7, 4893},{   33, 5300},{   53, 5632},{   97, 5942},
+        {  103, 6216},{   96, 6522},{   91, 6849},{   98, 6995}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=9  INTRA*/
+      {
+        {   47,  152},{   50, 1213},{  144, 2543},{  242, 2332},
+        {  210, 1894},{  250, 2386},{  328, 3094},{  407, 3419},
+        {  464, 3507},{  522, 3770},{  613, 4194},{  657, 4618},
+        {  753, 5137},{  796, 5248},{  842, 5110},{  927, 5330},
+        {  994, 5487},{ 1008, 5463},{ 1101, 5794},{ 1169, 5966},
+        { 1208, 6121},{ 1331, 6447},{ 1445, 6618},{ 1449, 6616}
+      },
+      /*Y'  qi=9  INTER*/
+      {
+        {    4,  218},{   16, 1314},{    4, 2563},{   37, 3882},
+        {   83, 5058},{  109, 6184},{  161, 7292},{  224, 8389},
+        {  287, 9485},{  349,10565},{  411,11608},{  464,12648},
+        {  518,13664},{  575,14650},{  649,15585},{  742,16451},
+        {  862,17214},{ 1003,17860},{ 1179,18325},{ 1372,18648},
+        { 1576,18878},{ 1795,18903},{ 2040,18880},{ 2116,18759}
+      }
+    },
+    {
+      /*Cb  qi=9  INTRA*/
+      {
+        {   27,   42},{   23,  587},{   34,  782},{   37, 1079},
+        {   34, 1204},{   42, 1630},{   37, 1887},{   25, 2210},
+        {   40, 2455},{   71, 2880},{  112, 3193},{  156, 3427},
+        {  168, 3403},{  217, 3488},{  203, 3335},{  224, 3200},
+        {  191, 2742},{  195, 2810},{  207, 2665},{  201, 2661},
+        {  169, 2078},{  211, 2720},{  226, 2813},{  228, 2824}
+      },
+      /*Cb  qi=9  INTER*/
+      {
+        {    4,  158},{    2,  537},{    3,  779},{    2, 1045},
+        {    3, 1284},{    7, 1629},{    7, 1917},{    1, 2218},
+        {   -4, 2497},{   -3, 2845},{    6, 3162},{   23, 3482},
+        {   42, 3788},{   62, 4116},{   76, 4416},{   84, 4700},
+        {   91, 4975},{   95, 5259},{   97, 5518},{   94, 5790},
+        {   99, 6052},{  111, 6311},{  126, 6601},{  136, 6719}
+      }
+    },
+    {
+      /*Cr  qi=9  INTRA*/
+      {
+        {   25,   50},{   32,  576},{   32,  762},{   21, 1049},
+        {   28, 1207},{   41, 1603},{   36, 1839},{   26, 2170},
+        {   34, 2462},{   59, 2872},{  109, 3176},{  157, 3364},
+        {  188, 3397},{  231, 3418},{  250, 3341},{  261, 3228},
+        {  222, 2814},{  258, 3091},{  234, 2915},{  228, 3042},
+        {  210, 2610},{  273, 3210},{  274, 3231},{  276, 3239}
+      },
+      /*Cr  qi=9  INTER*/
+      {
+        {    4,  156},{    2,  538},{    3,  772},{    2, 1028},
+        {    3, 1254},{    7, 1613},{    7, 1893},{    0, 2191},
+        {   -8, 2454},{   -4, 2811},{    7, 3121},{   27, 3442},
+        {   48, 3749},{   72, 4101},{   88, 4410},{   91, 4698},
+        {   99, 4988},{   99, 5279},{  101, 5542},{   95, 5813},
+        {   99, 6088},{  114, 6367},{  125, 6683},{  137, 6761}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=18  INTRA*/
+      {
+        {   51,   88},{   88, 1344},{  258, 1643},{  228, 1325},
+        {  372, 2208},{  443, 2371},{  520, 2382},{  584, 2477},
+        {  739, 2906},{  859, 3348},{ 1008, 3697},{ 1131, 3884},
+        { 1278, 4110},{ 1349, 4229},{ 1431, 4329},{ 1544, 4395},
+        { 1602, 4439},{ 1669, 4535},{ 1814, 4656},{ 1883, 4716},
+        { 1957, 4940},{ 2101, 5019},{ 2259, 5249},{ 2265, 5246}
+      },
+      /*Y'  qi=18  INTER*/
+      {
+        {   26,  195},{    1, 1317},{   45, 2595},{  103, 3750},
+        {  168, 4903},{  281, 6007},{  397, 7062},{  513, 8064},
+        {  630, 9010},{  758, 9902},{  906,10732},{ 1095,11463},
+        { 1338,12060},{ 1629,12490},{ 1969,12724},{ 2313,12842},
+        { 2666,12828},{ 2993,12747},{ 3294,12670},{ 3558,12553},
+        { 3813,12440},{ 3990,12379},{ 4177,12291},{ 4226,12265}
+      }
+    },
+    {
+      /*Cb  qi=18  INTRA*/
+      {
+        {   31,   43},{   33,  585},{   40,  781},{   58, 1077},
+        {   45, 1189},{   58, 1655},{   66, 1983},{  123, 2221},
+        {  168, 2193},{  227, 2321},{  241, 2246},{  250, 2208},
+        {  221, 1786},{  250, 2087},{  247, 2036},{  250, 2164},
+        {  241, 2054},{  287, 2453},{  302, 2551},{  335, 2758},
+        {  279, 2511},{  379, 2973},{  404, 3028},{  406, 3029}
+      },
+      /*Cb  qi=18  INTER*/
+      {
+        {    7,  153},{    4,  537},{    3,  777},{    9, 1034},
+        {    6, 1282},{    0, 1630},{    0, 1943},{   21, 2252},
+        {   48, 2567},{   67, 2881},{   83, 3178},{   89, 3463},
+        {   92, 3738},{   99, 4024},{  114, 4289},{  131, 4552},
+        {  153, 4814},{  179, 5081},{  207, 5333},{  241, 5581},
+        {  273, 5822},{  303, 6068},{  335, 6368},{  353, 6432}
+      }
+    },
+    {
+      /*Cr  qi=18  INTRA*/
+      {
+        {   31,   49},{   42,  575},{   42,  763},{   38, 1045},
+        {   41, 1184},{   56, 1631},{   87, 1968},{  163, 2177},
+        {  191, 2188},{  236, 2264},{  240, 2101},{  234, 2047},
+        {  206, 1651},{  222, 1966},{  238, 2013},{  240, 2176},
+        {  229, 2098},{  321, 2592},{  341, 2748},{  378, 3025},
+        {  367, 2849},{  442, 3283},{  453, 3315},{  455, 3313}
+      },
+      /*Cr  qi=18  INTER*/
+      {
+        {    6,  151},{    3,  539},{    3,  775},{    8, 1027},
+        {    6, 1260},{   -3, 1619},{    0, 1927},{   24, 2238},
+        {   58, 2558},{   76, 2871},{   92, 3173},{   96, 3461},
+        {   98, 3742},{  104, 4032},{  116, 4306},{  136, 4578},
+        {  158, 4839},{  185, 5123},{  217, 5383},{  250, 5642},
+        {  279, 5910},{  306, 6169},{  333, 6502},{  350, 6522}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=27  INTRA*/
+      {
+        {   10,   85},{  280, 1349},{  278,  815},{  497, 1699},
+        {  600, 1569},{  744, 1944},{  894, 2114},{ 1040, 2292},
+        { 1216, 2484},{ 1485, 2816},{ 1778, 3065},{ 1990, 3243},
+        { 2199, 3381},{ 2326, 3515},{ 2370, 3422},{ 2512, 3581},
+        { 2548, 3526},{ 2656, 3615},{ 2803, 3679},{ 2946, 3766},
+        { 3023, 3824},{ 3179, 3908},{ 3374, 4035},{ 3377, 4030}
+      },
+      /*Y'  qi=27  INTER*/
+      {
+        {   -2,  172},{   31, 1347},{  117, 2488},{  245, 3651},
+        {  448, 4719},{  668, 5679},{  918, 6524},{ 1204, 7255},
+        { 1557, 7848},{ 1998, 8281},{ 2511, 8531},{ 3055, 8642},
+        { 3582, 8648},{ 4062, 8611},{ 4482, 8582},{ 4845, 8560},
+        { 5140, 8560},{ 5423, 8581},{ 5645, 8596},{ 5855, 8586},
+        { 6061, 8608},{ 6211, 8558},{ 6402, 8583},{ 6472, 8575}
+      }
+    },
+    {
+      /*Cb  qi=27  INTRA*/
+      {
+        {   47,   49},{   35,  580},{   64,  778},{   69, 1071},
+        {   98, 1289},{  186, 1556},{  177, 1654},{  197, 1736},
+        {  211, 1373},{  284, 1742},{  321, 1840},{  344, 2024},
+        {  321, 1969},{  386, 2254},{  397, 2281},{  425, 2320},
+        {  396, 2088},{  448, 2284},{  462, 2213},{  482, 2274},
+        {  410, 1894},{  513, 2310},{  546, 2332},{  549, 2334}
+      },
+      /*Cb  qi=27  INTER*/
+      {
+        {   11,  145},{    5,  539},{   11,  771},{    0, 1033},
+        {    9, 1334},{   44, 1644},{   70, 1934},{   87, 2227},
+        {   96, 2508},{  113, 2812},{  139, 3085},{  174, 3352},
+        {  216, 3614},{  261, 3873},{  305, 4123},{  349, 4372},
+        {  396, 4611},{  442, 4853},{  493, 5088},{  543, 5313},
+        {  600, 5537},{  662, 5752},{  737, 6018},{  775, 6037}
+      }
+    },
+    {
+      /*Cr  qi=27  INTRA*/
+      {
+        {   49,   52},{   57,  570},{   61,  762},{   44, 1048},
+        {   80, 1291},{  196, 1513},{  224, 1522},{  242, 1532},
+        {  213, 1293},{  260, 1639},{  253, 1691},{  291, 1915},
+        {  294, 1897},{  367, 2178},{  395, 2258},{  432, 2310},
+        {  407, 2105},{  503, 2369},{  492, 2293},{  552, 2421},
+        {  496, 2099},{  598, 2549},{  624, 2531},{  627, 2532}
+      },
+      /*Cr  qi=27  INTER*/
+      {
+        {   10,  147},{    4,  538},{   11,  769},{    0, 1022},
+        {    9, 1318},{   51, 1635},{   80, 1925},{   97, 2214},
+        {  101, 2493},{  115, 2805},{  143, 3083},{  182, 3361},
+        {  226, 3625},{  270, 3898},{  319, 4157},{  366, 4405},
+        {  418, 4649},{  467, 4904},{  509, 5157},{  548, 5412},
+        {  589, 5659},{  636, 5909},{  683, 6208},{  710, 6190}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=36  INTRA*/
+      {
+        {   86,  252},{  345,  662},{  476, 1143},{  698, 1169},
+        {  894, 1457},{ 1218, 1728},{ 1465, 1849},{ 1731, 2019},
+        { 2183, 2298},{ 2666, 2511},{ 3116, 2731},{ 3371, 2813},
+        { 3621, 2923},{ 3675, 2949},{ 3710, 2921},{ 3740, 2896},
+        { 3746, 2895},{ 3886, 2978},{ 4069, 2991},{ 4229, 3016},
+        { 4338, 3102},{ 4530, 3124},{ 4751, 3248},{ 4753, 3244}
+      },
+      /*Y'  qi=36  INTER*/
+      {
+        {    0,  208},{   73, 1293},{  248, 2449},{  616, 3461},
+        { 1061, 4329},{ 1601, 4986},{ 2189, 5447},{ 2875, 5723},
+        { 3620, 5844},{ 4328, 5879},{ 4954, 5880},{ 5490, 5890},
+        { 5934, 5901},{ 6353, 5926},{ 6706, 5924},{ 7036, 5930},
+        { 7338, 5938},{ 7600, 5930},{ 7870, 5939},{ 8065, 5921},
+        { 8318, 5914},{ 8451, 5912},{ 8648, 5923},{ 8734, 5926}
+      }
+    },
+    {
+      /*Cb  qi=36  INTRA*/
+      {
+        {   52,   54},{   52,  575},{  103,  776},{  185, 1072},
+        {  172, 1069},{  211, 1302},{  217, 1413},{  285, 1586},
+        {  330, 1463},{  453, 1694},{  500, 1741},{  545, 1852},
+        {  501, 1650},{  584, 1874},{  587, 1856},{  638, 1919},
+        {  581, 1742},{  670, 1953},{  688, 1934},{  731, 2030},
+        {  637, 1794},{  806, 2123},{  840, 2091},{  843, 2091}
+      },
+      /*Cb  qi=36  INTER*/
+      {
+        {   19,  142},{   17,  534},{    6,  772},{   44, 1023},
+        {   82, 1296},{   94, 1614},{  117, 1903},{  158, 2187},
+        {  218, 2450},{  285, 2703},{  352, 2943},{  421, 3181},
+        {  489, 3415},{  564, 3644},{  647, 3861},{  748, 4060},
+        {  861, 4246},{  993, 4419},{ 1132, 4576},{ 1282, 4744},
+        { 1445, 4894},{ 1600, 5034},{ 1782, 5211},{ 1837, 5200}
+      }
+    },
+    {
+      /*Cr  qi=36  INTRA*/
+      {
+        {   62,   55},{   90,  561},{   56,  767},{  148, 1014},
+        {  207,  981},{  258, 1216},{  273, 1253},{  326, 1392},
+        {  338, 1383},{  417, 1613},{  443, 1629},{  497, 1734},
+        {  466, 1525},{  561, 1778},{  577, 1787},{  631, 1892},
+        {  591, 1706},{  715, 1980},{  730, 1958},{  822, 2113},
+        {  755, 1935},{  928, 2228},{  935, 2205},{  938, 2205}
+      },
+      /*Cr  qi=36  INTER*/
+      {
+        {   14,  145},{   16,  535},{    5,  772},{   44, 1017},
+        {   91, 1296},{  100, 1605},{  122, 1891},{  163, 2174},
+        {  225, 2443},{  294, 2707},{  362, 2962},{  436, 3210},
+        {  518, 3437},{  607, 3664},{  702, 3876},{  795, 4094},
+        {  886, 4310},{  980, 4538},{ 1089, 4749},{ 1216, 4927},
+        { 1357, 5116},{ 1506, 5247},{ 1758, 5338},{ 1787, 5306}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=45  INTRA*/
+      {
+        {  185,  246},{  513,  647},{  883,  891},{ 1313, 1142},
+        { 1760, 1351},{ 2368, 1595},{ 2828, 1718},{ 3097, 1780},
+        { 3762, 1951},{ 4454, 2121},{ 4986, 2227},{ 5281, 2281},
+        { 5477, 2299},{ 5431, 2288},{ 5425, 2283},{ 5439, 2290},
+        { 5324, 2249},{ 5509, 2279},{ 5703, 2321},{ 5896, 2348},
+        { 6049, 2370},{ 6253, 2425},{ 6415, 2432},{ 6419, 2430}
+      },
+      /*Y'  qi=45  INTER*/
+      {
+        {    6,  215},{  152, 1261},{  691, 2314},{ 1538, 3095},
+        { 2505, 3632},{ 3475, 3935},{ 4355, 4084},{ 5209, 4139},
+        { 5985, 4162},{ 6644, 4185},{ 7235, 4190},{ 7768, 4196},
+        { 8266, 4200},{ 8736, 4210},{ 9143, 4207},{ 9511, 4215},
+        { 9828, 4209},{10112, 4224},{10374, 4226},{10642, 4232},
+        {10842, 4219},{10971, 4208},{11200, 4211},{11299, 4216}
+      }
+    },
+    {
+      /*Cb  qi=45  INTRA*/
+      {
+        {   58,   71},{   66,  548},{  155,  762},{  213,  944},
+        {  192,  731},{  324, 1147},{  401, 1366},{  481, 1480},
+        {  508, 1238},{  657, 1522},{  727, 1563},{  794, 1611},
+        {  761, 1470},{  885, 1710},{  893, 1700},{  958, 1760},
+        {  893, 1543},{  985, 1719},{ 1014, 1732},{ 1082, 1784},
+        {  963, 1519},{ 1152, 1800},{ 1221, 1830},{ 1226, 1830}
+      },
+      /*Cb  qi=45  INTER*/
+      {
+        {   35,  135},{   12,  532},{   54,  769},{  106, 1007},
+        {  127, 1258},{  198, 1565},{  289, 1832},{  398, 2082},
+        {  520, 2302},{  653, 2511},{  800, 2705},{  956, 2897},
+        { 1143, 3064},{ 1358, 3220},{ 1623, 3335},{ 1913, 3444},
+        { 2198, 3534},{ 2502, 3626},{ 2787, 3711},{ 3114, 3783},
+        { 3454, 3831},{ 3711, 3871},{ 4163, 3901},{ 4221, 3890}
+      }
+    },
+    {
+      /*Cr  qi=45  INTRA*/
+      {
+        {   93,   68},{   72,  541},{  154,  769},{  239,  848},
+        {  214,  623},{  377, 1060},{  437, 1200},{  514, 1280},
+        {  512, 1160},{  625, 1453},{  657, 1470},{  718, 1516},
+        {  692, 1331},{  831, 1617},{  875, 1609},{  944, 1678},
+        {  886, 1469},{ 1061, 1699},{ 1082, 1714},{ 1226, 1823},
+        { 1113, 1581},{ 1324, 1872},{ 1370, 1925},{ 1374, 1924}
+      },
+      /*Cr  qi=45  INTER*/
+      {
+        {   31,  140},{   13,  533},{   52,  770},{  109, 1000},
+        {  134, 1253},{  201, 1555},{  298, 1821},{  411, 2076},
+        {  525, 2314},{  659, 2545},{  828, 2747},{ 1019, 2918},
+        { 1205, 3082},{ 1405, 3266},{ 1609, 3443},{ 1847, 3606},
+        { 2085, 3730},{ 2404, 3835},{ 2709, 3876},{ 3049, 3886},
+        { 3381, 3821},{ 3708, 3780},{ 4026, 3663},{ 4043, 3646}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=54  INTRA*/
+      {
+        {  316,  203},{  720,  585},{ 1596, 1077},{ 2316, 1289},
+        { 2687, 1439},{ 3133, 1593},{ 3495, 1706},{ 3836, 1775},
+        { 4249, 1892},{ 4804, 2031},{ 5320, 2139},{ 5617, 2203},
+        { 5726, 2199},{ 5726, 2176},{ 5682, 2146},{ 5677, 2127},
+        { 5717, 2124},{ 5707, 2129},{ 5853, 2148},{ 6110, 2180},
+        { 6454, 2247},{ 6714, 2287},{ 6845, 2304},{ 6854, 2303}
+      },
+      /*Y'  qi=54  INTER*/
+      {
+        {  -48,  217},{  314, 1261},{ 1450, 2126},{ 2761, 2728},
+        { 4275, 3012},{ 5408, 3167},{ 6305, 3245},{ 7165, 3290},
+        { 7966, 3325},{ 8698, 3359},{ 9352, 3377},{ 9907, 3391},
+        {10389, 3390},{10856, 3395},{11170, 3385},{11530, 3385},
+        {11780, 3362},{12018, 3362},{12266, 3361},{12443, 3339},
+        {12683, 3342},{12713, 3317},{12967, 3325},{13082, 3332}
+      }
+    },
+    {
+      /*Cb  qi=54  INTRA*/
+      {
+        {   94,   73},{   83,  557},{  152,  818},{  304,  919},
+        {  341,  819},{  506, 1128},{  593, 1281},{  700, 1389},
+        {  714, 1225},{  907, 1502},{  981, 1549},{ 1062, 1641},
+        { 1032, 1523},{ 1170, 1710},{ 1217, 1727},{ 1258, 1714},
+        { 1216, 1575},{ 1309, 1682},{ 1331, 1656},{ 1393, 1712},
+        { 1247, 1456},{ 1469, 1728},{ 1530, 1711},{ 1532, 1711}
+      },
+      /*Cb  qi=54  INTER*/
+      {
+        {   33,  133},{   12,  532},{   70,  770},{  171,  996},
+        {  279, 1233},{  427, 1503},{  600, 1736},{  824, 1939},
+        { 1101, 2097},{ 1411, 2237},{ 1735, 2374},{ 2097, 2493},
+        { 2486, 2606},{ 2916, 2691},{ 3297, 2771},{ 3715, 2826},
+        { 4088, 2855},{ 4460, 2886},{ 4849, 2911},{ 5198, 2932},
+        { 5489, 2940},{ 5875, 2981},{ 6208, 3017},{ 6270, 3012}
+      }
+    },
+    {
+      /*Cr  qi=54  INTRA*/
+      {
+        {  103,   63},{   83,  580},{  258,  796},{  301,  802},
+        {  361,  675},{  538, 1001},{  625, 1097},{  713, 1171},
+        {  699, 1103},{  868, 1380},{  915, 1400},{  970, 1491},
+        {  923, 1365},{ 1070, 1603},{ 1154, 1655},{ 1206, 1677},
+        { 1157, 1541},{ 1366, 1736},{ 1391, 1723},{ 1506, 1797},
+        { 1388, 1556},{ 1616, 1828},{ 1655, 1797},{ 1658, 1796}
+      },
+      /*Cr  qi=54  INTER*/
+      {
+        {   30,  138},{   14,  532},{   63,  771},{  176,  990},
+        {  299, 1226},{  438, 1496},{  606, 1735},{  814, 1950},
+        { 1089, 2127},{ 1417, 2281},{ 1761, 2421},{ 2104, 2571},
+        { 2467, 2701},{ 2881, 2827},{ 3303, 2900},{ 3735, 2917},
+        { 4183, 2913},{ 4529, 2882},{ 4915, 2844},{ 5168, 2796},
+        { 5410, 2763},{ 5562, 2753},{ 5815, 2764},{ 5832, 2755}
+      }
+    }
+  },
+  {
+    {
+      /*Y'  qi=63  INTRA*/
+      {
+        {  421,  194},{ 1272,  564},{ 3016,  943},{ 3831, 1079},
+        { 4282, 1174},{ 4799, 1290},{ 5166, 1348},{ 5259, 1350},
+        { 5720, 1426},{ 6501, 1539},{ 7048, 1606},{ 7328, 1642},
+        { 7374, 1622},{ 7349, 1612},{ 7192, 1578},{ 7207, 1571},
+        { 7161, 1555},{ 7259, 1573},{ 7432, 1592},{ 7710, 1613},
+        { 8167, 1672},{ 8425, 1697},{ 8597, 1710},{ 8602, 1710}
+      },
+      /*Y'  qi=63  INTER*/
+      {
+        { -584,  286},{ 1231, 1186},{ 3939, 1663},{ 6096, 1865},
+        { 7849, 1929},{ 8934, 1995},{ 9962, 2039},{11038, 2078},
+        {12016, 2092},{12889, 2100},{13617, 2096},{14221, 2089},
+        {14743, 2083},{15240, 2081},{15619, 2074},{15992, 2065},
+        {16314, 2065},{16529, 2059},{16822, 2056},{17041, 2049},
+        {17321, 2052},{17408, 2043},{17670, 2051},{17801, 2053}
+      }
+    },
+    {
+      /*Cb  qi=63  INTRA*/
+      {
+        {  154,   55},{  280,  582},{  507,  731},{  788,  853},
+        {  763,  738},{ 1141, 1008},{ 1323, 1090},{ 1540, 1220},
+        { 1487, 1089},{ 1861, 1322},{ 1983, 1347},{ 2145, 1425},
+        { 2047, 1317},{ 2334, 1475},{ 2352, 1413},{ 2458, 1467},
+        { 2243, 1270},{ 2464, 1413},{ 2423, 1335},{ 2506, 1385},
+        { 2182, 1180},{ 2565, 1376},{ 2555, 1321},{ 2557, 1321}
+      },
+      /*Cb  qi=63  INTER*/
+      {
+        {   34,  133},{    6,  531},{  139,  767},{  344,  975},
+        {  608, 1180},{ 1048, 1367},{ 1651, 1495},{ 2376, 1572},
+        { 3103, 1609},{ 3752, 1646},{ 4373, 1680},{ 4980, 1718},
+        { 5540, 1744},{ 6023, 1764},{ 6431, 1766},{ 6800, 1769},
+        { 7149, 1775},{ 7529, 1777},{ 7920, 1817},{ 8198, 1808},
+        { 8691, 1848},{ 8965, 1845},{ 9372, 1865},{ 9459, 1863}
+      }
+    },
+    {
+      /*Cr  qi=63  INTRA*/
+      {
+        {  121,   59},{  392,  570},{  609,  654},{  800,  760},
+        {  720,  598},{ 1192,  892},{ 1298,  897},{ 1470, 1027},
+        { 1411,  962},{ 1761, 1184},{ 1826, 1197},{ 1981, 1308},
+        { 1854, 1198},{ 2229, 1427},{ 2269, 1365},{ 2428, 1453},
+        { 2217, 1265},{ 2558, 1435},{ 2541, 1356},{ 2660, 1417},
+        { 2337, 1199},{ 2688, 1382},{ 2603, 1301},{ 2605, 1300}
+      },
+      /*Cr  qi=63  INTER*/
+      {
+        {   31,  137},{   10,  531},{  136,  768},{  360,  971},
+        {  638, 1166},{ 1029, 1373},{ 1604, 1519},{ 2351, 1595},
+        { 3129, 1640},{ 3861, 1691},{ 4491, 1751},{ 5101, 1783},
+        { 5635, 1784},{ 6136, 1779},{ 6550, 1763},{ 6905, 1746},
+        { 7172, 1726},{ 7495, 1732},{ 7738, 1735},{ 7949, 1735},
+        { 8211, 1744},{ 8424, 1740},{ 8779, 1764},{ 8812, 1760}
+      }
+    }
+  }
+};
+
+#endif

+ 128 - 0
jni/libtheora-1.2.0alpha1/lib/ocintrin.h

@@ -0,0 +1,128 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include <math.h>
+#if !defined(_ocintrin_H)
+# define _ocintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or inline assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Note that we do not provide a macro for abs(), because it is provided as a
+   library function, which we assume is translated into an intrinsic to avoid
+   the function call overhead and then implemented in the smartest way for the
+   target platform.
+  With modern gcc (4.x), this is true: it uses cmov instructions if the
+   architecture supports it and branchless bit-twiddling if it does not (the
+   speed difference between the two approaches is not measurable).
+  Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
+   by Sun Microsystems, despite prior art dating back to at least 1996:
+   http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
+  On gcc 3.x, however, our assumption is not true, as abs() is translated to a
+   conditional jump, which is horrible on deeply piplined architectures (e.g.,
+   all consumer architectures for the past decade or more).
+  Also be warned that -C*abs(x) where C is a constant is mis-optimized as
+   abs(C*x) on every gcc release before 4.2.3.
+  See bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34130 */
+
+/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
+   given an appropriate architecture, but the branchless bit-twiddling versions
+   are just as fast, and do not require any special target architecture.
+  Earlier gcc versions (3.x) compiled both code to the same assembly
+   instructions, because of the way they represented ((_b)>(_a)) internally.*/
+#define OC_MAXI(_a,_b)      ((_a)-((_a)-(_b)&-((_b)>(_a))))
+#define OC_MINI(_a,_b)      ((_a)+((_b)-(_a)&-((_b)<(_a))))
+/*Clamps an integer into the given range.
+  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+   behavior is required to meet our documented API behavior).
+  _a: The lower bound.
+  _b: The value to clamp.
+  _c: The upper boud.*/
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
+/*This has a chance of compiling branchless, and is just as fast as the
+   bit-twiddling method, which is slightly less portable, since it relies on a
+   sign-extended rightshift, which is not guaranteed by ANSI (but present on
+   every relevant platform).*/
+#define OC_SIGNI(_a)        (((_a)>0)-((_a)<0))
+/*Slightly more portable than relying on a sign-extended right-shift (which is
+   not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
+   compile it into the right-shift anyway.*/
+#define OC_SIGNMASK(_a)     (-((_a)<0))
+/*Divides an integer by a power of two, truncating towards 0.
+  _dividend: The integer to divide.
+  _shift:    The non-negative power of two to divide by.
+  _rmask:    (1<<_shift)-1*/
+#define OC_DIV_POW2(_dividend,_shift,_rmask)\
+  ((_dividend)+(OC_SIGNMASK(_dividend)&(_rmask))>>(_shift))
+/*Divides _x by 65536, truncating towards 0.*/
+#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
+/*Divides _x by 2, truncating towards 0.*/
+#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
+/*Divides _x by 8, truncating towards 0.*/
+#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
+/*Divides _x by 16, truncating towards 0.*/
+#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
+/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
+   negative dividends first.
+  When _rval is (1<<_shift-1), this is equivalent to division with rounding
+   ties away from zero.*/
+#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
+  ((_dividend)+OC_SIGNMASK(_dividend)+(_rval)>>(_shift))
+/*Divides a _x by 2, rounding towards even numbers.*/
+#define OC_DIV2_RE(_x) ((_x)+((_x)>>1&1)>>1)
+/*Divides a _x by (1<<(_shift)), rounding towards even numbers.*/
+#define OC_DIV_POW2_RE(_x,_shift) \
+  ((_x)+((_x)>>(_shift)&1)+((1<<(_shift))-1>>1)>>(_shift))
+/*Swaps two integers _a and _b if _a>_b.*/
+#define OC_SORT2I(_a,_b) \
+  do{ \
+    int t__; \
+    t__=((_a)^(_b))&-((_b)<(_a)); \
+    (_a)^=t__; \
+    (_b)^=t__; \
+  } \
+  while(0)
+
+/*Accesses one of four (signed) bytes given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_BYTE_TABLE32(_a,_b,_c,_d,_i) \
+  ((signed char) \
+   (((_a)&0xFF|((_b)&0xFF)<<8|((_c)&0xFF)<<16|((_d)&0xFF)<<24)>>(_i)*8))
+/*Accesses one of eight (unsigned) nibbles given an index.
+  This can be used to avoid small lookup tables.*/
+#define OC_UNIBBLE_TABLE32(_a,_b,_c,_d,_e,_f,_g,_h,_i) \
+  ((((_a)&0xF|((_b)&0xF)<<4|((_c)&0xF)<<8|((_d)&0xF)<<12| \
+   ((_e)&0xF)<<16|((_f)&0xF)<<20|((_g)&0xF)<<24|((_h)&0xF)<<28)>>(_i)*4)&0xF)
+
+
+
+/*All of these macros should expect floats as arguments.*/
+#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
+#define OC_FABSF(_f)        ((float)fabs(_f))
+#define OC_SQRTF(_f)        ((float)sqrt(_f))
+#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
+#define OC_LOGF(_f)         ((float)log(_f))
+#define OC_IFLOORF(_f)      ((int)floor(_f))
+#define OC_ICEILF(_f)       ((int)ceil(_f))
+
+#endif

+ 127 - 0
jni/libtheora-1.2.0alpha1/lib/quant.c

@@ -0,0 +1,127 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "quant.h"
+#include "decint.h"
+
+/*The maximum output of the DCT with +/- 255 inputs is +/- 8157.
+  These minimum quantizers ensure the result after quantization (and after
+   prediction for DC) will be no more than +/- 510.
+  The tokenization system can handle values up to +/- 580, so there is no need
+   to do any coefficient clamping.
+  I would rather have allowed smaller quantizers and had to clamp, but these
+   minimums were required when constructing the original VP3 matrices and have
+   been formalized in the spec.*/
+static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
+static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
+
+/*Initializes the dequantization tables from a set of quantizer info.
+  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
+   be initialized as pointing to the storage reserved for them in the
+   oc_theora_state (resp. oc_enc_ctx) structure.
+  If some tables are duplicates of others, the pointers will be adjusted to
+   point to a single copy of the tables, but the storage for them will not be
+   freed.
+  If you're concerned about the memory footprint, the obvious thing to do is
+   to move the storage out of its fixed place in the structures and allocate
+   it on demand.
+  However, a much, much better option is to only store the quantization
+   matrices being used for the current frame, and to recalculate these as the
+   qi values change between frames (this is what VP3 did).*/
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo){
+  /*Coding mode: intra or inter.*/
+  int          qti;
+  /*Y', C_b, C_r*/
+  int          pli;
+  for(qti=0;qti<2;qti++)for(pli=0;pli<3;pli++){
+    /*Quality index.*/
+    int qi;
+    /*Range iterator.*/
+    int qri;
+    for(qi=0,qri=0;qri<=_qinfo->qi_ranges[qti][pli].nranges;qri++){
+      th_quant_base base;
+      ogg_uint32_t  q;
+      int           qi_start;
+      int           qi_end;
+      memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
+       sizeof(base));
+      qi_start=qi;
+      if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
+      else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
+      /*Iterate over quality indicies in this range.*/
+      for(;;){
+        ogg_uint32_t qfac;
+        int          zzi;
+        int          ci;
+        /*In the original VP3.2 code, the rounding offset and the size of the
+           dead zone around 0 were controlled by a "sharpness" parameter.
+          The size of our dead zone is now controlled by the per-coefficient
+           quality thresholds returned by our HVS module.
+          We round down from a more accurate value when the quality of the
+           reconstruction does not fall below our threshold and it saves bits.
+          Hence, all of that VP3.2 code is gone from here, and the remaining
+           floating point code has been implemented as equivalent integer code
+           with exact precision.*/
+        qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
+        /*For postprocessing, not dequantization.*/
+        if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
+        /*Scale DC the coefficient from the proper table.*/
+        q=(qfac/100)<<2;
+        q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+        _dequant[qi][pli][qti][0]=(ogg_uint16_t)q;
+        /*Now scale AC coefficients from the proper table.*/
+        for(zzi=1;zzi<64;zzi++){
+          q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[OC_FZIG_ZAG[zzi]]/100)<<2;
+          q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+          _dequant[qi][pli][qti][zzi]=(ogg_uint16_t)q;
+        }
+        /*If this is a duplicate of a previous matrix, use that instead.
+          This simple check helps us improve cache coherency later.*/
+        {
+          int dupe;
+          int qtj;
+          int plj;
+          dupe=0;
+          for(qtj=0;qtj<=qti;qtj++){
+            for(plj=0;plj<(qtj<qti?3:pli);plj++){
+              if(!memcmp(_dequant[qi][pli][qti],_dequant[qi][plj][qtj],
+               sizeof(oc_quant_table))){
+                dupe=1;
+                break;
+              }
+            }
+            if(dupe)break;
+          }
+          if(dupe)_dequant[qi][pli][qti]=_dequant[qi][plj][qtj];
+        }
+        if(++qi>=qi_end)break;
+        /*Interpolate the next base matrix.*/
+        for(ci=0;ci<64;ci++){
+          base[ci]=(unsigned char)(
+           (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
+           (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+           +_qinfo->qi_ranges[qti][pli].sizes[qri])/
+           (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
+        }
+      }
+    }
+  }
+}

+ 33 - 0
jni/libtheora-1.2.0alpha1/lib/quant.h

@@ -0,0 +1,33 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#if !defined(_quant_H)
+# define _quant_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+typedef ogg_uint16_t   oc_quant_table[64];
+
+
+/*Maximum scaled quantizer value.*/
+#define OC_QUANT_MAX          (1024<<2)
+
+
+void oc_dequant_tables_init(ogg_uint16_t *_dequant[64][3][2],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo);
+
+#endif

+ 1147 - 0
jni/libtheora-1.2.0alpha1/lib/rate.c

@@ -0,0 +1,1147 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+/*A rough lookup table for tan(x), 0<=x<pi/2.
+  The values are Q12 fixed-point and spaced at 5 degree intervals.
+  These decisions are somewhat arbitrary, but sufficient for the 2nd order
+   Bessel follower below.
+  Values of x larger than 85 degrees are extrapolated from the last inteval,
+   which is way off, but "good enough".*/
+static unsigned short OC_ROUGH_TAN_LOOKUP[18]={
+      0,  358,  722, 1098, 1491, 1910,
+   2365, 2868, 3437, 4096, 4881, 5850,
+   7094, 8784,11254,15286,23230,46817
+};
+
+/*_alpha is Q24 in the range [0,0.5).
+  The return values is 5.12.*/
+static int oc_warp_alpha(int _alpha){
+  int i;
+  int d;
+  int t0;
+  int t1;
+  i=_alpha*36>>24;
+  if(i>=17)i=16;
+  t0=OC_ROUGH_TAN_LOOKUP[i];
+  t1=OC_ROUGH_TAN_LOOKUP[i+1];
+  d=_alpha*36-(i<<24);
+  return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32);
+}
+
+/*Re-initialize the Bessel filter coefficients with the specified delay.
+  This does not alter the x/y state, but changes the reaction time of the
+   filter.
+  Altering the time constant of a reactive filter without alterning internal
+   state is something that has to be done carefuly, but our design operates at
+   high enough delays and with small enough time constant changes to make it
+   safe.*/
+static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){
+  int         alpha;
+  ogg_int64_t one48;
+  ogg_int64_t warp;
+  ogg_int64_t k1;
+  ogg_int64_t k2;
+  ogg_int64_t d;
+  ogg_int64_t a;
+  ogg_int64_t ik2;
+  ogg_int64_t b1;
+  ogg_int64_t b2;
+  /*This borrows some code from an unreleased version of Postfish.
+    See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
+     on deriving the filter coefficients.*/
+  /*alpha is Q24*/
+  alpha=(1<<24)/_delay;
+  one48=(ogg_int64_t)1<<48;
+  /*warp is 7.12*/
+  warp=OC_MAXI(oc_warp_alpha(alpha),1);
+  /*k1 is 9.12*/
+  k1=3*warp;
+  /*k2 is 16.24.*/
+  k2=k1*warp;
+  /*d is 16.15.*/
+  d=((1<<12)+k1<<12)+k2+256>>9;
+  /*a is 0.32, since d is larger than both 1.0 and k2.*/
+  a=(k2<<23)/d;
+  /*ik2 is 25.24.*/
+  ik2=one48/k2;
+  /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b1=2*a*(ik2-(1<<24));
+  /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
+  b2=(one48<<8)-(4*a<<24)-b1;
+  /*All of the filter parameters are Q24.*/
+  _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
+  _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
+  _f->g=(ogg_int32_t)(a+128>>8);
+}
+
+/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
+   and initial value.
+  _value is Q24.*/
+static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){
+  oc_iir_filter_reinit(_f,_delay);
+  _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value;
+}
+
+static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){
+  ogg_int64_t c0;
+  ogg_int64_t c1;
+  ogg_int64_t g;
+  ogg_int64_t x0;
+  ogg_int64_t x1;
+  ogg_int64_t y0;
+  ogg_int64_t y1;
+  ogg_int64_t ya;
+  c0=_f->c[0];
+  c1=_f->c[1];
+  g=_f->g;
+  x0=_f->x[0];
+  x1=_f->x[1];
+  y0=_f->y[0];
+  y1=_f->y[1];
+  ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24;
+  _f->x[1]=(ogg_int32_t)x0;
+  _f->x[0]=_x;
+  _f->y[1]=(ogg_int32_t)y0;
+  _f->y[0]=(ogg_int32_t)ya;
+  return ya;
+}
+
+
+
+/*Search for the quantizer that matches the target most closely.
+  We don't assume a linear ordering, but when there are ties we pick the
+   quantizer closest to the old one.*/
+static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old,
+ int _qi_min,ogg_int64_t _log_qtarget){
+  ogg_int64_t best_qdiff;
+  int         best_qi;
+  int         qi;
+  best_qi=_qi_min;
+  best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget;
+  best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
+  for(qi=_qi_min+1;qi<64;qi++){
+    ogg_int64_t qdiff;
+    qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget;
+    qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
+    if(qdiff<best_qdiff||
+     qdiff==best_qdiff&&abs(qi-_qi_old)<abs(best_qi-_qi_old)){
+      best_qi=qi;
+      best_qdiff=qdiff;
+    }
+  }
+  return best_qi;
+}
+
+void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
+  ogg_int64_t lq;
+  int         qi;
+  int         qi1;
+  int         nqis;
+  /*For now, lambda is fixed depending on the qi value and frame type:
+      lambda=qscale*(qavg[qti][qi]**2),
+     where qscale=0.2125.
+    This was derived by exhaustively searching for the optimal quantizer for
+     the AC coefficients in each block from a number of test sequences for a
+     number of fixed lambda values and fitting the peaks of the resulting
+     histograms (on the log(qavg) scale).
+    The same model applies to both inter and intra frames.
+    A more adaptive scheme might perform better.*/
+  qi=_enc->state.qis[0];
+  /*If rate control is active, use the lambda for the _target_ quantizer.
+    This allows us to scale to rates slightly lower than we'd normally be able
+     to reach, and give the rate control a semblance of "fractional qi"
+     precision.
+    TODO: Add API for changing QI, and allow extra precision.*/
+  if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
+  else lq=_enc->log_qavg[_qti][qi];
+  /*The resulting lambda value is less than 0x500000.*/
+  _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
+  /*Select additional quantizers.
+    The R-D optimal block AC quantizer statistics suggest that the distribution
+     is roughly Gaussian-like with a slight positive skew.
+    K-means clustering on log_qavg to select 3 quantizers produces cluster
+     centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
+    Experiments confirm these are relatively good choices.
+
+    Although we do greedy R-D optimization of the qii flags to avoid switching
+     too frequently, this becomes ineffective at low rates, either because we
+     do a poor job of predicting the actual R-D cost, or the greedy
+     optimization is not sufficient.
+    Therefore adaptive quantization is disabled above an (experimentally
+     suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
+     INTER qi=20 with current matrices).
+    This may need to be revised if the R-D cost estimation or qii flag
+     optimization strategies change.*/
+  nqis=1;
+  if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible&&
+   _enc->sp_level<OC_SP_LEVEL_FAST_ANALYSIS){
+    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
+     lq+(OC_Q57(7)+5)/10);
+    if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
+    qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
+     lq-(OC_Q57(6)+5)/10);
+    if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
+  }
+  _enc->state.nqis=nqis;
+}
+
+/*Binary exponential of _log_scale with 24-bit fractional precision and
+   saturation.
+  _log_scale: A binary logarithm in Q24 format.
+  Return: The binary exponential in Q24 format, saturated to 2**47-1 if
+   _log_scale was too large.*/
+static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){
+  if(_log_scale<(ogg_int32_t)23<<24){
+    ogg_int64_t ret;
+    ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24));
+    return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL;
+  }
+  return 0x7FFFFFFFFFFFLL;
+}
+
+/*Convenience function converts Q57 value to a clamped 32-bit Q24 value
+  _in: input in Q57 format.
+  Return: same number in Q24 */
+static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){
+  ogg_int64_t ret;
+  ret=_in+((ogg_int64_t)1<<32)>>33;
+  /*0x80000000 is automatically converted to unsigned on 32-bit systems.
+    -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
+    unsigned.*/
+  return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF);
+}
+
+/*Binary exponential of _log_scale with 24-bit fractional precision and
+   saturation.
+  _log_scale: A binary logarithm in Q57 format.
+  Return: The binary exponential in Q24 format, saturated to 2**31-1 if
+   _log_scale was too large.*/
+static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){
+  if(_log_scale<OC_Q57(8)){
+    ogg_int64_t ret;
+    ret=oc_bexp64(_log_scale+OC_Q57(24));
+    return ret<0x7FFFFFFF?(ogg_int32_t)ret:0x7FFFFFFF;
+  }
+  return 0x7FFFFFFF;
+}
+
+
+static void oc_enc_rc_reset(oc_enc_ctx *_enc){
+  ogg_int64_t npixels;
+  ogg_int64_t ibpp;
+  int         inter_delay;
+  /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
+  _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
+   (ogg_int64_t)_enc->state.info.fps_denominator)/
+   _enc->state.info.fps_numerator;
+  /*Insane framerates or frame sizes mean insane bitrates.
+    Let's not get carried away.*/
+  if(_enc->rc.bits_per_frame>0x400000000000LL){
+    _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
+  }
+  else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
+  _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
+  _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
+  /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
+     on a single keyframe interval.
+    We can require fully half the bits in an interval for a keyframe, so this
+     initial level gives us maximum flexibility for over/under-shooting in
+     subsequent frames.*/
+  _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
+   OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
+  _enc->rc.fullness=_enc->rc.target;
+  /*Pick exponents and initial scales for quantizer selection.*/
+  npixels=_enc->state.info.frame_width*
+   (ogg_int64_t)_enc->state.info.frame_height;
+  _enc->rc.log_npixels=oc_blog64(npixels);
+  ibpp=npixels/_enc->rc.bits_per_frame;
+  if(ibpp<1){
+    _enc->rc.exp[0]=59;
+    _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
+  }
+  else if(ibpp<2){
+    _enc->rc.exp[0]=55;
+    _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
+  }
+  else{
+    _enc->rc.exp[0]=48;
+    _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
+  }
+  if(ibpp<4){
+    _enc->rc.exp[1]=100;
+    _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
+  }
+  else if(ibpp<8){
+    _enc->rc.exp[1]=95;
+    _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
+  }
+  else{
+    _enc->rc.exp[1]=73;
+    _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
+  }
+  _enc->rc.prev_drop_count=0;
+  _enc->rc.log_drop_scale=OC_Q57(0);
+  /*Set up second order followers, initialized according to corresponding
+     time constants.*/
+  oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
+   oc_q57_to_q24(_enc->rc.log_scale[0]));
+  inter_delay=(_enc->rc.twopass?
+   OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
+  _enc->rc.inter_count=0;
+  /*We clamp the actual inter_delay to a minimum of 10 to work within the range
+     of values where later incrementing the delay works as designed.
+    10 is not an exact choice, but rather a good working trade-off.*/
+  _enc->rc.inter_delay=10;
+  _enc->rc.inter_delay_target=inter_delay;
+  oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
+   oc_q57_to_q24(_enc->rc.log_scale[1]));
+  oc_iir_filter_init(&_enc->rc.vfrfilter,4,
+   oc_bexp64_q24(_enc->rc.log_drop_scale));
+}
+
+void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){
+  _rc->twopass=0;
+  _rc->twopass_buffer_bytes=0;
+  _rc->twopass_force_kf=0;
+  _rc->frame_metrics=NULL;
+  _rc->rate_bias=0;
+  if(_enc->state.info.target_bitrate>0){
+    /*The buffer size is set equal to the keyframe interval, clamped to the
+       range [12,256] frames.
+      The 12 frame minimum gives us some chance to distribute bit estimation
+       errors.
+      The 256 frame maximum means we'll require 8-10 seconds of pre-buffering
+       at 24-30 fps, which is not unreasonable.*/
+    _rc->buf_delay=_enc->keyframe_frequency_force>256?
+     256:_enc->keyframe_frequency_force;
+    /*By default, enforce all buffer constraints.*/
+    _rc->drop_frames=1;
+    _rc->cap_overflow=1;
+    _rc->cap_underflow=0;
+    oc_enc_rc_reset(_enc);
+  }
+}
+
+void oc_rc_state_clear(oc_rc_state *_rc){
+  _ogg_free(_rc->frame_metrics);
+}
+
+void oc_enc_rc_resize(oc_enc_ctx *_enc){
+  /*If encoding has not yet begun, reset the buffer state.*/
+  if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
+  else{
+    int idt;
+    /*Otherwise, update the bounds on the buffer, but not the current
+       fullness.*/
+    _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
+     (ogg_int64_t)_enc->state.info.fps_denominator)/
+     _enc->state.info.fps_numerator;
+    /*Insane framerates or frame sizes mean insane bitrates.
+      Let's not get carried away.*/
+    if(_enc->rc.bits_per_frame>0x400000000000LL){
+      _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
+    }
+    else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
+    _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
+    _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
+    _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
+     OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
+    /*Update the INTER-frame scale filter delay.
+      We jump to it immediately if we've already seen enough frames; otherwise
+       it is simply set as the new target.*/
+    _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
+    if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
+      oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
+       _enc->rc.scalefilter[1].y[0]);
+      _enc->rc.inter_delay=idt;
+    }
+  }
+  /*If we're in pass-2 mode, make sure the frame metrics array is big enough
+     to hold frame statistics for the full buffer.*/
+  if(_enc->rc.twopass==2){
+    int cfm;
+    int buf_delay;
+    int reset_window;
+    buf_delay=_enc->rc.buf_delay;
+    reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
+     buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+     +_enc->rc.frames_total[2]);
+    cfm=_enc->rc.cframe_metrics;
+    /*Only try to resize the frame metrics buffer if a) it's too small and
+       b) we were using a finite buffer, or are about to start.*/
+    if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
+      oc_frame_metrics *fm;
+      int               nfm;
+      int               fmh;
+      fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
+       buf_delay*sizeof(*_enc->rc.frame_metrics));
+      if(fm==NULL){
+        /*We failed to allocate a finite buffer.*/
+        /*If we don't have a valid 2-pass header yet, just return; we'll reset
+           the buffer size when we read the header.*/
+        if(_enc->rc.frames_total[0]==0)return;
+        /*Otherwise revert to the largest finite buffer previously set, or to
+           whole-file buffering if we were still using that.*/
+        _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
+         cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+         +_enc->rc.frames_total[2];
+        oc_enc_rc_resize(_enc);
+        return;
+      }
+      _enc->rc.frame_metrics=fm;
+      _enc->rc.cframe_metrics=buf_delay;
+      /*Re-organize the circular buffer.*/
+      fmh=_enc->rc.frame_metrics_head;
+      nfm=_enc->rc.nframe_metrics;
+      if(fmh+nfm>cfm){
+        int shift;
+        shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
+        memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
+        if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
+      }
+    }
+    /*We were using whole-file buffering; now we're not.*/
+    if(reset_window){
+      _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
+      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
+      _enc->rc.scale_window_end=_enc->rc.scale_window0=
+       _enc->state.curframe_num+_enc->prev_dup_count+1;
+      if(_enc->rc.twopass_buffer_bytes){
+        int qti;
+        /*We already read the metrics for the first frame in the window.*/
+        *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
+        _enc->rc.nframe_metrics++;
+        qti=_enc->rc.cur_metrics.frame_type;
+        _enc->rc.nframes[qti]++;
+        _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
+        _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
+        _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
+        if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
+          /*We need more frame data.*/
+          _enc->rc.twopass_buffer_bytes=0;
+        }
+      }
+    }
+    /*Otherwise, we could shrink the size of the current window, if necessary,
+       but leaving it like it is lets us adapt to the new buffer size more
+       gracefully.*/
+  }
+}
+
+/*Scale the number of frames by the number of expected drops/duplicates.*/
+static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){
+  if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){
+    ogg_int64_t dup_scale;
+    dup_scale=oc_bexp64((_rc->log_drop_scale
+     +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8));
+    if(dup_scale<_nframes<<8){
+      int dup_scalei;
+      dup_scalei=(int)dup_scale;
+      if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei;
+    }
+    else _nframes=!!_nframes;
+  }
+  return _nframes;
+}
+
+int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
+  ogg_int64_t  rate_total;
+  ogg_int64_t  rate_bias;
+  int          nframes[2];
+  int          buf_delay;
+  int          buf_pad;
+  ogg_int64_t  log_qtarget;
+  ogg_int64_t  log_scale0;
+  ogg_int64_t  log_cur_scale;
+  ogg_int64_t  log_qexp;
+  int          exp0;
+  int          old_qi;
+  int          qi;
+  /*Figure out how to re-distribute bits so that we hit our fullness target
+     before the last keyframe in our current buffer window (after the current
+     frame), or the end of the buffer window, whichever comes first.*/
+  log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33;
+  buf_pad=0;
+  switch(_enc->rc.twopass){
+    default:{
+      ogg_uint32_t next_key_frame;
+      /*Single pass mode: assume only forced keyframes and attempt to estimate
+         the drop count for VFR content.*/
+      next_key_frame=_qti?_enc->keyframe_frequency_force
+       -(_enc->state.curframe_num-_enc->state.keyframe_num):0;
+      nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay)
+       +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force;
+      if(nframes[0]+_qti>1){
+        nframes[0]--;
+        buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force;
+      }
+      else buf_delay=_enc->rc.buf_delay;
+      nframes[1]=buf_delay-nframes[0];
+      /*Downgrade the delta frame rate to correspond to the recent drop count
+         history.*/
+      nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]);
+    }break;
+    case 1:{
+      /*Pass 1 mode: use a fixed qi value.*/
+      qi=_enc->state.qis[0];
+      _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi];
+      return qi;
+    }break;
+    case 2:{
+      ogg_int64_t scale_sum[2];
+      int         qti;
+      /*Pass 2 mode: we know exactly how much of each frame type there is in
+         the current buffer window, and have estimates for the scales.*/
+      nframes[0]=_enc->rc.nframes[0];
+      nframes[1]=_enc->rc.nframes[1];
+      scale_sum[0]=_enc->rc.scale_sum[0];
+      scale_sum[1]=_enc->rc.scale_sum[1];
+      /*The window size can be slightly larger than the buffer window for VFR
+         content; clamp it down, if appropriate (the excess will all be dup
+         frames).*/
+      buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0,
+       _enc->rc.buf_delay);
+      /*If we're approaching the end of the file, add some slack to keep us
+         from slamming into a rail.
+        Our rate accuracy goes down, but it keeps the result sensible.
+        We position the target where the first forced keyframe beyond the end
+         of the file would be (for consistency with 1-pass mode).*/
+      buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num
+       +_enc->keyframe_frequency_force-_enc->rc.scale_window0);
+      if(buf_delay<buf_pad)buf_pad-=buf_delay;
+      else{
+        /*Otherwise, search for the last keyframe in the buffer window and
+           target that.*/
+        buf_pad=0;
+        /*TODO: Currently we only do this when using a finite buffer; we could
+           save the position of the last keyframe in the summary data and do it
+           with a whole-file buffer as well, but it isn't likely to make a
+           difference.*/
+        if(_enc->rc.frame_metrics!=NULL){
+          int fmi;
+          int fm_tail;
+          fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics;
+          if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics;
+          for(fmi=fm_tail;;){
+            oc_frame_metrics *m;
+            fmi--;
+            if(fmi<0)fmi+=_enc->rc.cframe_metrics;
+            /*Stop before we remove the first frame.*/
+            if(fmi==_enc->rc.frame_metrics_head)break;
+            m=_enc->rc.frame_metrics+fmi;
+            /*If we find a keyframe, remove it and everything past it.*/
+            if(m->frame_type==OC_INTRA_FRAME){
+              do{
+                qti=m->frame_type;
+                nframes[qti]--;
+                scale_sum[qti]-=oc_bexp_q24(m->log_scale);
+                buf_delay-=m->dup_count+1;
+                fmi++;
+                if(fmi>=_enc->rc.cframe_metrics)fmi=0;
+                m=_enc->rc.frame_metrics+fmi;
+              }
+              while(fmi!=fm_tail);
+              /*And stop scanning backwards.*/
+              break;
+            }
+          }
+        }
+      }
+      /*If we're not using the same frame type as in pass 1 (because someone
+         changed the keyframe interval), remove that scale estimate.
+        We'll add in a replacement for the correct frame type below.*/
+      qti=_enc->rc.cur_metrics.frame_type;
+      if(qti!=_qti){
+        nframes[qti]--;
+        scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
+      }
+      /*Compute log_scale estimates for each frame type from the pass-1 scales
+         we measured in the current window.*/
+      for(qti=0;qti<2;qti++){
+        _enc->rc.log_scale[qti]=nframes[qti]>0?
+         oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
+         -_enc->rc.log_npixels;
+      }
+      /*If we're not using the same frame type as in pass 1, add a scale
+         estimate for the corresponding frame using the current low-pass
+         filter value.
+        This is mostly to ensure we have a valid estimate even when pass 1 had
+         no frames of this type in the buffer window.
+        TODO: We could also plan ahead and figure out how many keyframes we'll
+         be forced to add in the current buffer window.*/
+      qti=_enc->rc.cur_metrics.frame_type;
+      if(qti!=_qti){
+        ogg_int64_t scale;
+        scale=_enc->rc.log_scale[_qti]<OC_Q57(23)?
+         oc_bexp64(_enc->rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL;
+        scale*=nframes[_qti];
+        nframes[_qti]++;
+        scale+=oc_bexp_q24(log_cur_scale>>33);
+        _enc->rc.log_scale[_qti]=oc_blog64(scale)
+         -oc_blog64(nframes[qti])-OC_Q57(24);
+      }
+      else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33;
+      /*Add the padding from above.
+        This basically reverts to 1-pass estimations in the last keyframe
+         interval.*/
+      if(buf_pad>0){
+        ogg_int64_t scale;
+        int         nextra_frames;
+        /*Extend the buffer.*/
+        buf_delay+=buf_pad;
+        /*Add virtual delta frames according to the estimated drop count.*/
+        nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad);
+        /*And blend in the low-pass filtered scale according to how many frames
+           we added.*/
+        scale=
+         oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1]
+         +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames;
+        nframes[1]+=nextra_frames;
+        _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24);
+      }
+    }break;
+  }
+  /*If we've been missing our target, add a penalty term.*/
+  rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))*
+   (buf_delay-buf_pad);
+  /*rate_total is the total bits available over the next buf_delay frames.*/
+  rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias
+   +buf_delay*_enc->rc.bits_per_frame;
+  log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
+  /*If there aren't enough bits to achieve our desired fullness level, use the
+     minimum quality permitted.*/
+  if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG;
+  else{
+    static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL;
+    ogg_int64_t log_scale1;
+    ogg_int64_t rlo;
+    ogg_int64_t rhi;
+    log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels;
+    rlo=0;
+    rhi=(rate_total+nframes[_qti]-1)/nframes[_qti];
+    while(rlo<rhi){
+      ogg_int64_t curr;
+      ogg_int64_t rdiff;
+      ogg_int64_t log_rpow;
+      ogg_int64_t rscale;
+      curr=rlo+rhi>>1;
+      log_rpow=oc_blog64(curr)-log_scale0;
+      log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti];
+      if(_qti)log_rpow+=LOG_KEY_RATIO>>6;
+      else log_rpow-=LOG_KEY_RATIO>>6;
+      log_rpow*=_enc->rc.exp[1-_qti];
+      rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow);
+      rdiff=nframes[_qti]*curr+rscale-rate_total;
+      if(rdiff<0)rlo=curr+1;
+      else if(rdiff>0)rhi=curr-1;
+      else break;
+    }
+    log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/
+     _enc->rc.exp[_qti]<<6);
+    log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
+  }
+  /*The above allocation looks only at the total rate we'll accumulate in the
+     next buf_delay frames.
+    However, we could overflow the buffer on the very next frame, so check for
+     that here, if we're not using a soft target.*/
+  exp0=_enc->rc.exp[_qti];
+  if(_enc->rc.cap_overflow){
+    ogg_int64_t margin;
+    ogg_int64_t soft_limit;
+    ogg_int64_t log_soft_limit;
+    /*Allow 3% of the buffer for prediction error.
+      This should be plenty, and we don't mind if we go a bit over; we only
+       want to keep these bits from being completely wasted.*/
+    margin=_enc->rc.max+31>>5;
+    /*We want to use at least this many bits next frame.*/
+    soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
+    log_soft_limit=oc_blog64(soft_limit);
+    /*If we're predicting we won't use that many...*/
+    log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
+    if(log_scale0-log_qexp<log_soft_limit){
+      /*Scale the adjustment based on how far into the margin we are.*/
+      log_qexp+=(log_scale0-log_soft_limit-log_qexp>>32)*
+       ((OC_MINI(margin,soft_limit)<<32)/margin);
+      log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
+    }
+  }
+  /*If this was not one of the initial frames, limit the change in quality.*/
+  old_qi=_enc->state.qis[0];
+  if(_clamp){
+    ogg_int64_t log_qmin;
+    ogg_int64_t log_qmax;
+    /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
+       current quantizer.
+      TODO: With user-specified quant matrices, we need to enlarge these limits
+       if they don't actually let us change qi values.*/
+    log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL;
+    log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL;
+    log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
+  }
+  /*The above allocation looks only at the total rate we'll accumulate in the
+     next buf_delay frames.
+    However, we could bust the budget on the very next frame, so check for that
+     here, if we're not using a soft target.*/
+  /* Disabled when our minimum qi > 0; if we saturate log_qtarget to
+     to the maximum possible size when we have a minimum qi, the
+     resulting lambda will interact very strangely with SKIP.  The
+     resulting artifacts look like waterfalls. */
+  if(_enc->state.info.quality==0){
+    ogg_int64_t log_hard_limit;
+    /*Compute the maximum number of bits we can use in the next frame.
+      Allow 50% of the rate for a single frame for prediction error.
+      This may not be enough for keyframes or sudden changes in complexity.*/
+    log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
+    /*If we're predicting we'll use more than this...*/
+    log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
+    if(log_scale0-log_qexp>log_hard_limit){
+      /*Force the target to hit our limit exactly.*/
+      log_qexp=log_scale0-log_hard_limit;
+      log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
+      /*If that target is unreasonable, oh well; we'll have to drop.*/
+      log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
+    }
+  }
+  /*Compute a final estimate of the number of bits we plan to use.*/
+  log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti];
+  _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp);
+  qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
+   _enc->state.info.quality,log_qtarget);
+  /*Save the quantizer target for lambda calculations.*/
+  _enc->rc.log_qtarget=log_qtarget;
+  return qi;
+}
+
+int oc_enc_update_rc_state(oc_enc_ctx *_enc,
+ long _bits,int _qti,int _qi,int _trial,int _droppable){
+  ogg_int64_t buf_delta;
+  ogg_int64_t log_scale;
+  int         dropped;
+  dropped=0;
+  /* Drop frames also disabled for now in the case of infinite-buffer
+     two-pass mode */
+  if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){
+    _droppable=0;
+  }
+  buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count);
+  if(_bits<=0){
+    /*We didn't code any blocks in this frame.*/
+    log_scale=OC_Q57(-64);
+    _bits=0;
+  }
+  else{
+    ogg_int64_t log_bits;
+    ogg_int64_t log_qexp;
+    /*Compute the estimated scale factor for this frame type.*/
+    log_bits=oc_blog64(_bits);
+    log_qexp=_enc->rc.log_qtarget-OC_Q57(2);
+    log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]);
+    log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16));
+  }
+  /*Special two-pass processing.*/
+  switch(_enc->rc.twopass){
+    case 1:{
+      /*Pass 1 mode: save the metrics for this frame.*/
+      _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale);
+      _enc->rc.cur_metrics.dup_count=_enc->dup_count;
+      _enc->rc.cur_metrics.frame_type=_enc->state.frame_type;
+      _enc->rc.cur_metrics.activity_avg=_enc->activity_avg;
+      _enc->rc.twopass_buffer_bytes=0;
+    }break;
+    case 2:{
+      /*Pass 2 mode:*/
+      if(!_trial){
+        ogg_int64_t next_frame_num;
+        int         qti;
+        /*Move the current metrics back one frame.*/
+        *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics;
+        next_frame_num=_enc->state.curframe_num+_enc->dup_count+1;
+        /*Back out the last frame's statistics from the sliding window.*/
+        qti=_enc->rc.prev_metrics.frame_type;
+        _enc->rc.frames_left[qti]--;
+        _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count;
+        _enc->rc.nframes[qti]--;
+        _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count;
+        _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale);
+        _enc->rc.scale_window0=(int)next_frame_num;
+        /*Free the corresponding entry in the circular buffer.*/
+        if(_enc->rc.frame_metrics!=NULL){
+          _enc->rc.nframe_metrics--;
+          _enc->rc.frame_metrics_head++;
+          if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){
+            _enc->rc.frame_metrics_head=0;
+          }
+        }
+        /*Mark us ready for the next 2-pass packet.*/
+        _enc->rc.twopass_buffer_bytes=0;
+        /*Update state, so the user doesn't have to keep calling 2pass_in after
+           they've fed in all the data when we're using a finite buffer.*/
+        _enc->prev_dup_count=_enc->dup_count;
+        oc_enc_rc_2pass_in(_enc,NULL,0);
+      }
+    }break;
+  }
+  /*Common to all passes:*/
+  if(_bits>0){
+    if(_trial){
+      oc_iir_filter *f;
+      /*Use the estimated scale factor directly if this was a trial.*/
+      f=_enc->rc.scalefilter+_qti;
+      f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale);
+      _enc->rc.log_scale[_qti]=log_scale;
+    }
+    else{
+      /*Lengthen the time constant for the INTER filter as we collect more
+         frame statistics, until we reach our target.*/
+      if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&&
+       _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){
+        oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay);
+      }
+      /*Otherwise update the low-pass scale filter for this frame type,
+         regardless of whether or not we dropped this frame.*/
+      _enc->rc.log_scale[_qti]=oc_iir_filter_update(
+       _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33;
+      /*If this frame busts our budget, it must be dropped.*/
+      if(_droppable&&_enc->rc.fullness+buf_delta<_bits){
+        _enc->rc.prev_drop_count+=1+_enc->dup_count;
+        _bits=0;
+        dropped=1;
+      }
+      else{
+        ogg_uint32_t drop_count;
+        /*Update a low-pass filter to estimate the "real" frame rate taking
+           drops and duplicates into account.
+          This is only done if the frame is coded, as it needs the final
+           count of dropped frames.*/
+        drop_count=_enc->rc.prev_drop_count+1;
+        if(drop_count>0x7F)drop_count=0x7FFFFFFF;
+        else drop_count<<=24;
+        _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update(
+         &_enc->rc.vfrfilter,drop_count))-OC_Q57(24);
+        /*Initialize the drop count for this frame to the user-requested dup
+           count.
+          It will be increased if we drop more frames.*/
+        _enc->rc.prev_drop_count=_enc->dup_count;
+      }
+    }
+    /*Increment the INTER frame count, for filter adaptation purposes.*/
+    if(_enc->rc.inter_count<INT_MAX)_enc->rc.inter_count+=_qti;
+  }
+  /*Increase the drop count.*/
+  else _enc->rc.prev_drop_count+=1+_enc->dup_count;
+  /*And update the buffer fullness level.*/
+  if(!_trial){
+    _enc->rc.fullness+=buf_delta-_bits;
+    /*If we're too quick filling the buffer and overflow is capped,
+      that rate is lost forever.*/
+    if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){
+      _enc->rc.fullness=_enc->rc.max;
+    }
+    /*If we're too quick draining the buffer and underflow is capped,
+      don't try to make up that rate later.*/
+    if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
+      _enc->rc.fullness=0;
+    }
+    /*Adjust the bias for the real bits we've used.*/
+    _enc->rc.rate_bias-=_bits;
+  }
+  return dropped;
+}
+
+#define OC_RC_2PASS_VERSION   (2)
+#define OC_RC_2PASS_HDR_SZ    (38)
+#define OC_RC_2PASS_PACKET_SZ (12)
+
+static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){
+  while(_bytes-->0){
+    _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF);
+    _val>>=8;
+  }
+}
+
+int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){
+  if(_enc->rc.twopass_buffer_bytes==0){
+    if(_enc->rc.twopass==0){
+      int qi;
+      /*Pick first-pass qi for scale calculations.*/
+      qi=oc_enc_select_qi(_enc,0,0);
+      _enc->state.nqis=1;
+      _enc->state.qis[0]=qi;
+      _enc->rc.twopass=1;
+      _enc->rc.frames_total[0]=_enc->rc.frames_total[1]=
+       _enc->rc.frames_total[2]=0;
+      _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
+      /*Fill in dummy summary values.*/
+      oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
+      oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
+      oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8);
+    }
+    else{
+      int qti;
+      qti=_enc->rc.cur_metrics.frame_type;
+      _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
+      _enc->rc.frames_total[qti]++;
+      _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count;
+      oc_rc_buffer_val(&_enc->rc,
+       _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4);
+      oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4);
+      oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.activity_avg,4);
+    }
+  }
+  else if(_enc->packet_state==OC_PACKET_DONE&&
+   _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){
+    _enc->rc.twopass_buffer_bytes=0;
+    oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
+    oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8);
+    oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8);
+  }
+  else{
+    /*The data for this frame has already been retrieved.*/
+    *_buf=NULL;
+    return 0;
+  }
+  *_buf=_enc->rc.twopass_buffer;
+  return _enc->rc.twopass_buffer_bytes;
+}
+
+static size_t oc_rc_buffer_fill(oc_rc_state *_rc,
+ unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){
+  while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){
+    _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++];
+  }
+  return _consumed;
+}
+
+static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){
+  ogg_int64_t ret;
+  int         shift;
+  ret=0;
+  shift=0;
+  while(_bytes-->0){
+    ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<<shift;
+    shift+=8;
+  }
+  return ret;
+}
+
+int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes){
+  size_t consumed;
+  consumed=0;
+  /*Enable pass 2 mode if this is the first call.*/
+  if(_enc->rc.twopass==0){
+    _enc->rc.twopass=2;
+    _enc->rc.twopass_buffer_fill=0;
+    _enc->rc.frames_total[0]=0;
+    _enc->rc.nframe_metrics=0;
+    _enc->rc.cframe_metrics=0;
+    _enc->rc.frame_metrics_head=0;
+    _enc->rc.scale_window0=0;
+    _enc->rc.scale_window_end=0;
+  }
+  /*If we haven't got a valid summary header yet, try to parse one.*/
+  if(_enc->rc.frames_total[0]==0){
+    if(!_buf){
+      int frames_needed;
+      /*If we're using a whole-file buffer, we just need the first frame.
+        Otherwise, we may need as many as one per buffer slot.*/
+      frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay;
+      return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ
+       -_enc->rc.twopass_buffer_fill;
+    }
+    consumed=oc_rc_buffer_fill(&_enc->rc,
+     _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ);
+    if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){
+      ogg_int64_t scale_sum[2];
+      int         exp[2];
+      int         buf_delay;
+      /*Read the summary header data.*/
+      /*Check the magic value and version number.*/
+      if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F||
+       oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_ENOTFORMAT;
+      }
+      _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
+      exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
+      exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
+      scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8);
+      scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8);
+      /*Make sure the file claims to have at least one frame.
+        Otherwise we probably got the placeholder data from an aborted pass 1.
+        Also make sure the total frame count doesn't overflow an integer.*/
+      buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+       +_enc->rc.frames_total[2];
+      if(_enc->rc.frames_total[0]==0||buf_delay<0||
+       (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]||
+       (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){
+        _enc->rc.frames_total[0]=0;
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_EBADHEADER;
+      }
+      /*Got a valid header; set up pass 2.*/
+      _enc->rc.frames_left[0]=_enc->rc.frames_total[0];
+      _enc->rc.frames_left[1]=_enc->rc.frames_total[1];
+      _enc->rc.frames_left[2]=_enc->rc.frames_total[2];
+      /*If the user hasn't specified a buffer size, use the whole file.*/
+      if(_enc->rc.frame_metrics==NULL){
+        _enc->rc.buf_delay=buf_delay;
+        _enc->rc.nframes[0]=_enc->rc.frames_total[0];
+        _enc->rc.nframes[1]=_enc->rc.frames_total[1];
+        _enc->rc.nframes[2]=_enc->rc.frames_total[2];
+        _enc->rc.scale_sum[0]=scale_sum[0];
+        _enc->rc.scale_sum[1]=scale_sum[1];
+        _enc->rc.scale_window_end=buf_delay;
+        oc_enc_rc_reset(_enc);
+      }
+      _enc->rc.exp[0]=exp[0];
+      _enc->rc.exp[1]=exp[1];
+      /*Clear the header data from the buffer to make room for packet data.*/
+      _enc->rc.twopass_buffer_fill=0;
+      _enc->rc.twopass_buffer_bytes=0;
+    }
+  }
+  if(_enc->rc.frames_total[0]!=0){
+    ogg_int64_t curframe_num;
+    int         nframes_total;
+    curframe_num=_enc->state.curframe_num;
+    if(curframe_num>=0){
+      /*We just encoded a frame; make sure things matched.*/
+      if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){
+        _enc->rc.twopass_buffer_bytes=0;
+        return TH_EINVAL;
+      }
+    }
+    curframe_num+=_enc->prev_dup_count+1;
+    nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
+     +_enc->rc.frames_total[2];
+    if(curframe_num>=nframes_total){
+      /*We don't want any more data after the last frame, and we don't want to
+         allow any more frames to be encoded.*/
+      _enc->rc.twopass_buffer_bytes=0;
+    }
+    else if(_enc->rc.twopass_buffer_bytes==0){
+      if(_enc->rc.frame_metrics==NULL){
+        /*We're using a whole-file buffer:*/
+        if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill;
+        consumed=oc_rc_buffer_fill(&_enc->rc,
+         _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
+        if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
+          ogg_uint32_t dup_count;
+          ogg_int32_t  log_scale;
+          unsigned     activity;
+          int          qti;
+          int          arg;
+          /*Read the metrics for the next frame.*/
+          dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
+          log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
+          activity=oc_rc_unbuffer_val(&_enc->rc,4);
+          _enc->rc.cur_metrics.log_scale=log_scale;
+          qti=(dup_count&0x80000000)>>31;
+          _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF;
+          _enc->rc.cur_metrics.frame_type=qti;
+          _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME;
+          _enc->activity_avg=_enc->rc.cur_metrics.activity_avg=activity;
+          /*"Helpfully" set the dup count back to what it was in pass 1.*/
+          arg=_enc->rc.cur_metrics.dup_count;
+          th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
+          /*Clear the buffer for the next frame.*/
+          _enc->rc.twopass_buffer_fill=0;
+        }
+      }
+      else{
+        int frames_needed;
+        /*We're using a finite buffer:*/
+        frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
+         -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
+         _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
+         -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
+        while(frames_needed>0){
+          if(!_buf){
+            return OC_RC_2PASS_PACKET_SZ*frames_needed
+           -_enc->rc.twopass_buffer_fill;
+          }
+          consumed=oc_rc_buffer_fill(&_enc->rc,
+           _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
+          if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
+            oc_frame_metrics *m;
+            int               fmi;
+            ogg_uint32_t      dup_count;
+            ogg_int32_t       log_scale;
+            int               qti;
+            unsigned          activity;
+            /*Read the metrics for the next frame.*/
+            dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
+            log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
+            activity=oc_rc_unbuffer_val(&_enc->rc,4);
+            /*Add the to the circular buffer.*/
+            fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++;
+            if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics;
+            m=_enc->rc.frame_metrics+fmi;
+            m->log_scale=log_scale;
+            qti=(dup_count&0x80000000)>>31;
+            m->dup_count=dup_count&0x7FFFFFFF;
+            m->frame_type=qti;
+            m->activity_avg=activity;
+            /*And accumulate the statistics over the window.*/
+            _enc->rc.nframes[qti]++;
+            _enc->rc.nframes[2]+=m->dup_count;
+            _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale);
+            _enc->rc.scale_window_end+=m->dup_count+1;
+            /*Compute an upper bound on the number of remaining packets needed
+               for the current window.*/
+            frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
+             -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
+             _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
+             -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
+            /*Clear the buffer for the next frame.*/
+            _enc->rc.twopass_buffer_fill=0;
+            _enc->rc.twopass_buffer_bytes=0;
+          }
+          /*Go back for more data.*/
+          else break;
+        }
+        /*If we've got all the frames we need, fill in the current metrics.
+          We're ready to go.*/
+        if(frames_needed<=0){
+          int arg;
+          *&_enc->rc.cur_metrics=
+           *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head);
+          _enc->rc.twopass_force_kf=
+           _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME;
+          _enc->activity_avg=_enc->rc.cur_metrics.activity_avg;
+          /*"Helpfully" set the dup count back to what it was in pass 1.*/
+          arg=_enc->rc.cur_metrics.dup_count;
+          th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
+          /*Mark us ready for the next frame.*/
+          _enc->rc.twopass_buffer_bytes=1;
+        }
+      }
+    }
+  }
+  return (int)consumed;
+}

+ 1267 - 0
jni/libtheora-1.2.0alpha1/lib/state.c

@@ -0,0 +1,1267 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id$
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include "state.h"
+#if defined(OC_DUMP_IMAGES)
+# include <stdio.h>
+# include "png.h"
+# include "zlib.h"
+#endif
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1])
+   +OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1])
+   +OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,2,2),OC_DIV_ROUND_POW2(dy,2,2));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[2]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[2]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+  dx=OC_MV_X(_lbmvs[1])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[1])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[1]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=OC_MV_X(_lbmvs[0])+OC_MV_X(_lbmvs[1]);
+  dy=OC_MV_Y(_lbmvs[0])+OC_MV_Y(_lbmvs[1]);
+  _cbmvs[0]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+  dx=OC_MV_X(_lbmvs[2])+OC_MV_X(_lbmvs[3]);
+  dy=OC_MV_Y(_lbmvs[2])+OC_MV_Y(_lbmvs[3]);
+  _cbmvs[2]=OC_MV(OC_DIV_ROUND_POW2(dx,1,1),OC_DIV_ROUND_POW2(dy,1,1));
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation (4:4:4).
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  _cbmvs[0]=_lbmvs[0];
+  _cbmvs[1]=_lbmvs[1];
+  _cbmvs[2]=_lbmvs[2];
+  _cbmvs[3]=_lbmvs[3];
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
+/*Returns the fragment index of the top-left block in a macro block.
+  This can be used to test whether or not the whole macro block is valid.
+  _sb_map: The super block map.
+  _quadi:  The quadrant number.
+  Return: The index of the fragment of the upper left block in the macro
+   block, or -1 if the block lies outside the coded frame.*/
+static ptrdiff_t oc_sb_quad_top_left_frag(oc_sb_map_quad _sb_map[4],int _quadi){
+  /*It so happens that under the Hilbert curve ordering described below, the
+     upper-left block in each macro block is at index 0, except in macro block
+     3, where it is at index 2.*/
+  return _sb_map[_quadi][_quadi&_quadi<<1];
+}
+
+/*Fills in the mapping from block positions to fragment numbers for a single
+   color plane.
+  This function also fills in the "valid" flag of each quadrant in the super
+   block flags.
+  _sb_maps:  The array of super block maps for the color plane.
+  _sb_flags: The array of super block flags for the color plane.
+  _frag0:    The index of the first fragment in the plane.
+  _hfrags:   The number of horizontal fragments in a coded frame.
+  _vfrags:   The number of vertical fragments in a coded frame.*/
+static void oc_sb_create_plane_mapping(oc_sb_map _sb_maps[],
+ oc_sb_flags _sb_flags[],ptrdiff_t _frag0,int _hfrags,int _vfrags){
+  /*Contains the (macro_block,block) indices for a 4x4 grid of
+     fragments.
+    The pattern is a 4x4 Hilbert space-filling curve.
+    A Hilbert curve has the nice property that as the curve grows larger, its
+     fractal dimension approaches 2.
+    The intuition is that nearby blocks in the curve are also close spatially,
+     with the previous element always an immediate neighbor, so that runs of
+     blocks should be well correlated.*/
+  static const int SB_MAP[4][4][2]={
+    {{0,0},{0,1},{3,2},{3,3}},
+    {{0,3},{0,2},{3,1},{3,0}},
+    {{1,0},{1,3},{2,0},{2,3}},
+    {{1,1},{1,2},{2,1},{2,2}}
+  };
+  ptrdiff_t  yfrag;
+  unsigned   sbi;
+  int        y;
+  sbi=0;
+  yfrag=_frag0;
+  for(y=0;;y+=4){
+    int imax;
+    int x;
+    /*Figure out how many columns of blocks in this super block lie within the
+       image.*/
+    imax=_vfrags-y;
+    if(imax>4)imax=4;
+    else if(imax<=0)break;
+    for(x=0;;x+=4,sbi++){
+      ptrdiff_t xfrag;
+      int       jmax;
+      int       quadi;
+      int       i;
+      /*Figure out how many rows of blocks in this super block lie within the
+         image.*/
+      jmax=_hfrags-x;
+      if(jmax>4)jmax=4;
+      else if(jmax<=0)break;
+      /*By default, set all fragment indices to -1.*/
+      memset(_sb_maps[sbi],0xFF,sizeof(_sb_maps[sbi]));
+      /*Fill in the fragment map for this super block.*/
+      xfrag=yfrag+x;
+      for(i=0;i<imax;i++){
+        int j;
+        for(j=0;j<jmax;j++){
+          _sb_maps[sbi][SB_MAP[i][j][0]][SB_MAP[i][j][1]]=xfrag+j;
+        }
+        xfrag+=_hfrags;
+      }
+      /*Mark which quadrants of this super block lie within the image.*/
+      for(quadi=0;quadi<4;quadi++){
+        _sb_flags[sbi].quad_valid|=
+         (oc_sb_quad_top_left_frag(_sb_maps[sbi],quadi)>=0)<<quadi;
+      }
+    }
+    yfrag+=_hfrags<<2;
+  }
+}
+
+/*Fills in the Y plane fragment map for a macro block given the fragment
+   coordinates of its upper-left hand corner.
+  _mb_map:    The macro block map to fill.
+  _fplane: The description of the Y plane.
+  _xfrag0: The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0: The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_ymapping(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane *_fplane,int _xfrag0,int _yfrag0){
+  int i;
+  int j;
+  for(i=0;i<2;i++)for(j=0;j<2;j++){
+    _mb_map[0][i<<1|j]=(_yfrag0+i)*(ptrdiff_t)_fplane->nhfrags+_xfrag0+j;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X and Y directions
+   (4:2:0).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping00(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  _xfrag0>>=1;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  _mb_map[1][0]=fragi+_fplanes[1].froffset;
+  _mb_map[2][0]=fragi+_fplanes[2].froffset;
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the Y direction.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping01(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       j;
+  _yfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(j=0;j<2;j++){
+    _mb_map[1][j]=fragi+_fplanes[1].froffset;
+    _mb_map[2][j]=fragi+_fplanes[2].froffset;
+    fragi++;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with chroma decimated in the X direction (4:2:2).
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping10(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  ptrdiff_t fragi;
+  int       i;
+  _xfrag0>>=1;
+  fragi=_yfrag0*(ptrdiff_t)_fplanes[1].nhfrags+_xfrag0;
+  for(i=0;i<2;i++){
+    _mb_map[1][i<<1]=fragi+_fplanes[1].froffset;
+    _mb_map[2][i<<1]=fragi+_fplanes[2].froffset;
+    fragi+=_fplanes[1].nhfrags;
+  }
+}
+
+/*Fills in the chroma plane fragment maps for a macro block.
+  This version is for use with no chroma decimation (4:4:4).
+  This uses the already filled-in luma plane values.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+static void oc_mb_fill_cmapping11(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0){
+  int k;
+  (void)_xfrag0;
+  (void)_yfrag0;
+  for(k=0;k<4;k++){
+    _mb_map[1][k]=_mb_map[0][k]+_fplanes[1].froffset;
+    _mb_map[2][k]=_mb_map[0][k]+_fplanes[2].froffset;
+  }
+}
+
+/*The function type used to fill in the chroma plane fragment maps for a
+   macro block.
+  _mb_map:  The macro block map to fill.
+  _fplanes: The descriptions of the fragment planes.
+  _xfrag0:  The X location of the upper-left hand fragment in the luma plane.
+  _yfrag0:  The Y location of the upper-left hand fragment in the luma plane.*/
+typedef void (*oc_mb_fill_cmapping_func)(oc_mb_map_plane _mb_map[3],
+ const oc_fragment_plane _fplanes[3],int _xfrag0,int _yfrag0);
+
+/*A table of functions used to fill in the chroma plane fragment maps for a
+   macro block for each type of chrominance decimation.*/
+static const oc_mb_fill_cmapping_func OC_MB_FILL_CMAPPING_TABLE[4]={
+  oc_mb_fill_cmapping00,
+  oc_mb_fill_cmapping01,
+  oc_mb_fill_cmapping10,
+  oc_mb_fill_cmapping11
+};
+
+/*Fills in the mapping from macro blocks to their corresponding fragment
+   numbers in each plane.
+  _mb_maps:   The list of macro block maps.
+  _mb_modes:  The list of macro block modes; macro blocks completely outside
+               the coded region are marked invalid.
+  _fplanes:   The descriptions of the fragment planes.
+  _pixel_fmt: The chroma decimation type.*/
+static void oc_mb_create_mapping(oc_mb_map _mb_maps[],
+ signed char _mb_modes[],const oc_fragment_plane _fplanes[3],int _pixel_fmt){
+  oc_mb_fill_cmapping_func  mb_fill_cmapping;
+  unsigned                  sbi;
+  int                       y;
+  mb_fill_cmapping=OC_MB_FILL_CMAPPING_TABLE[_pixel_fmt];
+  /*Loop through the luma plane super blocks.*/
+  for(sbi=y=0;y<_fplanes[0].nvfrags;y+=4){
+    int x;
+    for(x=0;x<_fplanes[0].nhfrags;x+=4,sbi++){
+      int ymb;
+      /*Loop through the macro blocks in each super block in display order.*/
+      for(ymb=0;ymb<2;ymb++){
+        int xmb;
+        for(xmb=0;xmb<2;xmb++){
+          unsigned mbi;
+          int      mbx;
+          int      mby;
+          mbi=sbi<<2|OC_MB_MAP[ymb][xmb];
+          mbx=x|xmb<<1;
+          mby=y|ymb<<1;
+          /*Initialize fragment indices to -1.*/
+          memset(_mb_maps[mbi],0xFF,sizeof(_mb_maps[mbi]));
+          /*Make sure this macro block is within the encoded region.*/
+          if(mbx>=_fplanes[0].nhfrags||mby>=_fplanes[0].nvfrags){
+            _mb_modes[mbi]=OC_MODE_INVALID;
+            continue;
+          }
+          /*Fill in the fragment indices for the luma plane.*/
+          oc_mb_fill_ymapping(_mb_maps[mbi],_fplanes,mbx,mby);
+          /*Fill in the fragment indices for the chroma planes.*/
+          (*mb_fill_cmapping)(_mb_maps[mbi],_fplanes,mbx,mby);
+        }
+      }
+    }
+  }
+}
+
+/*Marks the fragments which fall all or partially outside the displayable
+   region of the frame.
+  _state: The Theora state containing the fragments to be marked.*/
+static void oc_state_border_init(oc_theora_state *_state){
+  oc_fragment       *frag;
+  oc_fragment       *yfrag_end;
+  oc_fragment       *xfrag_end;
+  oc_fragment_plane *fplane;
+  int                crop_x0;
+  int                crop_y0;
+  int                crop_xf;
+  int                crop_yf;
+  int                pli;
+  int                y;
+  int                x;
+  /*The method we use here is slow, but the code is dead simple and handles
+     all the special cases easily.
+    We only ever need to do it once.*/
+  /*Loop through the fragments, marking those completely outside the
+     displayable region and constructing a border mask for those that straddle
+     the border.*/
+  _state->nborders=0;
+  yfrag_end=frag=_state->frags;
+  for(pli=0;pli<3;pli++){
+    fplane=_state->fplanes+pli;
+    /*Set up the cropping rectangle for this plane.*/
+    crop_x0=_state->info.pic_x;
+    crop_xf=_state->info.pic_x+_state->info.pic_width;
+    crop_y0=_state->info.pic_y;
+    crop_yf=_state->info.pic_y+_state->info.pic_height;
+    if(pli>0){
+      if(!(_state->info.pixel_fmt&1)){
+        crop_x0=crop_x0>>1;
+        crop_xf=crop_xf+1>>1;
+      }
+      if(!(_state->info.pixel_fmt&2)){
+        crop_y0=crop_y0>>1;
+        crop_yf=crop_yf+1>>1;
+      }
+    }
+    y=0;
+    for(yfrag_end+=fplane->nfrags;frag<yfrag_end;y+=8){
+      x=0;
+      for(xfrag_end=frag+fplane->nhfrags;frag<xfrag_end;frag++,x+=8){
+        /*First check to see if this fragment is completely outside the
+           displayable region.*/
+        /*Note the special checks for an empty cropping rectangle.
+          This guarantees that if we count a fragment as straddling the
+           border below, at least one pixel in the fragment will be inside
+           the displayable region.*/
+        if(x+8<=crop_x0||crop_xf<=x||y+8<=crop_y0||crop_yf<=y||
+         crop_x0>=crop_xf||crop_y0>=crop_yf){
+          frag->invalid=1;
+        }
+        /*Otherwise, check to see if it straddles the border.*/
+        else if(x<crop_x0&&crop_x0<x+8||x<crop_xf&&crop_xf<x+8||
+         y<crop_y0&&crop_y0<y+8||y<crop_yf&&crop_yf<y+8){
+          ogg_int64_t mask;
+          int         npixels;
+          int         i;
+          mask=npixels=0;
+          for(i=0;i<8;i++){
+            int j;
+            for(j=0;j<8;j++){
+              if(x+j>=crop_x0&&x+j<crop_xf&&y+i>=crop_y0&&y+i<crop_yf){
+                mask|=(ogg_int64_t)1<<(i<<3|j);
+                npixels++;
+              }
+            }
+          }
+          /*Search the fragment array for border info with the same pattern.
+            In general, there will be at most 8 different patterns (per
+             plane).*/
+          for(i=0;;i++){
+            if(i>=_state->nborders){
+              _state->nborders++;
+              _state->borders[i].mask=mask;
+              _state->borders[i].npixels=npixels;
+            }
+            else if(_state->borders[i].mask!=mask)continue;
+            frag->borderi=i;
+            break;
+          }
+        }
+        else frag->borderi=-1;
+      }
+    }
+  }
+}
+
+static int oc_state_frarray_init(oc_theora_state *_state){
+  int       yhfrags;
+  int       yvfrags;
+  int       chfrags;
+  int       cvfrags;
+  ptrdiff_t yfrags;
+  ptrdiff_t cfrags;
+  ptrdiff_t nfrags;
+  unsigned  yhsbs;
+  unsigned  yvsbs;
+  unsigned  chsbs;
+  unsigned  cvsbs;
+  unsigned  ysbs;
+  unsigned  csbs;
+  unsigned  nsbs;
+  size_t    nmbs;
+  int       hdec;
+  int       vdec;
+  int       pli;
+  /*Figure out the number of fragments in each plane.*/
+  /*These parameters have already been validated to be multiples of 16.*/
+  yhfrags=_state->info.frame_width>>3;
+  yvfrags=_state->info.frame_height>>3;
+  hdec=!(_state->info.pixel_fmt&1);
+  vdec=!(_state->info.pixel_fmt&2);
+  chfrags=yhfrags+hdec>>hdec;
+  cvfrags=yvfrags+vdec>>vdec;
+  yfrags=yhfrags*(ptrdiff_t)yvfrags;
+  cfrags=chfrags*(ptrdiff_t)cvfrags;
+  nfrags=yfrags+2*cfrags;
+  /*Figure out the number of super blocks in each plane.*/
+  yhsbs=yhfrags+3>>2;
+  yvsbs=yvfrags+3>>2;
+  chsbs=chfrags+3>>2;
+  cvsbs=cvfrags+3>>2;
+  ysbs=yhsbs*yvsbs;
+  csbs=chsbs*cvsbs;
+  nsbs=ysbs+2*csbs;
+  nmbs=(size_t)ysbs<<2;
+  /*Check for overflow.
+    We support the ridiculous upper limits of the specification (1048560 by
+     1048560, or 3 TB frames) if the target architecture has 64-bit pointers,
+     but for those with 32-bit pointers (or smaller!) we have to check.
+    If the caller wants to prevent denial-of-service by imposing a more
+     reasonable upper limit on the size of attempted allocations, they must do
+     so themselves; we have no platform independent way to determine how much
+     system memory there is nor an application-independent way to decide what a
+     "reasonable" allocation is.*/
+  if(yfrags/yhfrags!=yvfrags||2*cfrags<cfrags||nfrags<yfrags||
+   ysbs/yhsbs!=yvsbs||2*csbs<csbs||nsbs<ysbs||nmbs>>2!=ysbs){
+    return TH_EIMPL;
+  }
+  /*Initialize the fragment array.*/
+  _state->fplanes[0].nhfrags=yhfrags;
+  _state->fplanes[0].nvfrags=yvfrags;
+  _state->fplanes[0].froffset=0;
+  _state->fplanes[0].nfrags=yfrags;
+  _state->fplanes[0].nhsbs=yhsbs;
+  _state->fplanes[0].nvsbs=yvsbs;
+  _state->fplanes[0].sboffset=0;
+  _state->fplanes[0].nsbs=ysbs;
+  _state->fplanes[1].nhfrags=_state->fplanes[2].nhfrags=chfrags;
+  _state->fplanes[1].nvfrags=_state->fplanes[2].nvfrags=cvfrags;
+  _state->fplanes[1].froffset=yfrags;
+  _state->fplanes[2].froffset=yfrags+cfrags;
+  _state->fplanes[1].nfrags=_state->fplanes[2].nfrags=cfrags;
+  _state->fplanes[1].nhsbs=_state->fplanes[2].nhsbs=chsbs;
+  _state->fplanes[1].nvsbs=_state->fplanes[2].nvsbs=cvsbs;
+  _state->fplanes[1].sboffset=ysbs;
+  _state->fplanes[2].sboffset=ysbs+csbs;
+  _state->fplanes[1].nsbs=_state->fplanes[2].nsbs=csbs;
+  _state->nfrags=nfrags;
+  _state->frags=_ogg_calloc(nfrags,sizeof(*_state->frags));
+  _state->frag_mvs=_ogg_malloc(nfrags*sizeof(*_state->frag_mvs));
+  _state->nsbs=nsbs;
+  _state->sb_maps=_ogg_malloc(nsbs*sizeof(*_state->sb_maps));
+  _state->sb_flags=_ogg_calloc(nsbs,sizeof(*_state->sb_flags));
+  _state->nhmbs=yhsbs<<1;
+  _state->nvmbs=yvsbs<<1;
+  _state->nmbs=nmbs;
+  _state->mb_maps=_ogg_calloc(nmbs,sizeof(*_state->mb_maps));
+  _state->mb_modes=_ogg_calloc(nmbs,sizeof(*_state->mb_modes));
+  _state->coded_fragis=_ogg_malloc(nfrags*sizeof(*_state->coded_fragis));
+  if(_state->frags==NULL||_state->frag_mvs==NULL||_state->sb_maps==NULL||
+   _state->sb_flags==NULL||_state->mb_maps==NULL||_state->mb_modes==NULL||
+   _state->coded_fragis==NULL){
+    return TH_EFAULT;
+  }
+  /*Create the mapping from super blocks to fragments.*/
+  for(pli=0;pli<3;pli++){
+    oc_fragment_plane *fplane;
+    fplane=_state->fplanes+pli;
+    oc_sb_create_plane_mapping(_state->sb_maps+fplane->sboffset,
+     _state->sb_flags+fplane->sboffset,fplane->froffset,
+     fplane->nhfrags,fplane->nvfrags);
+  }
+  /*Create the mapping from macro blocks to fragments.*/
+  oc_mb_create_mapping(_state->mb_maps,_state->mb_modes,
+   _state->fplanes,_state->info.pixel_fmt);
+  /*Initialize the invalid and borderi fields of each fragment.*/
+  oc_state_border_init(_state);
+  return 0;
+}
+
+static void oc_state_frarray_clear(oc_theora_state *_state){
+  _ogg_free(_state->coded_fragis);
+  _ogg_free(_state->mb_modes);
+  _ogg_free(_state->mb_maps);
+  _ogg_free(_state->sb_flags);
+  _ogg_free(_state->sb_maps);
+  _ogg_free(_state->frag_mvs);
+  _ogg_free(_state->frags);
+}
+
+
+/*Initializes the buffers used for reconstructed frames.
+  These buffers are padded with 16 extra pixels on each side, to allow
+   unrestricted motion vectors without special casing the boundary.
+  If chroma is decimated in either direction, the padding is reduced by a
+   factor of 2 on the appropriate sides.
+  _nrefs: The number of reference buffers to init; must be in the range 3...6.*/
+static int oc_state_ref_bufs_init(oc_theora_state *_state,int _nrefs){
+  th_info       *info;
+  unsigned char *ref_frame_data;
+  size_t         ref_frame_data_sz;
+  size_t         ref_frame_sz;
+  size_t         yplane_sz;
+  size_t         cplane_sz;
+  int            yhstride;
+  int            yheight;
+  int            chstride;
+  int            cheight;
+  ptrdiff_t      align;
+  ptrdiff_t      yoffset;
+  ptrdiff_t      coffset;
+  ptrdiff_t     *frag_buf_offs;
+  ptrdiff_t      fragi;
+  int            hdec;
+  int            vdec;
+  int            rfi;
+  int            pli;
+  if(_nrefs<3||_nrefs>6)return TH_EINVAL;
+  info=&_state->info;
+  /*Compute the image buffer parameters for each plane.*/
+  hdec=!(info->pixel_fmt&1);
+  vdec=!(info->pixel_fmt&2);
+  yhstride=info->frame_width+2*OC_UMV_PADDING;
+  yheight=info->frame_height+2*OC_UMV_PADDING;
+  /*Require 16-byte aligned rows in the chroma planes.*/
+  chstride=(yhstride>>hdec)+15&~15;
+  cheight=yheight>>vdec;
+  yplane_sz=yhstride*(size_t)yheight;
+  cplane_sz=chstride*(size_t)cheight;
+  yoffset=OC_UMV_PADDING+OC_UMV_PADDING*(ptrdiff_t)yhstride;
+  coffset=(OC_UMV_PADDING>>hdec)+(OC_UMV_PADDING>>vdec)*(ptrdiff_t)chstride;
+  /*Although we guarantee the rows of the chroma planes are a multiple of 16
+     bytes, the initial padding on the first row may only be 8 bytes.
+    Compute the offset needed to the actual image data to a multiple of 16.*/
+  align=-coffset&15;
+  ref_frame_sz=yplane_sz+2*cplane_sz+16;
+  ref_frame_data_sz=_nrefs*ref_frame_sz;
+  /*Check for overflow.
+    The same caveats apply as for oc_state_frarray_init().*/
+  if(yplane_sz/yhstride!=(size_t)yheight||2*cplane_sz+16<cplane_sz||
+   ref_frame_sz<yplane_sz||ref_frame_data_sz/_nrefs!=ref_frame_sz){
+    return TH_EIMPL;
+  }
+  ref_frame_data=oc_aligned_malloc(ref_frame_data_sz,16);
+  frag_buf_offs=_state->frag_buf_offs=
+   _ogg_malloc(_state->nfrags*sizeof(*frag_buf_offs));
+  if(ref_frame_data==NULL||frag_buf_offs==NULL){
+    _ogg_free(frag_buf_offs);
+    oc_aligned_free(ref_frame_data);
+    return TH_EFAULT;
+  }
+  /*Set up the width, height and stride for the image buffers.*/
+  _state->ref_frame_bufs[0][0].width=info->frame_width;
+  _state->ref_frame_bufs[0][0].height=info->frame_height;
+  _state->ref_frame_bufs[0][0].stride=yhstride;
+  _state->ref_frame_bufs[0][1].width=_state->ref_frame_bufs[0][2].width=
+   info->frame_width>>hdec;
+  _state->ref_frame_bufs[0][1].height=_state->ref_frame_bufs[0][2].height=
+   info->frame_height>>vdec;
+  _state->ref_frame_bufs[0][1].stride=_state->ref_frame_bufs[0][2].stride=
+   chstride;
+  for(rfi=1;rfi<_nrefs;rfi++){
+    memcpy(_state->ref_frame_bufs[rfi],_state->ref_frame_bufs[0],
+     sizeof(_state->ref_frame_bufs[0]));
+  }
+  _state->ref_frame_handle=ref_frame_data;
+  /*Set up the data pointers for the image buffers.*/
+  for(rfi=0;rfi<_nrefs;rfi++){
+    _state->ref_frame_bufs[rfi][0].data=ref_frame_data+yoffset;
+    ref_frame_data+=yplane_sz+align;
+    _state->ref_frame_bufs[rfi][1].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz;
+    _state->ref_frame_bufs[rfi][2].data=ref_frame_data+coffset;
+    ref_frame_data+=cplane_sz+(16-align);
+    /*Flip the buffer upside down.
+      This allows us to decode Theora's bottom-up frames in their natural
+       order, yet return a top-down buffer with a positive stride to the user.*/
+    oc_ycbcr_buffer_flip(_state->ref_frame_bufs[rfi],
+     _state->ref_frame_bufs[rfi]);
+  }
+  _state->ref_ystride[0]=-yhstride;
+  _state->ref_ystride[1]=_state->ref_ystride[2]=-chstride;
+  /*Initialize the fragment buffer offsets.*/
+  ref_frame_data=_state->ref_frame_bufs[0][0].data;
+  fragi=0;
+  for(pli=0;pli<3;pli++){
+    th_img_plane      *iplane;
+    oc_fragment_plane *fplane;
+    unsigned char     *vpix;
+    ptrdiff_t          stride;
+    ptrdiff_t          vfragi_end;
+    int                nhfrags;
+    iplane=_state->ref_frame_bufs[0]+pli;
+    fplane=_state->fplanes+pli;
+    vpix=iplane->data;
+    vfragi_end=fplane->froffset+fplane->nfrags;
+    nhfrags=fplane->nhfrags;
+    stride=iplane->stride;
+    while(fragi<vfragi_end){
+      ptrdiff_t      hfragi_end;
+      unsigned char *hpix;
+      hpix=vpix;
+      for(hfragi_end=fragi+nhfrags;fragi<hfragi_end;fragi++){
+        frag_buf_offs[fragi]=hpix-ref_frame_data;
+        hpix+=8;
+      }
+      vpix+=stride<<3;
+    }
+  }
+  /*Initialize the reference frame pointers and indices.*/
+  _state->ref_frame_idx[OC_FRAME_GOLD]=
+   _state->ref_frame_idx[OC_FRAME_PREV]=
+   _state->ref_frame_idx[OC_FRAME_GOLD_ORIG]=
+   _state->ref_frame_idx[OC_FRAME_PREV_ORIG]=
+   _state->ref_frame_idx[OC_FRAME_SELF]=
+   _state->ref_frame_idx[OC_FRAME_IO]=-1;
+  _state->ref_frame_data[OC_FRAME_GOLD]=
+   _state->ref_frame_data[OC_FRAME_PREV]=
+   _state->ref_frame_data[OC_FRAME_GOLD_ORIG]=
+   _state->ref_frame_data[OC_FRAME_PREV_ORIG]=
+   _state->ref_frame_data[OC_FRAME_SELF]=
+   _state->ref_frame_data[OC_FRAME_IO]=NULL;
+  return 0;
+}
+
+static void oc_state_ref_bufs_clear(oc_theora_state *_state){
+  _ogg_free(_state->frag_buf_offs);
+  oc_aligned_free(_state->ref_frame_handle);
+}
+
+
+void oc_state_accel_init_c(oc_theora_state *_state){
+  _state->cpu_flags=0;
+#if defined(OC_STATE_USE_VTABLE)
+  _state->opt_vtable.frag_copy=oc_frag_copy_c;
+  _state->opt_vtable.frag_copy_list=oc_frag_copy_list_c;
+  _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_c;
+  _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_c;
+  _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_c;
+  _state->opt_vtable.idct8x8=oc_idct8x8_c;
+  _state->opt_vtable.state_frag_recon=oc_state_frag_recon_c;
+  _state->opt_vtable.loop_filter_init=oc_loop_filter_init_c;
+  _state->opt_vtable.state_loop_filter_frag_rows=
+   oc_state_loop_filter_frag_rows_c;
+  _state->opt_vtable.restore_fpu=oc_restore_fpu_c;
+#endif
+  _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG;
+}
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs){
+  int ret;
+  /*First validate the parameters.*/
+  if(_info==NULL)return TH_EFAULT;
+  /*The width and height of the encoded frame must be multiples of 16.
+    They must also, when divided by 16, fit into a 16-bit unsigned integer.
+    The displayable frame offset coordinates must fit into an 8-bit unsigned
+     integer.
+    Note that the offset Y in the API is specified on the opposite side from
+     how it is specified in the bitstream, because the Y axis is flipped in
+     the bitstream.
+    The displayable frame must fit inside the encoded frame.
+    The color space must be one known by the encoder.
+    The framerate ratio must not contain a zero value.*/
+  if((_info->frame_width&0xF)||(_info->frame_height&0xF)||
+   _info->frame_width<=0||_info->frame_width>=0x100000||
+   _info->frame_height<=0||_info->frame_height>=0x100000||
+   _info->pic_x+_info->pic_width>_info->frame_width||
+   _info->pic_y+_info->pic_height>_info->frame_height||
+   _info->pic_x>255||_info->frame_height-_info->pic_height-_info->pic_y>255||
+   /*Note: the following <0 comparisons may generate spurious warnings on
+      platforms where enums are unsigned.
+     We could cast them to unsigned and just use the following >= comparison,
+      but there are a number of compilers which will mis-optimize this.
+     It's better to live with the spurious warnings.*/
+   _info->colorspace<0||_info->colorspace>=TH_CS_NSPACES||
+   _info->pixel_fmt<0||_info->pixel_fmt>=TH_PF_NFORMATS||
+   _info->fps_numerator<1||_info->fps_denominator<1){
+    return TH_EINVAL;
+  }
+  memset(_state,0,sizeof(*_state));
+  memcpy(&_state->info,_info,sizeof(*_info));
+  /*Invert the sense of pic_y to match Theora's right-handed coordinate
+     system.*/
+  _state->info.pic_y=_info->frame_height-_info->pic_height-_info->pic_y;
+  _state->frame_type=OC_UNKWN_FRAME;
+  oc_state_accel_init(_state);
+  ret=oc_state_frarray_init(_state);
+  if(ret>=0)ret=oc_state_ref_bufs_init(_state,_nrefs);
+  if(ret<0){
+    oc_state_frarray_clear(_state);
+    return ret;
+  }
+  /*If the keyframe_granule_shift is out of range, use the maximum allowable
+     value.*/
+  if(_info->keyframe_granule_shift<0||_info->keyframe_granule_shift>31){
+    _state->info.keyframe_granule_shift=31;
+  }
+  _state->keyframe_num=0;
+  _state->curframe_num=-1;
+  /*3.2.0 streams mark the frame index instead of the frame count.
+    This was changed with stream version 3.2.1 to conform to other Ogg
+     codecs.
+    We add an extra bias when computing granule positions for new streams.*/
+  _state->granpos_bias=TH_VERSION_CHECK(_info,3,2,1);
+  return 0;
+}
+
+void oc_state_clear(oc_theora_state *_state){
+  oc_state_ref_bufs_clear(_state);
+  oc_state_frarray_clear(_state);
+}
+
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the left and right borders, and only for the fragment
+   rows specified.
+  _refi: The index of the reference buffer to pad.
+  _pli:  The color plane.
+  _y0:   The Y coordinate of the first row to pad.
+  _yend: The Y coordinate of the row to stop padding at.*/
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  apix=iplane->data+_y0*(ptrdiff_t)stride;
+  bpix=apix+iplane->width-1;
+  epix=iplane->data+_yend*(ptrdiff_t)stride;
+  /*Note the use of != instead of <, which allows the stride to be negative.*/
+  while(apix!=epix){
+    memset(apix-hpadding,apix[0],hpadding);
+    memset(bpix+1,bpix[0],hpadding);
+    apix+=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the image plane out into the
+   surrounding padding for use by unrestricted motion vectors.
+  This function only adds the top and bottom borders, and must be called after
+   the left and right borders are added.
+  _refi:      The index of the reference buffer to pad.
+  _pli:       The color plane.*/
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli){
+  th_img_plane  *iplane;
+  unsigned char *apix;
+  unsigned char *bpix;
+  unsigned char *epix;
+  int            stride;
+  int            hpadding;
+  int            vpadding;
+  int            fullw;
+  hpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&1));
+  vpadding=OC_UMV_PADDING>>(_pli!=0&&!(_state->info.pixel_fmt&2));
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  stride=iplane->stride;
+  fullw=iplane->width+(hpadding<<1);
+  apix=iplane->data-hpadding;
+  bpix=iplane->data+(iplane->height-1)*(ptrdiff_t)stride-hpadding;
+  epix=apix-stride*(ptrdiff_t)vpadding;
+  while(apix!=epix){
+    memcpy(apix-stride,apix,fullw);
+    memcpy(bpix+stride,bpix,fullw);
+    apix-=stride;
+    bpix+=stride;
+  }
+}
+
+/*Duplicates the pixels on the border of the given reference image out into
+   the surrounding padding for use by unrestricted motion vectors.
+  _state: The context containing the reference buffers.
+  _refi:  The index of the reference buffer to pad.*/
+void oc_state_borders_fill(oc_theora_state *_state,int _refi){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    oc_state_borders_fill_rows(_state,_refi,pli,0,
+     _state->ref_frame_bufs[_refi][pli].height);
+    oc_state_borders_fill_caps(_state,_refi,pli);
+  }
+}
+
+/*Determines the offsets in an image buffer to use for motion compensation.
+  _state:   The Theora state the offsets are to be computed with.
+  _offsets: Returns the offset for the buffer(s).
+            _offsets[0] is always set.
+            _offsets[1] is set if the motion vector has non-zero fractional
+             components.
+  _pli:     The color plane index.
+  _mv:      The motion vector.
+  Return: The number of offsets returned: 1 or 2.*/
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,oc_mv _mv){
+  /*Here is a brief description of how Theora handles motion vectors:
+    Motion vector components are specified to half-pixel accuracy in
+     undecimated directions of each plane, and quarter-pixel accuracy in
+     decimated directions.
+    Integer parts are extracted by dividing (not shifting) by the
+     appropriate amount, with truncation towards zero.
+    These integer values are used to calculate the first offset.
+
+    If either of the fractional parts are non-zero, then a second offset is
+     computed.
+    No third or fourth offsets are computed, even if both components have
+     non-zero fractional parts.
+    The second offset is computed by dividing (not shifting) by the
+     appropriate amount, always truncating _away_ from zero.*/
+#if 0
+  /*This version of the code doesn't use any tables, but is slower.*/
+  int ystride;
+  int xprec;
+  int yprec;
+  int xfrac;
+  int yfrac;
+  int offs;
+  int dx;
+  int dy;
+  ystride=_state->ref_ystride[_pli];
+  /*These two variables decide whether we are in half- or quarter-pixel
+     precision in each component.*/
+  xprec=1+(_pli!=0&&!(_state->info.pixel_fmt&1));
+  yprec=1+(_pli!=0&&!(_state->info.pixel_fmt&2));
+  dx=OC_MV_X(_mv);
+  dy=OC_MV_Y(_mv);
+  /*These two variables are either 0 if all the fractional bits are zero or -1
+     if any of them are non-zero.*/
+  xfrac=OC_SIGNMASK(-(dx&(xprec|1)));
+  yfrac=OC_SIGNMASK(-(dy&(yprec|1)));
+  offs=(dx>>xprec)+(dy>>yprec)*ystride;
+  if(xfrac||yfrac){
+    int xmask;
+    int ymask;
+    xmask=OC_SIGNMASK(dx);
+    ymask=OC_SIGNMASK(dy);
+    yfrac&=ystride;
+    _offsets[0]=offs-(xfrac&xmask)+(yfrac&ymask);
+    _offsets[1]=offs-(xfrac&~xmask)+(yfrac&~ymask);
+    return 2;
+  }
+  else{
+    _offsets[0]=offs;
+    return 1;
+  }
+#else
+  /*Using tables simplifies the code, and there's enough arithmetic to hide the
+     latencies of the memory references.*/
+  static const signed char OC_MVMAP[2][64]={
+    {
+          -15,-15,-14,-14,-13,-13,-12,-12,-11,-11,-10,-10, -9, -9, -8,
+       -8, -7, -7, -6, -6, -5, -5, -4, -4, -3, -3, -2, -2, -1, -1,  0,
+        0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,
+        8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15
+    },
+    {
+           -7, -7, -7, -7, -6, -6, -6, -6, -5, -5, -5, -5, -4, -4, -4,
+       -4, -3, -3, -3, -3, -2, -2, -2, -2, -1, -1, -1, -1,  0,  0,  0,
+        0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+        4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7
+    }
+  };
+  static const signed char OC_MVMAP2[2][64]={
+    {
+        -1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,  0,-1, 0,-1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,
+      0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1,  0, 1, 0, 1
+    },
+    {
+        -1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,  0,-1,-1,-1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,
+      0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1,  0, 1, 1, 1
+    }
+  };
+  int ystride;
+  int qpx;
+  int qpy;
+  int mx;
+  int my;
+  int mx2;
+  int my2;
+  int offs;
+  int dx;
+  int dy;
+  ystride=_state->ref_ystride[_pli];
+  qpy=_pli!=0&&!(_state->info.pixel_fmt&2);
+  dx=OC_MV_X(_mv);
+  dy=OC_MV_Y(_mv);
+  my=OC_MVMAP[qpy][dy+31];
+  my2=OC_MVMAP2[qpy][dy+31];
+  qpx=_pli!=0&&!(_state->info.pixel_fmt&1);
+  mx=OC_MVMAP[qpx][dx+31];
+  mx2=OC_MVMAP2[qpx][dx+31];
+  offs=my*ystride+mx;
+  if(mx2||my2){
+    _offsets[1]=offs+my2*ystride+mx2;
+    _offsets[0]=offs;
+    return 2;
+  }
+  _offsets[0]=offs;
+  return 1;
+#endif
+}
+
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
+  unsigned char *dst;
+  ptrdiff_t      frag_buf_off;
+  int            ystride;
+  int            refi;
+  /*Apply the inverse transform.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_int16_t p;
+    int         ci;
+    /*We round this dequant product (and not any of the others) because there's
+       no iDCT rounding.*/
+    p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
+    /*LOOP VECTORIZES.*/
+    for(ci=0;ci<64;ci++)_dct_coeffs[64+ci]=p;
+  }
+  else{
+    /*First, dequantize the DC coefficient.*/
+    _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
+    oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
+  }
+  /*Fill in the target buffer.*/
+  frag_buf_off=_state->frag_buf_offs[_fragi];
+  refi=_state->frags[_fragi].refi;
+  ystride=_state->ref_ystride[_pli];
+  dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
+  if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
+  else{
+    const unsigned char *ref;
+    int                  mvoffsets[2];
+    ref=_state->ref_frame_data[refi]+frag_buf_off;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
+     _state->frag_mvs[_fragi])>1){
+      oc_frag_recon_inter2(_state,
+       dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,_dct_coeffs+64);
+    }
+    else{
+      oc_frag_recon_inter(_state,dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
+    }
+  }
+}
+
+static void loop_filter_h(unsigned char *_pix,int _ystride,signed char *_bv){
+  int y;
+  _pix-=2;
+  for(y=0;y<8;y++){
+    int f;
+    f=_pix[0]-_pix[3]+3*(_pix[2]-_pix[1]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[1]=OC_CLAMP255(_pix[1]+f);
+    _pix[2]=OC_CLAMP255(_pix[2]-f);
+    _pix+=_ystride;
+  }
+}
+
+static void loop_filter_v(unsigned char *_pix,int _ystride,signed char *_bv){
+  int x;
+  _pix-=_ystride*2;
+  for(x=0;x<8;x++){
+    int f;
+    f=_pix[x]-_pix[_ystride*3+x]+3*(_pix[_ystride*2+x]-_pix[_ystride+x]);
+    /*The _bv array is used to compute the function
+      f=OC_CLAMPI(OC_MINI(-_2flimit-f,0),f,OC_MAXI(_2flimit-f,0));
+      where _2flimit=_state->loop_filter_limits[_state->qis[0]]<<1;*/
+    f=*(_bv+(f+4>>3));
+    _pix[_ystride+x]=OC_CLAMP255(_pix[_ystride+x]+f);
+    _pix[_ystride*2+x]=OC_CLAMP255(_pix[_ystride*2+x]-f);
+  }
+}
+
+/*Initialize the bounding values array used by the loop filter.
+  _bv: Storage for the array.
+  _flimit: The filter limit as defined in Section 7.10 of the spec.*/
+void oc_loop_filter_init_c(signed char _bv[256],int _flimit){
+  int i;
+  memset(_bv,0,sizeof(_bv[0])*256);
+  for(i=0;i<_flimit;i++){
+    if(127-i-_flimit>=0)_bv[127-i-_flimit]=(signed char)(i-_flimit);
+    _bv[127-i]=(signed char)(-i);
+    _bv[127+i]=(signed char)(i);
+    if(127+i+_flimit<256)_bv[127+i+_flimit]=(signed char)(_flimit-i);
+  }
+}
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ signed char *_bv,int _refi,int _pli,int _fragy0,int _fragy_end){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  const ptrdiff_t         *frag_buf_offs;
+  unsigned char           *ref_frame_data;
+  ptrdiff_t                fragi_top;
+  ptrdiff_t                fragi_bot;
+  ptrdiff_t                fragi0;
+  ptrdiff_t                fragi0_end;
+  int                      ystride;
+  int                      nhfrags;
+  _bv+=127;
+  fplane=_state->fplanes+_pli;
+  nhfrags=fplane->nhfrags;
+  fragi_top=fplane->froffset;
+  fragi_bot=fragi_top+fplane->nfrags;
+  fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
+  fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
+  ystride=_state->ref_ystride[_pli];
+  frags=_state->frags;
+  frag_buf_offs=_state->frag_buf_offs;
+  ref_frame_data=_state->ref_frame_data[_refi];
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  while(fragi0<fragi0_end){
+    ptrdiff_t fragi;
+    ptrdiff_t fragi_end;
+    fragi=fragi0;
+    fragi_end=fragi+nhfrags;
+    while(fragi<fragi_end){
+      if(frags[fragi].coded){
+        unsigned char *ref;
+        ref=ref_frame_data+frag_buf_offs[fragi];
+        if(fragi>fragi0)loop_filter_h(ref,ystride,_bv);
+        if(fragi0>fragi_top)loop_filter_v(ref,ystride,_bv);
+        if(fragi+1<fragi_end&&!frags[fragi+1].coded){
+          loop_filter_h(ref+8,ystride,_bv);
+        }
+        if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
+          loop_filter_v(ref+(ystride<<3),ystride,_bv);
+        }
+      }
+      fragi++;
+    }
+    fragi0+=nhfrags;
+  }
+}
+
+#if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf){
+  /*Dump a PNG of the reconstructed image.*/
+  png_structp    png;
+  png_infop      info;
+  png_bytep     *image;
+  FILE          *fp;
+  char           fname[16];
+  unsigned char *y_row;
+  unsigned char *u_row;
+  unsigned char *v_row;
+  unsigned char *y;
+  unsigned char *u;
+  unsigned char *v;
+  ogg_int64_t    iframe;
+  ogg_int64_t    pframe;
+  int            y_stride;
+  int            u_stride;
+  int            v_stride;
+  int            framei;
+  int            width;
+  int            height;
+  int            imgi;
+  int            imgj;
+  width=_state->info.frame_width;
+  height=_state->info.frame_height;
+  iframe=_state->granpos>>_state->info.keyframe_granule_shift;
+  pframe=_state->granpos-(iframe<<_state->info.keyframe_granule_shift);
+  sprintf(fname,"%08i%s.png",(int)(iframe+pframe),_suf);
+  fp=fopen(fname,"wb");
+  if(fp==NULL)return TH_EFAULT;
+  image=(png_bytep *)oc_malloc_2d(height,6*width,sizeof(**image));
+  if(image==NULL){
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  png=png_create_write_struct(PNG_LIBPNG_VER_STRING,NULL,NULL,NULL);
+  if(png==NULL){
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  info=png_create_info_struct(png);
+  if(info==NULL){
+    png_destroy_write_struct(&png,NULL);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  if(setjmp(png_jmpbuf(png))){
+    png_destroy_write_struct(&png,&info);
+    oc_free_2d(image);
+    fclose(fp);
+    return TH_EFAULT;
+  }
+  framei=_state->ref_frame_idx[_frame];
+  y_row=_state->ref_frame_bufs[framei][0].data;
+  u_row=_state->ref_frame_bufs[framei][1].data;
+  v_row=_state->ref_frame_bufs[framei][2].data;
+  y_stride=_state->ref_frame_bufs[framei][0].stride;
+  u_stride=_state->ref_frame_bufs[framei][1].stride;
+  v_stride=_state->ref_frame_bufs[framei][2].stride;
+  /*Chroma up-sampling is just done with a box filter.
+    This is very likely what will actually be used in practice on a real
+     display, and also removes one more layer to search in for the source of
+     artifacts.
+    As an added bonus, it's dead simple.*/
+  for(imgi=height;imgi-->0;){
+    int dc;
+    y=y_row;
+    u=u_row;
+    v=v_row;
+    for(imgj=0;imgj<6*width;){
+      float    yval;
+      float    uval;
+      float    vval;
+      unsigned rval;
+      unsigned gval;
+      unsigned bval;
+      /*This is intentionally slow and very accurate.*/
+      yval=(*y-16)*(1.0F/219);
+      uval=(*u-128)*(2*(1-0.114F)/224);
+      vval=(*v-128)*(2*(1-0.299F)/224);
+      rval=OC_CLAMPI(0,(int)(65535*(yval+vval)+0.5F),65535);
+      gval=OC_CLAMPI(0,(int)(65535*(
+       yval-uval*(0.114F/0.587F)-vval*(0.299F/0.587F))+0.5F),65535);
+      bval=OC_CLAMPI(0,(int)(65535*(yval+uval)+0.5F),65535);
+      image[imgi][imgj++]=(unsigned char)(rval>>8);
+      image[imgi][imgj++]=(unsigned char)(rval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(gval>>8);
+      image[imgi][imgj++]=(unsigned char)(gval&0xFF);
+      image[imgi][imgj++]=(unsigned char)(bval>>8);
+      image[imgi][imgj++]=(unsigned char)(bval&0xFF);
+      dc=(y-y_row&1)|(_state->info.pixel_fmt&1);
+      y++;
+      u+=dc;
+      v+=dc;
+    }
+    dc=-((height-1-imgi&1)|_state->info.pixel_fmt>>1);
+    y_row+=y_stride;
+    u_row+=dc&u_stride;
+    v_row+=dc&v_stride;
+  }
+  png_init_io(png,fp);
+  png_set_compression_level(png,Z_BEST_COMPRESSION);
+  png_set_IHDR(png,info,width,height,16,PNG_COLOR_TYPE_RGB,
+   PNG_INTERLACE_NONE,PNG_COMPRESSION_TYPE_DEFAULT,PNG_FILTER_TYPE_DEFAULT);
+  switch(_state->info.colorspace){
+    case TH_CS_ITU_REC_470M:{
+      png_set_gAMA(png,info,2.2);
+      png_set_cHRM_fixed(png,info,31006,31616,
+       67000,32000,21000,71000,14000,8000);
+    }break;
+    case TH_CS_ITU_REC_470BG:{
+      png_set_gAMA(png,info,2.67);
+      png_set_cHRM_fixed(png,info,31271,32902,
+       64000,33000,29000,60000,15000,6000);
+    }break;
+    default:break;
+  }
+  png_set_pHYs(png,info,_state->info.aspect_numerator,
+   _state->info.aspect_denominator,0);
+  png_set_rows(png,info,image);
+  png_write_png(png,info,PNG_TRANSFORM_IDENTITY,NULL);
+  png_write_end(png,info);
+  png_destroy_write_struct(&png,&info);
+  oc_free_2d(image);
+  fclose(fp);
+  return 0;
+}
+#endif
+
+
+
+ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    ogg_int64_t iframe;
+    ogg_int64_t pframe;
+    iframe=_granpos>>state->info.keyframe_granule_shift;
+    pframe=_granpos-(iframe<<state->info.keyframe_granule_shift);
+    /*3.2.0 streams store the frame index in the granule position.
+      3.2.1 and later store the frame count.
+      We return the index, so adjust the value if we have a 3.2.1 or later
+       stream.*/
+    return iframe+pframe-TH_VERSION_CHECK(&state->info,3,2,1);
+  }
+  return -1;
+}
+
+double th_granule_time(void *_encdec,ogg_int64_t _granpos){
+  oc_theora_state *state;
+  state=(oc_theora_state *)_encdec;
+  if(_granpos>=0){
+    return (th_granule_frame(_encdec, _granpos)+1)*(
+     (double)state->info.fps_denominator/state->info.fps_numerator);
+  }
+  return -1;
+}

+ 552 - 0
jni/libtheora-1.2.0alpha1/lib/state.h

@@ -0,0 +1,552 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.h 17337 2010-07-19 16:08:54Z tterribe $
+
+ ********************************************************************/
+#if !defined(_state_H)
+# define _state_H (1)
+# include "internal.h"
+# include "huffman.h"
+# include "quant.h"
+
+
+
+/*A single quadrant of the map from a super block to fragment numbers.*/
+typedef ptrdiff_t       oc_sb_map_quad[4];
+/*A map from a super block to fragment numbers.*/
+typedef oc_sb_map_quad  oc_sb_map[4];
+/*A single plane of the map from a macro block to fragment numbers.*/
+typedef ptrdiff_t       oc_mb_map_plane[4];
+/*A map from a macro block to fragment numbers.*/
+typedef oc_mb_map_plane oc_mb_map[3];
+/*A motion vector.*/
+typedef ogg_int16_t     oc_mv;
+
+typedef struct oc_sb_flags              oc_sb_flags;
+typedef struct oc_border_info           oc_border_info;
+typedef struct oc_fragment              oc_fragment;
+typedef struct oc_fragment_plane        oc_fragment_plane;
+typedef struct oc_base_opt_vtable       oc_base_opt_vtable;
+typedef struct oc_base_opt_data         oc_base_opt_data;
+typedef struct oc_state_dispatch_vtable oc_state_dispatch_vtable;
+typedef struct oc_theora_state          oc_theora_state;
+
+
+
+/*Shared accelerated functions.*/
+# if defined(OC_X86_ASM)
+#  if defined(_MSC_VER)
+#   include "x86_vc/x86int.h"
+#  else
+#   include "x86/x86int.h"
+#  endif
+# endif
+# if defined(OC_ARM_ASM)
+#  include "arm/armint.h"
+# endif
+# if defined(OC_C64X_ASM)
+#  include "c64x/c64xint.h"
+# endif
+
+# if !defined(oc_state_accel_init)
+#  define oc_state_accel_init oc_state_accel_init_c
+# endif
+# if defined(OC_STATE_USE_VTABLE)
+#  if !defined(oc_frag_copy)
+#   define oc_frag_copy(_state,_dst,_src,_ystride) \
+  ((*(_state)->opt_vtable.frag_copy)(_dst,_src,_ystride))
+#  endif
+#  if !defined(oc_frag_copy_list)
+#   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
+ _fragis,_nfragis,_frag_buf_offs) \
+ ((*(_state)->opt_vtable.frag_copy_list)(_dst_frame,_src_frame,_ystride, \
+  _fragis,_nfragis,_frag_buf_offs))
+#  endif
+#  if !defined(oc_frag_recon_intra)
+#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_intra)(_dst,_dst_ystride,_residue))
+#  endif
+#  if !defined(oc_frag_recon_inter)
+#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_inter)(_dst,_src,_ystride,_residue))
+#  endif
+#  if !defined(oc_frag_recon_inter2)
+#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  ((*(_state)->opt_vtable.frag_recon_inter2)(_dst, \
+   _src1,_src2,_ystride,_residue))
+#  endif
+# if !defined(oc_idct8x8)
+#   define oc_idct8x8(_state,_y,_x,_last_zzi) \
+  ((*(_state)->opt_vtable.idct8x8)(_y,_x,_last_zzi))
+#  endif
+#  if !defined(oc_state_frag_recon)
+#   define oc_state_frag_recon(_state,_fragi, \
+ _pli,_dct_coeffs,_last_zzi,_dc_quant) \
+  ((*(_state)->opt_vtable.state_frag_recon)(_state,_fragi, \
+   _pli,_dct_coeffs,_last_zzi,_dc_quant))
+#  endif
+#  if !defined(oc_loop_filter_init)
+#   define oc_loop_filter_init(_state,_bv,_flimit) \
+  ((*(_state)->opt_vtable.loop_filter_init)(_bv,_flimit))
+#  endif
+#  if !defined(oc_state_loop_filter_frag_rows)
+#   define oc_state_loop_filter_frag_rows(_state, \
+ _bv,_refi,_pli,_fragy0,_fragy_end) \
+  ((*(_state)->opt_vtable.state_loop_filter_frag_rows)(_state, \
+   _bv,_refi,_pli,_fragy0,_fragy_end))
+#  endif
+#  if !defined(oc_restore_fpu)
+#   define oc_restore_fpu(_state) \
+  ((*(_state)->opt_vtable.restore_fpu)())
+#  endif
+# else
+#  if !defined(oc_frag_copy)
+#   define oc_frag_copy(_state,_dst,_src,_ystride) \
+  oc_frag_copy_c(_dst,_src,_ystride)
+#  endif
+#  if !defined(oc_frag_copy_list)
+#   define oc_frag_copy_list(_state,_dst_frame,_src_frame,_ystride, \
+ _fragis,_nfragis,_frag_buf_offs) \
+  oc_frag_copy_list_c(_dst_frame,_src_frame,_ystride, \
+  _fragis,_nfragis,_frag_buf_offs)
+#  endif
+#  if !defined(oc_frag_recon_intra)
+#   define oc_frag_recon_intra(_state,_dst,_dst_ystride,_residue) \
+  oc_frag_recon_intra_c(_dst,_dst_ystride,_residue)
+#  endif
+#  if !defined(oc_frag_recon_inter)
+#   define oc_frag_recon_inter(_state,_dst,_src,_ystride,_residue) \
+  oc_frag_recon_inter_c(_dst,_src,_ystride,_residue)
+#  endif
+#  if !defined(oc_frag_recon_inter2)
+#   define oc_frag_recon_inter2(_state,_dst,_src1,_src2,_ystride,_residue) \
+  oc_frag_recon_inter2_c(_dst,_src1,_src2,_ystride,_residue)
+#  endif
+#  if !defined(oc_idct8x8)
+#   define oc_idct8x8(_state,_y,_x,_last_zzi) oc_idct8x8_c(_y,_x,_last_zzi)
+#  endif
+#  if !defined(oc_state_frag_recon)
+#   define oc_state_frag_recon oc_state_frag_recon_c
+#  endif
+#  if !defined(oc_loop_filter_init)
+#   define oc_loop_filter_init(_state,_bv,_flimit) \
+  oc_loop_filter_init_c(_bv,_flimit)
+#  endif
+#  if !defined(oc_state_loop_filter_frag_rows)
+#   define oc_state_loop_filter_frag_rows oc_state_loop_filter_frag_rows_c
+#  endif
+#  if !defined(oc_restore_fpu)
+#   define oc_restore_fpu(_state) do{}while(0)
+#  endif
+# endif
+
+
+
+/*A keyframe.*/
+# define OC_INTRA_FRAME (0)
+/*A predicted frame.*/
+# define OC_INTER_FRAME (1)
+/*A frame of unknown type (frame type decision has not yet been made).*/
+# define OC_UNKWN_FRAME (-1)
+
+/*The amount of padding to add to the reconstructed frame buffers on all
+   sides.
+  This is used to allow unrestricted motion vectors without special casing.
+  This must be a multiple of 2.*/
+# define OC_UMV_PADDING (16)
+
+/*Frame classification indices.*/
+/*The previous golden frame.*/
+# define OC_FRAME_GOLD      (0)
+/*The previous frame.*/
+# define OC_FRAME_PREV      (1)
+/*The current frame.*/
+# define OC_FRAME_SELF      (2)
+/*Used to mark uncoded fragments (for DC prediction).*/
+# define OC_FRAME_NONE      (3)
+
+/*The input or output buffer.*/
+# define OC_FRAME_IO        (3)
+/*Uncompressed prev golden frame.*/
+# define OC_FRAME_GOLD_ORIG (4)
+/*Uncompressed previous frame. */
+# define OC_FRAME_PREV_ORIG (5)
+
+/*Macroblock modes.*/
+/*Macro block is invalid: It is never coded.*/
+# define OC_MODE_INVALID        (-1)
+/*Encoded difference from the same macro block in the previous frame.*/
+# define OC_MODE_INTER_NOMV     (0)
+/*Encoded with no motion compensated prediction.*/
+# define OC_MODE_INTRA          (1)
+/*Encoded difference from the previous frame offset by the given motion
+   vector.*/
+# define OC_MODE_INTER_MV       (2)
+/*Encoded difference from the previous frame offset by the last coded motion
+   vector.*/
+# define OC_MODE_INTER_MV_LAST  (3)
+/*Encoded difference from the previous frame offset by the second to last
+   coded motion vector.*/
+# define OC_MODE_INTER_MV_LAST2 (4)
+/*Encoded difference from the same macro block in the previous golden
+   frame.*/
+# define OC_MODE_GOLDEN_NOMV    (5)
+/*Encoded difference from the previous golden frame offset by the given motion
+   vector.*/
+# define OC_MODE_GOLDEN_MV      (6)
+/*Encoded difference from the previous frame offset by the individual motion
+   vectors given for each block.*/
+# define OC_MODE_INTER_MV_FOUR  (7)
+/*The number of (coded) modes.*/
+# define OC_NMODES              (8)
+
+/*Determines the reference frame used for a given MB mode.*/
+# define OC_FRAME_FOR_MODE(_x) \
+ OC_UNIBBLE_TABLE32(OC_FRAME_PREV,OC_FRAME_SELF,OC_FRAME_PREV,OC_FRAME_PREV, \
+  OC_FRAME_PREV,OC_FRAME_GOLD,OC_FRAME_GOLD,OC_FRAME_PREV,(_x))
+
+/*Constants for the packet state machine common between encoder and decoder.*/
+
+/*Next packet to emit/read: Codec info header.*/
+# define OC_PACKET_INFO_HDR    (-3)
+/*Next packet to emit/read: Comment header.*/
+# define OC_PACKET_COMMENT_HDR (-2)
+/*Next packet to emit/read: Codec setup header.*/
+# define OC_PACKET_SETUP_HDR   (-1)
+/*No more packets to emit/read.*/
+# define OC_PACKET_DONE        (INT_MAX)
+
+
+
+#define OC_MV(_x,_y)         ((oc_mv)((_x)&0xFF|(_y)<<8))
+#define OC_MV_X(_mv)         ((signed char)(_mv))
+#define OC_MV_Y(_mv)         ((_mv)>>8)
+#define OC_MV_ADD(_mv1,_mv2) \
+  OC_MV(OC_MV_X(_mv1)+OC_MV_X(_mv2), \
+   OC_MV_Y(_mv1)+OC_MV_Y(_mv2))
+#define OC_MV_SUB(_mv1,_mv2) \
+  OC_MV(OC_MV_X(_mv1)-OC_MV_X(_mv2), \
+   OC_MV_Y(_mv1)-OC_MV_Y(_mv2))
+
+
+
+/*Super blocks are 32x32 segments of pixels in a single color plane indexed
+   in image order.
+  Internally, super blocks are broken up into four quadrants, each of which
+   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
+  Quadrants, and the blocks within them, are indexed in a special order called
+   a "Hilbert curve" within the super block.
+
+  In order to differentiate between the Hilbert-curve indexing strategy and
+   the regular image order indexing strategy, blocks indexed in image order
+   are called "fragments".
+  Fragments are indexed in image order, left to right, then bottom to top,
+   from Y' plane to Cb plane to Cr plane.
+
+  The co-located fragments in all image planes corresponding to the location
+   of a single quadrant of a luma plane super block form a macro block.
+  Thus there is only a single set of macro blocks for all planes, each of which
+   contains between 6 and 12 fragments, depending on the pixel format.
+  Therefore macro block information is kept in a separate set of arrays from
+   super blocks to avoid unused space in the other planes.
+  The lists are indexed in super block order.
+  That is, the macro block corresponding to the macro block mbi in (luma plane)
+   super block sbi is at index (sbi<<2|mbi).
+  Thus the number of macro blocks in each dimension is always twice the number
+   of super blocks, even when only an odd number fall inside the coded frame.
+  These "extra" macro blocks are just an artifact of our internal data layout,
+   and not part of the coded stream; they are flagged with a negative MB mode.*/
+
+
+
+/*Super block information.*/
+struct oc_sb_flags{
+  unsigned char coded_fully:1;
+  unsigned char coded_partially:1;
+  unsigned char quad_valid:4;
+};
+
+
+
+/*Information about a fragment which intersects the border of the displayable
+   region.
+  This marks which pixels belong to the displayable region.*/
+struct oc_border_info{
+  /*A bit mask marking which pixels are in the displayable region.
+    Pixel (x,y) corresponds to bit (y<<3|x).*/
+  ogg_int64_t mask;
+  /*The number of pixels in the displayable region.
+    This is always positive, and always less than 64.*/
+  int         npixels;
+};
+
+
+
+/*Fragment information.*/
+struct oc_fragment{
+  /*A flag indicating whether or not this fragment is coded.*/
+  unsigned   coded:1;
+  /*A flag indicating that this entire fragment lies outside the displayable
+     region of the frame.
+    Note the contrast with an invalid macro block, which is outside the coded
+     frame, not just the displayable one.
+    There are no fragments outside the coded frame by construction.*/
+  unsigned   invalid:1;
+  /*The index of the quality index used for this fragment's AC coefficients.*/
+  unsigned   qii:4;
+  /*The index of the reference frame this fragment is predicted from.*/
+  unsigned   refi:2;
+  /*The mode of the macroblock this fragment belongs to.*/
+  unsigned   mb_mode:3;
+  /*The index of the associated border information for fragments which lie
+     partially outside the displayable region.
+    For fragments completely inside or outside this region, this is -1.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int borderi:5;
+  /*The prediction-corrected DC component.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int dc:16;
+};
+
+
+
+/*A description of each fragment plane.*/
+struct oc_fragment_plane{
+  /*The number of fragments in the horizontal direction.*/
+  int       nhfrags;
+  /*The number of fragments in the vertical direction.*/
+  int       nvfrags;
+  /*The offset of the first fragment in the plane.*/
+  ptrdiff_t froffset;
+  /*The total number of fragments in the plane.*/
+  ptrdiff_t nfrags;
+  /*The number of super blocks in the horizontal direction.*/
+  unsigned  nhsbs;
+  /*The number of super blocks in the vertical direction.*/
+  unsigned  nvsbs;
+  /*The offset of the first super block in the plane.*/
+  unsigned  sboffset;
+  /*The total number of super blocks in the plane.*/
+  unsigned  nsbs;
+};
+
+
+typedef void (*oc_state_loop_filter_frag_rows_func)(
+ const oc_theora_state *_state,signed char _bv[256],int _refi,int _pli,
+ int _fragy0,int _fragy_end);
+
+/*The shared (encoder and decoder) functions that have accelerated variants.*/
+struct oc_base_opt_vtable{
+  void (*frag_copy)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride);
+  void (*frag_copy_list)(unsigned char *_dst_frame,
+   const unsigned char *_src_frame,int _ystride,
+   const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
+  void (*frag_recon_intra)(unsigned char *_dst,int _ystride,
+   const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter)(unsigned char *_dst,
+   const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+  void (*frag_recon_inter2)(unsigned char *_dst,const unsigned char *_src1,
+   const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+  void (*idct8x8)(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
+  void (*state_frag_recon)(const oc_theora_state *_state,ptrdiff_t _fragi,
+   int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
+  void (*loop_filter_init)(signed char _bv[256],int _flimit);
+  oc_state_loop_filter_frag_rows_func state_loop_filter_frag_rows;
+  void (*restore_fpu)(void);
+};
+
+/*The shared (encoder and decoder) tables that vary according to which variants
+   of the above functions are used.*/
+struct oc_base_opt_data{
+  const unsigned char *dct_fzig_zag;
+};
+
+
+/*State information common to both the encoder and decoder.*/
+struct oc_theora_state{
+  /*The stream information.*/
+  th_info             info;
+# if defined(OC_STATE_USE_VTABLE)
+  /*Table for shared accelerated functions.*/
+  oc_base_opt_vtable  opt_vtable;
+# endif
+  /*Table for shared data used by accelerated functions.*/
+  oc_base_opt_data    opt_data;
+  /*CPU flags to detect the presence of extended instruction sets.*/
+  ogg_uint32_t        cpu_flags;
+  /*The fragment plane descriptions.*/
+  oc_fragment_plane   fplanes[3];
+  /*The list of fragments, indexed in image order.*/
+  oc_fragment        *frags;
+  /*The the offset into the reference frame buffer to the upper-left pixel of
+     each fragment.*/
+  ptrdiff_t          *frag_buf_offs;
+  /*The motion vector for each fragment.*/
+  oc_mv              *frag_mvs;
+  /*The total number of fragments in a single frame.*/
+  ptrdiff_t           nfrags;
+  /*The list of super block maps, indexed in image order.*/
+  oc_sb_map          *sb_maps;
+  /*The list of super block flags, indexed in image order.*/
+  oc_sb_flags        *sb_flags;
+  /*The total number of super blocks in a single frame.*/
+  unsigned            nsbs;
+  /*The fragments from each color plane that belong to each macro block.
+    Fragments are stored in image order (left to right then top to bottom).
+    When chroma components are decimated, the extra fragments have an index of
+     -1.*/
+  oc_mb_map          *mb_maps;
+  /*The list of macro block modes.
+    A negative number indicates the macro block lies entirely outside the
+     coded frame.*/
+  signed char        *mb_modes;
+  /*The number of macro blocks in the X direction.*/
+  unsigned            nhmbs;
+  /*The number of macro blocks in the Y direction.*/
+  unsigned            nvmbs;
+  /*The total number of macro blocks.*/
+  size_t              nmbs;
+  /*The list of coded fragments, in coded order.
+    Uncoded fragments are stored in reverse order from the end of the list.*/
+  ptrdiff_t          *coded_fragis;
+  /*The number of coded fragments in each plane.*/
+  ptrdiff_t           ncoded_fragis[3];
+  /*The total number of coded fragments.*/
+  ptrdiff_t           ntotal_coded_fragis;
+  /*The actual buffers used for the reference frames.*/
+  th_ycbcr_buffer     ref_frame_bufs[6];
+  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
+  int                 ref_frame_idx[6];
+  /*The storage for the reference frame buffers.
+    This is just ref_frame_bufs[ref_frame_idx[i]][0].data, but is cached here
+     for faster look-up.*/
+  unsigned char      *ref_frame_data[6];
+  /*The handle used to allocate the reference frame buffers.*/
+  unsigned char      *ref_frame_handle;
+  /*The strides for each plane in the reference frames.*/
+  int                 ref_ystride[3];
+  /*The number of unique border patterns.*/
+  int                 nborders;
+  /*The unique border patterns for all border fragments.
+    The borderi field of fragments which straddle the border indexes this
+     list.*/
+  oc_border_info      borders[16];
+  /*The frame number of the last keyframe.*/
+  ogg_int64_t         keyframe_num;
+  /*The frame number of the current frame.*/
+  ogg_int64_t         curframe_num;
+  /*The granpos of the current frame.*/
+  ogg_int64_t         granpos;
+  /*The type of the current frame.*/
+  signed char         frame_type;
+  /*The bias to add to the frame count when computing granule positions.*/
+  unsigned char       granpos_bias;
+  /*The number of quality indices used in the current frame.*/
+  unsigned char       nqis;
+  /*The quality indices of the current frame.*/
+  unsigned char       qis[3];
+  /*The dequantization tables, stored in zig-zag order, and indexed by
+     qi, pli, qti, and zzi.*/
+  ogg_uint16_t       *dequant_tables[64][3][2];
+  OC_ALIGN16(oc_quant_table      dequant_table_data[64][3][2]);
+  /*Loop filter strength parameters.*/
+  unsigned char       loop_filter_limits[64];
+};
+
+
+
+/*The function type used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
+
+
+
+/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
+
+
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info,int _nrefs);
+void oc_state_clear(oc_theora_state *_state);
+void oc_state_accel_init_c(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
+void oc_state_borders_fill(oc_theora_state *_state,int _refi);
+void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
+ th_ycbcr_buffer _img);
+int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
+int oc_state_get_mv_offsets(const oc_theora_state *_state,int _offsets[2],
+ int _pli,oc_mv _mv);
+
+void oc_loop_filter_init_c(signed char _bv[256],int _flimit);
+void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+# if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf);
+# endif
+
+/*Default pure-C implementations of shared accelerated functions.*/
+void oc_frag_copy_c(unsigned char *_dst,
+ const unsigned char *_src,int _src_ystride);
+void oc_frag_copy_list_c(unsigned char *_dst_frame,
+ const unsigned char *_src_frame,int _ystride,
+ const ptrdiff_t *_fragis,ptrdiff_t _nfragis,const ptrdiff_t *_frag_buf_offs);
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter_c(unsigned char *_dst,
+ const unsigned char *_src,int _ystride,const ogg_int16_t _residue[64]);
+void oc_frag_recon_inter2_c(unsigned char *_dst,const unsigned char *_src1,
+ const unsigned char *_src2,int _ystride,const ogg_int16_t _residue[64]);
+void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi);
+void oc_state_frag_recon_c(const oc_theora_state *_state,ptrdiff_t _fragi,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant);
+void oc_state_loop_filter_frag_rows_c(const oc_theora_state *_state,
+ signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_c(void);
+
+/*We need a way to call a few encoder functions without introducing a link-time
+   dependency into the decoder, while still allowing the old alpha API which
+   does not distinguish between encoder and decoder objects to be used.
+  We do this by placing a function table at the start of the encoder object
+   which can dispatch into the encoder library.
+  We do a similar thing for the decoder in case we ever decide to split off a
+   common base library.*/
+typedef void (*oc_state_clear_func)(theora_state *_th);
+typedef int (*oc_state_control_func)(theora_state *th,int _req,
+ void *_buf,size_t _buf_sz);
+typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+typedef double (*oc_state_granule_time_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+
+
+struct oc_state_dispatch_vtable{
+  oc_state_clear_func         clear;
+  oc_state_control_func       control;
+  oc_state_granule_frame_func granule_frame;
+  oc_state_granule_time_func  granule_time;
+};
+
+#endif

+ 1368 - 0
jni/libtheora-1.2.0alpha1/lib/tokenize.c

@@ -0,0 +1,1368 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id$
+
+ ********************************************************************/
+#include <stdlib.h>
+#include <string.h>
+#include "encint.h"
+
+
+
+static unsigned char OC_DCT_EOB_TOKEN[31]={
+  0,1,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+
+static int oc_make_eob_token(int _run_count){
+  return _run_count<32?OC_DCT_EOB_TOKEN[_run_count-1]:OC_DCT_REPEAT_RUN3_TOKEN;
+}
+
+static unsigned char OC_DCT_EOB_EB[31]={
+  0,0,0,0,1,2,3,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+};
+
+static int oc_make_eob_token_full(int _run_count,int *_eb){
+  if(_run_count<32){
+    *_eb=OC_DCT_EOB_EB[_run_count-1];
+    return OC_DCT_EOB_TOKEN[_run_count-1];
+  }
+  else{
+    *_eb=_run_count;
+    return OC_DCT_REPEAT_RUN3_TOKEN;
+  }
+}
+
+/*Returns the number of blocks ended by an EOB token.*/
+static int oc_decode_eob_token(int _token,int _eb){
+  return (0x20820C41U>>_token*5&0x1F)+_eb;
+}
+
+/*Some tables for fast construction of value tokens.*/
+
+static const unsigned char OC_DCT_VALUE_TOKEN[1161]={
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,21,21,21,21,21,21,21,21,
+  21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
+  21,21,21,21,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,
+  19,19,19,19,19,19,19,19,18,18,18,18,17,17,16,15,14,13,12,10,
+   7,
+   9,11,13,14,15,16,17,17,18,18,18,18,19,19,19,19,19,19,19,19,
+  20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,21,21,21,21,
+  21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,21,
+  21,21,21,21,21,21,21,21,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,
+  22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22
+};
+
+static const ogg_uint16_t OC_DCT_VALUE_EB[1161]={
+  1023,1022,1021,1020,1019,1018,1017,1016,1015,1014,
+  1013,1012,1011,1010,1009,1008,1007,1006,1005,1004,
+  1003,1002,1001,1000, 999, 998, 997, 996, 995, 994,
+   993, 992, 991, 990, 989, 988, 987, 986, 985, 984,
+   983, 982, 981, 980, 979, 978, 977, 976, 975, 974,
+   973, 972, 971, 970, 969, 968, 967, 966, 965, 964,
+   963, 962, 961, 960, 959, 958, 957, 956, 955, 954,
+   953, 952, 951, 950, 949, 948, 947, 946, 945, 944,
+   943, 942, 941, 940, 939, 938, 937, 936, 935, 934,
+   933, 932, 931, 930, 929, 928, 927, 926, 925, 924,
+   923, 922, 921, 920, 919, 918, 917, 916, 915, 914,
+   913, 912, 911, 910, 909, 908, 907, 906, 905, 904,
+   903, 902, 901, 900, 899, 898, 897, 896, 895, 894,
+   893, 892, 891, 890, 889, 888, 887, 886, 885, 884,
+   883, 882, 881, 880, 879, 878, 877, 876, 875, 874,
+   873, 872, 871, 870, 869, 868, 867, 866, 865, 864,
+   863, 862, 861, 860, 859, 858, 857, 856, 855, 854,
+   853, 852, 851, 850, 849, 848, 847, 846, 845, 844,
+   843, 842, 841, 840, 839, 838, 837, 836, 835, 834,
+   833, 832, 831, 830, 829, 828, 827, 826, 825, 824,
+   823, 822, 821, 820, 819, 818, 817, 816, 815, 814,
+   813, 812, 811, 810, 809, 808, 807, 806, 805, 804,
+   803, 802, 801, 800, 799, 798, 797, 796, 795, 794,
+   793, 792, 791, 790, 789, 788, 787, 786, 785, 784,
+   783, 782, 781, 780, 779, 778, 777, 776, 775, 774,
+   773, 772, 771, 770, 769, 768, 767, 766, 765, 764,
+   763, 762, 761, 760, 759, 758, 757, 756, 755, 754,
+   753, 752, 751, 750, 749, 748, 747, 746, 745, 744,
+   743, 742, 741, 740, 739, 738, 737, 736, 735, 734,
+   733, 732, 731, 730, 729, 728, 727, 726, 725, 724,
+   723, 722, 721, 720, 719, 718, 717, 716, 715, 714,
+   713, 712, 711, 710, 709, 708, 707, 706, 705, 704,
+   703, 702, 701, 700, 699, 698, 697, 696, 695, 694,
+   693, 692, 691, 690, 689, 688, 687, 686, 685, 684,
+   683, 682, 681, 680, 679, 678, 677, 676, 675, 674,
+   673, 672, 671, 670, 669, 668, 667, 666, 665, 664,
+   663, 662, 661, 660, 659, 658, 657, 656, 655, 654,
+   653, 652, 651, 650, 649, 648, 647, 646, 645, 644,
+   643, 642, 641, 640, 639, 638, 637, 636, 635, 634,
+   633, 632, 631, 630, 629, 628, 627, 626, 625, 624,
+   623, 622, 621, 620, 619, 618, 617, 616, 615, 614,
+   613, 612, 611, 610, 609, 608, 607, 606, 605, 604,
+   603, 602, 601, 600, 599, 598, 597, 596, 595, 594,
+   593, 592, 591, 590, 589, 588, 587, 586, 585, 584,
+   583, 582, 581, 580, 579, 578, 577, 576, 575, 574,
+   573, 572, 571, 570, 569, 568, 567, 566, 565, 564,
+   563, 562, 561, 560, 559, 558, 557, 556, 555, 554,
+   553, 552, 551, 550, 549, 548, 547, 546, 545, 544,
+   543, 542, 541, 540, 539, 538, 537, 536, 535, 534,
+   533, 532, 531, 530, 529, 528, 527, 526, 525, 524,
+   523, 522, 521, 520, 519, 518, 517, 516, 515, 514,
+   513, 512,  63,  62,  61,  60,  59,  58,  57,  56,
+    55,  54,  53,  52,  51,  50,  49,  48,  47,  46,
+    45,  44,  43,  42,  41,  40,  39,  38,  37,  36,
+    35,  34,  33,  32,  31,  30,  29,  28,  27,  26,
+    25,  24,  23,  22,  21,  20,  19,  18,  17,  16,
+    15,  14,  13,  12,  11,  10,   9,   8,   7,   6,
+     5,   4,   3,   2,   1,   1,   1,   1,   0,   0,
+     0,
+     0,   0,   0,   0,   0,   0,   0,   1,   0,   1,
+     2,   3,   0,   1,   2,   3,   4,   5,   6,   7,
+     0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
+    10,  11,  12,  13,  14,  15,   0,   1,   2,   3,
+     4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
+    14,  15,  16,  17,  18,  19,  20,  21,  22,  23,
+    24,  25,  26,  27,  28,  29,  30,  31,   0,   1,
+     2,   3,   4,   5,   6,   7,   8,   9,  10,  11,
+    12,  13,  14,  15,  16,  17,  18,  19,  20,  21,
+    22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
+    32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
+    42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+    52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
+    62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
+    72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
+    82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
+    92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
+   102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+   112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+   122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
+   132, 133, 134, 135, 136, 137, 138, 139, 140, 141,
+   142, 143, 144, 145, 146, 147, 148, 149, 150, 151,
+   152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
+   162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
+   172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
+   182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+   192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
+   202, 203, 204, 205, 206, 207, 208, 209, 210, 211,
+   212, 213, 214, 215, 216, 217, 218, 219, 220, 221,
+   222, 223, 224, 225, 226, 227, 228, 229, 230, 231,
+   232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
+   242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
+   252, 253, 254, 255, 256, 257, 258, 259, 260, 261,
+   262, 263, 264, 265, 266, 267, 268, 269, 270, 271,
+   272, 273, 274, 275, 276, 277, 278, 279, 280, 281,
+   282, 283, 284, 285, 286, 287, 288, 289, 290, 291,
+   292, 293, 294, 295, 296, 297, 298, 299, 300, 301,
+   302, 303, 304, 305, 306, 307, 308, 309, 310, 311,
+   312, 313, 314, 315, 316, 317, 318, 319, 320, 321,
+   322, 323, 324, 325, 326, 327, 328, 329, 330, 331,
+   332, 333, 334, 335, 336, 337, 338, 339, 340, 341,
+   342, 343, 344, 345, 346, 347, 348, 349, 350, 351,
+   352, 353, 354, 355, 356, 357, 358, 359, 360, 361,
+   362, 363, 364, 365, 366, 367, 368, 369, 370, 371,
+   372, 373, 374, 375, 376, 377, 378, 379, 380, 381,
+   382, 383, 384, 385, 386, 387, 388, 389, 390, 391,
+   392, 393, 394, 395, 396, 397, 398, 399, 400, 401,
+   402, 403, 404, 405, 406, 407, 408, 409, 410, 411,
+   412, 413, 414, 415, 416, 417, 418, 419, 420, 421,
+   422, 423, 424, 425, 426, 427, 428, 429, 430, 431,
+   432, 433, 434, 435, 436, 437, 438, 439, 440, 441,
+   442, 443, 444, 445, 446, 447, 448, 449, 450, 451,
+   452, 453, 454, 455, 456, 457, 458, 459, 460, 461,
+   462, 463, 464, 465, 466, 467, 468, 469, 470, 471,
+   472, 473, 474, 475, 476, 477, 478, 479, 480, 481,
+   482, 483, 484, 485, 486, 487, 488, 489, 490, 491,
+   492, 493, 494, 495, 496, 497, 498, 499, 500, 501,
+   502, 503, 504, 505, 506, 507, 508, 509, 510, 511
+};
+
+/*The first DCT coefficient that both has a smaller magnitude and gets coded
+   with a different token.*/
+static const ogg_int16_t OC_DCT_TRELLIS_ALT_VALUE[1161]={
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
+   -68, -68, -36, -36, -36, -36, -36, -36, -36, -36,
+   -36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+   -36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
+   -36, -36, -36, -36, -20, -20, -20, -20, -20, -20,
+   -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
+   -12, -12, -12, -12, -12, -12, -12, -12,  -8,  -8,
+    -8,  -8,  -6,  -6,  -5,  -4,  -3,  -2,  -1,   0,
+     0,
+     0,   1,   2,   3,   4,   5,   6,   6,   8,   8,
+     8,   8,  12,  12,  12,  12,  12,  12,  12,  12,
+    20,  20,  20,  20,  20,  20,  20,  20,  20,  20,
+    20,  20,  20,  20,  20,  20,  36,  36,  36,  36,
+    36,  36,  36,  36,  36,  36,  36,  36,  36,  36,
+    36,  36,  36,  36,  36,  36,  36,  36,  36,  36,
+    36,  36,  36,  36,  36,  36,  36,  36,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68,
+    68,  68,  68,  68,  68,  68,  68,  68,  68,  68
+};
+
+#define OC_DCT_VALUE_TOKEN_PTR (OC_DCT_VALUE_TOKEN+580)
+#define OC_DCT_VALUE_EB_PTR (OC_DCT_VALUE_EB+580)
+#define OC_DCT_TRELLIS_ALT_VALUE_PTR (OC_DCT_TRELLIS_ALT_VALUE+580)
+
+/*Some tables for fast construction of combo tokens.*/
+
+static const unsigned char OC_DCT_RUN_CAT1_TOKEN[17]={
+  23,24,25,26,27,28,28,28,28,29,29,29,29,29,29,29,29
+};
+
+static const unsigned char OC_DCT_RUN_CAT1_EB[17][2]={
+  {0,1},{0,1},{0, 1},{0, 1},{0, 1},{0, 4},{1, 5},{2, 6},{3,7},
+  {0,8},{1,9},{2,10},{3,11},{4,12},{5,13},{6,14},{7,15}
+};
+
+static const unsigned char OC_DCT_RUN_CAT2_EB[3][2][2]={
+  { {0,1},{2,3} },{ {0,2},{4,6} },{ {1,3},{5,7} }
+};
+
+/*Token logging to allow a few fragments of efficient rollback.
+  Late SKIP analysis is tied up in the tokenization process, so we need to be
+   able to undo a fragment's tokens on a whim.*/
+
+static const unsigned char OC_ZZI_HUFF_OFFSET[64]={
+   0,16,16,16,16,16,32,32,
+  32,32,32,32,32,32,32,48,
+  48,48,48,48,48,48,48,48,
+  48,48,48,48,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+static int oc_token_bits(oc_enc_ctx *_enc,int _huffi,int _zzi,int _token){
+  return _enc->huff_codes[_huffi+OC_ZZI_HUFF_OFFSET[_zzi]][_token].nbits
+   +OC_DCT_TOKEN_EXTRA_BITS[_token];
+}
+
+static void oc_enc_tokenlog_checkpoint(oc_enc_ctx *_enc,
+ oc_token_checkpoint *_cp,int _pli,int _zzi){
+  _cp->pli=_pli;
+  _cp->zzi=_zzi;
+  _cp->eob_run=_enc->eob_run[_pli][_zzi];
+  _cp->ndct_tokens=_enc->ndct_tokens[_pli][_zzi];
+}
+
+void oc_enc_tokenlog_rollback(oc_enc_ctx *_enc,
+ const oc_token_checkpoint *_stack,int _n){
+  int i;
+  for(i=_n;i-->0;){
+    int pli;
+    int zzi;
+    pli=_stack[i].pli;
+    zzi=_stack[i].zzi;
+    _enc->eob_run[pli][zzi]=_stack[i].eob_run;
+    _enc->ndct_tokens[pli][zzi]=_stack[i].ndct_tokens;
+  }
+}
+
+static void oc_enc_token_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _token,int _eb){
+  ptrdiff_t ti;
+  ti=_enc->ndct_tokens[_pli][_zzi]++;
+  _enc->dct_tokens[_pli][_zzi][ti]=(unsigned char)_token;
+  _enc->extra_bits[_pli][_zzi][ti]=(ogg_uint16_t)_eb;
+}
+
+static void oc_enc_eob_log(oc_enc_ctx *_enc,
+ int _pli,int _zzi,int _run_count){
+  int token;
+  int eb;
+  token=oc_make_eob_token_full(_run_count,&eb);
+  oc_enc_token_log(_enc,_pli,_zzi,token,eb);
+}
+
+
+void oc_enc_tokenize_start(oc_enc_ctx *_enc){
+  memset(_enc->ndct_tokens,0,sizeof(_enc->ndct_tokens));
+  memset(_enc->eob_run,0,sizeof(_enc->eob_run));
+  memset(_enc->dct_token_offs,0,sizeof(_enc->dct_token_offs));
+  memset(_enc->dc_pred_last,0,sizeof(_enc->dc_pred_last));
+}
+
+typedef struct oc_quant_token oc_quant_token;
+
+/*A single node in the Viterbi trellis.
+  We maintain up to 2 of these per coefficient:
+    - A token to code if the value is zero (EOB, zero run, or combo token).
+    - A token to code if the value is not zero (DCT value token).*/
+struct oc_quant_token{
+  unsigned char next;
+  signed char   token;
+  ogg_int16_t   eb;
+  ogg_uint32_t  cost;
+  int           bits;
+  int           qc;
+};
+
+/*Tokenizes the AC coefficients, possibly adjusting the quantization, and then
+   dequantizes and de-zig-zags the result.
+  The AC coefficients of _idct must be pre-initialized to zero.*/
+int oc_enc_tokenize_ac(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_idct,const ogg_int16_t *_qdct,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin){
+  oc_token_checkpoint *stack;
+  ogg_int64_t          zflags;
+  ogg_int64_t          nzflags;
+  ogg_int64_t          best_flags;
+  ogg_uint32_t         d2_accum[64];
+  oc_quant_token       tokens[64][2];
+  ogg_uint16_t        *eob_run;
+  const unsigned char *dct_fzig_zag;
+  ogg_uint32_t         cost;
+  int                  bits;
+  int                  eob;
+  int                  token;
+  int                  eb;
+  int                  next;
+  int                  huffi;
+  int                  zzi;
+  int                  ti;
+  int                  zzj;
+  int                  qc;
+  huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1];
+  eob_run=_enc->eob_run[_pli];
+  memset(tokens[0],0,sizeof(tokens[0]));
+  best_flags=nzflags=0;
+  zflags=1;
+  d2_accum[0]=0;
+  zzj=64;
+  for(zzi=OC_MINI(_zzi,63);zzi>0;zzi--){
+    ogg_uint32_t best_cost;
+    int          best_bits=best_bits;
+    int          best_next=best_next;
+    int          best_token=best_token;
+    int          best_eb=best_eb;
+    int          best_qc=best_qc;
+    ogg_uint32_t d2;
+    int          dq;
+    int          qc_m;
+    int          e;
+    int          c;
+    int          s;
+    int          tj;
+    qc=_qdct[zzi];
+    s=-(qc<0);
+    qc_m=qc+s^s;
+    c=_dct[zzi];
+    /*The hard case: try a zero run.*/
+    if(qc_m<=1){
+      ogg_uint32_t sum_d2;
+      int          nzeros;
+      int          dc_reserve;
+      if(!qc_m){
+        /*Skip runs that are already quantized to zeros.
+          If we considered each zero coefficient in turn, we might
+           theoretically find a better way to partition long zero runs (e.g.,
+           a run of > 17 zeros followed by a 1 might be better coded as a short
+           zero run followed by a combo token, rather than the longer zero
+           token followed by a 1 value token), but zeros are so common that
+           this becomes very computationally expensive (quadratic instead of
+           linear in the number of coefficients), for a marginal gain.*/
+        while(zzi>1&&!_qdct[zzi-1])zzi--;
+        /*The distortion of coefficients originally quantized to zero is
+           treated as zero (since we'll never quantize them to anything else).*/
+        d2=0;
+      }
+      else{
+        d2=c*(ogg_int32_t)c;
+        c=c+s^s;
+      }
+      eob=eob_run[zzi];
+      nzeros=zzj-zzi;
+      zzj&=63;
+      sum_d2=d2+d2_accum[zzj];
+      d2_accum[zzi]=sum_d2;
+      /*We reserve 1 spot for combo run tokens that start in the 1st AC stack
+         to ensure they can be extended to include the DC coefficient if
+         necessary; this greatly simplifies stack-rewriting later on.*/
+      dc_reserve=zzi+62>>6;
+      best_cost=0xFFFFFFFF;
+      for(;;){
+        if(nzflags>>zzj&1){
+          int val;
+          int val_s;
+          int zzk;
+          int tk;
+          next=tokens[zzj][1].next;
+          tk=next&1;
+          zzk=next>>1;
+          /*Try a pure zero run to this point.*/
+          token=OC_DCT_SHORT_ZRL_TOKEN+(nzeros+55>>6);
+          bits=oc_token_bits(_enc,huffi,zzi,token);
+          d2=sum_d2-d2_accum[zzj];
+          cost=d2+_lambda*bits+tokens[zzj][1].cost;
+          if(cost<=best_cost){
+            best_next=(zzj<<1)+1;
+            best_token=token;
+            best_eb=nzeros-1;
+            best_cost=cost;
+            best_bits=bits+tokens[zzj][1].bits;
+            best_qc=0;
+          }
+          if(nzeros<17+dc_reserve){
+            val=_qdct[zzj];
+            val_s=-(val<0);
+            val=val+val_s^val_s;
+            if(val<=2){
+              /*Try a +/- 1 combo token.*/
+              token=OC_DCT_RUN_CAT1_TOKEN[nzeros-1];
+              eb=OC_DCT_RUN_CAT1_EB[nzeros-1][-val_s];
+              e=_dct[zzj]-(_dequant[zzj]+val_s^val_s);
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              bits=oc_token_bits(_enc,huffi,zzi,token);
+              cost=d2+_lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_next=next;
+                best_token=token;
+                best_eb=eb;
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_qc=1+val_s^val_s;
+              }
+            }
+            if(nzeros<3+dc_reserve&&2<=val&&val<=4){
+              int sval;
+              /*Try a +/- 2/3 combo token.*/
+              token=OC_DCT_RUN_CAT2A+(nzeros>>1);
+              bits=oc_token_bits(_enc,huffi,zzi,token);
+              val=2+(val>2);
+              sval=val+val_s^val_s;
+              e=_dct[zzj]-_dequant[zzj]*sval;
+              d2=e*(ogg_int32_t)e+sum_d2-d2_accum[zzj];
+              cost=d2+_lambda*bits+tokens[zzk][tk].cost;
+              if(cost<=best_cost){
+                best_cost=cost;
+                best_bits=bits+tokens[zzk][tk].bits;
+                best_next=next;
+                best_token=token;
+                best_eb=OC_DCT_RUN_CAT2_EB[nzeros-1][-val_s][val-2];
+                best_qc=sval;
+              }
+            }
+          }
+          /*zzj can't be coded as a zero, so stop trying to extend the run.*/
+          if(!(zflags>>zzj&1))break;
+        }
+        /*We could try to consider _all_ potentially non-zero coefficients, but
+           if we already found a bunch of them not worth coding, it's fairly
+           unlikely they would now be worth coding from this position; skipping
+           them saves a lot of work.*/
+        zzj=(tokens[zzj][0].next>>1)-(tokens[zzj][0].qc!=0)&63;
+        if(zzj==0){
+          /*We made it all the way to the end of the block; try an EOB token.*/
+          if(eob<4095){
+            bits=oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob+1))
+             -(eob>0?oc_token_bits(_enc,huffi,zzi,oc_make_eob_token(eob)):0);
+          }
+          else bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN);
+          cost=sum_d2+bits*_lambda;
+          /*If the best route so far is still a pure zero run to the end of the
+             block, force coding it as an EOB.
+            Even if it's not optimal for this block, it has a good chance of
+             getting combined with an EOB token from subsequent blocks, saving
+             bits overall.*/
+          if(cost<=best_cost||best_token<=OC_DCT_ZRL_TOKEN&&zzi+best_eb==63){
+            best_next=0;
+            /*This token is just a marker; in reality we may not emit any
+               tokens, but update eob_run[] instead.*/
+            best_token=OC_DCT_EOB1_TOKEN;
+            best_eb=0;
+            best_cost=cost;
+            best_bits=bits;
+            best_qc=0;
+          }
+          break;
+        }
+        nzeros=zzj-zzi;
+      }
+      tokens[zzi][0].next=(unsigned char)best_next;
+      tokens[zzi][0].token=(signed char)best_token;
+      tokens[zzi][0].eb=(ogg_int16_t)best_eb;
+      tokens[zzi][0].cost=best_cost;
+      tokens[zzi][0].bits=best_bits;
+      tokens[zzi][0].qc=best_qc;
+      zflags|=(ogg_int64_t)1<<zzi;
+      if(qc_m){
+        dq=_dequant[zzi];
+        if(zzi<_acmin)_lambda=0;
+        e=dq-c;
+        d2=e*(ogg_int32_t)e;
+        token=OC_ONE_TOKEN-s;
+        bits=oc_token_bits(_enc,huffi,zzi,token);
+        zzj=zzi+1&63;
+        tj=best_flags>>zzj&1;
+        next=(zzj<<1)+tj;
+        tokens[zzi][1].next=(unsigned char)next;
+        tokens[zzi][1].token=(signed char)token;
+        tokens[zzi][1].eb=0;
+        tokens[zzi][1].cost=d2+_lambda*bits+tokens[zzj][tj].cost;
+        tokens[zzi][1].bits=bits+tokens[zzj][tj].bits;
+        tokens[zzi][1].qc=1+s^s;
+        nzflags|=(ogg_int64_t)1<<zzi;
+        best_flags|=
+         (ogg_int64_t)(tokens[zzi][1].cost<tokens[zzi][0].cost)<<zzi;
+      }
+    }
+    else{
+      int alt_qc;
+      eob=eob_run[zzi];
+      if(zzi<_acmin)_lambda=0;
+      dq=_dequant[zzi];
+      /*No zero run can extend past this point.*/
+      d2_accum[zzi]=0;
+      e=qc*dq-c;
+      d2=e*(ogg_int32_t)e;
+      best_token=*(OC_DCT_VALUE_TOKEN_PTR+qc);
+      best_bits=oc_token_bits(_enc,huffi,zzi,best_token);
+      best_cost=d2+_lambda*best_bits;
+      alt_qc=*(OC_DCT_TRELLIS_ALT_VALUE_PTR+qc);
+      e=alt_qc*dq-c;
+      d2=e*(ogg_int32_t)e;
+      token=*(OC_DCT_VALUE_TOKEN_PTR+alt_qc);
+      bits=oc_token_bits(_enc,huffi,zzi,token);
+      cost=d2+_lambda*bits;
+      if(cost<best_cost){
+        best_token=token;
+        best_bits=bits;
+        best_cost=cost;
+        qc=alt_qc;
+      }
+      zzj=zzi+1&63;
+      tj=best_flags>>zzj&1;
+      next=(zzj<<1)+tj;
+      tokens[zzi][1].next=(unsigned char)next;
+      tokens[zzi][1].token=(signed char)best_token;
+      tokens[zzi][1].eb=*(OC_DCT_VALUE_EB_PTR+qc);
+      tokens[zzi][1].cost=best_cost+tokens[zzj][tj].cost;
+      tokens[zzi][1].bits=best_bits+tokens[zzj][tj].bits;
+      tokens[zzi][1].qc=qc;
+      nzflags|=(ogg_int64_t)1<<zzi;
+      best_flags|=(ogg_int64_t)1<<zzi;
+    }
+    zzj=zzi;
+  }
+  /*Emit the tokens from the best path through the trellis.*/
+  stack=*_stack;
+  dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag;
+  zzi=1;
+  ti=best_flags>>1&1;
+  bits=tokens[zzi][ti].bits;
+  do{
+    oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+    eob=eob_run[zzi];
+    if(tokens[zzi][ti].token<OC_NDCT_EOB_TOKEN_MAX){
+      if(++eob>=4095){
+        oc_enc_token_log(_enc,_pli,zzi,OC_DCT_REPEAT_RUN3_TOKEN,eob);
+        eob=0;
+      }
+      eob_run[zzi]=eob;
+      /*We don't include the actual EOB cost for this block in the return value.
+        It is very likely to eventually be spread over several blocks, and
+         including it more harshly penalizes the first few blocks in a long EOB
+         run.
+        Omitting it here gives a small PSNR and SSIM gain.*/
+      bits-=tokens[zzi][ti].bits;
+      zzi=_zzi;
+      break;
+    }
+    /*Emit pending EOB run if any.*/
+    if(eob>0){
+      oc_enc_eob_log(_enc,_pli,zzi,eob);
+      eob_run[zzi]=0;
+    }
+    oc_enc_token_log(_enc,_pli,zzi,tokens[zzi][ti].token,tokens[zzi][ti].eb);
+    next=tokens[zzi][ti].next;
+    qc=tokens[zzi][ti].qc;
+    zzj=(next>>1)-1&63;
+    /*TODO: It may be worth saving the dequantized coefficient in the trellis
+       above; we had to compute it to measure the error anyway.*/
+    _idct[dct_fzig_zag[zzj]]=(ogg_int16_t)(qc*(int)_dequant[zzj]);
+    zzi=next>>1;
+    ti=next&1;
+  }
+  while(zzi);
+  *_stack=stack;
+  return bits;
+}
+
+/*Simplistic R/D tokenizer.
+  The AC coefficients of _idct must be pre-initialized to zero.
+  This could be made more accurate by using more sophisticated
+   rate predictions for zeros.
+  It could be made faster by switching from R/D decisions to static
+   lambda-derived rounding biases.*/
+int oc_enc_tokenize_ac_fast(oc_enc_ctx *_enc,int _pli,ptrdiff_t _fragi,
+ ogg_int16_t *_idct,const ogg_int16_t *_qdct,
+ const ogg_uint16_t *_dequant,const ogg_int16_t *_dct,
+ int _zzi,oc_token_checkpoint **_stack,int _lambda,int _acmin){
+  const unsigned char *dct_fzig_zag;
+  ogg_uint16_t        *eob_run;
+  oc_token_checkpoint *stack;
+  int                  huffi;
+  int                  zzi;
+  int                  zzj;
+  int                  zzk;
+  int                  total_bits;
+  int                  zr[4];
+  stack=*_stack;
+  total_bits=0;
+  /*The apparent bit-cost of coding a zero from observing the trellis
+     quantizer is pre-combined with lambda.
+    Four predictive cases are considered: the last optimized value is zero (+2)
+     or non-zero and the non-optimized value is zero (+1) or non-zero.*/
+  zr[0]=3*_lambda>>1;
+  zr[1]=_lambda;
+  zr[2]=4*_lambda;
+  zr[3]=7*_lambda>>1;
+  eob_run=_enc->eob_run[_pli];
+  dct_fzig_zag=_enc->state.opt_data.dct_fzig_zag;
+  huffi=_enc->huff_idxs[_enc->state.frame_type][1][_pli+1>>1];
+  for(zzj=zzi=1;zzj<_zzi&&!_qdct[zzj];zzj++);
+  while(zzj<_zzi){
+    int v;
+    int d0;
+    int d1;
+    int sign;
+    int k;
+    int eob;
+    int dq0;
+    int dq1;
+    int dd0;
+    int dd1;
+    int next_zero;
+    int eob_bits;
+    int dct_fzig_zzj;
+    dct_fzig_zzj=dct_fzig_zag[zzj];
+    v=_dct[zzj];
+    d0=_qdct[zzj];
+    eob=eob_run[zzi];
+    for(zzk=zzj+1;zzk<_zzi&&!_qdct[zzk];zzk++);
+    next_zero=zzk-zzj+62>>6;
+    dq0=d0*_dequant[zzj];
+    dd0=dq0-v;
+    dd0*=dd0;
+    sign=-(d0<0);
+    k=d0+sign^sign;
+    d1=(k-(zzj>_acmin))+sign^sign;
+    dq1=d1*_dequant[zzj];
+    dd1=dq1-v;
+    dd1*=dd1;
+    /*The cost of ending an eob run is included when the alternative is to
+       extend this eob run.
+      A per qi/zzi weight would probably be useful.
+      Including it in the overall tokenization cost was not helpful.
+      The same is true at the far end of the zero run plus token case.*/
+    if(eob>0&&d1==0&&zzk==_zzi){
+      eob_bits=oc_token_bits(_enc,huffi,zzi,OC_DCT_EOB1_TOKEN);
+    }
+    else eob_bits=0;
+    if(zzj==zzi){
+      /*No active zero run.*/
+      int best_token;
+      int best_eb;
+      int token;
+      int best_bits;
+      int bits;
+      int cost;
+      best_token=*(OC_DCT_VALUE_TOKEN_PTR+d0);
+      best_bits=oc_token_bits(_enc,huffi,zzi,best_token);
+      if(d1!=0){
+        token=*(OC_DCT_VALUE_TOKEN_PTR+d1);
+        bits=oc_token_bits(_enc,huffi,zzi,token);
+        cost=dd1+(bits+eob_bits)*_lambda;
+      }
+      else{
+        token=bits=0;
+        cost=dd1+zr[next_zero];
+      }
+      if((dd0+(best_bits+eob_bits)*_lambda)>cost){
+        _idct[dct_fzig_zzj]=dq1;
+        if(d1==0){
+          zzj=zzk;
+          continue;
+        }
+        best_bits=bits;
+        best_token=token;
+        best_eb=*(OC_DCT_VALUE_EB_PTR+d1);
+      }
+      else{
+        best_eb=*(OC_DCT_VALUE_EB_PTR+d0);
+        _idct[dct_fzig_zzj]=dq0;
+      }
+      oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+      if(eob>0){
+        oc_enc_eob_log(_enc,_pli,zzi,eob);
+        eob_run[zzi]=0;
+      }
+      oc_enc_token_log(_enc,_pli,zzi,best_token,best_eb);
+      total_bits+=best_bits;
+    }
+    else{
+      int d;
+      int dc_reserve;
+      int best_token;
+      int best_eb;
+      int best_bits;
+      int best_cost;
+      int best_bits1;
+      int best_token1;
+      int best_eb1;
+      int zr_bits;
+      int eob2;
+      int eob_bits2;
+      int bits;
+      int token;
+      int nzeros;
+      nzeros=zzj-zzi;
+      dc_reserve=zzi+62>>6;
+      /*A zero run, followed by the value alone.*/
+      best_token=best_token1=OC_DCT_SHORT_ZRL_TOKEN+(nzeros+55>>6);
+      best_eb=best_eb1=nzeros-1;
+      eob2=eob_run[zzj];
+      eob_bits2=eob2>0?oc_token_bits(_enc,huffi,zzj,OC_DCT_EOB1_TOKEN):0;
+      zr_bits=oc_token_bits(_enc,huffi,zzi,best_token)+eob_bits2;
+      best_bits=zr_bits
+       +oc_token_bits(_enc,huffi,zzj,*(OC_DCT_VALUE_TOKEN_PTR+d0));
+      d=d0;
+      best_bits1=0;
+      if(d1!=0){
+        best_bits1=zr_bits
+         +oc_token_bits(_enc,huffi,zzj,*(OC_DCT_VALUE_TOKEN_PTR+d1));
+      }
+      if(nzeros<17+dc_reserve){
+        if(k<=2){
+          /*+/- 1 combo token.*/
+          token=OC_DCT_RUN_CAT1_TOKEN[nzeros-1];
+          bits=oc_token_bits(_enc,huffi,zzi,token);
+          if(k==2&&bits<=best_bits1){
+            best_bits1=bits;
+            best_token1=token;
+            best_eb1=OC_DCT_RUN_CAT1_EB[nzeros-1][-sign];
+          }
+          if(k==1&&bits<=best_bits){
+            best_bits=bits;
+            best_token=token;
+            best_eb=OC_DCT_RUN_CAT1_EB[nzeros-1][-sign];
+          }
+        }
+        if(nzeros<3+dc_reserve&&2<=k&&k<=4){
+          /*+/- 2/3 combo token.*/
+          token=OC_DCT_RUN_CAT2A+(nzeros>>1);
+          bits=oc_token_bits(_enc,huffi,zzi,token);
+          if(k==4&&bits<=best_bits1){
+            best_bits1=bits;
+            best_token1=token;
+            best_eb1=OC_DCT_RUN_CAT2_EB[nzeros-1][-sign][1];
+          }
+          if(k!=4&&bits<=best_bits){
+            best_bits=bits;
+            best_token=token;
+            best_eb=OC_DCT_RUN_CAT2_EB[nzeros-1][-sign][k-2];
+          }
+        }
+      }
+      best_cost=dd0+(best_bits+eob_bits)*_lambda;
+      if(d1==0&&(dd1+zr[2+next_zero])<=best_cost){
+        zzj=zzk;
+        continue;
+      }
+      if(d1!=0&&dd1+(best_bits1+eob_bits)*_lambda<best_cost){
+        best_bits=best_bits1;
+        best_token=best_token1;
+        best_eb=best_eb1;
+        d=d1;
+        _idct[dct_fzig_zzj]=dq1;
+      }
+      else _idct[dct_fzig_zzj]=dq0;
+      oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+      if(eob){
+        oc_enc_eob_log(_enc,_pli,zzi,eob);
+        eob_run[zzi]=0;
+      }
+      oc_enc_token_log(_enc,_pli,zzi,best_token,best_eb);
+      /*If a zero run won vs. the combo token we still need to code this
+         value.*/
+      if(best_token<=OC_DCT_ZRL_TOKEN){
+        oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzj);
+        if(eob2){
+          oc_enc_eob_log(_enc,_pli,zzj,eob2);
+          /*The cost of any EOB run we disrupted is ignored because doing so
+             improved PSNR/SSIM by a small amount.*/
+          best_bits-=eob_bits2;
+          eob_run[zzj]=0;
+        }
+        oc_enc_token_log(_enc,_pli,zzj,
+         *(OC_DCT_VALUE_TOKEN_PTR+d),*(OC_DCT_VALUE_EB_PTR+d));
+      }
+      total_bits+=best_bits;
+    }
+    zzi=zzj+1;
+    zzj=zzk;
+  }
+  /*Code an EOB run to complete this block.
+    The cost of the EOB run is not included in the total as explained in
+     in a comment in the trellis tokenizer above.*/
+  if(zzi<64){
+    int eob;
+    eob=eob_run[zzi]+1;
+    oc_enc_tokenlog_checkpoint(_enc,stack++,_pli,zzi);
+    if(eob>=4095){
+      oc_enc_token_log(_enc,_pli,zzi,OC_DCT_REPEAT_RUN3_TOKEN,eob);
+      eob=0;
+    }
+    eob_run[zzi]=eob;
+  }
+  *_stack=stack;
+  return total_bits;
+}
+
+void oc_enc_pred_dc_frag_rows(oc_enc_ctx *_enc,
+ int _pli,int _fragy0,int _frag_yend){
+  const oc_fragment_plane *fplane;
+  const oc_fragment       *frags;
+  ogg_int16_t             *frag_dc;
+  ptrdiff_t                fragi;
+  int                     *pred_last;
+  int                      nhfrags;
+  int                      fragx;
+  int                      fragy;
+  fplane=_enc->state.fplanes+_pli;
+  frags=_enc->state.frags;
+  frag_dc=_enc->frag_dc;
+  pred_last=_enc->dc_pred_last[_pli];
+  nhfrags=fplane->nhfrags;
+  fragi=fplane->froffset+_fragy0*nhfrags;
+  for(fragy=_fragy0;fragy<_frag_yend;fragy++){
+    if(fragy==0){
+      /*For the first row, all of the cases reduce to just using the previous
+         predictor for the same reference frame.*/
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        if(frags[fragi].coded){
+          int refi;
+          refi=frags[fragi].refi;
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[refi]);
+          pred_last[refi]=frags[fragi].dc;
+        }
+      }
+    }
+    else{
+      const oc_fragment *u_frags;
+      int                l_ref;
+      int                ul_ref;
+      int                u_ref;
+      u_frags=frags-nhfrags;
+      l_ref=-1;
+      ul_ref=-1;
+      u_ref=u_frags[fragi].refi;
+      for(fragx=0;fragx<nhfrags;fragx++,fragi++){
+        int ur_ref;
+        if(fragx+1>=nhfrags)ur_ref=-1;
+        else ur_ref=u_frags[fragi+1].refi;
+        if(frags[fragi].coded){
+          int pred;
+          int refi;
+          refi=frags[fragi].refi;
+          /*We break out a separate case based on which of our neighbors use
+             the same reference frames.
+            This is somewhat faster than trying to make a generic case which
+             handles all of them, since it reduces lots of poorly predicted
+             jumps to one switch statement, and also lets a number of the
+             multiplications be optimized out by strength reduction.*/
+          switch((l_ref==refi)|(ul_ref==refi)<<1|
+           (u_ref==refi)<<2|(ur_ref==refi)<<3){
+            default:pred=pred_last[refi];break;
+            case  1:
+            case  3:pred=frags[fragi-1].dc;break;
+            case  2:pred=u_frags[fragi-1].dc;break;
+            case  4:
+            case  6:
+            case 12:pred=u_frags[fragi].dc;break;
+            case  5:pred=(frags[fragi-1].dc+u_frags[fragi].dc)/2;break;
+            case  8:pred=u_frags[fragi+1].dc;break;
+            case  9:
+            case 11:
+            case 13:{
+              pred=(75*frags[fragi-1].dc+53*u_frags[fragi+1].dc)/128;
+            }break;
+            case 10:pred=(u_frags[fragi-1].dc+u_frags[fragi+1].dc)/2;break;
+            case 14:{
+              pred=(3*(u_frags[fragi-1].dc+u_frags[fragi+1].dc)
+               +10*u_frags[fragi].dc)/16;
+            }break;
+            case  7:
+            case 15:{
+              int p0;
+              int p1;
+              int p2;
+              p0=frags[fragi-1].dc;
+              p1=u_frags[fragi-1].dc;
+              p2=u_frags[fragi].dc;
+              pred=(29*(p0+p2)-26*p1)/32;
+              if(abs(pred-p2)>128)pred=p2;
+              else if(abs(pred-p0)>128)pred=p0;
+              else if(abs(pred-p1)>128)pred=p1;
+            }break;
+          }
+          frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred);
+          pred_last[refi]=frags[fragi].dc;
+          l_ref=refi;
+        }
+        else l_ref=-1;
+        ul_ref=u_ref;
+        u_ref=ur_ref;
+      }
+    }
+  }
+}
+
+void oc_enc_tokenize_dc_frag_list(oc_enc_ctx *_enc,int _pli,
+ const ptrdiff_t *_coded_fragis,ptrdiff_t _ncoded_fragis,
+ int _prev_ndct_tokens1,int _prev_eob_run1){
+  const ogg_int16_t *frag_dc;
+  ptrdiff_t          fragii;
+  unsigned char     *dct_tokens0;
+  unsigned char     *dct_tokens1;
+  ogg_uint16_t      *extra_bits0;
+  ogg_uint16_t      *extra_bits1;
+  ptrdiff_t          ti0;
+  ptrdiff_t          ti1r;
+  ptrdiff_t          ti1w;
+  int                eob_run0;
+  int                eob_run1;
+  int                neobs1;
+  int                token;
+  int                eb;
+  int                token1=token1;
+  int                eb1=eb1;
+  /*Return immediately if there are no coded fragments; otherwise we'd flush
+     any trailing EOB run into the AC 1 list and never read it back out.*/
+  if(_ncoded_fragis<=0)return;
+  frag_dc=_enc->frag_dc;
+  dct_tokens0=_enc->dct_tokens[_pli][0];
+  dct_tokens1=_enc->dct_tokens[_pli][1];
+  extra_bits0=_enc->extra_bits[_pli][0];
+  extra_bits1=_enc->extra_bits[_pli][1];
+  ti0=_enc->ndct_tokens[_pli][0];
+  ti1w=ti1r=_prev_ndct_tokens1;
+  eob_run0=_enc->eob_run[_pli][0];
+  /*Flush any trailing EOB run for the 1st AC coefficient.
+    This is needed to allow us to track tokens to the end of the list.*/
+  eob_run1=_enc->eob_run[_pli][1];
+  if(eob_run1>0)oc_enc_eob_log(_enc,_pli,1,eob_run1);
+  /*If there was an active EOB run at the start of the 1st AC stack, read it
+     in and decode it.*/
+  if(_prev_eob_run1>0){
+    token1=dct_tokens1[ti1r];
+    eb1=extra_bits1[ti1r];
+    ti1r++;
+    eob_run1=oc_decode_eob_token(token1,eb1);
+    /*Consume the portion of the run that came before these fragments.*/
+    neobs1=eob_run1-_prev_eob_run1;
+  }
+  else eob_run1=neobs1=0;
+  for(fragii=0;fragii<_ncoded_fragis;fragii++){
+    int val;
+    /*All tokens in the 1st AC coefficient stack are regenerated as the DC
+       coefficients are produced.
+      This can be done in-place; stack 1 cannot get larger.*/
+    if(!neobs1){
+      /*There's no active EOB run in stack 1; read the next token.*/
+      token1=dct_tokens1[ti1r];
+      eb1=extra_bits1[ti1r];
+      ti1r++;
+      if(token1<OC_NDCT_EOB_TOKEN_MAX){
+        neobs1=oc_decode_eob_token(token1,eb1);
+        /*It's an EOB run; add it to the current (inactive) one.
+          Because we may have moved entries to stack 0, we may have an
+           opportunity to merge two EOB runs in stack 1.*/
+        eob_run1+=neobs1;
+      }
+    }
+    val=frag_dc[_coded_fragis[fragii]];
+    if(val){
+      /*There was a non-zero DC value, so there's no alteration to stack 1
+         for this fragment; just code the stack 0 token.*/
+      /*Flush any pending EOB run.*/
+      if(eob_run0>0){
+        token=oc_make_eob_token_full(eob_run0,&eb);
+        dct_tokens0[ti0]=(unsigned char)token;
+        extra_bits0[ti0]=(ogg_uint16_t)eb;
+        ti0++;
+        eob_run0=0;
+      }
+      dct_tokens0[ti0]=*(OC_DCT_VALUE_TOKEN_PTR+val);
+      extra_bits0[ti0]=*(OC_DCT_VALUE_EB_PTR+val);
+      ti0++;
+    }
+    else{
+      /*Zero DC value; that means the entry in stack 1 might need to be coded
+         from stack 0.
+        This requires a stack 1 fixup.*/
+      if(neobs1>0){
+        /*We're in the middle of an active EOB run in stack 1.
+          Move it to stack 0.*/
+        if(++eob_run0>=4095){
+          dct_tokens0[ti0]=OC_DCT_REPEAT_RUN3_TOKEN;
+          extra_bits0[ti0]=eob_run0;
+          ti0++;
+          eob_run0=0;
+        }
+        eob_run1--;
+      }
+      else{
+        /*No active EOB run in stack 1, so we can't extend one in stack 0.
+          Flush it if we've got it.*/
+        if(eob_run0>0){
+          token=oc_make_eob_token_full(eob_run0,&eb);
+          dct_tokens0[ti0]=(unsigned char)token;
+          extra_bits0[ti0]=(ogg_uint16_t)eb;
+          ti0++;
+          eob_run0=0;
+        }
+        /*Stack 1 token is one of: a pure zero run token, a single
+           coefficient token, or a zero run/coefficient combo token.
+          A zero run token is expanded and moved to token stack 0, and the
+           stack 1 entry dropped.
+          A single coefficient value may be transformed into combo token that
+           is moved to stack 0, or if it cannot be combined, it is left alone
+           and a single length-1 zero run is emitted in stack 0.
+          A combo token is extended and moved to stack 0.
+          During AC coding, we restrict the run lengths on combo tokens for
+           stack 1 to guarantee we can extend them.*/
+        switch(token1){
+          case OC_DCT_SHORT_ZRL_TOKEN:{
+            if(eb1<7){
+              dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            /*Fall through.*/
+          }
+          case OC_DCT_ZRL_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_ZRL_TOKEN;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_ONE_TOKEN:
+          case OC_MINUS_ONE_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_ONE_TOKEN);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_TWO_TOKEN:
+          case OC_MINUS_TWO_TOKEN:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)(token1-OC_TWO_TOKEN<<1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_VAL_CAT2:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2A;
+            extra_bits0[ti0]=(ogg_uint16_t)((eb1<<1)+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A:
+          case OC_DCT_RUN_CAT1A+1:
+          case OC_DCT_RUN_CAT1A+2:
+          case OC_DCT_RUN_CAT1A+3:{
+            dct_tokens0[ti0]=(unsigned char)(token1+1);
+            extra_bits0[ti0]=(ogg_uint16_t)eb1;
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1A+4:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1<<2);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT1B:{
+            if((eb1&3)<3){
+              dct_tokens0[ti0]=OC_DCT_RUN_CAT1B;
+              extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+              ti0++;
+              /*Don't write the AC coefficient back out.*/
+              continue;
+            }
+            eb1=((eb1&4)<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT1C:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT1C;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+          case OC_DCT_RUN_CAT2A:{
+            eb1=(eb1<<1)-1;
+            /*Fall through.*/
+          }
+          case OC_DCT_RUN_CAT2B:{
+            dct_tokens0[ti0]=OC_DCT_RUN_CAT2B;
+            extra_bits0[ti0]=(ogg_uint16_t)(eb1+1);
+            ti0++;
+            /*Don't write the AC coefficient back out.*/
+          }continue;
+        }
+        /*We can't merge tokens, write a short zero run and keep going.*/
+        dct_tokens0[ti0]=OC_DCT_SHORT_ZRL_TOKEN;
+        extra_bits0[ti0]=0;
+        ti0++;
+      }
+    }
+    if(!neobs1){
+      /*Flush any (inactive) EOB run.*/
+      if(eob_run1>0){
+        token=oc_make_eob_token_full(eob_run1,&eb);
+        dct_tokens1[ti1w]=(unsigned char)token;
+        extra_bits1[ti1w]=(ogg_uint16_t)eb;
+        ti1w++;
+        eob_run1=0;
+      }
+      /*There's no active EOB run, so log the current token.*/
+      dct_tokens1[ti1w]=(unsigned char)token1;
+      extra_bits1[ti1w]=(ogg_uint16_t)eb1;
+      ti1w++;
+    }
+    else{
+      /*Otherwise consume one EOB from the current run.*/
+      neobs1--;
+      /*If we have more than 4095 EOBs outstanding in stack1, flush the run.*/
+      if(eob_run1-neobs1>=4095){
+        dct_tokens1[ti1w]=OC_DCT_REPEAT_RUN3_TOKEN;
+        extra_bits1[ti1w]=4095;
+        ti1w++;
+        eob_run1-=4095;
+      }
+    }
+  }
+  /*Save the current state.*/
+  _enc->ndct_tokens[_pli][0]=ti0;
+  _enc->ndct_tokens[_pli][1]=ti1w;
+  _enc->eob_run[_pli][0]=eob_run0;
+  _enc->eob_run[_pli][1]=eob_run1;
+}
+
+/*Final EOB run welding.*/
+void oc_enc_tokenize_finish(oc_enc_ctx *_enc){
+  int pli;
+  int zzi;
+  /*Emit final EOB runs.*/
+  for(pli=0;pli<3;pli++)for(zzi=0;zzi<64;zzi++){
+    int eob_run;
+    eob_run=_enc->eob_run[pli][zzi];
+    if(eob_run>0)oc_enc_eob_log(_enc,pli,zzi,eob_run);
+  }
+  /*Merge the final EOB run of one token list with the start of the next, if
+     possible.*/
+  for(zzi=0;zzi<64;zzi++)for(pli=0;pli<3;pli++){
+    int       old_tok1;
+    int       old_tok2;
+    int       old_eb1;
+    int       old_eb2;
+    int       new_tok;
+    int       new_eb;
+    int       zzj;
+    int       plj;
+    ptrdiff_t ti=ti;
+    int       run_count;
+    /*Make sure this coefficient has tokens at all.*/
+    if(_enc->ndct_tokens[pli][zzi]<=0)continue;
+    /*Ensure the first token is an EOB run.*/
+    old_tok2=_enc->dct_tokens[pli][zzi][0];
+    if(old_tok2>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Search for a previous coefficient that has any tokens at all.*/
+    old_tok1=OC_NDCT_EOB_TOKEN_MAX;
+    for(zzj=zzi,plj=pli;zzj>=0;zzj--){
+      while(plj-->0){
+        ti=_enc->ndct_tokens[plj][zzj]-1;
+        if(ti>=_enc->dct_token_offs[plj][zzj]){
+          old_tok1=_enc->dct_tokens[plj][zzj][ti];
+          break;
+        }
+      }
+      if(plj>=0)break;
+      plj=3;
+    }
+    /*Ensure its last token was an EOB run.*/
+    if(old_tok1>=OC_NDCT_EOB_TOKEN_MAX)continue;
+    /*Pull off the associated extra bits, if any, and decode the runs.*/
+    old_eb1=_enc->extra_bits[plj][zzj][ti];
+    old_eb2=_enc->extra_bits[pli][zzi][0];
+    run_count=oc_decode_eob_token(old_tok1,old_eb1)
+     +oc_decode_eob_token(old_tok2,old_eb2);
+    /*We can't possibly combine these into one run.
+      It might be possible to split them more optimally, but we'll just leave
+       them as-is.*/
+    if(run_count>=4096)continue;
+    /*We CAN combine them into one run.*/
+    new_tok=oc_make_eob_token_full(run_count,&new_eb);
+    _enc->dct_tokens[plj][zzj][ti]=(unsigned char)new_tok;
+    _enc->extra_bits[plj][zzj][ti]=(ogg_uint16_t)new_eb;
+    _enc->dct_token_offs[pli][zzi]++;
+  }
+}

+ 29 - 0
jni/libtheora-1.2.0alpha1/ndkenv.sh

@@ -0,0 +1,29 @@
+#
+# This file is a modified version of the android build system of the
+# Adventure Game Studio (AGS) https://github.com/adventuregamestudio/ags
+#
+
+export NDK_TOOLCHAIN=arm-linux-androideabi-4.8
+
+unset DEVROOT SDKROOT CFLAGS CC LD CPP CXX AR AS NM CXXCPP RANLIB LDFLAGS CPPFLAGS CXXFLAGS
+
+export NDK_PLATFORM_ROOT=$ANDROID_NDK/platforms/android-9/arch-arm
+export NDK_PLATFORM_NAME=armeabi-v7a
+export NDK_HOST_NAME=arm-linux-androideabi
+export NDK_TOOLCHAIN_ROOT=$ANDROID_NDK/toolchains/$NDK_TOOLCHAIN/prebuilt/linux-x86_64
+export PATH=$NDK_TOOLCHAIN_ROOT/bin:$PATH
+
+export AR=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-ar
+export AS=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-as
+export ASCPP=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-as
+export CC=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-gcc
+export CPP="$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-cpp -E --sysroot=$NDK_PLATFORM_ROOT"
+export CXX=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-g++
+export CXXCPP=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-g++
+export LD=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-ld
+export NM=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-nm
+export RANLIB=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-ranlib
+export STRIP=$NDK_TOOLCHAIN_ROOT/bin/arm-linux-androideabi-strip
+
+export NDK_CFLAGS=" -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16 -I$ANDROID_NDK/platforms/android-9/arch-arm/usr/include/sys"
+export NDK_LDFLAGS=" -march=armv7-a -Wl,--fix-cortex-a8"

+ 5 - 2
jni/love/Android.mk

@@ -30,7 +30,8 @@ LOCAL_C_INCLUDES  :=  \
 	${LOCAL_PATH}/../libmodplug-0.8.8.4/src \
 	${LOCAL_PATH}/../libvorbis-1.3.5/include \
 	${LOCAL_PATH}/../LuaJIT-2.0.1/src \
-	${LOCAL_PATH}/../libogg-1.3.2/include 
+	${LOCAL_PATH}/../libogg-1.3.2/include \
+	${LOCAL_PATH}/../libtheora-1.2.0alpha1/include 
 		
 LOCAL_SRC_FILES := \
 	$(filter-out \
@@ -72,6 +73,8 @@ LOCAL_SRC_FILES := \
  	$(wildcard ${LOCAL_PATH}/src/modules/touch/sdl/*.cpp) \
   $(wildcard ${LOCAL_PATH}/src/modules/timer/*.cpp) \
   $(wildcard ${LOCAL_PATH}/src/modules/timer/sdl/*.cpp) \
+  $(wildcard ${LOCAL_PATH}/src/modules/video/*.cpp) \
+  $(wildcard ${LOCAL_PATH}/src/modules/video/theora/*.cpp) \
   $(wildcard ${LOCAL_PATH}/src/modules/window/*.cpp) \
   $(wildcard ${LOCAL_PATH}/src/modules/window/sdl/*.cpp) \
   $(wildcard ${LOCAL_PATH}/src/libraries/ddsparse/*.cpp) \
@@ -99,7 +102,7 @@ LOCAL_CXXFLAGS := -std=c++0x
 
 LOCAL_SHARED_LIBRARIES := libopenal libmpg123 
 
-LOCAL_STATIC_LIBRARIES := libphysfs libvorbis libogg libmodplug libfreetype libluajit SDL2_static
+LOCAL_STATIC_LIBRARIES := libphysfs libvorbis libogg libtheora libmodplug libfreetype libluajit SDL2_static
 
 # $(info liblove: include dirs $(LOCAL_C_INCLUDES))
 # $(info liblove: src files $(LOCAL_SRC_FILES))